linux/fs/binfmt_elf.c
<<
>>
Prefs
   1/*
   2 * linux/fs/binfmt_elf.c
   3 *
   4 * These are the functions used to load ELF format executables as used
   5 * on SVr4 machines.  Information on the format may be found in the book
   6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
   7 * Tools".
   8 *
   9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
  10 */
  11
  12#include <linux/module.h>
  13#include <linux/kernel.h>
  14#include <linux/fs.h>
  15#include <linux/mm.h>
  16#include <linux/mman.h>
  17#include <linux/errno.h>
  18#include <linux/signal.h>
  19#include <linux/binfmts.h>
  20#include <linux/string.h>
  21#include <linux/file.h>
  22#include <linux/slab.h>
  23#include <linux/personality.h>
  24#include <linux/elfcore.h>
  25#include <linux/init.h>
  26#include <linux/highuid.h>
  27#include <linux/compiler.h>
  28#include <linux/highmem.h>
  29#include <linux/pagemap.h>
  30#include <linux/security.h>
  31#include <linux/random.h>
  32#include <linux/elf.h>
  33#include <linux/utsname.h>
  34#include <asm/uaccess.h>
  35#include <asm/param.h>
  36#include <asm/page.h>
  37
  38static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
  39static int load_elf_library(struct file *);
  40static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
  41                                int, int, unsigned long);
  42
  43/*
  44 * If we don't support core dumping, then supply a NULL so we
  45 * don't even try.
  46 */
  47#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
  48static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
  49#else
  50#define elf_core_dump   NULL
  51#endif
  52
  53#if ELF_EXEC_PAGESIZE > PAGE_SIZE
  54#define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
  55#else
  56#define ELF_MIN_ALIGN   PAGE_SIZE
  57#endif
  58
  59#ifndef ELF_CORE_EFLAGS
  60#define ELF_CORE_EFLAGS 0
  61#endif
  62
  63#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
  64#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
  65#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
  66
  67static struct linux_binfmt elf_format = {
  68                .module         = THIS_MODULE,
  69                .load_binary    = load_elf_binary,
  70                .load_shlib     = load_elf_library,
  71                .core_dump      = elf_core_dump,
  72                .min_coredump   = ELF_EXEC_PAGESIZE,
  73                .hasvdso        = 1
  74};
  75
  76#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
  77
  78static int set_brk(unsigned long start, unsigned long end)
  79{
  80        start = ELF_PAGEALIGN(start);
  81        end = ELF_PAGEALIGN(end);
  82        if (end > start) {
  83                unsigned long addr;
  84                down_write(&current->mm->mmap_sem);
  85                addr = do_brk(start, end - start);
  86                up_write(&current->mm->mmap_sem);
  87                if (BAD_ADDR(addr))
  88                        return addr;
  89        }
  90        current->mm->start_brk = current->mm->brk = end;
  91        return 0;
  92}
  93
  94/* We need to explicitly zero any fractional pages
  95   after the data section (i.e. bss).  This would
  96   contain the junk from the file that should not
  97   be in memory
  98 */
  99static int padzero(unsigned long elf_bss)
 100{
 101        unsigned long nbyte;
 102
 103        nbyte = ELF_PAGEOFFSET(elf_bss);
 104        if (nbyte) {
 105                nbyte = ELF_MIN_ALIGN - nbyte;
 106                if (clear_user((void __user *) elf_bss, nbyte))
 107                        return -EFAULT;
 108        }
 109        return 0;
 110}
 111
 112/* Let's use some macros to make this stack manipulation a little clearer */
 113#ifdef CONFIG_STACK_GROWSUP
 114#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
 115#define STACK_ROUND(sp, items) \
 116        ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
 117#define STACK_ALLOC(sp, len) ({ \
 118        elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
 119        old_sp; })
 120#else
 121#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
 122#define STACK_ROUND(sp, items) \
 123        (((unsigned long) (sp - items)) &~ 15UL)
 124#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
 125#endif
 126
 127#ifndef ELF_BASE_PLATFORM
 128/*
 129 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
 130 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
 131 * will be copied to the user stack in the same manner as AT_PLATFORM.
 132 */
 133#define ELF_BASE_PLATFORM NULL
 134#endif
 135
 136static int
 137create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 138                unsigned long load_addr, unsigned long interp_load_addr)
 139{
 140        unsigned long p = bprm->p;
 141        int argc = bprm->argc;
 142        int envc = bprm->envc;
 143        elf_addr_t __user *argv;
 144        elf_addr_t __user *envp;
 145        elf_addr_t __user *sp;
 146        elf_addr_t __user *u_platform;
 147        elf_addr_t __user *u_base_platform;
 148        elf_addr_t __user *u_rand_bytes;
 149        const char *k_platform = ELF_PLATFORM;
 150        const char *k_base_platform = ELF_BASE_PLATFORM;
 151        unsigned char k_rand_bytes[16];
 152        int items;
 153        elf_addr_t *elf_info;
 154        int ei_index = 0;
 155        const struct cred *cred = current_cred();
 156        struct vm_area_struct *vma;
 157
 158        /*
 159         * In some cases (e.g. Hyper-Threading), we want to avoid L1
 160         * evictions by the processes running on the same package. One
 161         * thing we can do is to shuffle the initial stack for them.
 162         */
 163
 164        p = arch_align_stack(p);
 165
 166        /*
 167         * If this architecture has a platform capability string, copy it
 168         * to userspace.  In some cases (Sparc), this info is impossible
 169         * for userspace to get any other way, in others (i386) it is
 170         * merely difficult.
 171         */
 172        u_platform = NULL;
 173        if (k_platform) {
 174                size_t len = strlen(k_platform) + 1;
 175
 176                u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 177                if (__copy_to_user(u_platform, k_platform, len))
 178                        return -EFAULT;
 179        }
 180
 181        /*
 182         * If this architecture has a "base" platform capability
 183         * string, copy it to userspace.
 184         */
 185        u_base_platform = NULL;
 186        if (k_base_platform) {
 187                size_t len = strlen(k_base_platform) + 1;
 188
 189                u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 190                if (__copy_to_user(u_base_platform, k_base_platform, len))
 191                        return -EFAULT;
 192        }
 193
 194        /*
 195         * Generate 16 random bytes for userspace PRNG seeding.
 196         */
 197        get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
 198        u_rand_bytes = (elf_addr_t __user *)
 199                       STACK_ALLOC(p, sizeof(k_rand_bytes));
 200        if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
 201                return -EFAULT;
 202
 203        /* Create the ELF interpreter info */
 204        elf_info = (elf_addr_t *)current->mm->saved_auxv;
 205        /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
 206#define NEW_AUX_ENT(id, val) \
 207        do { \
 208                elf_info[ei_index++] = id; \
 209                elf_info[ei_index++] = val; \
 210        } while (0)
 211
 212#ifdef ARCH_DLINFO
 213        /* 
 214         * ARCH_DLINFO must come first so PPC can do its special alignment of
 215         * AUXV.
 216         * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
 217         * ARCH_DLINFO changes
 218         */
 219        ARCH_DLINFO;
 220#endif
 221        NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
 222        NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
 223        NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
 224        NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
 225        NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
 226        NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
 227        NEW_AUX_ENT(AT_BASE, interp_load_addr);
 228        NEW_AUX_ENT(AT_FLAGS, 0);
 229        NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
 230        NEW_AUX_ENT(AT_UID, cred->uid);
 231        NEW_AUX_ENT(AT_EUID, cred->euid);
 232        NEW_AUX_ENT(AT_GID, cred->gid);
 233        NEW_AUX_ENT(AT_EGID, cred->egid);
 234        NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
 235        NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
 236        NEW_AUX_ENT(AT_EXECFN, bprm->exec);
 237        if (k_platform) {
 238                NEW_AUX_ENT(AT_PLATFORM,
 239                            (elf_addr_t)(unsigned long)u_platform);
 240        }
 241        if (k_base_platform) {
 242                NEW_AUX_ENT(AT_BASE_PLATFORM,
 243                            (elf_addr_t)(unsigned long)u_base_platform);
 244        }
 245        if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
 246                NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
 247        }
 248#undef NEW_AUX_ENT
 249        /* AT_NULL is zero; clear the rest too */
 250        memset(&elf_info[ei_index], 0,
 251               sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
 252
 253        /* And advance past the AT_NULL entry.  */
 254        ei_index += 2;
 255
 256        sp = STACK_ADD(p, ei_index);
 257
 258        items = (argc + 1) + (envc + 1) + 1;
 259        bprm->p = STACK_ROUND(sp, items);
 260
 261        /* Point sp at the lowest address on the stack */
 262#ifdef CONFIG_STACK_GROWSUP
 263        sp = (elf_addr_t __user *)bprm->p - items - ei_index;
 264        bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
 265#else
 266        sp = (elf_addr_t __user *)bprm->p;
 267#endif
 268
 269
 270        /*
 271         * Grow the stack manually; some architectures have a limit on how
 272         * far ahead a user-space access may be in order to grow the stack.
 273         */
 274        vma = find_extend_vma(current->mm, bprm->p);
 275        if (!vma)
 276                return -EFAULT;
 277
 278        /* Now, let's put argc (and argv, envp if appropriate) on the stack */
 279        if (__put_user(argc, sp++))
 280                return -EFAULT;
 281        argv = sp;
 282        envp = argv + argc + 1;
 283
 284        /* Populate argv and envp */
 285        p = current->mm->arg_end = current->mm->arg_start;
 286        while (argc-- > 0) {
 287                size_t len;
 288                if (__put_user((elf_addr_t)p, argv++))
 289                        return -EFAULT;
 290                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 291                if (!len || len > MAX_ARG_STRLEN)
 292                        return -EINVAL;
 293                p += len;
 294        }
 295        if (__put_user(0, argv))
 296                return -EFAULT;
 297        current->mm->arg_end = current->mm->env_start = p;
 298        while (envc-- > 0) {
 299                size_t len;
 300                if (__put_user((elf_addr_t)p, envp++))
 301                        return -EFAULT;
 302                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 303                if (!len || len > MAX_ARG_STRLEN)
 304                        return -EINVAL;
 305                p += len;
 306        }
 307        if (__put_user(0, envp))
 308                return -EFAULT;
 309        current->mm->env_end = p;
 310
 311        /* Put the elf_info on the stack in the right place.  */
 312        sp = (elf_addr_t __user *)envp + 1;
 313        if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
 314                return -EFAULT;
 315        return 0;
 316}
 317
 318#ifndef elf_map
 319
 320static unsigned long elf_map(struct file *filep, unsigned long addr,
 321                struct elf_phdr *eppnt, int prot, int type,
 322                unsigned long total_size)
 323{
 324        unsigned long map_addr;
 325        unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
 326        unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
 327        addr = ELF_PAGESTART(addr);
 328        size = ELF_PAGEALIGN(size);
 329
 330        /* mmap() will return -EINVAL if given a zero size, but a
 331         * segment with zero filesize is perfectly valid */
 332        if (!size)
 333                return addr;
 334
 335        down_write(&current->mm->mmap_sem);
 336        /*
 337        * total_size is the size of the ELF (interpreter) image.
 338        * The _first_ mmap needs to know the full size, otherwise
 339        * randomization might put this image into an overlapping
 340        * position with the ELF binary image. (since size < total_size)
 341        * So we first map the 'big' image - and unmap the remainder at
 342        * the end. (which unmap is needed for ELF images with holes.)
 343        */
 344        if (total_size) {
 345                total_size = ELF_PAGEALIGN(total_size);
 346                map_addr = do_mmap(filep, addr, total_size, prot, type, off);
 347                if (!BAD_ADDR(map_addr))
 348                        do_munmap(current->mm, map_addr+size, total_size-size);
 349        } else
 350                map_addr = do_mmap(filep, addr, size, prot, type, off);
 351
 352        up_write(&current->mm->mmap_sem);
 353        return(map_addr);
 354}
 355
 356#endif /* !elf_map */
 357
 358static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
 359{
 360        int i, first_idx = -1, last_idx = -1;
 361
 362        for (i = 0; i < nr; i++) {
 363                if (cmds[i].p_type == PT_LOAD) {
 364                        last_idx = i;
 365                        if (first_idx == -1)
 366                                first_idx = i;
 367                }
 368        }
 369        if (first_idx == -1)
 370                return 0;
 371
 372        return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
 373                                ELF_PAGESTART(cmds[first_idx].p_vaddr);
 374}
 375
 376
 377/* This is much more generalized than the library routine read function,
 378   so we keep this separate.  Technically the library read function
 379   is only provided so that we can read a.out libraries that have
 380   an ELF header */
 381
 382static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 383                struct file *interpreter, unsigned long *interp_map_addr,
 384                unsigned long no_base)
 385{
 386        struct elf_phdr *elf_phdata;
 387        struct elf_phdr *eppnt;
 388        unsigned long load_addr = 0;
 389        int load_addr_set = 0;
 390        unsigned long last_bss = 0, elf_bss = 0;
 391        unsigned long error = ~0UL;
 392        unsigned long total_size;
 393        int retval, i, size;
 394
 395        /* First of all, some simple consistency checks */
 396        if (interp_elf_ex->e_type != ET_EXEC &&
 397            interp_elf_ex->e_type != ET_DYN)
 398                goto out;
 399        if (!elf_check_arch(interp_elf_ex))
 400                goto out;
 401        if (!interpreter->f_op || !interpreter->f_op->mmap)
 402                goto out;
 403
 404        /*
 405         * If the size of this structure has changed, then punt, since
 406         * we will be doing the wrong thing.
 407         */
 408        if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
 409                goto out;
 410        if (interp_elf_ex->e_phnum < 1 ||
 411                interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
 412                goto out;
 413
 414        /* Now read in all of the header information */
 415        size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
 416        if (size > ELF_MIN_ALIGN)
 417                goto out;
 418        elf_phdata = kmalloc(size, GFP_KERNEL);
 419        if (!elf_phdata)
 420                goto out;
 421
 422        retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
 423                             (char *)elf_phdata,size);
 424        error = -EIO;
 425        if (retval != size) {
 426                if (retval < 0)
 427                        error = retval; 
 428                goto out_close;
 429        }
 430
 431        total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
 432        if (!total_size) {
 433                error = -EINVAL;
 434                goto out_close;
 435        }
 436
 437        eppnt = elf_phdata;
 438        for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 439                if (eppnt->p_type == PT_LOAD) {
 440                        int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
 441                        int elf_prot = 0;
 442                        unsigned long vaddr = 0;
 443                        unsigned long k, map_addr;
 444
 445                        if (eppnt->p_flags & PF_R)
 446                                elf_prot = PROT_READ;
 447                        if (eppnt->p_flags & PF_W)
 448                                elf_prot |= PROT_WRITE;
 449                        if (eppnt->p_flags & PF_X)
 450                                elf_prot |= PROT_EXEC;
 451                        vaddr = eppnt->p_vaddr;
 452                        if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 453                                elf_type |= MAP_FIXED;
 454                        else if (no_base && interp_elf_ex->e_type == ET_DYN)
 455                                load_addr = -vaddr;
 456
 457                        map_addr = elf_map(interpreter, load_addr + vaddr,
 458                                        eppnt, elf_prot, elf_type, total_size);
 459                        total_size = 0;
 460                        if (!*interp_map_addr)
 461                                *interp_map_addr = map_addr;
 462                        error = map_addr;
 463                        if (BAD_ADDR(map_addr))
 464                                goto out_close;
 465
 466                        if (!load_addr_set &&
 467                            interp_elf_ex->e_type == ET_DYN) {
 468                                load_addr = map_addr - ELF_PAGESTART(vaddr);
 469                                load_addr_set = 1;
 470                        }
 471
 472                        /*
 473                         * Check to see if the section's size will overflow the
 474                         * allowed task size. Note that p_filesz must always be
 475                         * <= p_memsize so it's only necessary to check p_memsz.
 476                         */
 477                        k = load_addr + eppnt->p_vaddr;
 478                        if (BAD_ADDR(k) ||
 479                            eppnt->p_filesz > eppnt->p_memsz ||
 480                            eppnt->p_memsz > TASK_SIZE ||
 481                            TASK_SIZE - eppnt->p_memsz < k) {
 482                                error = -ENOMEM;
 483                                goto out_close;
 484                        }
 485
 486                        /*
 487                         * Find the end of the file mapping for this phdr, and
 488                         * keep track of the largest address we see for this.
 489                         */
 490                        k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
 491                        if (k > elf_bss)
 492                                elf_bss = k;
 493
 494                        /*
 495                         * Do the same thing for the memory mapping - between
 496                         * elf_bss and last_bss is the bss section.
 497                         */
 498                        k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
 499                        if (k > last_bss)
 500                                last_bss = k;
 501                }
 502        }
 503
 504        if (last_bss > elf_bss) {
 505                /*
 506                 * Now fill out the bss section.  First pad the last page up
 507                 * to the page boundary, and then perform a mmap to make sure
 508                 * that there are zero-mapped pages up to and including the
 509                 * last bss page.
 510                 */
 511                if (padzero(elf_bss)) {
 512                        error = -EFAULT;
 513                        goto out_close;
 514                }
 515
 516                /* What we have mapped so far */
 517                elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
 518
 519                /* Map the last of the bss segment */
 520                down_write(&current->mm->mmap_sem);
 521                error = do_brk(elf_bss, last_bss - elf_bss);
 522                up_write(&current->mm->mmap_sem);
 523                if (BAD_ADDR(error))
 524                        goto out_close;
 525        }
 526
 527        error = load_addr;
 528
 529out_close:
 530        kfree(elf_phdata);
 531out:
 532        return error;
 533}
 534
 535/*
 536 * These are the functions used to load ELF style executables and shared
 537 * libraries.  There is no binary dependent code anywhere else.
 538 */
 539
 540#define INTERPRETER_NONE 0
 541#define INTERPRETER_ELF 2
 542
 543#ifndef STACK_RND_MASK
 544#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
 545#endif
 546
 547static unsigned long randomize_stack_top(unsigned long stack_top)
 548{
 549        unsigned int random_variable = 0;
 550
 551        if ((current->flags & PF_RANDOMIZE) &&
 552                !(current->personality & ADDR_NO_RANDOMIZE)) {
 553                random_variable = get_random_int() & STACK_RND_MASK;
 554                random_variable <<= PAGE_SHIFT;
 555        }
 556#ifdef CONFIG_STACK_GROWSUP
 557        return PAGE_ALIGN(stack_top) + random_variable;
 558#else
 559        return PAGE_ALIGN(stack_top) - random_variable;
 560#endif
 561}
 562
 563static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 564{
 565        struct file *interpreter = NULL; /* to shut gcc up */
 566        unsigned long load_addr = 0, load_bias = 0;
 567        int load_addr_set = 0;
 568        char * elf_interpreter = NULL;
 569        unsigned long error;
 570        struct elf_phdr *elf_ppnt, *elf_phdata;
 571        unsigned long elf_bss, elf_brk;
 572        int retval, i;
 573        unsigned int size;
 574        unsigned long elf_entry;
 575        unsigned long interp_load_addr = 0;
 576        unsigned long start_code, end_code, start_data, end_data;
 577        unsigned long reloc_func_desc = 0;
 578        int executable_stack = EXSTACK_DEFAULT;
 579        unsigned long def_flags = 0;
 580        struct {
 581                struct elfhdr elf_ex;
 582                struct elfhdr interp_elf_ex;
 583        } *loc;
 584
 585        loc = kmalloc(sizeof(*loc), GFP_KERNEL);
 586        if (!loc) {
 587                retval = -ENOMEM;
 588                goto out_ret;
 589        }
 590        
 591        /* Get the exec-header */
 592        loc->elf_ex = *((struct elfhdr *)bprm->buf);
 593
 594        retval = -ENOEXEC;
 595        /* First of all, some simple consistency checks */
 596        if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 597                goto out;
 598
 599        if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
 600                goto out;
 601        if (!elf_check_arch(&loc->elf_ex))
 602                goto out;
 603        if (!bprm->file->f_op||!bprm->file->f_op->mmap)
 604                goto out;
 605
 606        /* Now read in all of the header information */
 607        if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
 608                goto out;
 609        if (loc->elf_ex.e_phnum < 1 ||
 610                loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
 611                goto out;
 612        size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
 613        retval = -ENOMEM;
 614        elf_phdata = kmalloc(size, GFP_KERNEL);
 615        if (!elf_phdata)
 616                goto out;
 617
 618        retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
 619                             (char *)elf_phdata, size);
 620        if (retval != size) {
 621                if (retval >= 0)
 622                        retval = -EIO;
 623                goto out_free_ph;
 624        }
 625
 626        elf_ppnt = elf_phdata;
 627        elf_bss = 0;
 628        elf_brk = 0;
 629
 630        start_code = ~0UL;
 631        end_code = 0;
 632        start_data = 0;
 633        end_data = 0;
 634
 635        for (i = 0; i < loc->elf_ex.e_phnum; i++) {
 636                if (elf_ppnt->p_type == PT_INTERP) {
 637                        /* This is the program interpreter used for
 638                         * shared libraries - for now assume that this
 639                         * is an a.out format binary
 640                         */
 641                        retval = -ENOEXEC;
 642                        if (elf_ppnt->p_filesz > PATH_MAX || 
 643                            elf_ppnt->p_filesz < 2)
 644                                goto out_free_ph;
 645
 646                        retval = -ENOMEM;
 647                        elf_interpreter = kmalloc(elf_ppnt->p_filesz,
 648                                                  GFP_KERNEL);
 649                        if (!elf_interpreter)
 650                                goto out_free_ph;
 651
 652                        retval = kernel_read(bprm->file, elf_ppnt->p_offset,
 653                                             elf_interpreter,
 654                                             elf_ppnt->p_filesz);
 655                        if (retval != elf_ppnt->p_filesz) {
 656                                if (retval >= 0)
 657                                        retval = -EIO;
 658                                goto out_free_interp;
 659                        }
 660                        /* make sure path is NULL terminated */
 661                        retval = -ENOEXEC;
 662                        if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
 663                                goto out_free_interp;
 664
 665                        /*
 666                         * The early SET_PERSONALITY here is so that the lookup
 667                         * for the interpreter happens in the namespace of the 
 668                         * to-be-execed image.  SET_PERSONALITY can select an
 669                         * alternate root.
 670                         *
 671                         * However, SET_PERSONALITY is NOT allowed to switch
 672                         * this task into the new images's memory mapping
 673                         * policy - that is, TASK_SIZE must still evaluate to
 674                         * that which is appropriate to the execing application.
 675                         * This is because exit_mmap() needs to have TASK_SIZE
 676                         * evaluate to the size of the old image.
 677                         *
 678                         * So if (say) a 64-bit application is execing a 32-bit
 679                         * application it is the architecture's responsibility
 680                         * to defer changing the value of TASK_SIZE until the
 681                         * switch really is going to happen - do this in
 682                         * flush_thread().      - akpm
 683                         */
 684                        SET_PERSONALITY(loc->elf_ex);
 685
 686                        interpreter = open_exec(elf_interpreter);
 687                        retval = PTR_ERR(interpreter);
 688                        if (IS_ERR(interpreter))
 689                                goto out_free_interp;
 690
 691                        /*
 692                         * If the binary is not readable then enforce
 693                         * mm->dumpable = 0 regardless of the interpreter's
 694                         * permissions.
 695                         */
 696                        if (file_permission(interpreter, MAY_READ) < 0)
 697                                bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
 698
 699                        retval = kernel_read(interpreter, 0, bprm->buf,
 700                                             BINPRM_BUF_SIZE);
 701                        if (retval != BINPRM_BUF_SIZE) {
 702                                if (retval >= 0)
 703                                        retval = -EIO;
 704                                goto out_free_dentry;
 705                        }
 706
 707                        /* Get the exec headers */
 708                        loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
 709                        break;
 710                }
 711                elf_ppnt++;
 712        }
 713
 714        elf_ppnt = elf_phdata;
 715        for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
 716                if (elf_ppnt->p_type == PT_GNU_STACK) {
 717                        if (elf_ppnt->p_flags & PF_X)
 718                                executable_stack = EXSTACK_ENABLE_X;
 719                        else
 720                                executable_stack = EXSTACK_DISABLE_X;
 721                        break;
 722                }
 723
 724        /* Some simple consistency checks for the interpreter */
 725        if (elf_interpreter) {
 726                retval = -ELIBBAD;
 727                /* Not an ELF interpreter */
 728                if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 729                        goto out_free_dentry;
 730                /* Verify the interpreter has a valid arch */
 731                if (!elf_check_arch(&loc->interp_elf_ex))
 732                        goto out_free_dentry;
 733        } else {
 734                /* Executables without an interpreter also need a personality  */
 735                SET_PERSONALITY(loc->elf_ex);
 736        }
 737
 738        /* Flush all traces of the currently running executable */
 739        retval = flush_old_exec(bprm);
 740        if (retval)
 741                goto out_free_dentry;
 742
 743        /* OK, This is the point of no return */
 744        current->flags &= ~PF_FORKNOEXEC;
 745        current->mm->def_flags = def_flags;
 746
 747        /* Do this immediately, since STACK_TOP as used in setup_arg_pages
 748           may depend on the personality.  */
 749        SET_PERSONALITY(loc->elf_ex);
 750        if (elf_read_implies_exec(loc->elf_ex, executable_stack))
 751                current->personality |= READ_IMPLIES_EXEC;
 752
 753        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 754                current->flags |= PF_RANDOMIZE;
 755        arch_pick_mmap_layout(current->mm);
 756
 757        /* Do this so that we can load the interpreter, if need be.  We will
 758           change some of these later */
 759        current->mm->free_area_cache = current->mm->mmap_base;
 760        current->mm->cached_hole_size = 0;
 761        retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
 762                                 executable_stack);
 763        if (retval < 0) {
 764                send_sig(SIGKILL, current, 0);
 765                goto out_free_dentry;
 766        }
 767        
 768        current->mm->start_stack = bprm->p;
 769
 770        /* Now we do a little grungy work by mmaping the ELF image into
 771           the correct location in memory. */
 772        for(i = 0, elf_ppnt = elf_phdata;
 773            i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
 774                int elf_prot = 0, elf_flags;
 775                unsigned long k, vaddr;
 776
 777                if (elf_ppnt->p_type != PT_LOAD)
 778                        continue;
 779
 780                if (unlikely (elf_brk > elf_bss)) {
 781                        unsigned long nbyte;
 782                    
 783                        /* There was a PT_LOAD segment with p_memsz > p_filesz
 784                           before this one. Map anonymous pages, if needed,
 785                           and clear the area.  */
 786                        retval = set_brk (elf_bss + load_bias,
 787                                          elf_brk + load_bias);
 788                        if (retval) {
 789                                send_sig(SIGKILL, current, 0);
 790                                goto out_free_dentry;
 791                        }
 792                        nbyte = ELF_PAGEOFFSET(elf_bss);
 793                        if (nbyte) {
 794                                nbyte = ELF_MIN_ALIGN - nbyte;
 795                                if (nbyte > elf_brk - elf_bss)
 796                                        nbyte = elf_brk - elf_bss;
 797                                if (clear_user((void __user *)elf_bss +
 798                                                        load_bias, nbyte)) {
 799                                        /*
 800                                         * This bss-zeroing can fail if the ELF
 801                                         * file specifies odd protections. So
 802                                         * we don't check the return value
 803                                         */
 804                                }
 805                        }
 806                }
 807
 808                if (elf_ppnt->p_flags & PF_R)
 809                        elf_prot |= PROT_READ;
 810                if (elf_ppnt->p_flags & PF_W)
 811                        elf_prot |= PROT_WRITE;
 812                if (elf_ppnt->p_flags & PF_X)
 813                        elf_prot |= PROT_EXEC;
 814
 815                elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
 816
 817                vaddr = elf_ppnt->p_vaddr;
 818                if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
 819                        elf_flags |= MAP_FIXED;
 820                } else if (loc->elf_ex.e_type == ET_DYN) {
 821                        /* Try and get dynamic programs out of the way of the
 822                         * default mmap base, as well as whatever program they
 823                         * might try to exec.  This is because the brk will
 824                         * follow the loader, and is not movable.  */
 825#ifdef CONFIG_X86
 826                        load_bias = 0;
 827#else
 828                        load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
 829#endif
 830                }
 831
 832                error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
 833                                elf_prot, elf_flags, 0);
 834                if (BAD_ADDR(error)) {
 835                        send_sig(SIGKILL, current, 0);
 836                        retval = IS_ERR((void *)error) ?
 837                                PTR_ERR((void*)error) : -EINVAL;
 838                        goto out_free_dentry;
 839                }
 840
 841                if (!load_addr_set) {
 842                        load_addr_set = 1;
 843                        load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
 844                        if (loc->elf_ex.e_type == ET_DYN) {
 845                                load_bias += error -
 846                                             ELF_PAGESTART(load_bias + vaddr);
 847                                load_addr += load_bias;
 848                                reloc_func_desc = load_bias;
 849                        }
 850                }
 851                k = elf_ppnt->p_vaddr;
 852                if (k < start_code)
 853                        start_code = k;
 854                if (start_data < k)
 855                        start_data = k;
 856
 857                /*
 858                 * Check to see if the section's size will overflow the
 859                 * allowed task size. Note that p_filesz must always be
 860                 * <= p_memsz so it is only necessary to check p_memsz.
 861                 */
 862                if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
 863                    elf_ppnt->p_memsz > TASK_SIZE ||
 864                    TASK_SIZE - elf_ppnt->p_memsz < k) {
 865                        /* set_brk can never work. Avoid overflows. */
 866                        send_sig(SIGKILL, current, 0);
 867                        retval = -EINVAL;
 868                        goto out_free_dentry;
 869                }
 870
 871                k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
 872
 873                if (k > elf_bss)
 874                        elf_bss = k;
 875                if ((elf_ppnt->p_flags & PF_X) && end_code < k)
 876                        end_code = k;
 877                if (end_data < k)
 878                        end_data = k;
 879                k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
 880                if (k > elf_brk)
 881                        elf_brk = k;
 882        }
 883
 884        loc->elf_ex.e_entry += load_bias;
 885        elf_bss += load_bias;
 886        elf_brk += load_bias;
 887        start_code += load_bias;
 888        end_code += load_bias;
 889        start_data += load_bias;
 890        end_data += load_bias;
 891
 892        /* Calling set_brk effectively mmaps the pages that we need
 893         * for the bss and break sections.  We must do this before
 894         * mapping in the interpreter, to make sure it doesn't wind
 895         * up getting placed where the bss needs to go.
 896         */
 897        retval = set_brk(elf_bss, elf_brk);
 898        if (retval) {
 899                send_sig(SIGKILL, current, 0);
 900                goto out_free_dentry;
 901        }
 902        if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
 903                send_sig(SIGSEGV, current, 0);
 904                retval = -EFAULT; /* Nobody gets to see this, but.. */
 905                goto out_free_dentry;
 906        }
 907
 908        if (elf_interpreter) {
 909                unsigned long uninitialized_var(interp_map_addr);
 910
 911                elf_entry = load_elf_interp(&loc->interp_elf_ex,
 912                                            interpreter,
 913                                            &interp_map_addr,
 914                                            load_bias);
 915                if (!IS_ERR((void *)elf_entry)) {
 916                        /*
 917                         * load_elf_interp() returns relocation
 918                         * adjustment
 919                         */
 920                        interp_load_addr = elf_entry;
 921                        elf_entry += loc->interp_elf_ex.e_entry;
 922                }
 923                if (BAD_ADDR(elf_entry)) {
 924                        force_sig(SIGSEGV, current);
 925                        retval = IS_ERR((void *)elf_entry) ?
 926                                        (int)elf_entry : -EINVAL;
 927                        goto out_free_dentry;
 928                }
 929                reloc_func_desc = interp_load_addr;
 930
 931                allow_write_access(interpreter);
 932                fput(interpreter);
 933                kfree(elf_interpreter);
 934        } else {
 935                elf_entry = loc->elf_ex.e_entry;
 936                if (BAD_ADDR(elf_entry)) {
 937                        force_sig(SIGSEGV, current);
 938                        retval = -EINVAL;
 939                        goto out_free_dentry;
 940                }
 941        }
 942
 943        kfree(elf_phdata);
 944
 945        set_binfmt(&elf_format);
 946
 947#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
 948        retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
 949        if (retval < 0) {
 950                send_sig(SIGKILL, current, 0);
 951                goto out;
 952        }
 953#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
 954
 955        install_exec_creds(bprm);
 956        current->flags &= ~PF_FORKNOEXEC;
 957        retval = create_elf_tables(bprm, &loc->elf_ex,
 958                          load_addr, interp_load_addr);
 959        if (retval < 0) {
 960                send_sig(SIGKILL, current, 0);
 961                goto out;
 962        }
 963        /* N.B. passed_fileno might not be initialized? */
 964        current->mm->end_code = end_code;
 965        current->mm->start_code = start_code;
 966        current->mm->start_data = start_data;
 967        current->mm->end_data = end_data;
 968        current->mm->start_stack = bprm->p;
 969
 970#ifdef arch_randomize_brk
 971        if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1))
 972                current->mm->brk = current->mm->start_brk =
 973                        arch_randomize_brk(current->mm);
 974#endif
 975
 976        if (current->personality & MMAP_PAGE_ZERO) {
 977                /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
 978                   and some applications "depend" upon this behavior.
 979                   Since we do not have the power to recompile these, we
 980                   emulate the SVr4 behavior. Sigh. */
 981                down_write(&current->mm->mmap_sem);
 982                error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
 983                                MAP_FIXED | MAP_PRIVATE, 0);
 984                up_write(&current->mm->mmap_sem);
 985        }
 986
 987#ifdef ELF_PLAT_INIT
 988        /*
 989         * The ABI may specify that certain registers be set up in special
 990         * ways (on i386 %edx is the address of a DT_FINI function, for
 991         * example.  In addition, it may also specify (eg, PowerPC64 ELF)
 992         * that the e_entry field is the address of the function descriptor
 993         * for the startup routine, rather than the address of the startup
 994         * routine itself.  This macro performs whatever initialization to
 995         * the regs structure is required as well as any relocations to the
 996         * function descriptor entries when executing dynamically links apps.
 997         */
 998        ELF_PLAT_INIT(regs, reloc_func_desc);
 999#endif
1000
1001        start_thread(regs, elf_entry, bprm->p);
1002        retval = 0;
1003out:
1004        kfree(loc);
1005out_ret:
1006        return retval;
1007
1008        /* error cleanup */
1009out_free_dentry:
1010        allow_write_access(interpreter);
1011        if (interpreter)
1012                fput(interpreter);
1013out_free_interp:
1014        kfree(elf_interpreter);
1015out_free_ph:
1016        kfree(elf_phdata);
1017        goto out;
1018}
1019
1020/* This is really simpleminded and specialized - we are loading an
1021   a.out library that is given an ELF header. */
1022static int load_elf_library(struct file *file)
1023{
1024        struct elf_phdr *elf_phdata;
1025        struct elf_phdr *eppnt;
1026        unsigned long elf_bss, bss, len;
1027        int retval, error, i, j;
1028        struct elfhdr elf_ex;
1029
1030        error = -ENOEXEC;
1031        retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1032        if (retval != sizeof(elf_ex))
1033                goto out;
1034
1035        if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1036                goto out;
1037
1038        /* First of all, some simple consistency checks */
1039        if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1040            !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1041                goto out;
1042
1043        /* Now read in all of the header information */
1044
1045        j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1046        /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1047
1048        error = -ENOMEM;
1049        elf_phdata = kmalloc(j, GFP_KERNEL);
1050        if (!elf_phdata)
1051                goto out;
1052
1053        eppnt = elf_phdata;
1054        error = -ENOEXEC;
1055        retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1056        if (retval != j)
1057                goto out_free_ph;
1058
1059        for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1060                if ((eppnt + i)->p_type == PT_LOAD)
1061                        j++;
1062        if (j != 1)
1063                goto out_free_ph;
1064
1065        while (eppnt->p_type != PT_LOAD)
1066                eppnt++;
1067
1068        /* Now use mmap to map the library into memory. */
1069        down_write(&current->mm->mmap_sem);
1070        error = do_mmap(file,
1071                        ELF_PAGESTART(eppnt->p_vaddr),
1072                        (eppnt->p_filesz +
1073                         ELF_PAGEOFFSET(eppnt->p_vaddr)),
1074                        PROT_READ | PROT_WRITE | PROT_EXEC,
1075                        MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1076                        (eppnt->p_offset -
1077                         ELF_PAGEOFFSET(eppnt->p_vaddr)));
1078        up_write(&current->mm->mmap_sem);
1079        if (error != ELF_PAGESTART(eppnt->p_vaddr))
1080                goto out_free_ph;
1081
1082        elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1083        if (padzero(elf_bss)) {
1084                error = -EFAULT;
1085                goto out_free_ph;
1086        }
1087
1088        len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1089                            ELF_MIN_ALIGN - 1);
1090        bss = eppnt->p_memsz + eppnt->p_vaddr;
1091        if (bss > len) {
1092                down_write(&current->mm->mmap_sem);
1093                do_brk(len, bss - len);
1094                up_write(&current->mm->mmap_sem);
1095        }
1096        error = 0;
1097
1098out_free_ph:
1099        kfree(elf_phdata);
1100out:
1101        return error;
1102}
1103
1104/*
1105 * Note that some platforms still use traditional core dumps and not
1106 * the ELF core dump.  Each platform can select it as appropriate.
1107 */
1108#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1109
1110/*
1111 * ELF core dumper
1112 *
1113 * Modelled on fs/exec.c:aout_core_dump()
1114 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1115 */
1116/*
1117 * These are the only things you should do on a core-file: use only these
1118 * functions to write out all the necessary info.
1119 */
1120static int dump_write(struct file *file, const void *addr, int nr)
1121{
1122        return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1123}
1124
1125static int dump_seek(struct file *file, loff_t off)
1126{
1127        if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1128                if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1129                        return 0;
1130        } else {
1131                char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1132                if (!buf)
1133                        return 0;
1134                while (off > 0) {
1135                        unsigned long n = off;
1136                        if (n > PAGE_SIZE)
1137                                n = PAGE_SIZE;
1138                        if (!dump_write(file, buf, n))
1139                                return 0;
1140                        off -= n;
1141                }
1142                free_page((unsigned long)buf);
1143        }
1144        return 1;
1145}
1146
1147/*
1148 * Decide what to dump of a segment, part, all or none.
1149 */
1150static unsigned long vma_dump_size(struct vm_area_struct *vma,
1151                                   unsigned long mm_flags)
1152{
1153#define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1154
1155        /* The vma can be set up to tell us the answer directly.  */
1156        if (vma->vm_flags & VM_ALWAYSDUMP)
1157                goto whole;
1158
1159        /* Hugetlb memory check */
1160        if (vma->vm_flags & VM_HUGETLB) {
1161                if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1162                        goto whole;
1163                if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1164                        goto whole;
1165        }
1166
1167        /* Do not dump I/O mapped devices or special mappings */
1168        if (vma->vm_flags & (VM_IO | VM_RESERVED))
1169                return 0;
1170
1171        /* By default, dump shared memory if mapped from an anonymous file. */
1172        if (vma->vm_flags & VM_SHARED) {
1173                if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1174                    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1175                        goto whole;
1176                return 0;
1177        }
1178
1179        /* Dump segments that have been written to.  */
1180        if (vma->anon_vma && FILTER(ANON_PRIVATE))
1181                goto whole;
1182        if (vma->vm_file == NULL)
1183                return 0;
1184
1185        if (FILTER(MAPPED_PRIVATE))
1186                goto whole;
1187
1188        /*
1189         * If this looks like the beginning of a DSO or executable mapping,
1190         * check for an ELF header.  If we find one, dump the first page to
1191         * aid in determining what was mapped here.
1192         */
1193        if (FILTER(ELF_HEADERS) &&
1194            vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1195                u32 __user *header = (u32 __user *) vma->vm_start;
1196                u32 word;
1197                mm_segment_t fs = get_fs();
1198                /*
1199                 * Doing it this way gets the constant folded by GCC.
1200                 */
1201                union {
1202                        u32 cmp;
1203                        char elfmag[SELFMAG];
1204                } magic;
1205                BUILD_BUG_ON(SELFMAG != sizeof word);
1206                magic.elfmag[EI_MAG0] = ELFMAG0;
1207                magic.elfmag[EI_MAG1] = ELFMAG1;
1208                magic.elfmag[EI_MAG2] = ELFMAG2;
1209                magic.elfmag[EI_MAG3] = ELFMAG3;
1210                /*
1211                 * Switch to the user "segment" for get_user(),
1212                 * then put back what elf_core_dump() had in place.
1213                 */
1214                set_fs(USER_DS);
1215                if (unlikely(get_user(word, header)))
1216                        word = 0;
1217                set_fs(fs);
1218                if (word == magic.cmp)
1219                        return PAGE_SIZE;
1220        }
1221
1222#undef  FILTER
1223
1224        return 0;
1225
1226whole:
1227        return vma->vm_end - vma->vm_start;
1228}
1229
1230/* An ELF note in memory */
1231struct memelfnote
1232{
1233        const char *name;
1234        int type;
1235        unsigned int datasz;
1236        void *data;
1237};
1238
1239static int notesize(struct memelfnote *en)
1240{
1241        int sz;
1242
1243        sz = sizeof(struct elf_note);
1244        sz += roundup(strlen(en->name) + 1, 4);
1245        sz += roundup(en->datasz, 4);
1246
1247        return sz;
1248}
1249
1250#define DUMP_WRITE(addr, nr, foffset)   \
1251        do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1252
1253static int alignfile(struct file *file, loff_t *foffset)
1254{
1255        static const char buf[4] = { 0, };
1256        DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1257        return 1;
1258}
1259
1260static int writenote(struct memelfnote *men, struct file *file,
1261                        loff_t *foffset)
1262{
1263        struct elf_note en;
1264        en.n_namesz = strlen(men->name) + 1;
1265        en.n_descsz = men->datasz;
1266        en.n_type = men->type;
1267
1268        DUMP_WRITE(&en, sizeof(en), foffset);
1269        DUMP_WRITE(men->name, en.n_namesz, foffset);
1270        if (!alignfile(file, foffset))
1271                return 0;
1272        DUMP_WRITE(men->data, men->datasz, foffset);
1273        if (!alignfile(file, foffset))
1274                return 0;
1275
1276        return 1;
1277}
1278#undef DUMP_WRITE
1279
1280#define DUMP_WRITE(addr, nr)    \
1281        if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1282                goto end_coredump;
1283
1284static void fill_elf_header(struct elfhdr *elf, int segs,
1285                            u16 machine, u32 flags, u8 osabi)
1286{
1287        memset(elf, 0, sizeof(*elf));
1288
1289        memcpy(elf->e_ident, ELFMAG, SELFMAG);
1290        elf->e_ident[EI_CLASS] = ELF_CLASS;
1291        elf->e_ident[EI_DATA] = ELF_DATA;
1292        elf->e_ident[EI_VERSION] = EV_CURRENT;
1293        elf->e_ident[EI_OSABI] = ELF_OSABI;
1294
1295        elf->e_type = ET_CORE;
1296        elf->e_machine = machine;
1297        elf->e_version = EV_CURRENT;
1298        elf->e_phoff = sizeof(struct elfhdr);
1299        elf->e_flags = flags;
1300        elf->e_ehsize = sizeof(struct elfhdr);
1301        elf->e_phentsize = sizeof(struct elf_phdr);
1302        elf->e_phnum = segs;
1303
1304        return;
1305}
1306
1307static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1308{
1309        phdr->p_type = PT_NOTE;
1310        phdr->p_offset = offset;
1311        phdr->p_vaddr = 0;
1312        phdr->p_paddr = 0;
1313        phdr->p_filesz = sz;
1314        phdr->p_memsz = 0;
1315        phdr->p_flags = 0;
1316        phdr->p_align = 0;
1317        return;
1318}
1319
1320static void fill_note(struct memelfnote *note, const char *name, int type, 
1321                unsigned int sz, void *data)
1322{
1323        note->name = name;
1324        note->type = type;
1325        note->datasz = sz;
1326        note->data = data;
1327        return;
1328}
1329
1330/*
1331 * fill up all the fields in prstatus from the given task struct, except
1332 * registers which need to be filled up separately.
1333 */
1334static void fill_prstatus(struct elf_prstatus *prstatus,
1335                struct task_struct *p, long signr)
1336{
1337        prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1338        prstatus->pr_sigpend = p->pending.signal.sig[0];
1339        prstatus->pr_sighold = p->blocked.sig[0];
1340        rcu_read_lock();
1341        prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1342        rcu_read_unlock();
1343        prstatus->pr_pid = task_pid_vnr(p);
1344        prstatus->pr_pgrp = task_pgrp_vnr(p);
1345        prstatus->pr_sid = task_session_vnr(p);
1346        if (thread_group_leader(p)) {
1347                struct task_cputime cputime;
1348
1349                /*
1350                 * This is the record for the group leader.  It shows the
1351                 * group-wide total, not its individual thread total.
1352                 */
1353                thread_group_cputime(p, &cputime);
1354                cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1355                cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1356        } else {
1357                cputime_to_timeval(p->utime, &prstatus->pr_utime);
1358                cputime_to_timeval(p->stime, &prstatus->pr_stime);
1359        }
1360        cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1361        cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1362}
1363
1364static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1365                       struct mm_struct *mm)
1366{
1367        const struct cred *cred;
1368        unsigned int i, len;
1369        
1370        /* first copy the parameters from user space */
1371        memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1372
1373        len = mm->arg_end - mm->arg_start;
1374        if (len >= ELF_PRARGSZ)
1375                len = ELF_PRARGSZ-1;
1376        if (copy_from_user(&psinfo->pr_psargs,
1377                           (const char __user *)mm->arg_start, len))
1378                return -EFAULT;
1379        for(i = 0; i < len; i++)
1380                if (psinfo->pr_psargs[i] == 0)
1381                        psinfo->pr_psargs[i] = ' ';
1382        psinfo->pr_psargs[len] = 0;
1383
1384        rcu_read_lock();
1385        psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1386        rcu_read_unlock();
1387        psinfo->pr_pid = task_pid_vnr(p);
1388        psinfo->pr_pgrp = task_pgrp_vnr(p);
1389        psinfo->pr_sid = task_session_vnr(p);
1390
1391        i = p->state ? ffz(~p->state) + 1 : 0;
1392        psinfo->pr_state = i;
1393        psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1394        psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1395        psinfo->pr_nice = task_nice(p);
1396        psinfo->pr_flag = p->flags;
1397        rcu_read_lock();
1398        cred = __task_cred(p);
1399        SET_UID(psinfo->pr_uid, cred->uid);
1400        SET_GID(psinfo->pr_gid, cred->gid);
1401        rcu_read_unlock();
1402        strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1403        
1404        return 0;
1405}
1406
1407static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1408{
1409        elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1410        int i = 0;
1411        do
1412                i += 2;
1413        while (auxv[i - 2] != AT_NULL);
1414        fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1415}
1416
1417#ifdef CORE_DUMP_USE_REGSET
1418#include <linux/regset.h>
1419
1420struct elf_thread_core_info {
1421        struct elf_thread_core_info *next;
1422        struct task_struct *task;
1423        struct elf_prstatus prstatus;
1424        struct memelfnote notes[0];
1425};
1426
1427struct elf_note_info {
1428        struct elf_thread_core_info *thread;
1429        struct memelfnote psinfo;
1430        struct memelfnote auxv;
1431        size_t size;
1432        int thread_notes;
1433};
1434
1435/*
1436 * When a regset has a writeback hook, we call it on each thread before
1437 * dumping user memory.  On register window machines, this makes sure the
1438 * user memory backing the register data is up to date before we read it.
1439 */
1440static void do_thread_regset_writeback(struct task_struct *task,
1441                                       const struct user_regset *regset)
1442{
1443        if (regset->writeback)
1444                regset->writeback(task, regset, 1);
1445}
1446
1447static int fill_thread_core_info(struct elf_thread_core_info *t,
1448                                 const struct user_regset_view *view,
1449                                 long signr, size_t *total)
1450{
1451        unsigned int i;
1452
1453        /*
1454         * NT_PRSTATUS is the one special case, because the regset data
1455         * goes into the pr_reg field inside the note contents, rather
1456         * than being the whole note contents.  We fill the reset in here.
1457         * We assume that regset 0 is NT_PRSTATUS.
1458         */
1459        fill_prstatus(&t->prstatus, t->task, signr);
1460        (void) view->regsets[0].get(t->task, &view->regsets[0],
1461                                    0, sizeof(t->prstatus.pr_reg),
1462                                    &t->prstatus.pr_reg, NULL);
1463
1464        fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1465                  sizeof(t->prstatus), &t->prstatus);
1466        *total += notesize(&t->notes[0]);
1467
1468        do_thread_regset_writeback(t->task, &view->regsets[0]);
1469
1470        /*
1471         * Each other regset might generate a note too.  For each regset
1472         * that has no core_note_type or is inactive, we leave t->notes[i]
1473         * all zero and we'll know to skip writing it later.
1474         */
1475        for (i = 1; i < view->n; ++i) {
1476                const struct user_regset *regset = &view->regsets[i];
1477                do_thread_regset_writeback(t->task, regset);
1478                if (regset->core_note_type &&
1479                    (!regset->active || regset->active(t->task, regset))) {
1480                        int ret;
1481                        size_t size = regset->n * regset->size;
1482                        void *data = kmalloc(size, GFP_KERNEL);
1483                        if (unlikely(!data))
1484                                return 0;
1485                        ret = regset->get(t->task, regset,
1486                                          0, size, data, NULL);
1487                        if (unlikely(ret))
1488                                kfree(data);
1489                        else {
1490                                if (regset->core_note_type != NT_PRFPREG)
1491                                        fill_note(&t->notes[i], "LINUX",
1492                                                  regset->core_note_type,
1493                                                  size, data);
1494                                else {
1495                                        t->prstatus.pr_fpvalid = 1;
1496                                        fill_note(&t->notes[i], "CORE",
1497                                                  NT_PRFPREG, size, data);
1498                                }
1499                                *total += notesize(&t->notes[i]);
1500                        }
1501                }
1502        }
1503
1504        return 1;
1505}
1506
1507static int fill_note_info(struct elfhdr *elf, int phdrs,
1508                          struct elf_note_info *info,
1509                          long signr, struct pt_regs *regs)
1510{
1511        struct task_struct *dump_task = current;
1512        const struct user_regset_view *view = task_user_regset_view(dump_task);
1513        struct elf_thread_core_info *t;
1514        struct elf_prpsinfo *psinfo;
1515        struct core_thread *ct;
1516        unsigned int i;
1517
1518        info->size = 0;
1519        info->thread = NULL;
1520
1521        psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1522        if (psinfo == NULL)
1523                return 0;
1524
1525        fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1526
1527        /*
1528         * Figure out how many notes we're going to need for each thread.
1529         */
1530        info->thread_notes = 0;
1531        for (i = 0; i < view->n; ++i)
1532                if (view->regsets[i].core_note_type != 0)
1533                        ++info->thread_notes;
1534
1535        /*
1536         * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1537         * since it is our one special case.
1538         */
1539        if (unlikely(info->thread_notes == 0) ||
1540            unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1541                WARN_ON(1);
1542                return 0;
1543        }
1544
1545        /*
1546         * Initialize the ELF file header.
1547         */
1548        fill_elf_header(elf, phdrs,
1549                        view->e_machine, view->e_flags, view->ei_osabi);
1550
1551        /*
1552         * Allocate a structure for each thread.
1553         */
1554        for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1555                t = kzalloc(offsetof(struct elf_thread_core_info,
1556                                     notes[info->thread_notes]),
1557                            GFP_KERNEL);
1558                if (unlikely(!t))
1559                        return 0;
1560
1561                t->task = ct->task;
1562                if (ct->task == dump_task || !info->thread) {
1563                        t->next = info->thread;
1564                        info->thread = t;
1565                } else {
1566                        /*
1567                         * Make sure to keep the original task at
1568                         * the head of the list.
1569                         */
1570                        t->next = info->thread->next;
1571                        info->thread->next = t;
1572                }
1573        }
1574
1575        /*
1576         * Now fill in each thread's information.
1577         */
1578        for (t = info->thread; t != NULL; t = t->next)
1579                if (!fill_thread_core_info(t, view, signr, &info->size))
1580                        return 0;
1581
1582        /*
1583         * Fill in the two process-wide notes.
1584         */
1585        fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1586        info->size += notesize(&info->psinfo);
1587
1588        fill_auxv_note(&info->auxv, current->mm);
1589        info->size += notesize(&info->auxv);
1590
1591        return 1;
1592}
1593
1594static size_t get_note_info_size(struct elf_note_info *info)
1595{
1596        return info->size;
1597}
1598
1599/*
1600 * Write all the notes for each thread.  When writing the first thread, the
1601 * process-wide notes are interleaved after the first thread-specific note.
1602 */
1603static int write_note_info(struct elf_note_info *info,
1604                           struct file *file, loff_t *foffset)
1605{
1606        bool first = 1;
1607        struct elf_thread_core_info *t = info->thread;
1608
1609        do {
1610                int i;
1611
1612                if (!writenote(&t->notes[0], file, foffset))
1613                        return 0;
1614
1615                if (first && !writenote(&info->psinfo, file, foffset))
1616                        return 0;
1617                if (first && !writenote(&info->auxv, file, foffset))
1618                        return 0;
1619
1620                for (i = 1; i < info->thread_notes; ++i)
1621                        if (t->notes[i].data &&
1622                            !writenote(&t->notes[i], file, foffset))
1623                                return 0;
1624
1625                first = 0;
1626                t = t->next;
1627        } while (t);
1628
1629        return 1;
1630}
1631
1632static void free_note_info(struct elf_note_info *info)
1633{
1634        struct elf_thread_core_info *threads = info->thread;
1635        while (threads) {
1636                unsigned int i;
1637                struct elf_thread_core_info *t = threads;
1638                threads = t->next;
1639                WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1640                for (i = 1; i < info->thread_notes; ++i)
1641                        kfree(t->notes[i].data);
1642                kfree(t);
1643        }
1644        kfree(info->psinfo.data);
1645}
1646
1647#else
1648
1649/* Here is the structure in which status of each thread is captured. */
1650struct elf_thread_status
1651{
1652        struct list_head list;
1653        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1654        elf_fpregset_t fpu;             /* NT_PRFPREG */
1655        struct task_struct *thread;
1656#ifdef ELF_CORE_COPY_XFPREGS
1657        elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1658#endif
1659        struct memelfnote notes[3];
1660        int num_notes;
1661};
1662
1663/*
1664 * In order to add the specific thread information for the elf file format,
1665 * we need to keep a linked list of every threads pr_status and then create
1666 * a single section for them in the final core file.
1667 */
1668static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1669{
1670        int sz = 0;
1671        struct task_struct *p = t->thread;
1672        t->num_notes = 0;
1673
1674        fill_prstatus(&t->prstatus, p, signr);
1675        elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1676        
1677        fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1678                  &(t->prstatus));
1679        t->num_notes++;
1680        sz += notesize(&t->notes[0]);
1681
1682        if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1683                                                                &t->fpu))) {
1684                fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1685                          &(t->fpu));
1686                t->num_notes++;
1687                sz += notesize(&t->notes[1]);
1688        }
1689
1690#ifdef ELF_CORE_COPY_XFPREGS
1691        if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1692                fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1693                          sizeof(t->xfpu), &t->xfpu);
1694                t->num_notes++;
1695                sz += notesize(&t->notes[2]);
1696        }
1697#endif  
1698        return sz;
1699}
1700
1701struct elf_note_info {
1702        struct memelfnote *notes;
1703        struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1704        struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1705        struct list_head thread_list;
1706        elf_fpregset_t *fpu;
1707#ifdef ELF_CORE_COPY_XFPREGS
1708        elf_fpxregset_t *xfpu;
1709#endif
1710        int thread_status_size;
1711        int numnote;
1712};
1713
1714static int elf_note_info_init(struct elf_note_info *info)
1715{
1716        memset(info, 0, sizeof(*info));
1717        INIT_LIST_HEAD(&info->thread_list);
1718
1719        /* Allocate space for six ELF notes */
1720        info->notes = kmalloc(6 * sizeof(struct memelfnote), GFP_KERNEL);
1721        if (!info->notes)
1722                return 0;
1723        info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1724        if (!info->psinfo)
1725                goto notes_free;
1726        info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1727        if (!info->prstatus)
1728                goto psinfo_free;
1729        info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1730        if (!info->fpu)
1731                goto prstatus_free;
1732#ifdef ELF_CORE_COPY_XFPREGS
1733        info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1734        if (!info->xfpu)
1735                goto fpu_free;
1736#endif
1737        return 1;
1738#ifdef ELF_CORE_COPY_XFPREGS
1739 fpu_free:
1740        kfree(info->fpu);
1741#endif
1742 prstatus_free:
1743        kfree(info->prstatus);
1744 psinfo_free:
1745        kfree(info->psinfo);
1746 notes_free:
1747        kfree(info->notes);
1748        return 0;
1749}
1750
1751static int fill_note_info(struct elfhdr *elf, int phdrs,
1752                          struct elf_note_info *info,
1753                          long signr, struct pt_regs *regs)
1754{
1755        struct list_head *t;
1756
1757        if (!elf_note_info_init(info))
1758                return 0;
1759
1760        if (signr) {
1761                struct core_thread *ct;
1762                struct elf_thread_status *ets;
1763
1764                for (ct = current->mm->core_state->dumper.next;
1765                                                ct; ct = ct->next) {
1766                        ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1767                        if (!ets)
1768                                return 0;
1769
1770                        ets->thread = ct->task;
1771                        list_add(&ets->list, &info->thread_list);
1772                }
1773
1774                list_for_each(t, &info->thread_list) {
1775                        int sz;
1776
1777                        ets = list_entry(t, struct elf_thread_status, list);
1778                        sz = elf_dump_thread_status(signr, ets);
1779                        info->thread_status_size += sz;
1780                }
1781        }
1782        /* now collect the dump for the current */
1783        memset(info->prstatus, 0, sizeof(*info->prstatus));
1784        fill_prstatus(info->prstatus, current, signr);
1785        elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1786
1787        /* Set up header */
1788        fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1789
1790        /*
1791         * Set up the notes in similar form to SVR4 core dumps made
1792         * with info from their /proc.
1793         */
1794
1795        fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1796                  sizeof(*info->prstatus), info->prstatus);
1797        fill_psinfo(info->psinfo, current->group_leader, current->mm);
1798        fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1799                  sizeof(*info->psinfo), info->psinfo);
1800
1801        info->numnote = 2;
1802
1803        fill_auxv_note(&info->notes[info->numnote++], current->mm);
1804
1805        /* Try to dump the FPU. */
1806        info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1807                                                               info->fpu);
1808        if (info->prstatus->pr_fpvalid)
1809                fill_note(info->notes + info->numnote++,
1810                          "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1811#ifdef ELF_CORE_COPY_XFPREGS
1812        if (elf_core_copy_task_xfpregs(current, info->xfpu))
1813                fill_note(info->notes + info->numnote++,
1814                          "LINUX", ELF_CORE_XFPREG_TYPE,
1815                          sizeof(*info->xfpu), info->xfpu);
1816#endif
1817
1818        return 1;
1819}
1820
1821static size_t get_note_info_size(struct elf_note_info *info)
1822{
1823        int sz = 0;
1824        int i;
1825
1826        for (i = 0; i < info->numnote; i++)
1827                sz += notesize(info->notes + i);
1828
1829        sz += info->thread_status_size;
1830
1831        return sz;
1832}
1833
1834static int write_note_info(struct elf_note_info *info,
1835                           struct file *file, loff_t *foffset)
1836{
1837        int i;
1838        struct list_head *t;
1839
1840        for (i = 0; i < info->numnote; i++)
1841                if (!writenote(info->notes + i, file, foffset))
1842                        return 0;
1843
1844        /* write out the thread status notes section */
1845        list_for_each(t, &info->thread_list) {
1846                struct elf_thread_status *tmp =
1847                                list_entry(t, struct elf_thread_status, list);
1848
1849                for (i = 0; i < tmp->num_notes; i++)
1850                        if (!writenote(&tmp->notes[i], file, foffset))
1851                                return 0;
1852        }
1853
1854        return 1;
1855}
1856
1857static void free_note_info(struct elf_note_info *info)
1858{
1859        while (!list_empty(&info->thread_list)) {
1860                struct list_head *tmp = info->thread_list.next;
1861                list_del(tmp);
1862                kfree(list_entry(tmp, struct elf_thread_status, list));
1863        }
1864
1865        kfree(info->prstatus);
1866        kfree(info->psinfo);
1867        kfree(info->notes);
1868        kfree(info->fpu);
1869#ifdef ELF_CORE_COPY_XFPREGS
1870        kfree(info->xfpu);
1871#endif
1872}
1873
1874#endif
1875
1876static struct vm_area_struct *first_vma(struct task_struct *tsk,
1877                                        struct vm_area_struct *gate_vma)
1878{
1879        struct vm_area_struct *ret = tsk->mm->mmap;
1880
1881        if (ret)
1882                return ret;
1883        return gate_vma;
1884}
1885/*
1886 * Helper function for iterating across a vma list.  It ensures that the caller
1887 * will visit `gate_vma' prior to terminating the search.
1888 */
1889static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1890                                        struct vm_area_struct *gate_vma)
1891{
1892        struct vm_area_struct *ret;
1893
1894        ret = this_vma->vm_next;
1895        if (ret)
1896                return ret;
1897        if (this_vma == gate_vma)
1898                return NULL;
1899        return gate_vma;
1900}
1901
1902/*
1903 * Actual dumper
1904 *
1905 * This is a two-pass process; first we find the offsets of the bits,
1906 * and then they are actually written out.  If we run out of core limit
1907 * we just truncate.
1908 */
1909static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
1910{
1911        int has_dumped = 0;
1912        mm_segment_t fs;
1913        int segs;
1914        size_t size = 0;
1915        struct vm_area_struct *vma, *gate_vma;
1916        struct elfhdr *elf = NULL;
1917        loff_t offset = 0, dataoff, foffset;
1918        unsigned long mm_flags;
1919        struct elf_note_info info;
1920
1921        /*
1922         * We no longer stop all VM operations.
1923         * 
1924         * This is because those proceses that could possibly change map_count
1925         * or the mmap / vma pages are now blocked in do_exit on current
1926         * finishing this core dump.
1927         *
1928         * Only ptrace can touch these memory addresses, but it doesn't change
1929         * the map_count or the pages allocated. So no possibility of crashing
1930         * exists while dumping the mm->vm_next areas to the core file.
1931         */
1932  
1933        /* alloc memory for large data structures: too large to be on stack */
1934        elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1935        if (!elf)
1936                goto out;
1937        /*
1938         * The number of segs are recored into ELF header as 16bit value.
1939         * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
1940         */
1941        segs = current->mm->map_count;
1942#ifdef ELF_CORE_EXTRA_PHDRS
1943        segs += ELF_CORE_EXTRA_PHDRS;
1944#endif
1945
1946        gate_vma = get_gate_vma(current);
1947        if (gate_vma != NULL)
1948                segs++;
1949
1950        /*
1951         * Collect all the non-memory information about the process for the
1952         * notes.  This also sets up the file header.
1953         */
1954        if (!fill_note_info(elf, segs + 1, /* including notes section */
1955                            &info, signr, regs))
1956                goto cleanup;
1957
1958        has_dumped = 1;
1959        current->flags |= PF_DUMPCORE;
1960  
1961        fs = get_fs();
1962        set_fs(KERNEL_DS);
1963
1964        DUMP_WRITE(elf, sizeof(*elf));
1965        offset += sizeof(*elf);                         /* Elf header */
1966        offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1967        foffset = offset;
1968
1969        /* Write notes phdr entry */
1970        {
1971                struct elf_phdr phdr;
1972                size_t sz = get_note_info_size(&info);
1973
1974                sz += elf_coredump_extra_notes_size();
1975
1976                fill_elf_note_phdr(&phdr, sz, offset);
1977                offset += sz;
1978                DUMP_WRITE(&phdr, sizeof(phdr));
1979        }
1980
1981        dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1982
1983        /*
1984         * We must use the same mm->flags while dumping core to avoid
1985         * inconsistency between the program headers and bodies, otherwise an
1986         * unusable core file can be generated.
1987         */
1988        mm_flags = current->mm->flags;
1989
1990        /* Write program headers for segments dump */
1991        for (vma = first_vma(current, gate_vma); vma != NULL;
1992                        vma = next_vma(vma, gate_vma)) {
1993                struct elf_phdr phdr;
1994
1995                phdr.p_type = PT_LOAD;
1996                phdr.p_offset = offset;
1997                phdr.p_vaddr = vma->vm_start;
1998                phdr.p_paddr = 0;
1999                phdr.p_filesz = vma_dump_size(vma, mm_flags);
2000                phdr.p_memsz = vma->vm_end - vma->vm_start;
2001                offset += phdr.p_filesz;
2002                phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2003                if (vma->vm_flags & VM_WRITE)
2004                        phdr.p_flags |= PF_W;
2005                if (vma->vm_flags & VM_EXEC)
2006                        phdr.p_flags |= PF_X;
2007                phdr.p_align = ELF_EXEC_PAGESIZE;
2008
2009                DUMP_WRITE(&phdr, sizeof(phdr));
2010        }
2011
2012#ifdef ELF_CORE_WRITE_EXTRA_PHDRS
2013        ELF_CORE_WRITE_EXTRA_PHDRS;
2014#endif
2015
2016        /* write out the notes section */
2017        if (!write_note_info(&info, file, &foffset))
2018                goto end_coredump;
2019
2020        if (elf_coredump_extra_notes_write(file, &foffset))
2021                goto end_coredump;
2022
2023        /* Align to page */
2024        if (!dump_seek(file, dataoff - foffset))
2025                goto end_coredump;
2026
2027        for (vma = first_vma(current, gate_vma); vma != NULL;
2028                        vma = next_vma(vma, gate_vma)) {
2029                unsigned long addr;
2030                unsigned long end;
2031
2032                end = vma->vm_start + vma_dump_size(vma, mm_flags);
2033
2034                for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2035                        struct page *page;
2036                        int stop;
2037
2038                        page = get_dump_page(addr);
2039                        if (page) {
2040                                void *kaddr = kmap(page);
2041                                stop = ((size += PAGE_SIZE) > limit) ||
2042                                        !dump_write(file, kaddr, PAGE_SIZE);
2043                                kunmap(page);
2044                                page_cache_release(page);
2045                        } else
2046                                stop = !dump_seek(file, PAGE_SIZE);
2047                        if (stop)
2048                                goto end_coredump;
2049                }
2050        }
2051
2052#ifdef ELF_CORE_WRITE_EXTRA_DATA
2053        ELF_CORE_WRITE_EXTRA_DATA;
2054#endif
2055
2056end_coredump:
2057        set_fs(fs);
2058
2059cleanup:
2060        free_note_info(&info);
2061        kfree(elf);
2062out:
2063        return has_dumped;
2064}
2065
2066#endif          /* USE_ELF_CORE_DUMP */
2067
2068static int __init init_elf_binfmt(void)
2069{
2070        return register_binfmt(&elf_format);
2071}
2072
2073static void __exit exit_elf_binfmt(void)
2074{
2075        /* Remove the COFF and ELF loaders. */
2076        unregister_binfmt(&elf_format);
2077}
2078
2079core_initcall(init_elf_binfmt);
2080module_exit(exit_elf_binfmt);
2081MODULE_LICENSE("GPL");
2082