linux/fs/binfmt_elf.c
<<
>>
Prefs
   1/*
   2 * linux/fs/binfmt_elf.c
   3 *
   4 * These are the functions used to load ELF format executables as used
   5 * on SVr4 machines.  Information on the format may be found in the book
   6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
   7 * Tools".
   8 *
   9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
  10 */
  11
  12#include <linux/module.h>
  13#include <linux/kernel.h>
  14#include <linux/fs.h>
  15#include <linux/stat.h>
  16#include <linux/time.h>
  17#include <linux/mm.h>
  18#include <linux/mman.h>
  19#include <linux/a.out.h>
  20#include <linux/errno.h>
  21#include <linux/signal.h>
  22#include <linux/binfmts.h>
  23#include <linux/string.h>
  24#include <linux/file.h>
  25#include <linux/fcntl.h>
  26#include <linux/ptrace.h>
  27#include <linux/slab.h>
  28#include <linux/shm.h>
  29#include <linux/personality.h>
  30#include <linux/elfcore.h>
  31#include <linux/init.h>
  32#include <linux/highuid.h>
  33#include <linux/smp.h>
  34#include <linux/compiler.h>
  35#include <linux/highmem.h>
  36#include <linux/pagemap.h>
  37#include <linux/security.h>
  38#include <linux/syscalls.h>
  39#include <linux/random.h>
  40#include <linux/elf.h>
  41#include <linux/utsname.h>
  42#include <asm/uaccess.h>
  43#include <asm/param.h>
  44#include <asm/page.h>
  45
  46static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
  47static int load_elf_library(struct file *);
  48static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
  49
  50/*
  51 * If we don't support core dumping, then supply a NULL so we
  52 * don't even try.
  53 */
  54#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
  55static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
  56#else
  57#define elf_core_dump   NULL
  58#endif
  59
  60#if ELF_EXEC_PAGESIZE > PAGE_SIZE
  61#define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
  62#else
  63#define ELF_MIN_ALIGN   PAGE_SIZE
  64#endif
  65
  66#ifndef ELF_CORE_EFLAGS
  67#define ELF_CORE_EFLAGS 0
  68#endif
  69
  70#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
  71#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
  72#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
  73
  74static struct linux_binfmt elf_format = {
  75                .module         = THIS_MODULE,
  76                .load_binary    = load_elf_binary,
  77                .load_shlib     = load_elf_library,
  78                .core_dump      = elf_core_dump,
  79                .min_coredump   = ELF_EXEC_PAGESIZE,
  80                .hasvdso        = 1
  81};
  82
  83#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
  84
  85static int set_brk(unsigned long start, unsigned long end)
  86{
  87        start = ELF_PAGEALIGN(start);
  88        end = ELF_PAGEALIGN(end);
  89        if (end > start) {
  90                unsigned long addr;
  91                down_write(&current->mm->mmap_sem);
  92                addr = do_brk(start, end - start);
  93                up_write(&current->mm->mmap_sem);
  94                if (BAD_ADDR(addr))
  95                        return addr;
  96        }
  97        current->mm->start_brk = current->mm->brk = end;
  98        return 0;
  99}
 100
 101/* We need to explicitly zero any fractional pages
 102   after the data section (i.e. bss).  This would
 103   contain the junk from the file that should not
 104   be in memory
 105 */
 106static int padzero(unsigned long elf_bss)
 107{
 108        unsigned long nbyte;
 109
 110        nbyte = ELF_PAGEOFFSET(elf_bss);
 111        if (nbyte) {
 112                nbyte = ELF_MIN_ALIGN - nbyte;
 113                if (clear_user((void __user *) elf_bss, nbyte))
 114                        return -EFAULT;
 115        }
 116        return 0;
 117}
 118
 119/* Let's use some macros to make this stack manipulation a litle clearer */
 120#ifdef CONFIG_STACK_GROWSUP
 121#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
 122#define STACK_ROUND(sp, items) \
 123        ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
 124#define STACK_ALLOC(sp, len) ({ \
 125        elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
 126        old_sp; })
 127#else
 128#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
 129#define STACK_ROUND(sp, items) \
 130        (((unsigned long) (sp - items)) &~ 15UL)
 131#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
 132#endif
 133
 134static int
 135create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 136                int interp_aout, unsigned long load_addr,
 137                unsigned long interp_load_addr)
 138{
 139        unsigned long p = bprm->p;
 140        int argc = bprm->argc;
 141        int envc = bprm->envc;
 142        elf_addr_t __user *argv;
 143        elf_addr_t __user *envp;
 144        elf_addr_t __user *sp;
 145        elf_addr_t __user *u_platform;
 146        const char *k_platform = ELF_PLATFORM;
 147        int items;
 148        elf_addr_t *elf_info;
 149        int ei_index = 0;
 150        struct task_struct *tsk = current;
 151        struct vm_area_struct *vma;
 152
 153        /*
 154         * In some cases (e.g. Hyper-Threading), we want to avoid L1
 155         * evictions by the processes running on the same package. One
 156         * thing we can do is to shuffle the initial stack for them.
 157         */
 158
 159        p = arch_align_stack(p);
 160
 161        /*
 162         * If this architecture has a platform capability string, copy it
 163         * to userspace.  In some cases (Sparc), this info is impossible
 164         * for userspace to get any other way, in others (i386) it is
 165         * merely difficult.
 166         */
 167        u_platform = NULL;
 168        if (k_platform) {
 169                size_t len = strlen(k_platform) + 1;
 170
 171                u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 172                if (__copy_to_user(u_platform, k_platform, len))
 173                        return -EFAULT;
 174        }
 175
 176        /* Create the ELF interpreter info */
 177        elf_info = (elf_addr_t *)current->mm->saved_auxv;
 178        /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
 179#define NEW_AUX_ENT(id, val) \
 180        do { \
 181                elf_info[ei_index++] = id; \
 182                elf_info[ei_index++] = val; \
 183        } while (0)
 184
 185#ifdef ARCH_DLINFO
 186        /* 
 187         * ARCH_DLINFO must come first so PPC can do its special alignment of
 188         * AUXV.
 189         * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
 190         * ARCH_DLINFO changes
 191         */
 192        ARCH_DLINFO;
 193#endif
 194        NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
 195        NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
 196        NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
 197        NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
 198        NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
 199        NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
 200        NEW_AUX_ENT(AT_BASE, interp_load_addr);
 201        NEW_AUX_ENT(AT_FLAGS, 0);
 202        NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
 203        NEW_AUX_ENT(AT_UID, tsk->uid);
 204        NEW_AUX_ENT(AT_EUID, tsk->euid);
 205        NEW_AUX_ENT(AT_GID, tsk->gid);
 206        NEW_AUX_ENT(AT_EGID, tsk->egid);
 207        NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
 208        if (k_platform) {
 209                NEW_AUX_ENT(AT_PLATFORM,
 210                            (elf_addr_t)(unsigned long)u_platform);
 211        }
 212        if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
 213                NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
 214        }
 215#undef NEW_AUX_ENT
 216        /* AT_NULL is zero; clear the rest too */
 217        memset(&elf_info[ei_index], 0,
 218               sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
 219
 220        /* And advance past the AT_NULL entry.  */
 221        ei_index += 2;
 222
 223        sp = STACK_ADD(p, ei_index);
 224
 225        items = (argc + 1) + (envc + 1);
 226        if (interp_aout) {
 227                items += 3; /* a.out interpreters require argv & envp too */
 228        } else {
 229                items += 1; /* ELF interpreters only put argc on the stack */
 230        }
 231        bprm->p = STACK_ROUND(sp, items);
 232
 233        /* Point sp at the lowest address on the stack */
 234#ifdef CONFIG_STACK_GROWSUP
 235        sp = (elf_addr_t __user *)bprm->p - items - ei_index;
 236        bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
 237#else
 238        sp = (elf_addr_t __user *)bprm->p;
 239#endif
 240
 241
 242        /*
 243         * Grow the stack manually; some architectures have a limit on how
 244         * far ahead a user-space access may be in order to grow the stack.
 245         */
 246        vma = find_extend_vma(current->mm, bprm->p);
 247        if (!vma)
 248                return -EFAULT;
 249
 250        /* Now, let's put argc (and argv, envp if appropriate) on the stack */
 251        if (__put_user(argc, sp++))
 252                return -EFAULT;
 253        if (interp_aout) {
 254                argv = sp + 2;
 255                envp = argv + argc + 1;
 256                if (__put_user((elf_addr_t)(unsigned long)argv, sp++) ||
 257                    __put_user((elf_addr_t)(unsigned long)envp, sp++))
 258                        return -EFAULT;
 259        } else {
 260                argv = sp;
 261                envp = argv + argc + 1;
 262        }
 263
 264        /* Populate argv and envp */
 265        p = current->mm->arg_end = current->mm->arg_start;
 266        while (argc-- > 0) {
 267                size_t len;
 268                if (__put_user((elf_addr_t)p, argv++))
 269                        return -EFAULT;
 270                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 271                if (!len || len > MAX_ARG_STRLEN)
 272                        return 0;
 273                p += len;
 274        }
 275        if (__put_user(0, argv))
 276                return -EFAULT;
 277        current->mm->arg_end = current->mm->env_start = p;
 278        while (envc-- > 0) {
 279                size_t len;
 280                if (__put_user((elf_addr_t)p, envp++))
 281                        return -EFAULT;
 282                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 283                if (!len || len > MAX_ARG_STRLEN)
 284                        return 0;
 285                p += len;
 286        }
 287        if (__put_user(0, envp))
 288                return -EFAULT;
 289        current->mm->env_end = p;
 290
 291        /* Put the elf_info on the stack in the right place.  */
 292        sp = (elf_addr_t __user *)envp + 1;
 293        if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
 294                return -EFAULT;
 295        return 0;
 296}
 297
 298#ifndef elf_map
 299
 300static unsigned long elf_map(struct file *filep, unsigned long addr,
 301                struct elf_phdr *eppnt, int prot, int type)
 302{
 303        unsigned long map_addr;
 304        unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr);
 305
 306        down_write(&current->mm->mmap_sem);
 307        /* mmap() will return -EINVAL if given a zero size, but a
 308         * segment with zero filesize is perfectly valid */
 309        if (eppnt->p_filesz + pageoffset)
 310                map_addr = do_mmap(filep, ELF_PAGESTART(addr),
 311                                   eppnt->p_filesz + pageoffset, prot, type,
 312                                   eppnt->p_offset - pageoffset);
 313        else
 314                map_addr = ELF_PAGESTART(addr);
 315        up_write(&current->mm->mmap_sem);
 316        return(map_addr);
 317}
 318
 319#endif /* !elf_map */
 320
 321/* This is much more generalized than the library routine read function,
 322   so we keep this separate.  Technically the library read function
 323   is only provided so that we can read a.out libraries that have
 324   an ELF header */
 325
 326static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 327                struct file *interpreter, unsigned long *interp_load_addr)
 328{
 329        struct elf_phdr *elf_phdata;
 330        struct elf_phdr *eppnt;
 331        unsigned long load_addr = 0;
 332        int load_addr_set = 0;
 333        unsigned long last_bss = 0, elf_bss = 0;
 334        unsigned long error = ~0UL;
 335        int retval, i, size;
 336
 337        /* First of all, some simple consistency checks */
 338        if (interp_elf_ex->e_type != ET_EXEC &&
 339            interp_elf_ex->e_type != ET_DYN)
 340                goto out;
 341        if (!elf_check_arch(interp_elf_ex))
 342                goto out;
 343        if (!interpreter->f_op || !interpreter->f_op->mmap)
 344                goto out;
 345
 346        /*
 347         * If the size of this structure has changed, then punt, since
 348         * we will be doing the wrong thing.
 349         */
 350        if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
 351                goto out;
 352        if (interp_elf_ex->e_phnum < 1 ||
 353                interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
 354                goto out;
 355
 356        /* Now read in all of the header information */
 357        size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
 358        if (size > ELF_MIN_ALIGN)
 359                goto out;
 360        elf_phdata = kmalloc(size, GFP_KERNEL);
 361        if (!elf_phdata)
 362                goto out;
 363
 364        retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
 365                             (char *)elf_phdata,size);
 366        error = -EIO;
 367        if (retval != size) {
 368                if (retval < 0)
 369                        error = retval; 
 370                goto out_close;
 371        }
 372
 373        eppnt = elf_phdata;
 374        for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 375                if (eppnt->p_type == PT_LOAD) {
 376                        int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
 377                        int elf_prot = 0;
 378                        unsigned long vaddr = 0;
 379                        unsigned long k, map_addr;
 380
 381                        if (eppnt->p_flags & PF_R)
 382                                elf_prot = PROT_READ;
 383                        if (eppnt->p_flags & PF_W)
 384                                elf_prot |= PROT_WRITE;
 385                        if (eppnt->p_flags & PF_X)
 386                                elf_prot |= PROT_EXEC;
 387                        vaddr = eppnt->p_vaddr;
 388                        if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 389                                elf_type |= MAP_FIXED;
 390
 391                        map_addr = elf_map(interpreter, load_addr + vaddr,
 392                                           eppnt, elf_prot, elf_type);
 393                        error = map_addr;
 394                        if (BAD_ADDR(map_addr))
 395                                goto out_close;
 396
 397                        if (!load_addr_set &&
 398                            interp_elf_ex->e_type == ET_DYN) {
 399                                load_addr = map_addr - ELF_PAGESTART(vaddr);
 400                                load_addr_set = 1;
 401                        }
 402
 403                        /*
 404                         * Check to see if the section's size will overflow the
 405                         * allowed task size. Note that p_filesz must always be
 406                         * <= p_memsize so it's only necessary to check p_memsz.
 407                         */
 408                        k = load_addr + eppnt->p_vaddr;
 409                        if (BAD_ADDR(k) ||
 410                            eppnt->p_filesz > eppnt->p_memsz ||
 411                            eppnt->p_memsz > TASK_SIZE ||
 412                            TASK_SIZE - eppnt->p_memsz < k) {
 413                                error = -ENOMEM;
 414                                goto out_close;
 415                        }
 416
 417                        /*
 418                         * Find the end of the file mapping for this phdr, and
 419                         * keep track of the largest address we see for this.
 420                         */
 421                        k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
 422                        if (k > elf_bss)
 423                                elf_bss = k;
 424
 425                        /*
 426                         * Do the same thing for the memory mapping - between
 427                         * elf_bss and last_bss is the bss section.
 428                         */
 429                        k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
 430                        if (k > last_bss)
 431                                last_bss = k;
 432                }
 433        }
 434
 435        /*
 436         * Now fill out the bss section.  First pad the last page up
 437         * to the page boundary, and then perform a mmap to make sure
 438         * that there are zero-mapped pages up to and including the 
 439         * last bss page.
 440         */
 441        if (padzero(elf_bss)) {
 442                error = -EFAULT;
 443                goto out_close;
 444        }
 445
 446        /* What we have mapped so far */
 447        elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
 448
 449        /* Map the last of the bss segment */
 450        if (last_bss > elf_bss) {
 451                down_write(&current->mm->mmap_sem);
 452                error = do_brk(elf_bss, last_bss - elf_bss);
 453                up_write(&current->mm->mmap_sem);
 454                if (BAD_ADDR(error))
 455                        goto out_close;
 456        }
 457
 458        *interp_load_addr = load_addr;
 459        error = ((unsigned long)interp_elf_ex->e_entry) + load_addr;
 460
 461out_close:
 462        kfree(elf_phdata);
 463out:
 464        return error;
 465}
 466
 467static unsigned long load_aout_interp(struct exec *interp_ex,
 468                struct file *interpreter)
 469{
 470        unsigned long text_data, elf_entry = ~0UL;
 471        char __user * addr;
 472        loff_t offset;
 473
 474        current->mm->end_code = interp_ex->a_text;
 475        text_data = interp_ex->a_text + interp_ex->a_data;
 476        current->mm->end_data = text_data;
 477        current->mm->brk = interp_ex->a_bss + text_data;
 478
 479        switch (N_MAGIC(*interp_ex)) {
 480        case OMAGIC:
 481                offset = 32;
 482                addr = (char __user *)0;
 483                break;
 484        case ZMAGIC:
 485        case QMAGIC:
 486                offset = N_TXTOFF(*interp_ex);
 487                addr = (char __user *)N_TXTADDR(*interp_ex);
 488                break;
 489        default:
 490                goto out;
 491        }
 492
 493        down_write(&current->mm->mmap_sem);     
 494        do_brk(0, text_data);
 495        up_write(&current->mm->mmap_sem);
 496        if (!interpreter->f_op || !interpreter->f_op->read)
 497                goto out;
 498        if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
 499                goto out;
 500        flush_icache_range((unsigned long)addr,
 501                           (unsigned long)addr + text_data);
 502
 503        down_write(&current->mm->mmap_sem);     
 504        do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
 505                interp_ex->a_bss);
 506        up_write(&current->mm->mmap_sem);
 507        elf_entry = interp_ex->a_entry;
 508
 509out:
 510        return elf_entry;
 511}
 512
 513/*
 514 * These are the functions used to load ELF style executables and shared
 515 * libraries.  There is no binary dependent code anywhere else.
 516 */
 517
 518#define INTERPRETER_NONE 0
 519#define INTERPRETER_AOUT 1
 520#define INTERPRETER_ELF 2
 521
 522#ifndef STACK_RND_MASK
 523#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
 524#endif
 525
 526static unsigned long randomize_stack_top(unsigned long stack_top)
 527{
 528        unsigned int random_variable = 0;
 529
 530        if ((current->flags & PF_RANDOMIZE) &&
 531                !(current->personality & ADDR_NO_RANDOMIZE)) {
 532                random_variable = get_random_int() & STACK_RND_MASK;
 533                random_variable <<= PAGE_SHIFT;
 534        }
 535#ifdef CONFIG_STACK_GROWSUP
 536        return PAGE_ALIGN(stack_top) + random_variable;
 537#else
 538        return PAGE_ALIGN(stack_top) - random_variable;
 539#endif
 540}
 541
 542static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 543{
 544        struct file *interpreter = NULL; /* to shut gcc up */
 545        unsigned long load_addr = 0, load_bias = 0;
 546        int load_addr_set = 0;
 547        char * elf_interpreter = NULL;
 548        unsigned int interpreter_type = INTERPRETER_NONE;
 549        unsigned char ibcs2_interpreter = 0;
 550        unsigned long error;
 551        struct elf_phdr *elf_ppnt, *elf_phdata;
 552        unsigned long elf_bss, elf_brk;
 553        int elf_exec_fileno;
 554        int retval, i;
 555        unsigned int size;
 556        unsigned long elf_entry, interp_load_addr = 0;
 557        unsigned long start_code, end_code, start_data, end_data;
 558        unsigned long reloc_func_desc = 0;
 559        char passed_fileno[6];
 560        struct files_struct *files;
 561        int executable_stack = EXSTACK_DEFAULT;
 562        unsigned long def_flags = 0;
 563        struct {
 564                struct elfhdr elf_ex;
 565                struct elfhdr interp_elf_ex;
 566                struct exec interp_ex;
 567        } *loc;
 568
 569        loc = kmalloc(sizeof(*loc), GFP_KERNEL);
 570        if (!loc) {
 571                retval = -ENOMEM;
 572                goto out_ret;
 573        }
 574        
 575        /* Get the exec-header */
 576        loc->elf_ex = *((struct elfhdr *)bprm->buf);
 577
 578        retval = -ENOEXEC;
 579        /* First of all, some simple consistency checks */
 580        if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 581                goto out;
 582
 583        if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
 584                goto out;
 585        if (!elf_check_arch(&loc->elf_ex))
 586                goto out;
 587        if (!bprm->file->f_op||!bprm->file->f_op->mmap)
 588                goto out;
 589
 590        /* Now read in all of the header information */
 591        if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
 592                goto out;
 593        if (loc->elf_ex.e_phnum < 1 ||
 594                loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
 595                goto out;
 596        size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
 597        retval = -ENOMEM;
 598        elf_phdata = kmalloc(size, GFP_KERNEL);
 599        if (!elf_phdata)
 600                goto out;
 601
 602        retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
 603                             (char *)elf_phdata, size);
 604        if (retval != size) {
 605                if (retval >= 0)
 606                        retval = -EIO;
 607                goto out_free_ph;
 608        }
 609
 610        files = current->files; /* Refcounted so ok */
 611        retval = unshare_files();
 612        if (retval < 0)
 613                goto out_free_ph;
 614        if (files == current->files) {
 615                put_files_struct(files);
 616                files = NULL;
 617        }
 618
 619        /* exec will make our files private anyway, but for the a.out
 620           loader stuff we need to do it earlier */
 621        retval = get_unused_fd();
 622        if (retval < 0)
 623                goto out_free_fh;
 624        get_file(bprm->file);
 625        fd_install(elf_exec_fileno = retval, bprm->file);
 626
 627        elf_ppnt = elf_phdata;
 628        elf_bss = 0;
 629        elf_brk = 0;
 630
 631        start_code = ~0UL;
 632        end_code = 0;
 633        start_data = 0;
 634        end_data = 0;
 635
 636        for (i = 0; i < loc->elf_ex.e_phnum; i++) {
 637                if (elf_ppnt->p_type == PT_INTERP) {
 638                        /* This is the program interpreter used for
 639                         * shared libraries - for now assume that this
 640                         * is an a.out format binary
 641                         */
 642                        retval = -ENOEXEC;
 643                        if (elf_ppnt->p_filesz > PATH_MAX || 
 644                            elf_ppnt->p_filesz < 2)
 645                                goto out_free_file;
 646
 647                        retval = -ENOMEM;
 648                        elf_interpreter = kmalloc(elf_ppnt->p_filesz,
 649                                                  GFP_KERNEL);
 650                        if (!elf_interpreter)
 651                                goto out_free_file;
 652
 653                        retval = kernel_read(bprm->file, elf_ppnt->p_offset,
 654                                             elf_interpreter,
 655                                             elf_ppnt->p_filesz);
 656                        if (retval != elf_ppnt->p_filesz) {
 657                                if (retval >= 0)
 658                                        retval = -EIO;
 659                                goto out_free_interp;
 660                        }
 661                        /* make sure path is NULL terminated */
 662                        retval = -ENOEXEC;
 663                        if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
 664                                goto out_free_interp;
 665
 666                        /* If the program interpreter is one of these two,
 667                         * then assume an iBCS2 image. Otherwise assume
 668                         * a native linux image.
 669                         */
 670                        if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
 671                            strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
 672                                ibcs2_interpreter = 1;
 673
 674                        /*
 675                         * The early SET_PERSONALITY here is so that the lookup
 676                         * for the interpreter happens in the namespace of the 
 677                         * to-be-execed image.  SET_PERSONALITY can select an
 678                         * alternate root.
 679                         *
 680                         * However, SET_PERSONALITY is NOT allowed to switch
 681                         * this task into the new images's memory mapping
 682                         * policy - that is, TASK_SIZE must still evaluate to
 683                         * that which is appropriate to the execing application.
 684                         * This is because exit_mmap() needs to have TASK_SIZE
 685                         * evaluate to the size of the old image.
 686                         *
 687                         * So if (say) a 64-bit application is execing a 32-bit
 688                         * application it is the architecture's responsibility
 689                         * to defer changing the value of TASK_SIZE until the
 690                         * switch really is going to happen - do this in
 691                         * flush_thread().      - akpm
 692                         */
 693                        SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
 694
 695                        interpreter = open_exec(elf_interpreter);
 696                        retval = PTR_ERR(interpreter);
 697                        if (IS_ERR(interpreter))
 698                                goto out_free_interp;
 699
 700                        /*
 701                         * If the binary is not readable then enforce
 702                         * mm->dumpable = 0 regardless of the interpreter's
 703                         * permissions.
 704                         */
 705                        if (file_permission(interpreter, MAY_READ) < 0)
 706                                bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
 707
 708                        retval = kernel_read(interpreter, 0, bprm->buf,
 709                                             BINPRM_BUF_SIZE);
 710                        if (retval != BINPRM_BUF_SIZE) {
 711                                if (retval >= 0)
 712                                        retval = -EIO;
 713                                goto out_free_dentry;
 714                        }
 715
 716                        /* Get the exec headers */
 717                        loc->interp_ex = *((struct exec *)bprm->buf);
 718                        loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
 719                        break;
 720                }
 721                elf_ppnt++;
 722        }
 723
 724        elf_ppnt = elf_phdata;
 725        for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
 726                if (elf_ppnt->p_type == PT_GNU_STACK) {
 727                        if (elf_ppnt->p_flags & PF_X)
 728                                executable_stack = EXSTACK_ENABLE_X;
 729                        else
 730                                executable_stack = EXSTACK_DISABLE_X;
 731                        break;
 732                }
 733
 734        /* Some simple consistency checks for the interpreter */
 735        if (elf_interpreter) {
 736                static int warn;
 737                interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
 738
 739                /* Now figure out which format our binary is */
 740                if ((N_MAGIC(loc->interp_ex) != OMAGIC) &&
 741                    (N_MAGIC(loc->interp_ex) != ZMAGIC) &&
 742                    (N_MAGIC(loc->interp_ex) != QMAGIC))
 743                        interpreter_type = INTERPRETER_ELF;
 744
 745                if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 746                        interpreter_type &= ~INTERPRETER_ELF;
 747
 748                if (interpreter_type == INTERPRETER_AOUT && warn < 10) {
 749                        printk(KERN_WARNING "a.out ELF interpreter %s is "
 750                                "deprecated and will not be supported "
 751                                "after Linux 2.6.25\n", elf_interpreter);
 752                        warn++;
 753                }
 754
 755                retval = -ELIBBAD;
 756                if (!interpreter_type)
 757                        goto out_free_dentry;
 758
 759                /* Make sure only one type was selected */
 760                if ((interpreter_type & INTERPRETER_ELF) &&
 761                     interpreter_type != INTERPRETER_ELF) {
 762                        // FIXME - ratelimit this before re-enabling
 763                        // printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
 764                        interpreter_type = INTERPRETER_ELF;
 765                }
 766                /* Verify the interpreter has a valid arch */
 767                if ((interpreter_type == INTERPRETER_ELF) &&
 768                    !elf_check_arch(&loc->interp_elf_ex))
 769                        goto out_free_dentry;
 770        } else {
 771                /* Executables without an interpreter also need a personality  */
 772                SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
 773        }
 774
 775        /* OK, we are done with that, now set up the arg stuff,
 776           and then start this sucker up */
 777        if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) {
 778                char *passed_p = passed_fileno;
 779                sprintf(passed_fileno, "%d", elf_exec_fileno);
 780
 781                if (elf_interpreter) {
 782                        retval = copy_strings_kernel(1, &passed_p, bprm);
 783                        if (retval)
 784                                goto out_free_dentry; 
 785                        bprm->argc++;
 786                }
 787        }
 788
 789        /* Flush all traces of the currently running executable */
 790        retval = flush_old_exec(bprm);
 791        if (retval)
 792                goto out_free_dentry;
 793
 794        /* Discard our unneeded old files struct */
 795        if (files) {
 796                put_files_struct(files);
 797                files = NULL;
 798        }
 799
 800        /* OK, This is the point of no return */
 801        current->flags &= ~PF_FORKNOEXEC;
 802        current->mm->def_flags = def_flags;
 803
 804        /* Do this immediately, since STACK_TOP as used in setup_arg_pages
 805           may depend on the personality.  */
 806        SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
 807        if (elf_read_implies_exec(loc->elf_ex, executable_stack))
 808                current->personality |= READ_IMPLIES_EXEC;
 809
 810        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 811                current->flags |= PF_RANDOMIZE;
 812        arch_pick_mmap_layout(current->mm);
 813
 814        /* Do this so that we can load the interpreter, if need be.  We will
 815           change some of these later */
 816        current->mm->free_area_cache = current->mm->mmap_base;
 817        current->mm->cached_hole_size = 0;
 818        retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
 819                                 executable_stack);
 820        if (retval < 0) {
 821                send_sig(SIGKILL, current, 0);
 822                goto out_free_dentry;
 823        }
 824        
 825        current->mm->start_stack = bprm->p;
 826
 827        /* Now we do a little grungy work by mmaping the ELF image into
 828           the correct location in memory.  At this point, we assume that
 829           the image should be loaded at fixed address, not at a variable
 830           address. */
 831        for(i = 0, elf_ppnt = elf_phdata;
 832            i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
 833                int elf_prot = 0, elf_flags;
 834                unsigned long k, vaddr;
 835
 836                if (elf_ppnt->p_type != PT_LOAD)
 837                        continue;
 838
 839                if (unlikely (elf_brk > elf_bss)) {
 840                        unsigned long nbyte;
 841                    
 842                        /* There was a PT_LOAD segment with p_memsz > p_filesz
 843                           before this one. Map anonymous pages, if needed,
 844                           and clear the area.  */
 845                        retval = set_brk (elf_bss + load_bias,
 846                                          elf_brk + load_bias);
 847                        if (retval) {
 848                                send_sig(SIGKILL, current, 0);
 849                                goto out_free_dentry;
 850                        }
 851                        nbyte = ELF_PAGEOFFSET(elf_bss);
 852                        if (nbyte) {
 853                                nbyte = ELF_MIN_ALIGN - nbyte;
 854                                if (nbyte > elf_brk - elf_bss)
 855                                        nbyte = elf_brk - elf_bss;
 856                                if (clear_user((void __user *)elf_bss +
 857                                                        load_bias, nbyte)) {
 858                                        /*
 859                                         * This bss-zeroing can fail if the ELF
 860                                         * file specifies odd protections. So
 861                                         * we don't check the return value
 862                                         */
 863                                }
 864                        }
 865                }
 866
 867                if (elf_ppnt->p_flags & PF_R)
 868                        elf_prot |= PROT_READ;
 869                if (elf_ppnt->p_flags & PF_W)
 870                        elf_prot |= PROT_WRITE;
 871                if (elf_ppnt->p_flags & PF_X)
 872                        elf_prot |= PROT_EXEC;
 873
 874                elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
 875
 876                vaddr = elf_ppnt->p_vaddr;
 877                if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
 878                        elf_flags |= MAP_FIXED;
 879                } else if (loc->elf_ex.e_type == ET_DYN) {
 880                        /* Try and get dynamic programs out of the way of the
 881                         * default mmap base, as well as whatever program they
 882                         * might try to exec.  This is because the brk will
 883                         * follow the loader, and is not movable.  */
 884                        load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
 885                }
 886
 887                error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
 888                                elf_prot, elf_flags);
 889                if (BAD_ADDR(error)) {
 890                        send_sig(SIGKILL, current, 0);
 891                        retval = IS_ERR((void *)error) ?
 892                                PTR_ERR((void*)error) : -EINVAL;
 893                        goto out_free_dentry;
 894                }
 895
 896                if (!load_addr_set) {
 897                        load_addr_set = 1;
 898                        load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
 899                        if (loc->elf_ex.e_type == ET_DYN) {
 900                                load_bias += error -
 901                                             ELF_PAGESTART(load_bias + vaddr);
 902                                load_addr += load_bias;
 903                                reloc_func_desc = load_bias;
 904                        }
 905                }
 906                k = elf_ppnt->p_vaddr;
 907                if (k < start_code)
 908                        start_code = k;
 909                if (start_data < k)
 910                        start_data = k;
 911
 912                /*
 913                 * Check to see if the section's size will overflow the
 914                 * allowed task size. Note that p_filesz must always be
 915                 * <= p_memsz so it is only necessary to check p_memsz.
 916                 */
 917                if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
 918                    elf_ppnt->p_memsz > TASK_SIZE ||
 919                    TASK_SIZE - elf_ppnt->p_memsz < k) {
 920                        /* set_brk can never work. Avoid overflows. */
 921                        send_sig(SIGKILL, current, 0);
 922                        retval = -EINVAL;
 923                        goto out_free_dentry;
 924                }
 925
 926                k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
 927
 928                if (k > elf_bss)
 929                        elf_bss = k;
 930                if ((elf_ppnt->p_flags & PF_X) && end_code < k)
 931                        end_code = k;
 932                if (end_data < k)
 933                        end_data = k;
 934                k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
 935                if (k > elf_brk)
 936                        elf_brk = k;
 937        }
 938
 939        loc->elf_ex.e_entry += load_bias;
 940        elf_bss += load_bias;
 941        elf_brk += load_bias;
 942        start_code += load_bias;
 943        end_code += load_bias;
 944        start_data += load_bias;
 945        end_data += load_bias;
 946
 947        /* Calling set_brk effectively mmaps the pages that we need
 948         * for the bss and break sections.  We must do this before
 949         * mapping in the interpreter, to make sure it doesn't wind
 950         * up getting placed where the bss needs to go.
 951         */
 952        retval = set_brk(elf_bss, elf_brk);
 953        if (retval) {
 954                send_sig(SIGKILL, current, 0);
 955                goto out_free_dentry;
 956        }
 957        if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
 958                send_sig(SIGSEGV, current, 0);
 959                retval = -EFAULT; /* Nobody gets to see this, but.. */
 960                goto out_free_dentry;
 961        }
 962
 963        if (elf_interpreter) {
 964                if (interpreter_type == INTERPRETER_AOUT)
 965                        elf_entry = load_aout_interp(&loc->interp_ex,
 966                                                     interpreter);
 967                else
 968                        elf_entry = load_elf_interp(&loc->interp_elf_ex,
 969                                                    interpreter,
 970                                                    &interp_load_addr);
 971                if (BAD_ADDR(elf_entry)) {
 972                        force_sig(SIGSEGV, current);
 973                        retval = IS_ERR((void *)elf_entry) ?
 974                                        (int)elf_entry : -EINVAL;
 975                        goto out_free_dentry;
 976                }
 977                reloc_func_desc = interp_load_addr;
 978
 979                allow_write_access(interpreter);
 980                fput(interpreter);
 981                kfree(elf_interpreter);
 982        } else {
 983                elf_entry = loc->elf_ex.e_entry;
 984                if (BAD_ADDR(elf_entry)) {
 985                        force_sig(SIGSEGV, current);
 986                        retval = -EINVAL;
 987                        goto out_free_dentry;
 988                }
 989        }
 990
 991        kfree(elf_phdata);
 992
 993        if (interpreter_type != INTERPRETER_AOUT)
 994                sys_close(elf_exec_fileno);
 995
 996        set_binfmt(&elf_format);
 997
 998#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
 999        retval = arch_setup_additional_pages(bprm, executable_stack);
1000        if (retval < 0) {
1001                send_sig(SIGKILL, current, 0);
1002                goto out;
1003        }
1004#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1005
1006        compute_creds(bprm);
1007        current->flags &= ~PF_FORKNOEXEC;
1008        retval = create_elf_tables(bprm, &loc->elf_ex,
1009                          (interpreter_type == INTERPRETER_AOUT),
1010                          load_addr, interp_load_addr);
1011        if (retval < 0) {
1012                send_sig(SIGKILL, current, 0);
1013                goto out;
1014        }
1015        /* N.B. passed_fileno might not be initialized? */
1016        if (interpreter_type == INTERPRETER_AOUT)
1017                current->mm->arg_start += strlen(passed_fileno) + 1;
1018        current->mm->end_code = end_code;
1019        current->mm->start_code = start_code;
1020        current->mm->start_data = start_data;
1021        current->mm->end_data = end_data;
1022        current->mm->start_stack = bprm->p;
1023
1024        if (current->personality & MMAP_PAGE_ZERO) {
1025                /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1026                   and some applications "depend" upon this behavior.
1027                   Since we do not have the power to recompile these, we
1028                   emulate the SVr4 behavior. Sigh. */
1029                down_write(&current->mm->mmap_sem);
1030                error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1031                                MAP_FIXED | MAP_PRIVATE, 0);
1032                up_write(&current->mm->mmap_sem);
1033        }
1034
1035#ifdef ELF_PLAT_INIT
1036        /*
1037         * The ABI may specify that certain registers be set up in special
1038         * ways (on i386 %edx is the address of a DT_FINI function, for
1039         * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1040         * that the e_entry field is the address of the function descriptor
1041         * for the startup routine, rather than the address of the startup
1042         * routine itself.  This macro performs whatever initialization to
1043         * the regs structure is required as well as any relocations to the
1044         * function descriptor entries when executing dynamically links apps.
1045         */
1046        ELF_PLAT_INIT(regs, reloc_func_desc);
1047#endif
1048
1049        start_thread(regs, elf_entry, bprm->p);
1050        if (unlikely(current->ptrace & PT_PTRACED)) {
1051                if (current->ptrace & PT_TRACE_EXEC)
1052                        ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
1053                else
1054                        send_sig(SIGTRAP, current, 0);
1055        }
1056        retval = 0;
1057out:
1058        kfree(loc);
1059out_ret:
1060        return retval;
1061
1062        /* error cleanup */
1063out_free_dentry:
1064        allow_write_access(interpreter);
1065        if (interpreter)
1066                fput(interpreter);
1067out_free_interp:
1068        kfree(elf_interpreter);
1069out_free_file:
1070        sys_close(elf_exec_fileno);
1071out_free_fh:
1072        if (files)
1073                reset_files_struct(current, files);
1074out_free_ph:
1075        kfree(elf_phdata);
1076        goto out;
1077}
1078
1079/* This is really simpleminded and specialized - we are loading an
1080   a.out library that is given an ELF header. */
1081static int load_elf_library(struct file *file)
1082{
1083        struct elf_phdr *elf_phdata;
1084        struct elf_phdr *eppnt;
1085        unsigned long elf_bss, bss, len;
1086        int retval, error, i, j;
1087        struct elfhdr elf_ex;
1088
1089        error = -ENOEXEC;
1090        retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1091        if (retval != sizeof(elf_ex))
1092                goto out;
1093
1094        if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1095                goto out;
1096
1097        /* First of all, some simple consistency checks */
1098        if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1099            !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1100                goto out;
1101
1102        /* Now read in all of the header information */
1103
1104        j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1105        /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1106
1107        error = -ENOMEM;
1108        elf_phdata = kmalloc(j, GFP_KERNEL);
1109        if (!elf_phdata)
1110                goto out;
1111
1112        eppnt = elf_phdata;
1113        error = -ENOEXEC;
1114        retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1115        if (retval != j)
1116                goto out_free_ph;
1117
1118        for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1119                if ((eppnt + i)->p_type == PT_LOAD)
1120                        j++;
1121        if (j != 1)
1122                goto out_free_ph;
1123
1124        while (eppnt->p_type != PT_LOAD)
1125                eppnt++;
1126
1127        /* Now use mmap to map the library into memory. */
1128        down_write(&current->mm->mmap_sem);
1129        error = do_mmap(file,
1130                        ELF_PAGESTART(eppnt->p_vaddr),
1131                        (eppnt->p_filesz +
1132                         ELF_PAGEOFFSET(eppnt->p_vaddr)),
1133                        PROT_READ | PROT_WRITE | PROT_EXEC,
1134                        MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1135                        (eppnt->p_offset -
1136                         ELF_PAGEOFFSET(eppnt->p_vaddr)));
1137        up_write(&current->mm->mmap_sem);
1138        if (error != ELF_PAGESTART(eppnt->p_vaddr))
1139                goto out_free_ph;
1140
1141        elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1142        if (padzero(elf_bss)) {
1143                error = -EFAULT;
1144                goto out_free_ph;
1145        }
1146
1147        len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1148                            ELF_MIN_ALIGN - 1);
1149        bss = eppnt->p_memsz + eppnt->p_vaddr;
1150        if (bss > len) {
1151                down_write(&current->mm->mmap_sem);
1152                do_brk(len, bss - len);
1153                up_write(&current->mm->mmap_sem);
1154        }
1155        error = 0;
1156
1157out_free_ph:
1158        kfree(elf_phdata);
1159out:
1160        return error;
1161}
1162
1163/*
1164 * Note that some platforms still use traditional core dumps and not
1165 * the ELF core dump.  Each platform can select it as appropriate.
1166 */
1167#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1168
1169/*
1170 * ELF core dumper
1171 *
1172 * Modelled on fs/exec.c:aout_core_dump()
1173 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1174 */
1175/*
1176 * These are the only things you should do on a core-file: use only these
1177 * functions to write out all the necessary info.
1178 */
1179static int dump_write(struct file *file, const void *addr, int nr)
1180{
1181        return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1182}
1183
1184static int dump_seek(struct file *file, loff_t off)
1185{
1186        if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1187                if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1188                        return 0;
1189        } else {
1190                char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1191                if (!buf)
1192                        return 0;
1193                while (off > 0) {
1194                        unsigned long n = off;
1195                        if (n > PAGE_SIZE)
1196                                n = PAGE_SIZE;
1197                        if (!dump_write(file, buf, n))
1198                                return 0;
1199                        off -= n;
1200                }
1201                free_page((unsigned long)buf);
1202        }
1203        return 1;
1204}
1205
1206/*
1207 * Decide what to dump of a segment, part, all or none.
1208 */
1209static unsigned long vma_dump_size(struct vm_area_struct *vma,
1210                                   unsigned long mm_flags)
1211{
1212        /* The vma can be set up to tell us the answer directly.  */
1213        if (vma->vm_flags & VM_ALWAYSDUMP)
1214                goto whole;
1215
1216        /* Do not dump I/O mapped devices or special mappings */
1217        if (vma->vm_flags & (VM_IO | VM_RESERVED))
1218                return 0;
1219
1220#define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1221
1222        /* By default, dump shared memory if mapped from an anonymous file. */
1223        if (vma->vm_flags & VM_SHARED) {
1224                if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1225                    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1226                        goto whole;
1227                return 0;
1228        }
1229
1230        /* Dump segments that have been written to.  */
1231        if (vma->anon_vma && FILTER(ANON_PRIVATE))
1232                goto whole;
1233        if (vma->vm_file == NULL)
1234                return 0;
1235
1236        if (FILTER(MAPPED_PRIVATE))
1237                goto whole;
1238
1239        /*
1240         * If this looks like the beginning of a DSO or executable mapping,
1241         * check for an ELF header.  If we find one, dump the first page to
1242         * aid in determining what was mapped here.
1243         */
1244        if (FILTER(ELF_HEADERS) && vma->vm_file != NULL && vma->vm_pgoff == 0) {
1245                u32 __user *header = (u32 __user *) vma->vm_start;
1246                u32 word;
1247                /*
1248                 * Doing it this way gets the constant folded by GCC.
1249                 */
1250                union {
1251                        u32 cmp;
1252                        char elfmag[SELFMAG];
1253                } magic;
1254                BUILD_BUG_ON(SELFMAG != sizeof word);
1255                magic.elfmag[EI_MAG0] = ELFMAG0;
1256                magic.elfmag[EI_MAG1] = ELFMAG1;
1257                magic.elfmag[EI_MAG2] = ELFMAG2;
1258                magic.elfmag[EI_MAG3] = ELFMAG3;
1259                if (get_user(word, header) == 0 && word == magic.cmp)
1260                        return PAGE_SIZE;
1261        }
1262
1263#undef  FILTER
1264
1265        return 0;
1266
1267whole:
1268        return vma->vm_end - vma->vm_start;
1269}
1270
1271/* An ELF note in memory */
1272struct memelfnote
1273{
1274        const char *name;
1275        int type;
1276        unsigned int datasz;
1277        void *data;
1278};
1279
1280static int notesize(struct memelfnote *en)
1281{
1282        int sz;
1283
1284        sz = sizeof(struct elf_note);
1285        sz += roundup(strlen(en->name) + 1, 4);
1286        sz += roundup(en->datasz, 4);
1287
1288        return sz;
1289}
1290
1291#define DUMP_WRITE(addr, nr, foffset)   \
1292        do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1293
1294static int alignfile(struct file *file, loff_t *foffset)
1295{
1296        static const char buf[4] = { 0, };
1297        DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1298        return 1;
1299}
1300
1301static int writenote(struct memelfnote *men, struct file *file,
1302                        loff_t *foffset)
1303{
1304        struct elf_note en;
1305        en.n_namesz = strlen(men->name) + 1;
1306        en.n_descsz = men->datasz;
1307        en.n_type = men->type;
1308
1309        DUMP_WRITE(&en, sizeof(en), foffset);
1310        DUMP_WRITE(men->name, en.n_namesz, foffset);
1311        if (!alignfile(file, foffset))
1312                return 0;
1313        DUMP_WRITE(men->data, men->datasz, foffset);
1314        if (!alignfile(file, foffset))
1315                return 0;
1316
1317        return 1;
1318}
1319#undef DUMP_WRITE
1320
1321#define DUMP_WRITE(addr, nr)    \
1322        if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1323                goto end_coredump;
1324#define DUMP_SEEK(off)  \
1325        if (!dump_seek(file, (off))) \
1326                goto end_coredump;
1327
1328static void fill_elf_header(struct elfhdr *elf, int segs)
1329{
1330        memcpy(elf->e_ident, ELFMAG, SELFMAG);
1331        elf->e_ident[EI_CLASS] = ELF_CLASS;
1332        elf->e_ident[EI_DATA] = ELF_DATA;
1333        elf->e_ident[EI_VERSION] = EV_CURRENT;
1334        elf->e_ident[EI_OSABI] = ELF_OSABI;
1335        memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1336
1337        elf->e_type = ET_CORE;
1338        elf->e_machine = ELF_ARCH;
1339        elf->e_version = EV_CURRENT;
1340        elf->e_entry = 0;
1341        elf->e_phoff = sizeof(struct elfhdr);
1342        elf->e_shoff = 0;
1343        elf->e_flags = ELF_CORE_EFLAGS;
1344        elf->e_ehsize = sizeof(struct elfhdr);
1345        elf->e_phentsize = sizeof(struct elf_phdr);
1346        elf->e_phnum = segs;
1347        elf->e_shentsize = 0;
1348        elf->e_shnum = 0;
1349        elf->e_shstrndx = 0;
1350        return;
1351}
1352
1353static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1354{
1355        phdr->p_type = PT_NOTE;
1356        phdr->p_offset = offset;
1357        phdr->p_vaddr = 0;
1358        phdr->p_paddr = 0;
1359        phdr->p_filesz = sz;
1360        phdr->p_memsz = 0;
1361        phdr->p_flags = 0;
1362        phdr->p_align = 0;
1363        return;
1364}
1365
1366static void fill_note(struct memelfnote *note, const char *name, int type, 
1367                unsigned int sz, void *data)
1368{
1369        note->name = name;
1370        note->type = type;
1371        note->datasz = sz;
1372        note->data = data;
1373        return;
1374}
1375
1376/*
1377 * fill up all the fields in prstatus from the given task struct, except
1378 * registers which need to be filled up separately.
1379 */
1380static void fill_prstatus(struct elf_prstatus *prstatus,
1381                struct task_struct *p, long signr)
1382{
1383        prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1384        prstatus->pr_sigpend = p->pending.signal.sig[0];
1385        prstatus->pr_sighold = p->blocked.sig[0];
1386        prstatus->pr_pid = task_pid_vnr(p);
1387        prstatus->pr_ppid = task_pid_vnr(p->real_parent);
1388        prstatus->pr_pgrp = task_pgrp_vnr(p);
1389        prstatus->pr_sid = task_session_vnr(p);
1390        if (thread_group_leader(p)) {
1391                /*
1392                 * This is the record for the group leader.  Add in the
1393                 * cumulative times of previous dead threads.  This total
1394                 * won't include the time of each live thread whose state
1395                 * is included in the core dump.  The final total reported
1396                 * to our parent process when it calls wait4 will include
1397                 * those sums as well as the little bit more time it takes
1398                 * this and each other thread to finish dying after the
1399                 * core dump synchronization phase.
1400                 */
1401                cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1402                                   &prstatus->pr_utime);
1403                cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1404                                   &prstatus->pr_stime);
1405        } else {
1406                cputime_to_timeval(p->utime, &prstatus->pr_utime);
1407                cputime_to_timeval(p->stime, &prstatus->pr_stime);
1408        }
1409        cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1410        cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1411}
1412
1413static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1414                       struct mm_struct *mm)
1415{
1416        unsigned int i, len;
1417        
1418        /* first copy the parameters from user space */
1419        memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1420
1421        len = mm->arg_end - mm->arg_start;
1422        if (len >= ELF_PRARGSZ)
1423                len = ELF_PRARGSZ-1;
1424        if (copy_from_user(&psinfo->pr_psargs,
1425                           (const char __user *)mm->arg_start, len))
1426                return -EFAULT;
1427        for(i = 0; i < len; i++)
1428                if (psinfo->pr_psargs[i] == 0)
1429                        psinfo->pr_psargs[i] = ' ';
1430        psinfo->pr_psargs[len] = 0;
1431
1432        psinfo->pr_pid = task_pid_vnr(p);
1433        psinfo->pr_ppid = task_pid_vnr(p->real_parent);
1434        psinfo->pr_pgrp = task_pgrp_vnr(p);
1435        psinfo->pr_sid = task_session_vnr(p);
1436
1437        i = p->state ? ffz(~p->state) + 1 : 0;
1438        psinfo->pr_state = i;
1439        psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1440        psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1441        psinfo->pr_nice = task_nice(p);
1442        psinfo->pr_flag = p->flags;
1443        SET_UID(psinfo->pr_uid, p->uid);
1444        SET_GID(psinfo->pr_gid, p->gid);
1445        strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1446        
1447        return 0;
1448}
1449
1450/* Here is the structure in which status of each thread is captured. */
1451struct elf_thread_status
1452{
1453        struct list_head list;
1454        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1455        elf_fpregset_t fpu;             /* NT_PRFPREG */
1456        struct task_struct *thread;
1457#ifdef ELF_CORE_COPY_XFPREGS
1458        elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1459#endif
1460        struct memelfnote notes[3];
1461        int num_notes;
1462};
1463
1464/*
1465 * In order to add the specific thread information for the elf file format,
1466 * we need to keep a linked list of every threads pr_status and then create
1467 * a single section for them in the final core file.
1468 */
1469static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1470{
1471        int sz = 0;
1472        struct task_struct *p = t->thread;
1473        t->num_notes = 0;
1474
1475        fill_prstatus(&t->prstatus, p, signr);
1476        elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1477        
1478        fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1479                  &(t->prstatus));
1480        t->num_notes++;
1481        sz += notesize(&t->notes[0]);
1482
1483        if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1484                                                                &t->fpu))) {
1485                fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1486                          &(t->fpu));
1487                t->num_notes++;
1488                sz += notesize(&t->notes[1]);
1489        }
1490
1491#ifdef ELF_CORE_COPY_XFPREGS
1492        if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1493                fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1494                          sizeof(t->xfpu), &t->xfpu);
1495                t->num_notes++;
1496                sz += notesize(&t->notes[2]);
1497        }
1498#endif  
1499        return sz;
1500}
1501
1502static struct vm_area_struct *first_vma(struct task_struct *tsk,
1503                                        struct vm_area_struct *gate_vma)
1504{
1505        struct vm_area_struct *ret = tsk->mm->mmap;
1506
1507        if (ret)
1508                return ret;
1509        return gate_vma;
1510}
1511/*
1512 * Helper function for iterating across a vma list.  It ensures that the caller
1513 * will visit `gate_vma' prior to terminating the search.
1514 */
1515static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1516                                        struct vm_area_struct *gate_vma)
1517{
1518        struct vm_area_struct *ret;
1519
1520        ret = this_vma->vm_next;
1521        if (ret)
1522                return ret;
1523        if (this_vma == gate_vma)
1524                return NULL;
1525        return gate_vma;
1526}
1527
1528/*
1529 * Actual dumper
1530 *
1531 * This is a two-pass process; first we find the offsets of the bits,
1532 * and then they are actually written out.  If we run out of core limit
1533 * we just truncate.
1534 */
1535static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
1536{
1537#define NUM_NOTES       6
1538        int has_dumped = 0;
1539        mm_segment_t fs;
1540        int segs;
1541        size_t size = 0;
1542        int i;
1543        struct vm_area_struct *vma, *gate_vma;
1544        struct elfhdr *elf = NULL;
1545        loff_t offset = 0, dataoff, foffset;
1546        int numnote;
1547        struct memelfnote *notes = NULL;
1548        struct elf_prstatus *prstatus = NULL;   /* NT_PRSTATUS */
1549        struct elf_prpsinfo *psinfo = NULL;     /* NT_PRPSINFO */
1550        struct task_struct *g, *p;
1551        LIST_HEAD(thread_list);
1552        struct list_head *t;
1553        elf_fpregset_t *fpu = NULL;
1554#ifdef ELF_CORE_COPY_XFPREGS
1555        elf_fpxregset_t *xfpu = NULL;
1556#endif
1557        int thread_status_size = 0;
1558        elf_addr_t *auxv;
1559        unsigned long mm_flags;
1560
1561        /*
1562         * We no longer stop all VM operations.
1563         * 
1564         * This is because those proceses that could possibly change map_count
1565         * or the mmap / vma pages are now blocked in do_exit on current
1566         * finishing this core dump.
1567         *
1568         * Only ptrace can touch these memory addresses, but it doesn't change
1569         * the map_count or the pages allocated. So no possibility of crashing
1570         * exists while dumping the mm->vm_next areas to the core file.
1571         */
1572  
1573        /* alloc memory for large data structures: too large to be on stack */
1574        elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1575        if (!elf)
1576                goto cleanup;
1577        prstatus = kmalloc(sizeof(*prstatus), GFP_KERNEL);
1578        if (!prstatus)
1579                goto cleanup;
1580        psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1581        if (!psinfo)
1582                goto cleanup;
1583        notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL);
1584        if (!notes)
1585                goto cleanup;
1586        fpu = kmalloc(sizeof(*fpu), GFP_KERNEL);
1587        if (!fpu)
1588                goto cleanup;
1589#ifdef ELF_CORE_COPY_XFPREGS
1590        xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL);
1591        if (!xfpu)
1592                goto cleanup;
1593#endif
1594
1595        if (signr) {
1596                struct elf_thread_status *tmp;
1597                rcu_read_lock();
1598                do_each_thread(g,p)
1599                        if (current->mm == p->mm && current != p) {
1600                                tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
1601                                if (!tmp) {
1602                                        rcu_read_unlock();
1603                                        goto cleanup;
1604                                }
1605                                tmp->thread = p;
1606                                list_add(&tmp->list, &thread_list);
1607                        }
1608                while_each_thread(g,p);
1609                rcu_read_unlock();
1610                list_for_each(t, &thread_list) {
1611                        struct elf_thread_status *tmp;
1612                        int sz;
1613
1614                        tmp = list_entry(t, struct elf_thread_status, list);
1615                        sz = elf_dump_thread_status(signr, tmp);
1616                        thread_status_size += sz;
1617                }
1618        }
1619        /* now collect the dump for the current */
1620        memset(prstatus, 0, sizeof(*prstatus));
1621        fill_prstatus(prstatus, current, signr);
1622        elf_core_copy_regs(&prstatus->pr_reg, regs);
1623        
1624        segs = current->mm->map_count;
1625#ifdef ELF_CORE_EXTRA_PHDRS
1626        segs += ELF_CORE_EXTRA_PHDRS;
1627#endif
1628
1629        gate_vma = get_gate_vma(current);
1630        if (gate_vma != NULL)
1631                segs++;
1632
1633        /* Set up header */
1634        fill_elf_header(elf, segs + 1); /* including notes section */
1635
1636        has_dumped = 1;
1637        current->flags |= PF_DUMPCORE;
1638
1639        /*
1640         * Set up the notes in similar form to SVR4 core dumps made
1641         * with info from their /proc.
1642         */
1643
1644        fill_note(notes + 0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
1645        fill_psinfo(psinfo, current->group_leader, current->mm);
1646        fill_note(notes + 1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1647        
1648        numnote = 2;
1649
1650        auxv = (elf_addr_t *)current->mm->saved_auxv;
1651
1652        i = 0;
1653        do
1654                i += 2;
1655        while (auxv[i - 2] != AT_NULL);
1656        fill_note(&notes[numnote++], "CORE", NT_AUXV,
1657                  i * sizeof(elf_addr_t), auxv);
1658
1659        /* Try to dump the FPU. */
1660        if ((prstatus->pr_fpvalid =
1661             elf_core_copy_task_fpregs(current, regs, fpu)))
1662                fill_note(notes + numnote++,
1663                          "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1664#ifdef ELF_CORE_COPY_XFPREGS
1665        if (elf_core_copy_task_xfpregs(current, xfpu))
1666                fill_note(notes + numnote++,
1667                          "LINUX", ELF_CORE_XFPREG_TYPE, sizeof(*xfpu), xfpu);
1668#endif  
1669  
1670        fs = get_fs();
1671        set_fs(KERNEL_DS);
1672
1673        DUMP_WRITE(elf, sizeof(*elf));
1674        offset += sizeof(*elf);                         /* Elf header */
1675        offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1676        foffset = offset;
1677
1678        /* Write notes phdr entry */
1679        {
1680                struct elf_phdr phdr;
1681                int sz = 0;
1682
1683                for (i = 0; i < numnote; i++)
1684                        sz += notesize(notes + i);
1685                
1686                sz += thread_status_size;
1687
1688                sz += elf_coredump_extra_notes_size();
1689
1690                fill_elf_note_phdr(&phdr, sz, offset);
1691                offset += sz;
1692                DUMP_WRITE(&phdr, sizeof(phdr));
1693        }
1694
1695        dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1696
1697        /*
1698         * We must use the same mm->flags while dumping core to avoid
1699         * inconsistency between the program headers and bodies, otherwise an
1700         * unusable core file can be generated.
1701         */
1702        mm_flags = current->mm->flags;
1703
1704        /* Write program headers for segments dump */
1705        for (vma = first_vma(current, gate_vma); vma != NULL;
1706                        vma = next_vma(vma, gate_vma)) {
1707                struct elf_phdr phdr;
1708
1709                phdr.p_type = PT_LOAD;
1710                phdr.p_offset = offset;
1711                phdr.p_vaddr = vma->vm_start;
1712                phdr.p_paddr = 0;
1713                phdr.p_filesz = vma_dump_size(vma, mm_flags);
1714                phdr.p_memsz = vma->vm_end - vma->vm_start;
1715                offset += phdr.p_filesz;
1716                phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1717                if (vma->vm_flags & VM_WRITE)
1718                        phdr.p_flags |= PF_W;
1719                if (vma->vm_flags & VM_EXEC)
1720                        phdr.p_flags |= PF_X;
1721                phdr.p_align = ELF_EXEC_PAGESIZE;
1722
1723                DUMP_WRITE(&phdr, sizeof(phdr));
1724        }
1725
1726#ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1727        ELF_CORE_WRITE_EXTRA_PHDRS;
1728#endif
1729
1730        /* write out the notes section */
1731        for (i = 0; i < numnote; i++)
1732                if (!writenote(notes + i, file, &foffset))
1733                        goto end_coredump;
1734
1735        if (elf_coredump_extra_notes_write(file, &foffset))
1736                goto end_coredump;
1737
1738        /* write out the thread status notes section */
1739        list_for_each(t, &thread_list) {
1740                struct elf_thread_status *tmp =
1741                                list_entry(t, struct elf_thread_status, list);
1742
1743                for (i = 0; i < tmp->num_notes; i++)
1744                        if (!writenote(&tmp->notes[i], file, &foffset))
1745                                goto end_coredump;
1746        }
1747
1748        /* Align to page */
1749        DUMP_SEEK(dataoff - foffset);
1750
1751        for (vma = first_vma(current, gate_vma); vma != NULL;
1752                        vma = next_vma(vma, gate_vma)) {
1753                unsigned long addr;
1754                unsigned long end;
1755
1756                end = vma->vm_start + vma_dump_size(vma, mm_flags);
1757
1758                for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
1759                        struct page *page;
1760                        struct vm_area_struct *vma;
1761
1762                        if (get_user_pages(current, current->mm, addr, 1, 0, 1,
1763                                                &page, &vma) <= 0) {
1764                                DUMP_SEEK(PAGE_SIZE);
1765                        } else {
1766                                if (page == ZERO_PAGE(0)) {
1767                                        if (!dump_seek(file, PAGE_SIZE)) {
1768                                                page_cache_release(page);
1769                                                goto end_coredump;
1770                                        }
1771                                } else {
1772                                        void *kaddr;
1773                                        flush_cache_page(vma, addr,
1774                                                         page_to_pfn(page));
1775                                        kaddr = kmap(page);
1776                                        if ((size += PAGE_SIZE) > limit ||
1777                                            !dump_write(file, kaddr,
1778                                            PAGE_SIZE)) {
1779                                                kunmap(page);
1780                                                page_cache_release(page);
1781                                                goto end_coredump;
1782                                        }
1783                                        kunmap(page);
1784                                }
1785                                page_cache_release(page);
1786                        }
1787                }
1788        }
1789
1790#ifdef ELF_CORE_WRITE_EXTRA_DATA
1791        ELF_CORE_WRITE_EXTRA_DATA;
1792#endif
1793
1794end_coredump:
1795        set_fs(fs);
1796
1797cleanup:
1798        while (!list_empty(&thread_list)) {
1799                struct list_head *tmp = thread_list.next;
1800                list_del(tmp);
1801                kfree(list_entry(tmp, struct elf_thread_status, list));
1802        }
1803
1804        kfree(elf);
1805        kfree(prstatus);
1806        kfree(psinfo);
1807        kfree(notes);
1808        kfree(fpu);
1809#ifdef ELF_CORE_COPY_XFPREGS
1810        kfree(xfpu);
1811#endif
1812        return has_dumped;
1813#undef NUM_NOTES
1814}
1815
1816#endif          /* USE_ELF_CORE_DUMP */
1817
1818static int __init init_elf_binfmt(void)
1819{
1820        return register_binfmt(&elf_format);
1821}
1822
1823static void __exit exit_elf_binfmt(void)
1824{
1825        /* Remove the COFF and ELF loaders. */
1826        unregister_binfmt(&elf_format);
1827}
1828
1829core_initcall(init_elf_binfmt);
1830module_exit(exit_elf_binfmt);
1831MODULE_LICENSE("GPL");
1832