linux/fs/binfmt_elf.c
<<
>>
Prefs
   1/*
   2 * linux/fs/binfmt_elf.c
   3 *
   4 * These are the functions used to load ELF format executables as used
   5 * on SVr4 machines.  Information on the format may be found in the book
   6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
   7 * Tools".
   8 *
   9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
  10 */
  11
  12#include <linux/module.h>
  13#include <linux/kernel.h>
  14#include <linux/fs.h>
  15#include <linux/mm.h>
  16#include <linux/mman.h>
  17#include <linux/errno.h>
  18#include <linux/signal.h>
  19#include <linux/binfmts.h>
  20#include <linux/string.h>
  21#include <linux/file.h>
  22#include <linux/slab.h>
  23#include <linux/personality.h>
  24#include <linux/elfcore.h>
  25#include <linux/init.h>
  26#include <linux/highuid.h>
  27#include <linux/compiler.h>
  28#include <linux/highmem.h>
  29#include <linux/pagemap.h>
  30#include <linux/vmalloc.h>
  31#include <linux/security.h>
  32#include <linux/random.h>
  33#include <linux/elf.h>
  34#include <linux/elf-randomize.h>
  35#include <linux/utsname.h>
  36#include <linux/coredump.h>
  37#include <linux/sched.h>
  38#include <asm/uaccess.h>
  39#include <asm/param.h>
  40#include <asm/page.h>
  41
  42#ifndef user_long_t
  43#define user_long_t long
  44#endif
  45#ifndef user_siginfo_t
  46#define user_siginfo_t siginfo_t
  47#endif
  48
  49static int load_elf_binary(struct linux_binprm *bprm);
  50static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
  51                                int, int, unsigned long);
  52
  53#ifdef CONFIG_USELIB
  54static int load_elf_library(struct file *);
  55#else
  56#define load_elf_library NULL
  57#endif
  58
  59/*
  60 * If we don't support core dumping, then supply a NULL so we
  61 * don't even try.
  62 */
  63#ifdef CONFIG_ELF_CORE
  64static int elf_core_dump(struct coredump_params *cprm);
  65#else
  66#define elf_core_dump   NULL
  67#endif
  68
  69#if ELF_EXEC_PAGESIZE > PAGE_SIZE
  70#define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
  71#else
  72#define ELF_MIN_ALIGN   PAGE_SIZE
  73#endif
  74
  75#ifndef ELF_CORE_EFLAGS
  76#define ELF_CORE_EFLAGS 0
  77#endif
  78
  79#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
  80#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
  81#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
  82
  83static struct linux_binfmt elf_format = {
  84        .module         = THIS_MODULE,
  85        .load_binary    = load_elf_binary,
  86        .load_shlib     = load_elf_library,
  87        .core_dump      = elf_core_dump,
  88        .min_coredump   = ELF_EXEC_PAGESIZE,
  89};
  90
  91#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
  92
  93static int set_brk(unsigned long start, unsigned long end)
  94{
  95        start = ELF_PAGEALIGN(start);
  96        end = ELF_PAGEALIGN(end);
  97        if (end > start) {
  98                unsigned long addr;
  99                addr = vm_brk(start, end - start);
 100                if (BAD_ADDR(addr))
 101                        return addr;
 102        }
 103        current->mm->start_brk = current->mm->brk = end;
 104        return 0;
 105}
 106
 107/* We need to explicitly zero any fractional pages
 108   after the data section (i.e. bss).  This would
 109   contain the junk from the file that should not
 110   be in memory
 111 */
 112static int padzero(unsigned long elf_bss)
 113{
 114        unsigned long nbyte;
 115
 116        nbyte = ELF_PAGEOFFSET(elf_bss);
 117        if (nbyte) {
 118                nbyte = ELF_MIN_ALIGN - nbyte;
 119                if (clear_user((void __user *) elf_bss, nbyte))
 120                        return -EFAULT;
 121        }
 122        return 0;
 123}
 124
 125/* Let's use some macros to make this stack manipulation a little clearer */
 126#ifdef CONFIG_STACK_GROWSUP
 127#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
 128#define STACK_ROUND(sp, items) \
 129        ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
 130#define STACK_ALLOC(sp, len) ({ \
 131        elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
 132        old_sp; })
 133#else
 134#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
 135#define STACK_ROUND(sp, items) \
 136        (((unsigned long) (sp - items)) &~ 15UL)
 137#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
 138#endif
 139
 140#ifndef ELF_BASE_PLATFORM
 141/*
 142 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
 143 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
 144 * will be copied to the user stack in the same manner as AT_PLATFORM.
 145 */
 146#define ELF_BASE_PLATFORM NULL
 147#endif
 148
 149static int
 150create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 151                unsigned long load_addr, unsigned long interp_load_addr)
 152{
 153        unsigned long p = bprm->p;
 154        int argc = bprm->argc;
 155        int envc = bprm->envc;
 156        elf_addr_t __user *argv;
 157        elf_addr_t __user *envp;
 158        elf_addr_t __user *sp;
 159        elf_addr_t __user *u_platform;
 160        elf_addr_t __user *u_base_platform;
 161        elf_addr_t __user *u_rand_bytes;
 162        const char *k_platform = ELF_PLATFORM;
 163        const char *k_base_platform = ELF_BASE_PLATFORM;
 164        unsigned char k_rand_bytes[16];
 165        int items;
 166        elf_addr_t *elf_info;
 167        int ei_index = 0;
 168        const struct cred *cred = current_cred();
 169        struct vm_area_struct *vma;
 170
 171        /*
 172         * In some cases (e.g. Hyper-Threading), we want to avoid L1
 173         * evictions by the processes running on the same package. One
 174         * thing we can do is to shuffle the initial stack for them.
 175         */
 176
 177        p = arch_align_stack(p);
 178
 179        /*
 180         * If this architecture has a platform capability string, copy it
 181         * to userspace.  In some cases (Sparc), this info is impossible
 182         * for userspace to get any other way, in others (i386) it is
 183         * merely difficult.
 184         */
 185        u_platform = NULL;
 186        if (k_platform) {
 187                size_t len = strlen(k_platform) + 1;
 188
 189                u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 190                if (__copy_to_user(u_platform, k_platform, len))
 191                        return -EFAULT;
 192        }
 193
 194        /*
 195         * If this architecture has a "base" platform capability
 196         * string, copy it to userspace.
 197         */
 198        u_base_platform = NULL;
 199        if (k_base_platform) {
 200                size_t len = strlen(k_base_platform) + 1;
 201
 202                u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 203                if (__copy_to_user(u_base_platform, k_base_platform, len))
 204                        return -EFAULT;
 205        }
 206
 207        /*
 208         * Generate 16 random bytes for userspace PRNG seeding.
 209         */
 210        get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
 211        u_rand_bytes = (elf_addr_t __user *)
 212                       STACK_ALLOC(p, sizeof(k_rand_bytes));
 213        if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
 214                return -EFAULT;
 215
 216        /* Create the ELF interpreter info */
 217        elf_info = (elf_addr_t *)current->mm->saved_auxv;
 218        /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
 219#define NEW_AUX_ENT(id, val) \
 220        do { \
 221                elf_info[ei_index++] = id; \
 222                elf_info[ei_index++] = val; \
 223        } while (0)
 224
 225#ifdef ARCH_DLINFO
 226        /* 
 227         * ARCH_DLINFO must come first so PPC can do its special alignment of
 228         * AUXV.
 229         * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
 230         * ARCH_DLINFO changes
 231         */
 232        ARCH_DLINFO;
 233#endif
 234        NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
 235        NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
 236        NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
 237        NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
 238        NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
 239        NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
 240        NEW_AUX_ENT(AT_BASE, interp_load_addr);
 241        NEW_AUX_ENT(AT_FLAGS, 0);
 242        NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
 243        NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
 244        NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
 245        NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
 246        NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
 247        NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
 248        NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
 249#ifdef ELF_HWCAP2
 250        NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
 251#endif
 252        NEW_AUX_ENT(AT_EXECFN, bprm->exec);
 253        if (k_platform) {
 254                NEW_AUX_ENT(AT_PLATFORM,
 255                            (elf_addr_t)(unsigned long)u_platform);
 256        }
 257        if (k_base_platform) {
 258                NEW_AUX_ENT(AT_BASE_PLATFORM,
 259                            (elf_addr_t)(unsigned long)u_base_platform);
 260        }
 261        if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
 262                NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
 263        }
 264#undef NEW_AUX_ENT
 265        /* AT_NULL is zero; clear the rest too */
 266        memset(&elf_info[ei_index], 0,
 267               sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
 268
 269        /* And advance past the AT_NULL entry.  */
 270        ei_index += 2;
 271
 272        sp = STACK_ADD(p, ei_index);
 273
 274        items = (argc + 1) + (envc + 1) + 1;
 275        bprm->p = STACK_ROUND(sp, items);
 276
 277        /* Point sp at the lowest address on the stack */
 278#ifdef CONFIG_STACK_GROWSUP
 279        sp = (elf_addr_t __user *)bprm->p - items - ei_index;
 280        bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
 281#else
 282        sp = (elf_addr_t __user *)bprm->p;
 283#endif
 284
 285
 286        /*
 287         * Grow the stack manually; some architectures have a limit on how
 288         * far ahead a user-space access may be in order to grow the stack.
 289         */
 290        vma = find_extend_vma(current->mm, bprm->p);
 291        if (!vma)
 292                return -EFAULT;
 293
 294        /* Now, let's put argc (and argv, envp if appropriate) on the stack */
 295        if (__put_user(argc, sp++))
 296                return -EFAULT;
 297        argv = sp;
 298        envp = argv + argc + 1;
 299
 300        /* Populate argv and envp */
 301        p = current->mm->arg_end = current->mm->arg_start;
 302        while (argc-- > 0) {
 303                size_t len;
 304                if (__put_user((elf_addr_t)p, argv++))
 305                        return -EFAULT;
 306                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 307                if (!len || len > MAX_ARG_STRLEN)
 308                        return -EINVAL;
 309                p += len;
 310        }
 311        if (__put_user(0, argv))
 312                return -EFAULT;
 313        current->mm->arg_end = current->mm->env_start = p;
 314        while (envc-- > 0) {
 315                size_t len;
 316                if (__put_user((elf_addr_t)p, envp++))
 317                        return -EFAULT;
 318                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 319                if (!len || len > MAX_ARG_STRLEN)
 320                        return -EINVAL;
 321                p += len;
 322        }
 323        if (__put_user(0, envp))
 324                return -EFAULT;
 325        current->mm->env_end = p;
 326
 327        /* Put the elf_info on the stack in the right place.  */
 328        sp = (elf_addr_t __user *)envp + 1;
 329        if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
 330                return -EFAULT;
 331        return 0;
 332}
 333
 334#ifndef elf_map
 335
 336static unsigned long elf_map(struct file *filep, unsigned long addr,
 337                struct elf_phdr *eppnt, int prot, int type,
 338                unsigned long total_size)
 339{
 340        unsigned long map_addr;
 341        unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
 342        unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
 343        addr = ELF_PAGESTART(addr);
 344        size = ELF_PAGEALIGN(size);
 345
 346        /* mmap() will return -EINVAL if given a zero size, but a
 347         * segment with zero filesize is perfectly valid */
 348        if (!size)
 349                return addr;
 350
 351        /*
 352        * total_size is the size of the ELF (interpreter) image.
 353        * The _first_ mmap needs to know the full size, otherwise
 354        * randomization might put this image into an overlapping
 355        * position with the ELF binary image. (since size < total_size)
 356        * So we first map the 'big' image - and unmap the remainder at
 357        * the end. (which unmap is needed for ELF images with holes.)
 358        */
 359        if (total_size) {
 360                total_size = ELF_PAGEALIGN(total_size);
 361                map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
 362                if (!BAD_ADDR(map_addr))
 363                        vm_munmap(map_addr+size, total_size-size);
 364        } else
 365                map_addr = vm_mmap(filep, addr, size, prot, type, off);
 366
 367        return(map_addr);
 368}
 369
 370#endif /* !elf_map */
 371
 372static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
 373{
 374        int i, first_idx = -1, last_idx = -1;
 375
 376        for (i = 0; i < nr; i++) {
 377                if (cmds[i].p_type == PT_LOAD) {
 378                        last_idx = i;
 379                        if (first_idx == -1)
 380                                first_idx = i;
 381                }
 382        }
 383        if (first_idx == -1)
 384                return 0;
 385
 386        return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
 387                                ELF_PAGESTART(cmds[first_idx].p_vaddr);
 388}
 389
 390/**
 391 * load_elf_phdrs() - load ELF program headers
 392 * @elf_ex:   ELF header of the binary whose program headers should be loaded
 393 * @elf_file: the opened ELF binary file
 394 *
 395 * Loads ELF program headers from the binary file elf_file, which has the ELF
 396 * header pointed to by elf_ex, into a newly allocated array. The caller is
 397 * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
 398 */
 399static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex,
 400                                       struct file *elf_file)
 401{
 402        struct elf_phdr *elf_phdata = NULL;
 403        int retval, size, err = -1;
 404
 405        /*
 406         * If the size of this structure has changed, then punt, since
 407         * we will be doing the wrong thing.
 408         */
 409        if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
 410                goto out;
 411
 412        /* Sanity check the number of program headers... */
 413        if (elf_ex->e_phnum < 1 ||
 414                elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
 415                goto out;
 416
 417        /* ...and their total size. */
 418        size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
 419        if (size > ELF_MIN_ALIGN)
 420                goto out;
 421
 422        elf_phdata = kmalloc(size, GFP_KERNEL);
 423        if (!elf_phdata)
 424                goto out;
 425
 426        /* Read in the program headers */
 427        retval = kernel_read(elf_file, elf_ex->e_phoff,
 428                             (char *)elf_phdata, size);
 429        if (retval != size) {
 430                err = (retval < 0) ? retval : -EIO;
 431                goto out;
 432        }
 433
 434        /* Success! */
 435        err = 0;
 436out:
 437        if (err) {
 438                kfree(elf_phdata);
 439                elf_phdata = NULL;
 440        }
 441        return elf_phdata;
 442}
 443
 444#ifndef CONFIG_ARCH_BINFMT_ELF_STATE
 445
 446/**
 447 * struct arch_elf_state - arch-specific ELF loading state
 448 *
 449 * This structure is used to preserve architecture specific data during
 450 * the loading of an ELF file, throughout the checking of architecture
 451 * specific ELF headers & through to the point where the ELF load is
 452 * known to be proceeding (ie. SET_PERSONALITY).
 453 *
 454 * This implementation is a dummy for architectures which require no
 455 * specific state.
 456 */
 457struct arch_elf_state {
 458};
 459
 460#define INIT_ARCH_ELF_STATE {}
 461
 462/**
 463 * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
 464 * @ehdr:       The main ELF header
 465 * @phdr:       The program header to check
 466 * @elf:        The open ELF file
 467 * @is_interp:  True if the phdr is from the interpreter of the ELF being
 468 *              loaded, else false.
 469 * @state:      Architecture-specific state preserved throughout the process
 470 *              of loading the ELF.
 471 *
 472 * Inspects the program header phdr to validate its correctness and/or
 473 * suitability for the system. Called once per ELF program header in the
 474 * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
 475 * interpreter.
 476 *
 477 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
 478 *         with that return code.
 479 */
 480static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
 481                                   struct elf_phdr *phdr,
 482                                   struct file *elf, bool is_interp,
 483                                   struct arch_elf_state *state)
 484{
 485        /* Dummy implementation, always proceed */
 486        return 0;
 487}
 488
 489/**
 490 * arch_check_elf() - check a PT_LOPROC..PT_HIPROC ELF program header
 491 * @ehdr:       The main ELF header
 492 * @has_interp: True if the ELF has an interpreter, else false.
 493 * @state:      Architecture-specific state preserved throughout the process
 494 *              of loading the ELF.
 495 *
 496 * Provides a final opportunity for architecture code to reject the loading
 497 * of the ELF & cause an exec syscall to return an error. This is called after
 498 * all program headers to be checked by arch_elf_pt_proc have been.
 499 *
 500 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
 501 *         with that return code.
 502 */
 503static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
 504                                 struct arch_elf_state *state)
 505{
 506        /* Dummy implementation, always proceed */
 507        return 0;
 508}
 509
 510#endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
 511
 512/* This is much more generalized than the library routine read function,
 513   so we keep this separate.  Technically the library read function
 514   is only provided so that we can read a.out libraries that have
 515   an ELF header */
 516
 517static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 518                struct file *interpreter, unsigned long *interp_map_addr,
 519                unsigned long no_base, struct elf_phdr *interp_elf_phdata)
 520{
 521        struct elf_phdr *eppnt;
 522        unsigned long load_addr = 0;
 523        int load_addr_set = 0;
 524        unsigned long last_bss = 0, elf_bss = 0;
 525        unsigned long error = ~0UL;
 526        unsigned long total_size;
 527        int i;
 528
 529        /* First of all, some simple consistency checks */
 530        if (interp_elf_ex->e_type != ET_EXEC &&
 531            interp_elf_ex->e_type != ET_DYN)
 532                goto out;
 533        if (!elf_check_arch(interp_elf_ex))
 534                goto out;
 535        if (!interpreter->f_op->mmap)
 536                goto out;
 537
 538        total_size = total_mapping_size(interp_elf_phdata,
 539                                        interp_elf_ex->e_phnum);
 540        if (!total_size) {
 541                error = -EINVAL;
 542                goto out;
 543        }
 544
 545        eppnt = interp_elf_phdata;
 546        for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 547                if (eppnt->p_type == PT_LOAD) {
 548                        int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
 549                        int elf_prot = 0;
 550                        unsigned long vaddr = 0;
 551                        unsigned long k, map_addr;
 552
 553                        if (eppnt->p_flags & PF_R)
 554                                elf_prot = PROT_READ;
 555                        if (eppnt->p_flags & PF_W)
 556                                elf_prot |= PROT_WRITE;
 557                        if (eppnt->p_flags & PF_X)
 558                                elf_prot |= PROT_EXEC;
 559                        vaddr = eppnt->p_vaddr;
 560                        if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 561                                elf_type |= MAP_FIXED;
 562                        else if (no_base && interp_elf_ex->e_type == ET_DYN)
 563                                load_addr = -vaddr;
 564
 565                        map_addr = elf_map(interpreter, load_addr + vaddr,
 566                                        eppnt, elf_prot, elf_type, total_size);
 567                        total_size = 0;
 568                        if (!*interp_map_addr)
 569                                *interp_map_addr = map_addr;
 570                        error = map_addr;
 571                        if (BAD_ADDR(map_addr))
 572                                goto out;
 573
 574                        if (!load_addr_set &&
 575                            interp_elf_ex->e_type == ET_DYN) {
 576                                load_addr = map_addr - ELF_PAGESTART(vaddr);
 577                                load_addr_set = 1;
 578                        }
 579
 580                        /*
 581                         * Check to see if the section's size will overflow the
 582                         * allowed task size. Note that p_filesz must always be
 583                         * <= p_memsize so it's only necessary to check p_memsz.
 584                         */
 585                        k = load_addr + eppnt->p_vaddr;
 586                        if (BAD_ADDR(k) ||
 587                            eppnt->p_filesz > eppnt->p_memsz ||
 588                            eppnt->p_memsz > TASK_SIZE ||
 589                            TASK_SIZE - eppnt->p_memsz < k) {
 590                                error = -ENOMEM;
 591                                goto out;
 592                        }
 593
 594                        /*
 595                         * Find the end of the file mapping for this phdr, and
 596                         * keep track of the largest address we see for this.
 597                         */
 598                        k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
 599                        if (k > elf_bss)
 600                                elf_bss = k;
 601
 602                        /*
 603                         * Do the same thing for the memory mapping - between
 604                         * elf_bss and last_bss is the bss section.
 605                         */
 606                        k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
 607                        if (k > last_bss)
 608                                last_bss = k;
 609                }
 610        }
 611
 612        if (last_bss > elf_bss) {
 613                /*
 614                 * Now fill out the bss section.  First pad the last page up
 615                 * to the page boundary, and then perform a mmap to make sure
 616                 * that there are zero-mapped pages up to and including the
 617                 * last bss page.
 618                 */
 619                if (padzero(elf_bss)) {
 620                        error = -EFAULT;
 621                        goto out;
 622                }
 623
 624                /* What we have mapped so far */
 625                elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
 626
 627                /* Map the last of the bss segment */
 628                error = vm_brk(elf_bss, last_bss - elf_bss);
 629                if (BAD_ADDR(error))
 630                        goto out;
 631        }
 632
 633        error = load_addr;
 634out:
 635        return error;
 636}
 637
 638/*
 639 * These are the functions used to load ELF style executables and shared
 640 * libraries.  There is no binary dependent code anywhere else.
 641 */
 642
 643#ifndef STACK_RND_MASK
 644#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
 645#endif
 646
 647static unsigned long randomize_stack_top(unsigned long stack_top)
 648{
 649        unsigned long random_variable = 0;
 650
 651        if ((current->flags & PF_RANDOMIZE) &&
 652                !(current->personality & ADDR_NO_RANDOMIZE)) {
 653                random_variable = (unsigned long) get_random_int();
 654                random_variable &= STACK_RND_MASK;
 655                random_variable <<= PAGE_SHIFT;
 656        }
 657#ifdef CONFIG_STACK_GROWSUP
 658        return PAGE_ALIGN(stack_top) + random_variable;
 659#else
 660        return PAGE_ALIGN(stack_top) - random_variable;
 661#endif
 662}
 663
 664static int load_elf_binary(struct linux_binprm *bprm)
 665{
 666        struct file *interpreter = NULL; /* to shut gcc up */
 667        unsigned long load_addr = 0, load_bias = 0;
 668        int load_addr_set = 0;
 669        char * elf_interpreter = NULL;
 670        unsigned long error;
 671        struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
 672        unsigned long elf_bss, elf_brk;
 673        int retval, i;
 674        unsigned long elf_entry;
 675        unsigned long interp_load_addr = 0;
 676        unsigned long start_code, end_code, start_data, end_data;
 677        unsigned long reloc_func_desc __maybe_unused = 0;
 678        int executable_stack = EXSTACK_DEFAULT;
 679        struct pt_regs *regs = current_pt_regs();
 680        struct {
 681                struct elfhdr elf_ex;
 682                struct elfhdr interp_elf_ex;
 683        } *loc;
 684        struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
 685
 686        loc = kmalloc(sizeof(*loc), GFP_KERNEL);
 687        if (!loc) {
 688                retval = -ENOMEM;
 689                goto out_ret;
 690        }
 691        
 692        /* Get the exec-header */
 693        loc->elf_ex = *((struct elfhdr *)bprm->buf);
 694
 695        retval = -ENOEXEC;
 696        /* First of all, some simple consistency checks */
 697        if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 698                goto out;
 699
 700        if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
 701                goto out;
 702        if (!elf_check_arch(&loc->elf_ex))
 703                goto out;
 704        if (!bprm->file->f_op->mmap)
 705                goto out;
 706
 707        elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
 708        if (!elf_phdata)
 709                goto out;
 710
 711        elf_ppnt = elf_phdata;
 712        elf_bss = 0;
 713        elf_brk = 0;
 714
 715        start_code = ~0UL;
 716        end_code = 0;
 717        start_data = 0;
 718        end_data = 0;
 719
 720        for (i = 0; i < loc->elf_ex.e_phnum; i++) {
 721                if (elf_ppnt->p_type == PT_INTERP) {
 722                        /* This is the program interpreter used for
 723                         * shared libraries - for now assume that this
 724                         * is an a.out format binary
 725                         */
 726                        retval = -ENOEXEC;
 727                        if (elf_ppnt->p_filesz > PATH_MAX || 
 728                            elf_ppnt->p_filesz < 2)
 729                                goto out_free_ph;
 730
 731                        retval = -ENOMEM;
 732                        elf_interpreter = kmalloc(elf_ppnt->p_filesz,
 733                                                  GFP_KERNEL);
 734                        if (!elf_interpreter)
 735                                goto out_free_ph;
 736
 737                        retval = kernel_read(bprm->file, elf_ppnt->p_offset,
 738                                             elf_interpreter,
 739                                             elf_ppnt->p_filesz);
 740                        if (retval != elf_ppnt->p_filesz) {
 741                                if (retval >= 0)
 742                                        retval = -EIO;
 743                                goto out_free_interp;
 744                        }
 745                        /* make sure path is NULL terminated */
 746                        retval = -ENOEXEC;
 747                        if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
 748                                goto out_free_interp;
 749
 750                        interpreter = open_exec(elf_interpreter);
 751                        retval = PTR_ERR(interpreter);
 752                        if (IS_ERR(interpreter))
 753                                goto out_free_interp;
 754
 755                        /*
 756                         * If the binary is not readable then enforce
 757                         * mm->dumpable = 0 regardless of the interpreter's
 758                         * permissions.
 759                         */
 760                        would_dump(bprm, interpreter);
 761
 762                        retval = kernel_read(interpreter, 0, bprm->buf,
 763                                             BINPRM_BUF_SIZE);
 764                        if (retval != BINPRM_BUF_SIZE) {
 765                                if (retval >= 0)
 766                                        retval = -EIO;
 767                                goto out_free_dentry;
 768                        }
 769
 770                        /* Get the exec headers */
 771                        loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
 772                        break;
 773                }
 774                elf_ppnt++;
 775        }
 776
 777        elf_ppnt = elf_phdata;
 778        for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
 779                switch (elf_ppnt->p_type) {
 780                case PT_GNU_STACK:
 781                        if (elf_ppnt->p_flags & PF_X)
 782                                executable_stack = EXSTACK_ENABLE_X;
 783                        else
 784                                executable_stack = EXSTACK_DISABLE_X;
 785                        break;
 786
 787                case PT_LOPROC ... PT_HIPROC:
 788                        retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
 789                                                  bprm->file, false,
 790                                                  &arch_state);
 791                        if (retval)
 792                                goto out_free_dentry;
 793                        break;
 794                }
 795
 796        /* Some simple consistency checks for the interpreter */
 797        if (elf_interpreter) {
 798                retval = -ELIBBAD;
 799                /* Not an ELF interpreter */
 800                if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 801                        goto out_free_dentry;
 802                /* Verify the interpreter has a valid arch */
 803                if (!elf_check_arch(&loc->interp_elf_ex))
 804                        goto out_free_dentry;
 805
 806                /* Load the interpreter program headers */
 807                interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
 808                                                   interpreter);
 809                if (!interp_elf_phdata)
 810                        goto out_free_dentry;
 811
 812                /* Pass PT_LOPROC..PT_HIPROC headers to arch code */
 813                elf_ppnt = interp_elf_phdata;
 814                for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
 815                        switch (elf_ppnt->p_type) {
 816                        case PT_LOPROC ... PT_HIPROC:
 817                                retval = arch_elf_pt_proc(&loc->interp_elf_ex,
 818                                                          elf_ppnt, interpreter,
 819                                                          true, &arch_state);
 820                                if (retval)
 821                                        goto out_free_dentry;
 822                                break;
 823                        }
 824        }
 825
 826        /*
 827         * Allow arch code to reject the ELF at this point, whilst it's
 828         * still possible to return an error to the code that invoked
 829         * the exec syscall.
 830         */
 831        retval = arch_check_elf(&loc->elf_ex, !!interpreter, &arch_state);
 832        if (retval)
 833                goto out_free_dentry;
 834
 835        /* Flush all traces of the currently running executable */
 836        retval = flush_old_exec(bprm);
 837        if (retval)
 838                goto out_free_dentry;
 839
 840        /* Do this immediately, since STACK_TOP as used in setup_arg_pages
 841           may depend on the personality.  */
 842        SET_PERSONALITY2(loc->elf_ex, &arch_state);
 843        if (elf_read_implies_exec(loc->elf_ex, executable_stack))
 844                current->personality |= READ_IMPLIES_EXEC;
 845
 846        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 847                current->flags |= PF_RANDOMIZE;
 848
 849        setup_new_exec(bprm);
 850
 851        /* Do this so that we can load the interpreter, if need be.  We will
 852           change some of these later */
 853        retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
 854                                 executable_stack);
 855        if (retval < 0)
 856                goto out_free_dentry;
 857        
 858        current->mm->start_stack = bprm->p;
 859
 860        /* Now we do a little grungy work by mmapping the ELF image into
 861           the correct location in memory. */
 862        for(i = 0, elf_ppnt = elf_phdata;
 863            i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
 864                int elf_prot = 0, elf_flags;
 865                unsigned long k, vaddr;
 866                unsigned long total_size = 0;
 867
 868                if (elf_ppnt->p_type != PT_LOAD)
 869                        continue;
 870
 871                if (unlikely (elf_brk > elf_bss)) {
 872                        unsigned long nbyte;
 873                    
 874                        /* There was a PT_LOAD segment with p_memsz > p_filesz
 875                           before this one. Map anonymous pages, if needed,
 876                           and clear the area.  */
 877                        retval = set_brk(elf_bss + load_bias,
 878                                         elf_brk + load_bias);
 879                        if (retval)
 880                                goto out_free_dentry;
 881                        nbyte = ELF_PAGEOFFSET(elf_bss);
 882                        if (nbyte) {
 883                                nbyte = ELF_MIN_ALIGN - nbyte;
 884                                if (nbyte > elf_brk - elf_bss)
 885                                        nbyte = elf_brk - elf_bss;
 886                                if (clear_user((void __user *)elf_bss +
 887                                                        load_bias, nbyte)) {
 888                                        /*
 889                                         * This bss-zeroing can fail if the ELF
 890                                         * file specifies odd protections. So
 891                                         * we don't check the return value
 892                                         */
 893                                }
 894                        }
 895                }
 896
 897                if (elf_ppnt->p_flags & PF_R)
 898                        elf_prot |= PROT_READ;
 899                if (elf_ppnt->p_flags & PF_W)
 900                        elf_prot |= PROT_WRITE;
 901                if (elf_ppnt->p_flags & PF_X)
 902                        elf_prot |= PROT_EXEC;
 903
 904                elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
 905
 906                vaddr = elf_ppnt->p_vaddr;
 907                if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
 908                        elf_flags |= MAP_FIXED;
 909                } else if (loc->elf_ex.e_type == ET_DYN) {
 910                        /* Try and get dynamic programs out of the way of the
 911                         * default mmap base, as well as whatever program they
 912                         * might try to exec.  This is because the brk will
 913                         * follow the loader, and is not movable.  */
 914                        load_bias = ELF_ET_DYN_BASE - vaddr;
 915                        if (current->flags & PF_RANDOMIZE)
 916                                load_bias += arch_mmap_rnd();
 917                        load_bias = ELF_PAGESTART(load_bias);
 918                        total_size = total_mapping_size(elf_phdata,
 919                                                        loc->elf_ex.e_phnum);
 920                        if (!total_size) {
 921                                retval = -EINVAL;
 922                                goto out_free_dentry;
 923                        }
 924                }
 925
 926                error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
 927                                elf_prot, elf_flags, total_size);
 928                if (BAD_ADDR(error)) {
 929                        retval = IS_ERR((void *)error) ?
 930                                PTR_ERR((void*)error) : -EINVAL;
 931                        goto out_free_dentry;
 932                }
 933
 934                if (!load_addr_set) {
 935                        load_addr_set = 1;
 936                        load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
 937                        if (loc->elf_ex.e_type == ET_DYN) {
 938                                load_bias += error -
 939                                             ELF_PAGESTART(load_bias + vaddr);
 940                                load_addr += load_bias;
 941                                reloc_func_desc = load_bias;
 942                        }
 943                }
 944                k = elf_ppnt->p_vaddr;
 945                if (k < start_code)
 946                        start_code = k;
 947                if (start_data < k)
 948                        start_data = k;
 949
 950                /*
 951                 * Check to see if the section's size will overflow the
 952                 * allowed task size. Note that p_filesz must always be
 953                 * <= p_memsz so it is only necessary to check p_memsz.
 954                 */
 955                if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
 956                    elf_ppnt->p_memsz > TASK_SIZE ||
 957                    TASK_SIZE - elf_ppnt->p_memsz < k) {
 958                        /* set_brk can never work. Avoid overflows. */
 959                        retval = -EINVAL;
 960                        goto out_free_dentry;
 961                }
 962
 963                k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
 964
 965                if (k > elf_bss)
 966                        elf_bss = k;
 967                if ((elf_ppnt->p_flags & PF_X) && end_code < k)
 968                        end_code = k;
 969                if (end_data < k)
 970                        end_data = k;
 971                k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
 972                if (k > elf_brk)
 973                        elf_brk = k;
 974        }
 975
 976        loc->elf_ex.e_entry += load_bias;
 977        elf_bss += load_bias;
 978        elf_brk += load_bias;
 979        start_code += load_bias;
 980        end_code += load_bias;
 981        start_data += load_bias;
 982        end_data += load_bias;
 983
 984        /* Calling set_brk effectively mmaps the pages that we need
 985         * for the bss and break sections.  We must do this before
 986         * mapping in the interpreter, to make sure it doesn't wind
 987         * up getting placed where the bss needs to go.
 988         */
 989        retval = set_brk(elf_bss, elf_brk);
 990        if (retval)
 991                goto out_free_dentry;
 992        if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
 993                retval = -EFAULT; /* Nobody gets to see this, but.. */
 994                goto out_free_dentry;
 995        }
 996
 997        if (elf_interpreter) {
 998                unsigned long interp_map_addr = 0;
 999
1000                elf_entry = load_elf_interp(&loc->interp_elf_ex,
1001                                            interpreter,
1002                                            &interp_map_addr,
1003                                            load_bias, interp_elf_phdata);
1004                if (!IS_ERR((void *)elf_entry)) {
1005                        /*
1006                         * load_elf_interp() returns relocation
1007                         * adjustment
1008                         */
1009                        interp_load_addr = elf_entry;
1010                        elf_entry += loc->interp_elf_ex.e_entry;
1011                }
1012                if (BAD_ADDR(elf_entry)) {
1013                        retval = IS_ERR((void *)elf_entry) ?
1014                                        (int)elf_entry : -EINVAL;
1015                        goto out_free_dentry;
1016                }
1017                reloc_func_desc = interp_load_addr;
1018
1019                allow_write_access(interpreter);
1020                fput(interpreter);
1021                kfree(elf_interpreter);
1022        } else {
1023                elf_entry = loc->elf_ex.e_entry;
1024                if (BAD_ADDR(elf_entry)) {
1025                        retval = -EINVAL;
1026                        goto out_free_dentry;
1027                }
1028        }
1029
1030        kfree(interp_elf_phdata);
1031        kfree(elf_phdata);
1032
1033        set_binfmt(&elf_format);
1034
1035#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1036        retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
1037        if (retval < 0)
1038                goto out;
1039#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1040
1041        install_exec_creds(bprm);
1042        retval = create_elf_tables(bprm, &loc->elf_ex,
1043                          load_addr, interp_load_addr);
1044        if (retval < 0)
1045                goto out;
1046        /* N.B. passed_fileno might not be initialized? */
1047        current->mm->end_code = end_code;
1048        current->mm->start_code = start_code;
1049        current->mm->start_data = start_data;
1050        current->mm->end_data = end_data;
1051        current->mm->start_stack = bprm->p;
1052
1053        if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1054                current->mm->brk = current->mm->start_brk =
1055                        arch_randomize_brk(current->mm);
1056#ifdef compat_brk_randomized
1057                current->brk_randomized = 1;
1058#endif
1059        }
1060
1061        if (current->personality & MMAP_PAGE_ZERO) {
1062                /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1063                   and some applications "depend" upon this behavior.
1064                   Since we do not have the power to recompile these, we
1065                   emulate the SVr4 behavior. Sigh. */
1066                error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1067                                MAP_FIXED | MAP_PRIVATE, 0);
1068        }
1069
1070#ifdef ELF_PLAT_INIT
1071        /*
1072         * The ABI may specify that certain registers be set up in special
1073         * ways (on i386 %edx is the address of a DT_FINI function, for
1074         * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1075         * that the e_entry field is the address of the function descriptor
1076         * for the startup routine, rather than the address of the startup
1077         * routine itself.  This macro performs whatever initialization to
1078         * the regs structure is required as well as any relocations to the
1079         * function descriptor entries when executing dynamically links apps.
1080         */
1081        ELF_PLAT_INIT(regs, reloc_func_desc);
1082#endif
1083
1084        start_thread(regs, elf_entry, bprm->p);
1085        retval = 0;
1086out:
1087        kfree(loc);
1088out_ret:
1089        return retval;
1090
1091        /* error cleanup */
1092out_free_dentry:
1093        kfree(interp_elf_phdata);
1094        allow_write_access(interpreter);
1095        if (interpreter)
1096                fput(interpreter);
1097out_free_interp:
1098        kfree(elf_interpreter);
1099out_free_ph:
1100        kfree(elf_phdata);
1101        goto out;
1102}
1103
1104#ifdef CONFIG_USELIB
1105/* This is really simpleminded and specialized - we are loading an
1106   a.out library that is given an ELF header. */
1107static int load_elf_library(struct file *file)
1108{
1109        struct elf_phdr *elf_phdata;
1110        struct elf_phdr *eppnt;
1111        unsigned long elf_bss, bss, len;
1112        int retval, error, i, j;
1113        struct elfhdr elf_ex;
1114
1115        error = -ENOEXEC;
1116        retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1117        if (retval != sizeof(elf_ex))
1118                goto out;
1119
1120        if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1121                goto out;
1122
1123        /* First of all, some simple consistency checks */
1124        if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1125            !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1126                goto out;
1127
1128        /* Now read in all of the header information */
1129
1130        j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1131        /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1132
1133        error = -ENOMEM;
1134        elf_phdata = kmalloc(j, GFP_KERNEL);
1135        if (!elf_phdata)
1136                goto out;
1137
1138        eppnt = elf_phdata;
1139        error = -ENOEXEC;
1140        retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1141        if (retval != j)
1142                goto out_free_ph;
1143
1144        for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1145                if ((eppnt + i)->p_type == PT_LOAD)
1146                        j++;
1147        if (j != 1)
1148                goto out_free_ph;
1149
1150        while (eppnt->p_type != PT_LOAD)
1151                eppnt++;
1152
1153        /* Now use mmap to map the library into memory. */
1154        error = vm_mmap(file,
1155                        ELF_PAGESTART(eppnt->p_vaddr),
1156                        (eppnt->p_filesz +
1157                         ELF_PAGEOFFSET(eppnt->p_vaddr)),
1158                        PROT_READ | PROT_WRITE | PROT_EXEC,
1159                        MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1160                        (eppnt->p_offset -
1161                         ELF_PAGEOFFSET(eppnt->p_vaddr)));
1162        if (error != ELF_PAGESTART(eppnt->p_vaddr))
1163                goto out_free_ph;
1164
1165        elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1166        if (padzero(elf_bss)) {
1167                error = -EFAULT;
1168                goto out_free_ph;
1169        }
1170
1171        len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1172                            ELF_MIN_ALIGN - 1);
1173        bss = eppnt->p_memsz + eppnt->p_vaddr;
1174        if (bss > len)
1175                vm_brk(len, bss - len);
1176        error = 0;
1177
1178out_free_ph:
1179        kfree(elf_phdata);
1180out:
1181        return error;
1182}
1183#endif /* #ifdef CONFIG_USELIB */
1184
1185#ifdef CONFIG_ELF_CORE
1186/*
1187 * ELF core dumper
1188 *
1189 * Modelled on fs/exec.c:aout_core_dump()
1190 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1191 */
1192
1193/*
1194 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1195 * that are useful for post-mortem analysis are included in every core dump.
1196 * In that way we ensure that the core dump is fully interpretable later
1197 * without matching up the same kernel and hardware config to see what PC values
1198 * meant. These special mappings include - vDSO, vsyscall, and other
1199 * architecture specific mappings
1200 */
1201static bool always_dump_vma(struct vm_area_struct *vma)
1202{
1203        /* Any vsyscall mappings? */
1204        if (vma == get_gate_vma(vma->vm_mm))
1205                return true;
1206
1207        /*
1208         * Assume that all vmas with a .name op should always be dumped.
1209         * If this changes, a new vm_ops field can easily be added.
1210         */
1211        if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1212                return true;
1213
1214        /*
1215         * arch_vma_name() returns non-NULL for special architecture mappings,
1216         * such as vDSO sections.
1217         */
1218        if (arch_vma_name(vma))
1219                return true;
1220
1221        return false;
1222}
1223
1224/*
1225 * Decide what to dump of a segment, part, all or none.
1226 */
1227static unsigned long vma_dump_size(struct vm_area_struct *vma,
1228                                   unsigned long mm_flags)
1229{
1230#define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1231
1232        /* always dump the vdso and vsyscall sections */
1233        if (always_dump_vma(vma))
1234                goto whole;
1235
1236        if (vma->vm_flags & VM_DONTDUMP)
1237                return 0;
1238
1239        /* Hugetlb memory check */
1240        if (vma->vm_flags & VM_HUGETLB) {
1241                if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1242                        goto whole;
1243                if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1244                        goto whole;
1245                return 0;
1246        }
1247
1248        /* Do not dump I/O mapped devices or special mappings */
1249        if (vma->vm_flags & VM_IO)
1250                return 0;
1251
1252        /* By default, dump shared memory if mapped from an anonymous file. */
1253        if (vma->vm_flags & VM_SHARED) {
1254                if (file_inode(vma->vm_file)->i_nlink == 0 ?
1255                    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1256                        goto whole;
1257                return 0;
1258        }
1259
1260        /* Dump segments that have been written to.  */
1261        if (vma->anon_vma && FILTER(ANON_PRIVATE))
1262                goto whole;
1263        if (vma->vm_file == NULL)
1264                return 0;
1265
1266        if (FILTER(MAPPED_PRIVATE))
1267                goto whole;
1268
1269        /*
1270         * If this looks like the beginning of a DSO or executable mapping,
1271         * check for an ELF header.  If we find one, dump the first page to
1272         * aid in determining what was mapped here.
1273         */
1274        if (FILTER(ELF_HEADERS) &&
1275            vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1276                u32 __user *header = (u32 __user *) vma->vm_start;
1277                u32 word;
1278                mm_segment_t fs = get_fs();
1279                /*
1280                 * Doing it this way gets the constant folded by GCC.
1281                 */
1282                union {
1283                        u32 cmp;
1284                        char elfmag[SELFMAG];
1285                } magic;
1286                BUILD_BUG_ON(SELFMAG != sizeof word);
1287                magic.elfmag[EI_MAG0] = ELFMAG0;
1288                magic.elfmag[EI_MAG1] = ELFMAG1;
1289                magic.elfmag[EI_MAG2] = ELFMAG2;
1290                magic.elfmag[EI_MAG3] = ELFMAG3;
1291                /*
1292                 * Switch to the user "segment" for get_user(),
1293                 * then put back what elf_core_dump() had in place.
1294                 */
1295                set_fs(USER_DS);
1296                if (unlikely(get_user(word, header)))
1297                        word = 0;
1298                set_fs(fs);
1299                if (word == magic.cmp)
1300                        return PAGE_SIZE;
1301        }
1302
1303#undef  FILTER
1304
1305        return 0;
1306
1307whole:
1308        return vma->vm_end - vma->vm_start;
1309}
1310
1311/* An ELF note in memory */
1312struct memelfnote
1313{
1314        const char *name;
1315        int type;
1316        unsigned int datasz;
1317        void *data;
1318};
1319
1320static int notesize(struct memelfnote *en)
1321{
1322        int sz;
1323
1324        sz = sizeof(struct elf_note);
1325        sz += roundup(strlen(en->name) + 1, 4);
1326        sz += roundup(en->datasz, 4);
1327
1328        return sz;
1329}
1330
1331static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1332{
1333        struct elf_note en;
1334        en.n_namesz = strlen(men->name) + 1;
1335        en.n_descsz = men->datasz;
1336        en.n_type = men->type;
1337
1338        return dump_emit(cprm, &en, sizeof(en)) &&
1339            dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1340            dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1341}
1342
1343static void fill_elf_header(struct elfhdr *elf, int segs,
1344                            u16 machine, u32 flags)
1345{
1346        memset(elf, 0, sizeof(*elf));
1347
1348        memcpy(elf->e_ident, ELFMAG, SELFMAG);
1349        elf->e_ident[EI_CLASS] = ELF_CLASS;
1350        elf->e_ident[EI_DATA] = ELF_DATA;
1351        elf->e_ident[EI_VERSION] = EV_CURRENT;
1352        elf->e_ident[EI_OSABI] = ELF_OSABI;
1353
1354        elf->e_type = ET_CORE;
1355        elf->e_machine = machine;
1356        elf->e_version = EV_CURRENT;
1357        elf->e_phoff = sizeof(struct elfhdr);
1358        elf->e_flags = flags;
1359        elf->e_ehsize = sizeof(struct elfhdr);
1360        elf->e_phentsize = sizeof(struct elf_phdr);
1361        elf->e_phnum = segs;
1362
1363        return;
1364}
1365
1366static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1367{
1368        phdr->p_type = PT_NOTE;
1369        phdr->p_offset = offset;
1370        phdr->p_vaddr = 0;
1371        phdr->p_paddr = 0;
1372        phdr->p_filesz = sz;
1373        phdr->p_memsz = 0;
1374        phdr->p_flags = 0;
1375        phdr->p_align = 0;
1376        return;
1377}
1378
1379static void fill_note(struct memelfnote *note, const char *name, int type, 
1380                unsigned int sz, void *data)
1381{
1382        note->name = name;
1383        note->type = type;
1384        note->datasz = sz;
1385        note->data = data;
1386        return;
1387}
1388
1389/*
1390 * fill up all the fields in prstatus from the given task struct, except
1391 * registers which need to be filled up separately.
1392 */
1393static void fill_prstatus(struct elf_prstatus *prstatus,
1394                struct task_struct *p, long signr)
1395{
1396        prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1397        prstatus->pr_sigpend = p->pending.signal.sig[0];
1398        prstatus->pr_sighold = p->blocked.sig[0];
1399        rcu_read_lock();
1400        prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1401        rcu_read_unlock();
1402        prstatus->pr_pid = task_pid_vnr(p);
1403        prstatus->pr_pgrp = task_pgrp_vnr(p);
1404        prstatus->pr_sid = task_session_vnr(p);
1405        if (thread_group_leader(p)) {
1406                struct task_cputime cputime;
1407
1408                /*
1409                 * This is the record for the group leader.  It shows the
1410                 * group-wide total, not its individual thread total.
1411                 */
1412                thread_group_cputime(p, &cputime);
1413                cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1414                cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1415        } else {
1416                cputime_t utime, stime;
1417
1418                task_cputime(p, &utime, &stime);
1419                cputime_to_timeval(utime, &prstatus->pr_utime);
1420                cputime_to_timeval(stime, &prstatus->pr_stime);
1421        }
1422        cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1423        cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1424}
1425
1426static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1427                       struct mm_struct *mm)
1428{
1429        const struct cred *cred;
1430        unsigned int i, len;
1431        
1432        /* first copy the parameters from user space */
1433        memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1434
1435        len = mm->arg_end - mm->arg_start;
1436        if (len >= ELF_PRARGSZ)
1437                len = ELF_PRARGSZ-1;
1438        if (copy_from_user(&psinfo->pr_psargs,
1439                           (const char __user *)mm->arg_start, len))
1440                return -EFAULT;
1441        for(i = 0; i < len; i++)
1442                if (psinfo->pr_psargs[i] == 0)
1443                        psinfo->pr_psargs[i] = ' ';
1444        psinfo->pr_psargs[len] = 0;
1445
1446        rcu_read_lock();
1447        psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1448        rcu_read_unlock();
1449        psinfo->pr_pid = task_pid_vnr(p);
1450        psinfo->pr_pgrp = task_pgrp_vnr(p);
1451        psinfo->pr_sid = task_session_vnr(p);
1452
1453        i = p->state ? ffz(~p->state) + 1 : 0;
1454        psinfo->pr_state = i;
1455        psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1456        psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1457        psinfo->pr_nice = task_nice(p);
1458        psinfo->pr_flag = p->flags;
1459        rcu_read_lock();
1460        cred = __task_cred(p);
1461        SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1462        SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1463        rcu_read_unlock();
1464        strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1465        
1466        return 0;
1467}
1468
1469static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1470{
1471        elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1472        int i = 0;
1473        do
1474                i += 2;
1475        while (auxv[i - 2] != AT_NULL);
1476        fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1477}
1478
1479static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1480                const siginfo_t *siginfo)
1481{
1482        mm_segment_t old_fs = get_fs();
1483        set_fs(KERNEL_DS);
1484        copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1485        set_fs(old_fs);
1486        fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1487}
1488
1489#define MAX_FILE_NOTE_SIZE (4*1024*1024)
1490/*
1491 * Format of NT_FILE note:
1492 *
1493 * long count     -- how many files are mapped
1494 * long page_size -- units for file_ofs
1495 * array of [COUNT] elements of
1496 *   long start
1497 *   long end
1498 *   long file_ofs
1499 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1500 */
1501static int fill_files_note(struct memelfnote *note)
1502{
1503        struct vm_area_struct *vma;
1504        unsigned count, size, names_ofs, remaining, n;
1505        user_long_t *data;
1506        user_long_t *start_end_ofs;
1507        char *name_base, *name_curpos;
1508
1509        /* *Estimated* file count and total data size needed */
1510        count = current->mm->map_count;
1511        size = count * 64;
1512
1513        names_ofs = (2 + 3 * count) * sizeof(data[0]);
1514 alloc:
1515        if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1516                return -EINVAL;
1517        size = round_up(size, PAGE_SIZE);
1518        data = vmalloc(size);
1519        if (!data)
1520                return -ENOMEM;
1521
1522        start_end_ofs = data + 2;
1523        name_base = name_curpos = ((char *)data) + names_ofs;
1524        remaining = size - names_ofs;
1525        count = 0;
1526        for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1527                struct file *file;
1528                const char *filename;
1529
1530                file = vma->vm_file;
1531                if (!file)
1532                        continue;
1533                filename = file_path(file, name_curpos, remaining);
1534                if (IS_ERR(filename)) {
1535                        if (PTR_ERR(filename) == -ENAMETOOLONG) {
1536                                vfree(data);
1537                                size = size * 5 / 4;
1538                                goto alloc;
1539                        }
1540                        continue;
1541                }
1542
1543                /* file_path() fills at the end, move name down */
1544                /* n = strlen(filename) + 1: */
1545                n = (name_curpos + remaining) - filename;
1546                remaining = filename - name_curpos;
1547                memmove(name_curpos, filename, n);
1548                name_curpos += n;
1549
1550                *start_end_ofs++ = vma->vm_start;
1551                *start_end_ofs++ = vma->vm_end;
1552                *start_end_ofs++ = vma->vm_pgoff;
1553                count++;
1554        }
1555
1556        /* Now we know exact count of files, can store it */
1557        data[0] = count;
1558        data[1] = PAGE_SIZE;
1559        /*
1560         * Count usually is less than current->mm->map_count,
1561         * we need to move filenames down.
1562         */
1563        n = current->mm->map_count - count;
1564        if (n != 0) {
1565                unsigned shift_bytes = n * 3 * sizeof(data[0]);
1566                memmove(name_base - shift_bytes, name_base,
1567                        name_curpos - name_base);
1568                name_curpos -= shift_bytes;
1569        }
1570
1571        size = name_curpos - (char *)data;
1572        fill_note(note, "CORE", NT_FILE, size, data);
1573        return 0;
1574}
1575
1576#ifdef CORE_DUMP_USE_REGSET
1577#include <linux/regset.h>
1578
1579struct elf_thread_core_info {
1580        struct elf_thread_core_info *next;
1581        struct task_struct *task;
1582        struct elf_prstatus prstatus;
1583        struct memelfnote notes[0];
1584};
1585
1586struct elf_note_info {
1587        struct elf_thread_core_info *thread;
1588        struct memelfnote psinfo;
1589        struct memelfnote signote;
1590        struct memelfnote auxv;
1591        struct memelfnote files;
1592        user_siginfo_t csigdata;
1593        size_t size;
1594        int thread_notes;
1595};
1596
1597/*
1598 * When a regset has a writeback hook, we call it on each thread before
1599 * dumping user memory.  On register window machines, this makes sure the
1600 * user memory backing the register data is up to date before we read it.
1601 */
1602static void do_thread_regset_writeback(struct task_struct *task,
1603                                       const struct user_regset *regset)
1604{
1605        if (regset->writeback)
1606                regset->writeback(task, regset, 1);
1607}
1608
1609#ifndef PR_REG_SIZE
1610#define PR_REG_SIZE(S) sizeof(S)
1611#endif
1612
1613#ifndef PRSTATUS_SIZE
1614#define PRSTATUS_SIZE(S) sizeof(S)
1615#endif
1616
1617#ifndef PR_REG_PTR
1618#define PR_REG_PTR(S) (&((S)->pr_reg))
1619#endif
1620
1621#ifndef SET_PR_FPVALID
1622#define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1623#endif
1624
1625static int fill_thread_core_info(struct elf_thread_core_info *t,
1626                                 const struct user_regset_view *view,
1627                                 long signr, size_t *total)
1628{
1629        unsigned int i;
1630
1631        /*
1632         * NT_PRSTATUS is the one special case, because the regset data
1633         * goes into the pr_reg field inside the note contents, rather
1634         * than being the whole note contents.  We fill the reset in here.
1635         * We assume that regset 0 is NT_PRSTATUS.
1636         */
1637        fill_prstatus(&t->prstatus, t->task, signr);
1638        (void) view->regsets[0].get(t->task, &view->regsets[0],
1639                                    0, PR_REG_SIZE(t->prstatus.pr_reg),
1640                                    PR_REG_PTR(&t->prstatus), NULL);
1641
1642        fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1643                  PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1644        *total += notesize(&t->notes[0]);
1645
1646        do_thread_regset_writeback(t->task, &view->regsets[0]);
1647
1648        /*
1649         * Each other regset might generate a note too.  For each regset
1650         * that has no core_note_type or is inactive, we leave t->notes[i]
1651         * all zero and we'll know to skip writing it later.
1652         */
1653        for (i = 1; i < view->n; ++i) {
1654                const struct user_regset *regset = &view->regsets[i];
1655                do_thread_regset_writeback(t->task, regset);
1656                if (regset->core_note_type && regset->get &&
1657                    (!regset->active || regset->active(t->task, regset))) {
1658                        int ret;
1659                        size_t size = regset->n * regset->size;
1660                        void *data = kmalloc(size, GFP_KERNEL);
1661                        if (unlikely(!data))
1662                                return 0;
1663                        ret = regset->get(t->task, regset,
1664                                          0, size, data, NULL);
1665                        if (unlikely(ret))
1666                                kfree(data);
1667                        else {
1668                                if (regset->core_note_type != NT_PRFPREG)
1669                                        fill_note(&t->notes[i], "LINUX",
1670                                                  regset->core_note_type,
1671                                                  size, data);
1672                                else {
1673                                        SET_PR_FPVALID(&t->prstatus, 1);
1674                                        fill_note(&t->notes[i], "CORE",
1675                                                  NT_PRFPREG, size, data);
1676                                }
1677                                *total += notesize(&t->notes[i]);
1678                        }
1679                }
1680        }
1681
1682        return 1;
1683}
1684
1685static int fill_note_info(struct elfhdr *elf, int phdrs,
1686                          struct elf_note_info *info,
1687                          const siginfo_t *siginfo, struct pt_regs *regs)
1688{
1689        struct task_struct *dump_task = current;
1690        const struct user_regset_view *view = task_user_regset_view(dump_task);
1691        struct elf_thread_core_info *t;
1692        struct elf_prpsinfo *psinfo;
1693        struct core_thread *ct;
1694        unsigned int i;
1695
1696        info->size = 0;
1697        info->thread = NULL;
1698
1699        psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1700        if (psinfo == NULL) {
1701                info->psinfo.data = NULL; /* So we don't free this wrongly */
1702                return 0;
1703        }
1704
1705        fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1706
1707        /*
1708         * Figure out how many notes we're going to need for each thread.
1709         */
1710        info->thread_notes = 0;
1711        for (i = 0; i < view->n; ++i)
1712                if (view->regsets[i].core_note_type != 0)
1713                        ++info->thread_notes;
1714
1715        /*
1716         * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1717         * since it is our one special case.
1718         */
1719        if (unlikely(info->thread_notes == 0) ||
1720            unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1721                WARN_ON(1);
1722                return 0;
1723        }
1724
1725        /*
1726         * Initialize the ELF file header.
1727         */
1728        fill_elf_header(elf, phdrs,
1729                        view->e_machine, view->e_flags);
1730
1731        /*
1732         * Allocate a structure for each thread.
1733         */
1734        for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1735                t = kzalloc(offsetof(struct elf_thread_core_info,
1736                                     notes[info->thread_notes]),
1737                            GFP_KERNEL);
1738                if (unlikely(!t))
1739                        return 0;
1740
1741                t->task = ct->task;
1742                if (ct->task == dump_task || !info->thread) {
1743                        t->next = info->thread;
1744                        info->thread = t;
1745                } else {
1746                        /*
1747                         * Make sure to keep the original task at
1748                         * the head of the list.
1749                         */
1750                        t->next = info->thread->next;
1751                        info->thread->next = t;
1752                }
1753        }
1754
1755        /*
1756         * Now fill in each thread's information.
1757         */
1758        for (t = info->thread; t != NULL; t = t->next)
1759                if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1760                        return 0;
1761
1762        /*
1763         * Fill in the two process-wide notes.
1764         */
1765        fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1766        info->size += notesize(&info->psinfo);
1767
1768        fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1769        info->size += notesize(&info->signote);
1770
1771        fill_auxv_note(&info->auxv, current->mm);
1772        info->size += notesize(&info->auxv);
1773
1774        if (fill_files_note(&info->files) == 0)
1775                info->size += notesize(&info->files);
1776
1777        return 1;
1778}
1779
1780static size_t get_note_info_size(struct elf_note_info *info)
1781{
1782        return info->size;
1783}
1784
1785/*
1786 * Write all the notes for each thread.  When writing the first thread, the
1787 * process-wide notes are interleaved after the first thread-specific note.
1788 */
1789static int write_note_info(struct elf_note_info *info,
1790                           struct coredump_params *cprm)
1791{
1792        bool first = true;
1793        struct elf_thread_core_info *t = info->thread;
1794
1795        do {
1796                int i;
1797
1798                if (!writenote(&t->notes[0], cprm))
1799                        return 0;
1800
1801                if (first && !writenote(&info->psinfo, cprm))
1802                        return 0;
1803                if (first && !writenote(&info->signote, cprm))
1804                        return 0;
1805                if (first && !writenote(&info->auxv, cprm))
1806                        return 0;
1807                if (first && info->files.data &&
1808                                !writenote(&info->files, cprm))
1809                        return 0;
1810
1811                for (i = 1; i < info->thread_notes; ++i)
1812                        if (t->notes[i].data &&
1813                            !writenote(&t->notes[i], cprm))
1814                                return 0;
1815
1816                first = false;
1817                t = t->next;
1818        } while (t);
1819
1820        return 1;
1821}
1822
1823static void free_note_info(struct elf_note_info *info)
1824{
1825        struct elf_thread_core_info *threads = info->thread;
1826        while (threads) {
1827                unsigned int i;
1828                struct elf_thread_core_info *t = threads;
1829                threads = t->next;
1830                WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1831                for (i = 1; i < info->thread_notes; ++i)
1832                        kfree(t->notes[i].data);
1833                kfree(t);
1834        }
1835        kfree(info->psinfo.data);
1836        vfree(info->files.data);
1837}
1838
1839#else
1840
1841/* Here is the structure in which status of each thread is captured. */
1842struct elf_thread_status
1843{
1844        struct list_head list;
1845        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1846        elf_fpregset_t fpu;             /* NT_PRFPREG */
1847        struct task_struct *thread;
1848#ifdef ELF_CORE_COPY_XFPREGS
1849        elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1850#endif
1851        struct memelfnote notes[3];
1852        int num_notes;
1853};
1854
1855/*
1856 * In order to add the specific thread information for the elf file format,
1857 * we need to keep a linked list of every threads pr_status and then create
1858 * a single section for them in the final core file.
1859 */
1860static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1861{
1862        int sz = 0;
1863        struct task_struct *p = t->thread;
1864        t->num_notes = 0;
1865
1866        fill_prstatus(&t->prstatus, p, signr);
1867        elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1868        
1869        fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1870                  &(t->prstatus));
1871        t->num_notes++;
1872        sz += notesize(&t->notes[0]);
1873
1874        if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1875                                                                &t->fpu))) {
1876                fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1877                          &(t->fpu));
1878                t->num_notes++;
1879                sz += notesize(&t->notes[1]);
1880        }
1881
1882#ifdef ELF_CORE_COPY_XFPREGS
1883        if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1884                fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1885                          sizeof(t->xfpu), &t->xfpu);
1886                t->num_notes++;
1887                sz += notesize(&t->notes[2]);
1888        }
1889#endif  
1890        return sz;
1891}
1892
1893struct elf_note_info {
1894        struct memelfnote *notes;
1895        struct memelfnote *notes_files;
1896        struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1897        struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1898        struct list_head thread_list;
1899        elf_fpregset_t *fpu;
1900#ifdef ELF_CORE_COPY_XFPREGS
1901        elf_fpxregset_t *xfpu;
1902#endif
1903        user_siginfo_t csigdata;
1904        int thread_status_size;
1905        int numnote;
1906};
1907
1908static int elf_note_info_init(struct elf_note_info *info)
1909{
1910        memset(info, 0, sizeof(*info));
1911        INIT_LIST_HEAD(&info->thread_list);
1912
1913        /* Allocate space for ELF notes */
1914        info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1915        if (!info->notes)
1916                return 0;
1917        info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1918        if (!info->psinfo)
1919                return 0;
1920        info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1921        if (!info->prstatus)
1922                return 0;
1923        info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1924        if (!info->fpu)
1925                return 0;
1926#ifdef ELF_CORE_COPY_XFPREGS
1927        info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1928        if (!info->xfpu)
1929                return 0;
1930#endif
1931        return 1;
1932}
1933
1934static int fill_note_info(struct elfhdr *elf, int phdrs,
1935                          struct elf_note_info *info,
1936                          const siginfo_t *siginfo, struct pt_regs *regs)
1937{
1938        struct list_head *t;
1939        struct core_thread *ct;
1940        struct elf_thread_status *ets;
1941
1942        if (!elf_note_info_init(info))
1943                return 0;
1944
1945        for (ct = current->mm->core_state->dumper.next;
1946                                        ct; ct = ct->next) {
1947                ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1948                if (!ets)
1949                        return 0;
1950
1951                ets->thread = ct->task;
1952                list_add(&ets->list, &info->thread_list);
1953        }
1954
1955        list_for_each(t, &info->thread_list) {
1956                int sz;
1957
1958                ets = list_entry(t, struct elf_thread_status, list);
1959                sz = elf_dump_thread_status(siginfo->si_signo, ets);
1960                info->thread_status_size += sz;
1961        }
1962        /* now collect the dump for the current */
1963        memset(info->prstatus, 0, sizeof(*info->prstatus));
1964        fill_prstatus(info->prstatus, current, siginfo->si_signo);
1965        elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1966
1967        /* Set up header */
1968        fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1969
1970        /*
1971         * Set up the notes in similar form to SVR4 core dumps made
1972         * with info from their /proc.
1973         */
1974
1975        fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1976                  sizeof(*info->prstatus), info->prstatus);
1977        fill_psinfo(info->psinfo, current->group_leader, current->mm);
1978        fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1979                  sizeof(*info->psinfo), info->psinfo);
1980
1981        fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1982        fill_auxv_note(info->notes + 3, current->mm);
1983        info->numnote = 4;
1984
1985        if (fill_files_note(info->notes + info->numnote) == 0) {
1986                info->notes_files = info->notes + info->numnote;
1987                info->numnote++;
1988        }
1989
1990        /* Try to dump the FPU. */
1991        info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1992                                                               info->fpu);
1993        if (info->prstatus->pr_fpvalid)
1994                fill_note(info->notes + info->numnote++,
1995                          "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1996#ifdef ELF_CORE_COPY_XFPREGS
1997        if (elf_core_copy_task_xfpregs(current, info->xfpu))
1998                fill_note(info->notes + info->numnote++,
1999                          "LINUX", ELF_CORE_XFPREG_TYPE,
2000                          sizeof(*info->xfpu), info->xfpu);
2001#endif
2002
2003        return 1;
2004}
2005
2006static size_t get_note_info_size(struct elf_note_info *info)
2007{
2008        int sz = 0;
2009        int i;
2010
2011        for (i = 0; i < info->numnote; i++)
2012                sz += notesize(info->notes + i);
2013
2014        sz += info->thread_status_size;
2015
2016        return sz;
2017}
2018
2019static int write_note_info(struct elf_note_info *info,
2020                           struct coredump_params *cprm)
2021{
2022        int i;
2023        struct list_head *t;
2024
2025        for (i = 0; i < info->numnote; i++)
2026                if (!writenote(info->notes + i, cprm))
2027                        return 0;
2028
2029        /* write out the thread status notes section */
2030        list_for_each(t, &info->thread_list) {
2031                struct elf_thread_status *tmp =
2032                                list_entry(t, struct elf_thread_status, list);
2033
2034                for (i = 0; i < tmp->num_notes; i++)
2035                        if (!writenote(&tmp->notes[i], cprm))
2036                                return 0;
2037        }
2038
2039        return 1;
2040}
2041
2042static void free_note_info(struct elf_note_info *info)
2043{
2044        while (!list_empty(&info->thread_list)) {
2045                struct list_head *tmp = info->thread_list.next;
2046                list_del(tmp);
2047                kfree(list_entry(tmp, struct elf_thread_status, list));
2048        }
2049
2050        /* Free data possibly allocated by fill_files_note(): */
2051        if (info->notes_files)
2052                vfree(info->notes_files->data);
2053
2054        kfree(info->prstatus);
2055        kfree(info->psinfo);
2056        kfree(info->notes);
2057        kfree(info->fpu);
2058#ifdef ELF_CORE_COPY_XFPREGS
2059        kfree(info->xfpu);
2060#endif
2061}
2062
2063#endif
2064
2065static struct vm_area_struct *first_vma(struct task_struct *tsk,
2066                                        struct vm_area_struct *gate_vma)
2067{
2068        struct vm_area_struct *ret = tsk->mm->mmap;
2069
2070        if (ret)
2071                return ret;
2072        return gate_vma;
2073}
2074/*
2075 * Helper function for iterating across a vma list.  It ensures that the caller
2076 * will visit `gate_vma' prior to terminating the search.
2077 */
2078static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2079                                        struct vm_area_struct *gate_vma)
2080{
2081        struct vm_area_struct *ret;
2082
2083        ret = this_vma->vm_next;
2084        if (ret)
2085                return ret;
2086        if (this_vma == gate_vma)
2087                return NULL;
2088        return gate_vma;
2089}
2090
2091static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2092                             elf_addr_t e_shoff, int segs)
2093{
2094        elf->e_shoff = e_shoff;
2095        elf->e_shentsize = sizeof(*shdr4extnum);
2096        elf->e_shnum = 1;
2097        elf->e_shstrndx = SHN_UNDEF;
2098
2099        memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2100
2101        shdr4extnum->sh_type = SHT_NULL;
2102        shdr4extnum->sh_size = elf->e_shnum;
2103        shdr4extnum->sh_link = elf->e_shstrndx;
2104        shdr4extnum->sh_info = segs;
2105}
2106
2107/*
2108 * Actual dumper
2109 *
2110 * This is a two-pass process; first we find the offsets of the bits,
2111 * and then they are actually written out.  If we run out of core limit
2112 * we just truncate.
2113 */
2114static int elf_core_dump(struct coredump_params *cprm)
2115{
2116        int has_dumped = 0;
2117        mm_segment_t fs;
2118        int segs, i;
2119        size_t vma_data_size = 0;
2120        struct vm_area_struct *vma, *gate_vma;
2121        struct elfhdr *elf = NULL;
2122        loff_t offset = 0, dataoff;
2123        struct elf_note_info info = { };
2124        struct elf_phdr *phdr4note = NULL;
2125        struct elf_shdr *shdr4extnum = NULL;
2126        Elf_Half e_phnum;
2127        elf_addr_t e_shoff;
2128        elf_addr_t *vma_filesz = NULL;
2129
2130        /*
2131         * We no longer stop all VM operations.
2132         * 
2133         * This is because those proceses that could possibly change map_count
2134         * or the mmap / vma pages are now blocked in do_exit on current
2135         * finishing this core dump.
2136         *
2137         * Only ptrace can touch these memory addresses, but it doesn't change
2138         * the map_count or the pages allocated. So no possibility of crashing
2139         * exists while dumping the mm->vm_next areas to the core file.
2140         */
2141  
2142        /* alloc memory for large data structures: too large to be on stack */
2143        elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2144        if (!elf)
2145                goto out;
2146        /*
2147         * The number of segs are recored into ELF header as 16bit value.
2148         * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2149         */
2150        segs = current->mm->map_count;
2151        segs += elf_core_extra_phdrs();
2152
2153        gate_vma = get_gate_vma(current->mm);
2154        if (gate_vma != NULL)
2155                segs++;
2156
2157        /* for notes section */
2158        segs++;
2159
2160        /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2161         * this, kernel supports extended numbering. Have a look at
2162         * include/linux/elf.h for further information. */
2163        e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2164
2165        /*
2166         * Collect all the non-memory information about the process for the
2167         * notes.  This also sets up the file header.
2168         */
2169        if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2170                goto cleanup;
2171
2172        has_dumped = 1;
2173
2174        fs = get_fs();
2175        set_fs(KERNEL_DS);
2176
2177        offset += sizeof(*elf);                         /* Elf header */
2178        offset += segs * sizeof(struct elf_phdr);       /* Program headers */
2179
2180        /* Write notes phdr entry */
2181        {
2182                size_t sz = get_note_info_size(&info);
2183
2184                sz += elf_coredump_extra_notes_size();
2185
2186                phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2187                if (!phdr4note)
2188                        goto end_coredump;
2189
2190                fill_elf_note_phdr(phdr4note, sz, offset);
2191                offset += sz;
2192        }
2193
2194        dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2195
2196        vma_filesz = kmalloc_array(segs - 1, sizeof(*vma_filesz), GFP_KERNEL);
2197        if (!vma_filesz)
2198                goto end_coredump;
2199
2200        for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2201                        vma = next_vma(vma, gate_vma)) {
2202                unsigned long dump_size;
2203
2204                dump_size = vma_dump_size(vma, cprm->mm_flags);
2205                vma_filesz[i++] = dump_size;
2206                vma_data_size += dump_size;
2207        }
2208
2209        offset += vma_data_size;
2210        offset += elf_core_extra_data_size();
2211        e_shoff = offset;
2212
2213        if (e_phnum == PN_XNUM) {
2214                shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2215                if (!shdr4extnum)
2216                        goto end_coredump;
2217                fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2218        }
2219
2220        offset = dataoff;
2221
2222        if (!dump_emit(cprm, elf, sizeof(*elf)))
2223                goto end_coredump;
2224
2225        if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2226                goto end_coredump;
2227
2228        /* Write program headers for segments dump */
2229        for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2230                        vma = next_vma(vma, gate_vma)) {
2231                struct elf_phdr phdr;
2232
2233                phdr.p_type = PT_LOAD;
2234                phdr.p_offset = offset;
2235                phdr.p_vaddr = vma->vm_start;
2236                phdr.p_paddr = 0;
2237                phdr.p_filesz = vma_filesz[i++];
2238                phdr.p_memsz = vma->vm_end - vma->vm_start;
2239                offset += phdr.p_filesz;
2240                phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2241                if (vma->vm_flags & VM_WRITE)
2242                        phdr.p_flags |= PF_W;
2243                if (vma->vm_flags & VM_EXEC)
2244                        phdr.p_flags |= PF_X;
2245                phdr.p_align = ELF_EXEC_PAGESIZE;
2246
2247                if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2248                        goto end_coredump;
2249        }
2250
2251        if (!elf_core_write_extra_phdrs(cprm, offset))
2252                goto end_coredump;
2253
2254        /* write out the notes section */
2255        if (!write_note_info(&info, cprm))
2256                goto end_coredump;
2257
2258        if (elf_coredump_extra_notes_write(cprm))
2259                goto end_coredump;
2260
2261        /* Align to page */
2262        if (!dump_skip(cprm, dataoff - cprm->written))
2263                goto end_coredump;
2264
2265        for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2266                        vma = next_vma(vma, gate_vma)) {
2267                unsigned long addr;
2268                unsigned long end;
2269
2270                end = vma->vm_start + vma_filesz[i++];
2271
2272                for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2273                        struct page *page;
2274                        int stop;
2275
2276                        page = get_dump_page(addr);
2277                        if (page) {
2278                                void *kaddr = kmap(page);
2279                                stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2280                                kunmap(page);
2281                                page_cache_release(page);
2282                        } else
2283                                stop = !dump_skip(cprm, PAGE_SIZE);
2284                        if (stop)
2285                                goto end_coredump;
2286                }
2287        }
2288
2289        if (!elf_core_write_extra_data(cprm))
2290                goto end_coredump;
2291
2292        if (e_phnum == PN_XNUM) {
2293                if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2294                        goto end_coredump;
2295        }
2296
2297end_coredump:
2298        set_fs(fs);
2299
2300cleanup:
2301        free_note_info(&info);
2302        kfree(shdr4extnum);
2303        kfree(vma_filesz);
2304        kfree(phdr4note);
2305        kfree(elf);
2306out:
2307        return has_dumped;
2308}
2309
2310#endif          /* CONFIG_ELF_CORE */
2311
2312static int __init init_elf_binfmt(void)
2313{
2314        register_binfmt(&elf_format);
2315        return 0;
2316}
2317
2318static void __exit exit_elf_binfmt(void)
2319{
2320        /* Remove the COFF and ELF loaders. */
2321        unregister_binfmt(&elf_format);
2322}
2323
2324core_initcall(init_elf_binfmt);
2325module_exit(exit_elf_binfmt);
2326MODULE_LICENSE("GPL");
2327