linux/fs/binfmt_elf.c
<<
>>
Prefs
   1/*
   2 * linux/fs/binfmt_elf.c
   3 *
   4 * These are the functions used to load ELF format executables as used
   5 * on SVr4 machines.  Information on the format may be found in the book
   6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
   7 * Tools".
   8 *
   9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
  10 */
  11
  12#include <linux/module.h>
  13#include <linux/kernel.h>
  14#include <linux/fs.h>
  15#include <linux/mm.h>
  16#include <linux/mman.h>
  17#include <linux/errno.h>
  18#include <linux/signal.h>
  19#include <linux/binfmts.h>
  20#include <linux/string.h>
  21#include <linux/file.h>
  22#include <linux/slab.h>
  23#include <linux/personality.h>
  24#include <linux/elfcore.h>
  25#include <linux/init.h>
  26#include <linux/highuid.h>
  27#include <linux/compiler.h>
  28#include <linux/highmem.h>
  29#include <linux/pagemap.h>
  30#include <linux/vmalloc.h>
  31#include <linux/security.h>
  32#include <linux/random.h>
  33#include <linux/elf.h>
  34#include <linux/elf-randomize.h>
  35#include <linux/utsname.h>
  36#include <linux/coredump.h>
  37#include <linux/sched.h>
  38#include <linux/dax.h>
  39#include <asm/uaccess.h>
  40#include <asm/param.h>
  41#include <asm/page.h>
  42
  43#ifndef user_long_t
  44#define user_long_t long
  45#endif
  46#ifndef user_siginfo_t
  47#define user_siginfo_t siginfo_t
  48#endif
  49
  50static int load_elf_binary(struct linux_binprm *bprm);
  51static int load_elf_library(struct file *);
  52static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
  53                                int, int, unsigned long);
  54
  55/*
  56 * If we don't support core dumping, then supply a NULL so we
  57 * don't even try.
  58 */
  59#ifdef CONFIG_ELF_CORE
  60static int elf_core_dump(struct coredump_params *cprm);
  61#else
  62#define elf_core_dump   NULL
  63#endif
  64
  65#if ELF_EXEC_PAGESIZE > PAGE_SIZE
  66#define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
  67#else
  68#define ELF_MIN_ALIGN   PAGE_SIZE
  69#endif
  70
  71#ifndef ELF_CORE_EFLAGS
  72#define ELF_CORE_EFLAGS 0
  73#endif
  74
  75#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
  76#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
  77#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
  78
  79static struct linux_binfmt elf_format = {
  80        .module         = THIS_MODULE,
  81        .load_binary    = load_elf_binary,
  82        .load_shlib     = load_elf_library,
  83        .core_dump      = elf_core_dump,
  84        .min_coredump   = ELF_EXEC_PAGESIZE,
  85};
  86
  87#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
  88
  89static int set_brk(unsigned long start, unsigned long end, int prot)
  90{
  91        start = ELF_PAGEALIGN(start);
  92        end = ELF_PAGEALIGN(end);
  93        if (end > start) {
  94                unsigned long addr;
  95                /*
  96                 * Map the last of the bss segment.
  97                 * If the header is requesting these pages to be
  98                 * executable, honour that (ppc32 needs this).
  99                 */
 100                addr = vm_brk_flags(start, end - start,
 101                                prot & PROT_EXEC ? VM_EXEC : 0);
 102                if (BAD_ADDR(addr))
 103                        return addr;
 104        }
 105        current->mm->start_brk = current->mm->brk = end;
 106        return 0;
 107}
 108
 109/* We need to explicitly zero any fractional pages
 110   after the data section (i.e. bss).  This would
 111   contain the junk from the file that should not
 112   be in memory
 113 */
 114static int padzero(unsigned long elf_bss)
 115{
 116        unsigned long nbyte;
 117
 118        nbyte = ELF_PAGEOFFSET(elf_bss);
 119        if (nbyte) {
 120                nbyte = ELF_MIN_ALIGN - nbyte;
 121                if (clear_user((void __user *) elf_bss, nbyte))
 122                        return -EFAULT;
 123        }
 124        return 0;
 125}
 126
 127/* Let's use some macros to make this stack manipulation a little clearer */
 128#ifdef CONFIG_STACK_GROWSUP
 129#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
 130#define STACK_ROUND(sp, items) \
 131        ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
 132#define STACK_ALLOC(sp, len) ({ \
 133        elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
 134        old_sp; })
 135#else
 136#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
 137#define STACK_ROUND(sp, items) \
 138        (((unsigned long) (sp - items)) &~ 15UL)
 139#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
 140#endif
 141
 142#ifndef ELF_BASE_PLATFORM
 143/*
 144 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
 145 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
 146 * will be copied to the user stack in the same manner as AT_PLATFORM.
 147 */
 148#define ELF_BASE_PLATFORM NULL
 149#endif
 150
 151static int
 152create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 153                unsigned long load_addr, unsigned long interp_load_addr)
 154{
 155        unsigned long p = bprm->p;
 156        int argc = bprm->argc;
 157        int envc = bprm->envc;
 158        elf_addr_t __user *sp;
 159        elf_addr_t __user *u_platform;
 160        elf_addr_t __user *u_base_platform;
 161        elf_addr_t __user *u_rand_bytes;
 162        const char *k_platform = ELF_PLATFORM;
 163        const char *k_base_platform = ELF_BASE_PLATFORM;
 164        unsigned char k_rand_bytes[16];
 165        int items;
 166        elf_addr_t *elf_info;
 167        int ei_index = 0;
 168        const struct cred *cred = current_cred();
 169        struct vm_area_struct *vma;
 170
 171        /*
 172         * In some cases (e.g. Hyper-Threading), we want to avoid L1
 173         * evictions by the processes running on the same package. One
 174         * thing we can do is to shuffle the initial stack for them.
 175         */
 176
 177        p = arch_align_stack(p);
 178
 179        /*
 180         * If this architecture has a platform capability string, copy it
 181         * to userspace.  In some cases (Sparc), this info is impossible
 182         * for userspace to get any other way, in others (i386) it is
 183         * merely difficult.
 184         */
 185        u_platform = NULL;
 186        if (k_platform) {
 187                size_t len = strlen(k_platform) + 1;
 188
 189                u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 190                if (__copy_to_user(u_platform, k_platform, len))
 191                        return -EFAULT;
 192        }
 193
 194        /*
 195         * If this architecture has a "base" platform capability
 196         * string, copy it to userspace.
 197         */
 198        u_base_platform = NULL;
 199        if (k_base_platform) {
 200                size_t len = strlen(k_base_platform) + 1;
 201
 202                u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 203                if (__copy_to_user(u_base_platform, k_base_platform, len))
 204                        return -EFAULT;
 205        }
 206
 207        /*
 208         * Generate 16 random bytes for userspace PRNG seeding.
 209         */
 210        get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
 211        u_rand_bytes = (elf_addr_t __user *)
 212                       STACK_ALLOC(p, sizeof(k_rand_bytes));
 213        if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
 214                return -EFAULT;
 215
 216        /* Create the ELF interpreter info */
 217        elf_info = (elf_addr_t *)current->mm->saved_auxv;
 218        /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
 219#define NEW_AUX_ENT(id, val) \
 220        do { \
 221                elf_info[ei_index++] = id; \
 222                elf_info[ei_index++] = val; \
 223        } while (0)
 224
 225#ifdef ARCH_DLINFO
 226        /* 
 227         * ARCH_DLINFO must come first so PPC can do its special alignment of
 228         * AUXV.
 229         * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
 230         * ARCH_DLINFO changes
 231         */
 232        ARCH_DLINFO;
 233#endif
 234        NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
 235        NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
 236        NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
 237        NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
 238        NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
 239        NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
 240        NEW_AUX_ENT(AT_BASE, interp_load_addr);
 241        NEW_AUX_ENT(AT_FLAGS, 0);
 242        NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
 243        NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
 244        NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
 245        NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
 246        NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
 247        NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
 248        NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
 249#ifdef ELF_HWCAP2
 250        NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
 251#endif
 252        NEW_AUX_ENT(AT_EXECFN, bprm->exec);
 253        if (k_platform) {
 254                NEW_AUX_ENT(AT_PLATFORM,
 255                            (elf_addr_t)(unsigned long)u_platform);
 256        }
 257        if (k_base_platform) {
 258                NEW_AUX_ENT(AT_BASE_PLATFORM,
 259                            (elf_addr_t)(unsigned long)u_base_platform);
 260        }
 261        if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
 262                NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
 263        }
 264#undef NEW_AUX_ENT
 265        /* AT_NULL is zero; clear the rest too */
 266        memset(&elf_info[ei_index], 0,
 267               sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
 268
 269        /* And advance past the AT_NULL entry.  */
 270        ei_index += 2;
 271
 272        sp = STACK_ADD(p, ei_index);
 273
 274        items = (argc + 1) + (envc + 1) + 1;
 275        bprm->p = STACK_ROUND(sp, items);
 276
 277        /* Point sp at the lowest address on the stack */
 278#ifdef CONFIG_STACK_GROWSUP
 279        sp = (elf_addr_t __user *)bprm->p - items - ei_index;
 280        bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
 281#else
 282        sp = (elf_addr_t __user *)bprm->p;
 283#endif
 284
 285
 286        /*
 287         * Grow the stack manually; some architectures have a limit on how
 288         * far ahead a user-space access may be in order to grow the stack.
 289         */
 290        vma = find_extend_vma(current->mm, bprm->p);
 291        if (!vma)
 292                return -EFAULT;
 293
 294        /* Now, let's put argc (and argv, envp if appropriate) on the stack */
 295        if (__put_user(argc, sp++))
 296                return -EFAULT;
 297
 298        /* Populate list of argv pointers back to argv strings. */
 299        p = current->mm->arg_end = current->mm->arg_start;
 300        while (argc-- > 0) {
 301                size_t len;
 302                if (__put_user((elf_addr_t)p, sp++))
 303                        return -EFAULT;
 304                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 305                if (!len || len > MAX_ARG_STRLEN)
 306                        return -EINVAL;
 307                p += len;
 308        }
 309        if (__put_user(0, sp++))
 310                return -EFAULT;
 311        current->mm->arg_end = p;
 312
 313        /* Populate list of envp pointers back to envp strings. */
 314        current->mm->env_end = current->mm->env_start = p;
 315        while (envc-- > 0) {
 316                size_t len;
 317                if (__put_user((elf_addr_t)p, sp++))
 318                        return -EFAULT;
 319                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 320                if (!len || len > MAX_ARG_STRLEN)
 321                        return -EINVAL;
 322                p += len;
 323        }
 324        if (__put_user(0, sp++))
 325                return -EFAULT;
 326        current->mm->env_end = p;
 327
 328        /* Put the elf_info on the stack in the right place.  */
 329        if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
 330                return -EFAULT;
 331        return 0;
 332}
 333
 334#ifndef elf_map
 335
 336static unsigned long elf_map(struct file *filep, unsigned long addr,
 337                struct elf_phdr *eppnt, int prot, int type,
 338                unsigned long total_size)
 339{
 340        unsigned long map_addr;
 341        unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
 342        unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
 343        addr = ELF_PAGESTART(addr);
 344        size = ELF_PAGEALIGN(size);
 345
 346        /* mmap() will return -EINVAL if given a zero size, but a
 347         * segment with zero filesize is perfectly valid */
 348        if (!size)
 349                return addr;
 350
 351        /*
 352        * total_size is the size of the ELF (interpreter) image.
 353        * The _first_ mmap needs to know the full size, otherwise
 354        * randomization might put this image into an overlapping
 355        * position with the ELF binary image. (since size < total_size)
 356        * So we first map the 'big' image - and unmap the remainder at
 357        * the end. (which unmap is needed for ELF images with holes.)
 358        */
 359        if (total_size) {
 360                total_size = ELF_PAGEALIGN(total_size);
 361                map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
 362                if (!BAD_ADDR(map_addr))
 363                        vm_munmap(map_addr+size, total_size-size);
 364        } else
 365                map_addr = vm_mmap(filep, addr, size, prot, type, off);
 366
 367        return(map_addr);
 368}
 369
 370#endif /* !elf_map */
 371
 372static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
 373{
 374        int i, first_idx = -1, last_idx = -1;
 375
 376        for (i = 0; i < nr; i++) {
 377                if (cmds[i].p_type == PT_LOAD) {
 378                        last_idx = i;
 379                        if (first_idx == -1)
 380                                first_idx = i;
 381                }
 382        }
 383        if (first_idx == -1)
 384                return 0;
 385
 386        return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
 387                                ELF_PAGESTART(cmds[first_idx].p_vaddr);
 388}
 389
 390
 391/* This is much more generalized than the library routine read function,
 392   so we keep this separate.  Technically the library read function
 393   is only provided so that we can read a.out libraries that have
 394   an ELF header */
 395
 396static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 397                struct file *interpreter, unsigned long *interp_map_addr,
 398                unsigned long no_base)
 399{
 400        struct elf_phdr *elf_phdata;
 401        struct elf_phdr *eppnt;
 402        unsigned long load_addr = 0;
 403        int load_addr_set = 0;
 404        unsigned long last_bss = 0, elf_bss = 0;
 405        int bss_prot = 0;
 406        unsigned long error = ~0UL;
 407        unsigned long total_size;
 408        int retval, i, size;
 409
 410        /* First of all, some simple consistency checks */
 411        if (interp_elf_ex->e_type != ET_EXEC &&
 412            interp_elf_ex->e_type != ET_DYN)
 413                goto out;
 414        if (!elf_check_arch(interp_elf_ex))
 415                goto out;
 416        if (!interpreter->f_op || !interpreter->f_op->mmap)
 417                goto out;
 418
 419        /*
 420         * If the size of this structure has changed, then punt, since
 421         * we will be doing the wrong thing.
 422         */
 423        if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
 424                goto out;
 425        if (interp_elf_ex->e_phnum < 1 ||
 426                interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
 427                goto out;
 428
 429        /* Now read in all of the header information */
 430        size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
 431        if (size > ELF_MIN_ALIGN)
 432                goto out;
 433        elf_phdata = kmalloc(size, GFP_KERNEL);
 434        if (!elf_phdata)
 435                goto out;
 436
 437        retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
 438                             (char *)elf_phdata, size);
 439        error = -EIO;
 440        if (retval != size) {
 441                if (retval < 0)
 442                        error = retval; 
 443                goto out_close;
 444        }
 445
 446        total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
 447        if (!total_size) {
 448                error = -EINVAL;
 449                goto out_close;
 450        }
 451
 452        eppnt = elf_phdata;
 453        for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 454                if (eppnt->p_type == PT_LOAD) {
 455                        int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
 456                        int elf_prot = 0;
 457                        unsigned long vaddr = 0;
 458                        unsigned long k, map_addr;
 459
 460                        if (eppnt->p_flags & PF_R)
 461                                elf_prot = PROT_READ;
 462                        if (eppnt->p_flags & PF_W)
 463                                elf_prot |= PROT_WRITE;
 464                        if (eppnt->p_flags & PF_X)
 465                                elf_prot |= PROT_EXEC;
 466                        vaddr = eppnt->p_vaddr;
 467                        if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 468                                elf_type |= MAP_FIXED;
 469                        else if (no_base && interp_elf_ex->e_type == ET_DYN)
 470                                load_addr = -vaddr;
 471
 472                        map_addr = elf_map(interpreter, load_addr + vaddr,
 473                                        eppnt, elf_prot, elf_type, total_size);
 474                        total_size = 0;
 475                        if (!*interp_map_addr)
 476                                *interp_map_addr = map_addr;
 477                        error = map_addr;
 478                        if (BAD_ADDR(map_addr))
 479                                goto out_close;
 480
 481                        if (!load_addr_set &&
 482                            interp_elf_ex->e_type == ET_DYN) {
 483                                load_addr = map_addr - ELF_PAGESTART(vaddr);
 484                                load_addr_set = 1;
 485                        }
 486
 487                        /*
 488                         * Check to see if the section's size will overflow the
 489                         * allowed task size. Note that p_filesz must always be
 490                         * <= p_memsize so it's only necessary to check p_memsz.
 491                         */
 492                        k = load_addr + eppnt->p_vaddr;
 493                        if (BAD_ADDR(k) ||
 494                            eppnt->p_filesz > eppnt->p_memsz ||
 495                            eppnt->p_memsz > TASK_SIZE ||
 496                            TASK_SIZE - eppnt->p_memsz < k) {
 497                                error = -ENOMEM;
 498                                goto out_close;
 499                        }
 500
 501                        /*
 502                         * Find the end of the file mapping for this phdr, and
 503                         * keep track of the largest address we see for this.
 504                         */
 505                        k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
 506                        if (k > elf_bss)
 507                                elf_bss = k;
 508
 509                        /*
 510                         * Do the same thing for the memory mapping - between
 511                         * elf_bss and last_bss is the bss section.
 512                         */
 513                        k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
 514                        if (k > last_bss) {
 515                                last_bss = k;
 516                                bss_prot = elf_prot;
 517                        }
 518                }
 519        }
 520
 521        if (last_bss > elf_bss) {
 522                /*
 523                 * Now fill out the bss section.  First pad the last page up
 524                 * to the page boundary, and then perform a mmap to make sure
 525                 * that there are zero-mapped pages up to and including the
 526                 * last bss page.
 527                 */
 528                if (padzero(elf_bss)) {
 529                        error = -EFAULT;
 530                        goto out_close;
 531                }
 532
 533                /* What we have mapped so far */
 534                elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
 535
 536                /* Map the last of the bss segment */
 537                error = vm_brk_flags(elf_bss, last_bss - elf_bss,
 538                                bss_prot & PROT_EXEC ? VM_EXEC : 0);
 539                if (BAD_ADDR(error))
 540                        goto out_close;
 541        }
 542
 543        error = load_addr;
 544
 545out_close:
 546        kfree(elf_phdata);
 547out:
 548        return error;
 549}
 550
 551/*
 552 * These are the functions used to load ELF style executables and shared
 553 * libraries.  There is no binary dependent code anywhere else.
 554 */
 555
 556#define INTERPRETER_NONE 0
 557#define INTERPRETER_ELF 2
 558
 559#ifndef STACK_RND_MASK
 560#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
 561#endif
 562
 563static unsigned long randomize_stack_top(unsigned long stack_top)
 564{
 565        unsigned long random_variable = 0;
 566
 567        if ((current->flags & PF_RANDOMIZE) &&
 568                !(current->personality & ADDR_NO_RANDOMIZE)) {
 569                random_variable = get_random_long();
 570                random_variable &= STACK_RND_MASK;
 571                random_variable <<= PAGE_SHIFT;
 572        }
 573#ifdef CONFIG_STACK_GROWSUP
 574        return PAGE_ALIGN(stack_top) + random_variable;
 575#else
 576        return PAGE_ALIGN(stack_top) - random_variable;
 577#endif
 578}
 579
 580static int load_elf_binary(struct linux_binprm *bprm)
 581{
 582        struct file *interpreter = NULL; /* to shut gcc up */
 583        unsigned long load_addr = 0, load_bias = 0;
 584        int load_addr_set = 0;
 585        char * elf_interpreter = NULL;
 586        unsigned long error;
 587        struct elf_phdr *elf_ppnt, *elf_phdata;
 588        unsigned long elf_bss, elf_brk;
 589        int bss_prot = 0;
 590        int retval, i;
 591        unsigned int size;
 592        unsigned long elf_entry;
 593        unsigned long interp_load_addr = 0;
 594        unsigned long start_code, end_code, start_data, end_data;
 595        unsigned long reloc_func_desc __maybe_unused = 0;
 596        int executable_stack = EXSTACK_DEFAULT;
 597        struct pt_regs *regs = current_pt_regs();
 598        struct {
 599                struct elfhdr elf_ex;
 600                struct elfhdr interp_elf_ex;
 601        } *loc;
 602
 603        loc = kmalloc(sizeof(*loc), GFP_KERNEL);
 604        if (!loc) {
 605                retval = -ENOMEM;
 606                goto out_ret;
 607        }
 608        
 609        /* Get the exec-header */
 610        loc->elf_ex = *((struct elfhdr *)bprm->buf);
 611
 612        retval = -ENOEXEC;
 613        /* First of all, some simple consistency checks */
 614        if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 615                goto out;
 616
 617        if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
 618                goto out;
 619        if (!elf_check_arch(&loc->elf_ex))
 620                goto out;
 621        if (!bprm->file->f_op || !bprm->file->f_op->mmap)
 622                goto out;
 623
 624        /* Now read in all of the header information */
 625        if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
 626                goto out;
 627        if (loc->elf_ex.e_phnum < 1 ||
 628                loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
 629                goto out;
 630        size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
 631        retval = -ENOMEM;
 632        elf_phdata = kmalloc(size, GFP_KERNEL);
 633        if (!elf_phdata)
 634                goto out;
 635
 636        retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
 637                             (char *)elf_phdata, size);
 638        if (retval != size) {
 639                if (retval >= 0)
 640                        retval = -EIO;
 641                goto out_free_ph;
 642        }
 643
 644        elf_ppnt = elf_phdata;
 645        elf_bss = 0;
 646        elf_brk = 0;
 647
 648        start_code = ~0UL;
 649        end_code = 0;
 650        start_data = 0;
 651        end_data = 0;
 652
 653        for (i = 0; i < loc->elf_ex.e_phnum; i++) {
 654                if (elf_ppnt->p_type == PT_INTERP) {
 655                        /* This is the program interpreter used for
 656                         * shared libraries - for now assume that this
 657                         * is an a.out format binary
 658                         */
 659                        retval = -ENOEXEC;
 660                        if (elf_ppnt->p_filesz > PATH_MAX || 
 661                            elf_ppnt->p_filesz < 2)
 662                                goto out_free_ph;
 663
 664                        retval = -ENOMEM;
 665                        elf_interpreter = kmalloc(elf_ppnt->p_filesz,
 666                                                  GFP_KERNEL);
 667                        if (!elf_interpreter)
 668                                goto out_free_ph;
 669
 670                        retval = kernel_read(bprm->file, elf_ppnt->p_offset,
 671                                             elf_interpreter,
 672                                             elf_ppnt->p_filesz);
 673                        if (retval != elf_ppnt->p_filesz) {
 674                                if (retval >= 0)
 675                                        retval = -EIO;
 676                                goto out_free_interp;
 677                        }
 678                        /* make sure path is NULL terminated */
 679                        retval = -ENOEXEC;
 680                        if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
 681                                goto out_free_interp;
 682
 683                        interpreter = open_exec(elf_interpreter);
 684                        retval = PTR_ERR(interpreter);
 685                        if (IS_ERR(interpreter))
 686                                goto out_free_interp;
 687
 688                        /*
 689                         * If the binary is not readable then enforce
 690                         * mm->dumpable = 0 regardless of the interpreter's
 691                         * permissions.
 692                         */
 693                        would_dump(bprm, interpreter);
 694
 695                        retval = kernel_read(interpreter, 0, bprm->buf,
 696                                             BINPRM_BUF_SIZE);
 697                        if (retval != BINPRM_BUF_SIZE) {
 698                                if (retval >= 0)
 699                                        retval = -EIO;
 700                                goto out_free_dentry;
 701                        }
 702
 703                        /* Get the exec headers */
 704                        loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
 705                        break;
 706                }
 707                elf_ppnt++;
 708        }
 709
 710        elf_ppnt = elf_phdata;
 711        for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
 712                if (elf_ppnt->p_type == PT_GNU_STACK) {
 713                        if (elf_ppnt->p_flags & PF_X)
 714                                executable_stack = EXSTACK_ENABLE_X;
 715                        else
 716                                executable_stack = EXSTACK_DISABLE_X;
 717                        break;
 718                }
 719
 720        /* Some simple consistency checks for the interpreter */
 721        if (elf_interpreter) {
 722                retval = -ELIBBAD;
 723                /* Not an ELF interpreter */
 724                if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 725                        goto out_free_dentry;
 726                /* Verify the interpreter has a valid arch */
 727                if (!elf_check_arch(&loc->interp_elf_ex))
 728                        goto out_free_dentry;
 729        }
 730
 731        /* Flush all traces of the currently running executable */
 732        retval = flush_old_exec(bprm);
 733        if (retval)
 734                goto out_free_dentry;
 735
 736        /* Do this immediately, since STACK_TOP as used in setup_arg_pages
 737           may depend on the personality.  */
 738        SET_PERSONALITY(loc->elf_ex);
 739        if (elf_read_implies_exec(loc->elf_ex, executable_stack))
 740                current->personality |= READ_IMPLIES_EXEC;
 741
 742        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 743                current->flags |= PF_RANDOMIZE;
 744
 745        setup_new_exec(bprm);
 746
 747        /* Do this so that we can load the interpreter, if need be.  We will
 748           change some of these later */
 749        current->mm->free_area_cache = current->mm->mmap_base;
 750        current->mm->cached_hole_size = 0;
 751        retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
 752                                 executable_stack);
 753        if (retval < 0) {
 754                send_sig(SIGKILL, current, 0);
 755                goto out_free_dentry;
 756        }
 757        
 758        current->mm->start_stack = bprm->p;
 759
 760        /* Now we do a little grungy work by mmapping the ELF image into
 761           the correct location in memory. */
 762        for(i = 0, elf_ppnt = elf_phdata;
 763            i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
 764                int elf_prot = 0, elf_flags;
 765                unsigned long k, vaddr;
 766                unsigned long total_size = 0;
 767
 768                if (elf_ppnt->p_type != PT_LOAD)
 769                        continue;
 770
 771                if (unlikely (elf_brk > elf_bss)) {
 772                        unsigned long nbyte;
 773                    
 774                        /* There was a PT_LOAD segment with p_memsz > p_filesz
 775                           before this one. Map anonymous pages, if needed,
 776                           and clear the area.  */
 777                        retval = set_brk(elf_bss + load_bias,
 778                                         elf_brk + load_bias,
 779                                         bss_prot);
 780                        if (retval) {
 781                                send_sig(SIGKILL, current, 0);
 782                                goto out_free_dentry;
 783                        }
 784                        nbyte = ELF_PAGEOFFSET(elf_bss);
 785                        if (nbyte) {
 786                                nbyte = ELF_MIN_ALIGN - nbyte;
 787                                if (nbyte > elf_brk - elf_bss)
 788                                        nbyte = elf_brk - elf_bss;
 789                                if (clear_user((void __user *)elf_bss +
 790                                                        load_bias, nbyte)) {
 791                                        /*
 792                                         * This bss-zeroing can fail if the ELF
 793                                         * file specifies odd protections. So
 794                                         * we don't check the return value
 795                                         */
 796                                }
 797                        }
 798                }
 799
 800                if (elf_ppnt->p_flags & PF_R)
 801                        elf_prot |= PROT_READ;
 802                if (elf_ppnt->p_flags & PF_W)
 803                        elf_prot |= PROT_WRITE;
 804                if (elf_ppnt->p_flags & PF_X)
 805                        elf_prot |= PROT_EXEC;
 806
 807                elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
 808
 809                vaddr = elf_ppnt->p_vaddr;
 810                /*
 811                 * If we are loading ET_EXEC or we have already performed
 812                 * the ET_DYN load_addr calculations, proceed normally.
 813                 */
 814                if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
 815                        elf_flags |= MAP_FIXED;
 816                } else if (loc->elf_ex.e_type == ET_DYN) {
 817                        /*
 818                         * This logic is run once for the first LOAD Program
 819                         * Header for ET_DYN binaries to calculate the
 820                         * randomization (load_bias) for all the LOAD
 821                         * Program Headers, and to calculate the entire
 822                         * size of the ELF mapping (total_size). (Note that
 823                         * load_addr_set is set to true later once the
 824                         * initial mapping is performed.)
 825                         *
 826                         * There are effectively two types of ET_DYN
 827                         * binaries: programs (i.e. PIE: ET_DYN with INTERP)
 828                         * and loaders (ET_DYN without INTERP, since they
 829                         * _are_ the ELF interpreter). The loaders must
 830                         * be loaded away from programs since the program
 831                         * may otherwise collide with the loader (especially
 832                         * for ET_EXEC which does not have a randomized
 833                         * position). For example to handle invocations of
 834                         * "./ld.so someprog" to test out a new version of
 835                         * the loader, the subsequent program that the
 836                         * loader loads must avoid the loader itself, so
 837                         * they cannot share the same load range. Sufficient
 838                         * room for the brk must be allocated with the
 839                         * loader as well, since brk must be available with
 840                         * the loader.
 841                         *
 842                         * Therefore, programs are loaded offset from
 843                         * ELF_ET_DYN_BASE and loaders are loaded into the
 844                         * independently randomized mmap region (0 load_bias
 845                         * without MAP_FIXED).
 846                         */
 847                        if (elf_interpreter) {
 848                                load_bias = ELF_ET_DYN_BASE;
 849                                if (current->flags & PF_RANDOMIZE)
 850                                        load_bias += arch_mmap_rnd();
 851                                elf_flags |= MAP_FIXED;
 852                        } else
 853                                load_bias = 0;
 854
 855                        /*
 856                         * Since load_bias is used for all subsequent loading
 857                         * calculations, we must lower it by the first vaddr
 858                         * so that the remaining calculations based on the
 859                         * ELF vaddrs will be correctly offset. The result
 860                         * is then page aligned.
 861                         */
 862                        load_bias = ELF_PAGESTART(load_bias - vaddr);
 863
 864                        total_size = total_mapping_size(elf_phdata,
 865                                                        loc->elf_ex.e_phnum);
 866                        if (!total_size) {
 867                                retval = -EINVAL;
 868                                goto out_free_dentry;
 869                        }
 870                }
 871
 872                error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
 873                                elf_prot, elf_flags, total_size);
 874                if (BAD_ADDR(error)) {
 875                        send_sig(SIGKILL, current, 0);
 876                        retval = IS_ERR((void *)error) ?
 877                                PTR_ERR((void*)error) : -EINVAL;
 878                        goto out_free_dentry;
 879                }
 880
 881                if (!load_addr_set) {
 882                        load_addr_set = 1;
 883                        load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
 884                        if (loc->elf_ex.e_type == ET_DYN) {
 885                                load_bias += error -
 886                                             ELF_PAGESTART(load_bias + vaddr);
 887                                load_addr += load_bias;
 888                                reloc_func_desc = load_bias;
 889                        }
 890                }
 891                k = elf_ppnt->p_vaddr;
 892                if (k < start_code)
 893                        start_code = k;
 894                if (start_data < k)
 895                        start_data = k;
 896
 897                /*
 898                 * Check to see if the section's size will overflow the
 899                 * allowed task size. Note that p_filesz must always be
 900                 * <= p_memsz so it is only necessary to check p_memsz.
 901                 */
 902                if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
 903                    elf_ppnt->p_memsz > TASK_SIZE ||
 904                    TASK_SIZE - elf_ppnt->p_memsz < k) {
 905                        /* set_brk can never work. Avoid overflows. */
 906                        send_sig(SIGKILL, current, 0);
 907                        retval = -EINVAL;
 908                        goto out_free_dentry;
 909                }
 910
 911                k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
 912
 913                if (k > elf_bss)
 914                        elf_bss = k;
 915                if ((elf_ppnt->p_flags & PF_X) && end_code < k)
 916                        end_code = k;
 917                if (end_data < k)
 918                        end_data = k;
 919                k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
 920                if (k > elf_brk) {
 921                        bss_prot = elf_prot;
 922                        elf_brk = k;
 923                }
 924        }
 925
 926        loc->elf_ex.e_entry += load_bias;
 927        elf_bss += load_bias;
 928        elf_brk += load_bias;
 929        start_code += load_bias;
 930        end_code += load_bias;
 931        start_data += load_bias;
 932        end_data += load_bias;
 933
 934        /* Calling set_brk effectively mmaps the pages that we need
 935         * for the bss and break sections.  We must do this before
 936         * mapping in the interpreter, to make sure it doesn't wind
 937         * up getting placed where the bss needs to go.
 938         */
 939        retval = set_brk(elf_bss, elf_brk, bss_prot);
 940        if (retval) {
 941                send_sig(SIGKILL, current, 0);
 942                goto out_free_dentry;
 943        }
 944        if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
 945                send_sig(SIGSEGV, current, 0);
 946                retval = -EFAULT; /* Nobody gets to see this, but.. */
 947                goto out_free_dentry;
 948        }
 949
 950        if (elf_interpreter) {
 951                unsigned long interp_map_addr = 0;
 952
 953                elf_entry = load_elf_interp(&loc->interp_elf_ex,
 954                                            interpreter,
 955                                            &interp_map_addr,
 956                                            load_bias);
 957                if (!IS_ERR((void *)elf_entry)) {
 958                        /*
 959                         * load_elf_interp() returns relocation
 960                         * adjustment
 961                         */
 962                        interp_load_addr = elf_entry;
 963                        elf_entry += loc->interp_elf_ex.e_entry;
 964                }
 965                if (BAD_ADDR(elf_entry)) {
 966                        force_sig(SIGSEGV, current);
 967                        retval = IS_ERR((void *)elf_entry) ?
 968                                        (int)elf_entry : -EINVAL;
 969                        goto out_free_dentry;
 970                }
 971                reloc_func_desc = interp_load_addr;
 972
 973                allow_write_access(interpreter);
 974                fput(interpreter);
 975                kfree(elf_interpreter);
 976        } else {
 977                elf_entry = loc->elf_ex.e_entry;
 978                if (BAD_ADDR(elf_entry)) {
 979                        force_sig(SIGSEGV, current);
 980                        retval = -EINVAL;
 981                        goto out_free_dentry;
 982                }
 983        }
 984
 985        kfree(elf_phdata);
 986
 987        set_binfmt(&elf_format);
 988
 989#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
 990        retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
 991        if (retval < 0) {
 992                send_sig(SIGKILL, current, 0);
 993                goto out;
 994        }
 995#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
 996
 997        install_exec_creds(bprm);
 998        retval = create_elf_tables(bprm, &loc->elf_ex,
 999                          load_addr, interp_load_addr);
1000        if (retval < 0) {
1001                send_sig(SIGKILL, current, 0);
1002                goto out;
1003        }
1004        /* N.B. passed_fileno might not be initialized? */
1005        current->mm->end_code = end_code;
1006        current->mm->start_code = start_code;
1007        current->mm->start_data = start_data;
1008        current->mm->end_data = end_data;
1009        current->mm->start_stack = bprm->p;
1010
1011        if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1012                current->mm->brk = current->mm->start_brk =
1013                        arch_randomize_brk(current->mm);
1014#ifdef compat_brk_randomized
1015                current->brk_randomized = 1;
1016#endif
1017        }
1018
1019        if (current->personality & MMAP_PAGE_ZERO) {
1020                /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1021                   and some applications "depend" upon this behavior.
1022                   Since we do not have the power to recompile these, we
1023                   emulate the SVr4 behavior. Sigh. */
1024                error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1025                                MAP_FIXED | MAP_PRIVATE, 0);
1026        }
1027
1028#ifdef ELF_PLAT_INIT
1029        /*
1030         * The ABI may specify that certain registers be set up in special
1031         * ways (on i386 %edx is the address of a DT_FINI function, for
1032         * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1033         * that the e_entry field is the address of the function descriptor
1034         * for the startup routine, rather than the address of the startup
1035         * routine itself.  This macro performs whatever initialization to
1036         * the regs structure is required as well as any relocations to the
1037         * function descriptor entries when executing dynamically links apps.
1038         */
1039        ELF_PLAT_INIT(regs, reloc_func_desc);
1040#endif
1041
1042        start_thread(regs, elf_entry, bprm->p);
1043        retval = 0;
1044out:
1045        kfree(loc);
1046out_ret:
1047        return retval;
1048
1049        /* error cleanup */
1050out_free_dentry:
1051        allow_write_access(interpreter);
1052        if (interpreter)
1053                fput(interpreter);
1054out_free_interp:
1055        kfree(elf_interpreter);
1056out_free_ph:
1057        kfree(elf_phdata);
1058        goto out;
1059}
1060
1061/* This is really simpleminded and specialized - we are loading an
1062   a.out library that is given an ELF header. */
1063static int load_elf_library(struct file *file)
1064{
1065        struct elf_phdr *elf_phdata;
1066        struct elf_phdr *eppnt;
1067        unsigned long elf_bss, bss, len;
1068        int retval, error, i, j;
1069        struct elfhdr elf_ex;
1070
1071        error = -ENOEXEC;
1072        retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1073        if (retval != sizeof(elf_ex))
1074                goto out;
1075
1076        if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1077                goto out;
1078
1079        /* First of all, some simple consistency checks */
1080        if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1081            !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1082                goto out;
1083
1084        /* Now read in all of the header information */
1085
1086        j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1087        /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1088
1089        error = -ENOMEM;
1090        elf_phdata = kmalloc(j, GFP_KERNEL);
1091        if (!elf_phdata)
1092                goto out;
1093
1094        eppnt = elf_phdata;
1095        error = -ENOEXEC;
1096        retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1097        if (retval != j)
1098                goto out_free_ph;
1099
1100        for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1101                if ((eppnt + i)->p_type == PT_LOAD)
1102                        j++;
1103        if (j != 1)
1104                goto out_free_ph;
1105
1106        while (eppnt->p_type != PT_LOAD)
1107                eppnt++;
1108
1109        /* Now use mmap to map the library into memory. */
1110        error = vm_mmap(file,
1111                        ELF_PAGESTART(eppnt->p_vaddr),
1112                        (eppnt->p_filesz +
1113                         ELF_PAGEOFFSET(eppnt->p_vaddr)),
1114                        PROT_READ | PROT_WRITE | PROT_EXEC,
1115                        MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1116                        (eppnt->p_offset -
1117                         ELF_PAGEOFFSET(eppnt->p_vaddr)));
1118        if (error != ELF_PAGESTART(eppnt->p_vaddr))
1119                goto out_free_ph;
1120
1121        elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1122        if (padzero(elf_bss)) {
1123                error = -EFAULT;
1124                goto out_free_ph;
1125        }
1126
1127        len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1128                            ELF_MIN_ALIGN - 1);
1129        bss = eppnt->p_memsz + eppnt->p_vaddr;
1130        if (bss > len)
1131                vm_brk(len, bss - len);
1132        error = 0;
1133
1134out_free_ph:
1135        kfree(elf_phdata);
1136out:
1137        return error;
1138}
1139
1140#ifdef CONFIG_ELF_CORE
1141/*
1142 * ELF core dumper
1143 *
1144 * Modelled on fs/exec.c:aout_core_dump()
1145 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1146 */
1147
1148/*
1149 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1150 * that are useful for post-mortem analysis are included in every core dump.
1151 * In that way we ensure that the core dump is fully interpretable later
1152 * without matching up the same kernel and hardware config to see what PC values
1153 * meant. These special mappings include - vDSO, vsyscall, and other
1154 * architecture specific mappings
1155 */
1156static bool always_dump_vma(struct vm_area_struct *vma)
1157{
1158        /* Any vsyscall mappings? */
1159        if (vma == get_gate_vma(vma->vm_mm))
1160                return true;
1161        /*
1162         * arch_vma_name() returns non-NULL for special architecture mappings,
1163         * such as vDSO sections.
1164         */
1165        if (arch_vma_name(vma))
1166                return true;
1167
1168        return false;
1169}
1170
1171/*
1172 * Decide what to dump of a segment, part, all or none.
1173 */
1174static unsigned long vma_dump_size(struct vm_area_struct *vma,
1175                                   unsigned long mm_flags)
1176{
1177#define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1178
1179        /* always dump the vdso and vsyscall sections */
1180        if (always_dump_vma(vma))
1181                goto whole;
1182
1183        if (vma->vm_flags & VM_DONTDUMP)
1184                return 0;
1185
1186        /* support for DAX */
1187        if (vma_is_dax(vma)) {
1188                if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
1189                        goto whole;
1190                if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
1191                        goto whole;
1192                return 0;
1193        }
1194
1195        /* Hugetlb memory check */
1196        if (vma->vm_flags & VM_HUGETLB) {
1197                if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1198                        goto whole;
1199                if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1200                        goto whole;
1201                return 0;
1202        }
1203
1204        /* Do not dump I/O mapped devices or special mappings */
1205        if (vma->vm_flags & VM_IO)
1206                return 0;
1207
1208        /* By default, dump shared memory if mapped from an anonymous file. */
1209        if (vma->vm_flags & VM_SHARED) {
1210                if (file_inode(vma->vm_file)->i_nlink == 0 ?
1211                    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1212                        goto whole;
1213                return 0;
1214        }
1215
1216        /* Dump segments that have been written to.  */
1217        if (vma->anon_vma && FILTER(ANON_PRIVATE))
1218                goto whole;
1219        if (vma->vm_file == NULL)
1220                return 0;
1221
1222        if (FILTER(MAPPED_PRIVATE))
1223                goto whole;
1224
1225        /*
1226         * If this looks like the beginning of a DSO or executable mapping,
1227         * check for an ELF header.  If we find one, dump the first page to
1228         * aid in determining what was mapped here.
1229         */
1230        if (FILTER(ELF_HEADERS) &&
1231            vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1232                u32 __user *header = (u32 __user *) vma->vm_start;
1233                u32 word;
1234                mm_segment_t fs = get_fs();
1235                /*
1236                 * Doing it this way gets the constant folded by GCC.
1237                 */
1238                union {
1239                        u32 cmp;
1240                        char elfmag[SELFMAG];
1241                } magic;
1242                BUILD_BUG_ON(SELFMAG != sizeof word);
1243                magic.elfmag[EI_MAG0] = ELFMAG0;
1244                magic.elfmag[EI_MAG1] = ELFMAG1;
1245                magic.elfmag[EI_MAG2] = ELFMAG2;
1246                magic.elfmag[EI_MAG3] = ELFMAG3;
1247                /*
1248                 * Switch to the user "segment" for get_user(),
1249                 * then put back what elf_core_dump() had in place.
1250                 */
1251                set_fs(USER_DS);
1252                if (unlikely(get_user(word, header)))
1253                        word = 0;
1254                set_fs(fs);
1255                if (word == magic.cmp)
1256                        return PAGE_SIZE;
1257        }
1258
1259#undef  FILTER
1260
1261        return 0;
1262
1263whole:
1264        return vma->vm_end - vma->vm_start;
1265}
1266
1267/* An ELF note in memory */
1268struct memelfnote
1269{
1270        const char *name;
1271        int type;
1272        unsigned int datasz;
1273        void *data;
1274};
1275
1276static int notesize(struct memelfnote *en)
1277{
1278        int sz;
1279
1280        sz = sizeof(struct elf_note);
1281        sz += roundup(strlen(en->name) + 1, 4);
1282        sz += roundup(en->datasz, 4);
1283
1284        return sz;
1285}
1286
1287#define DUMP_WRITE(addr, nr, foffset)   \
1288        do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1289
1290static int alignfile(struct file *file, loff_t *foffset)
1291{
1292        static const char buf[4] = { 0, };
1293        DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1294        return 1;
1295}
1296
1297static int writenote(struct memelfnote *men, struct file *file,
1298                        loff_t *foffset)
1299{
1300        struct elf_note en;
1301        en.n_namesz = strlen(men->name) + 1;
1302        en.n_descsz = men->datasz;
1303        en.n_type = men->type;
1304
1305        DUMP_WRITE(&en, sizeof(en), foffset);
1306        DUMP_WRITE(men->name, en.n_namesz, foffset);
1307        if (!alignfile(file, foffset))
1308                return 0;
1309        DUMP_WRITE(men->data, men->datasz, foffset);
1310        if (!alignfile(file, foffset))
1311                return 0;
1312
1313        return 1;
1314}
1315#undef DUMP_WRITE
1316
1317static void fill_elf_header(struct elfhdr *elf, int segs,
1318                            u16 machine, u32 flags)
1319{
1320        memset(elf, 0, sizeof(*elf));
1321
1322        memcpy(elf->e_ident, ELFMAG, SELFMAG);
1323        elf->e_ident[EI_CLASS] = ELF_CLASS;
1324        elf->e_ident[EI_DATA] = ELF_DATA;
1325        elf->e_ident[EI_VERSION] = EV_CURRENT;
1326        elf->e_ident[EI_OSABI] = ELF_OSABI;
1327
1328        elf->e_type = ET_CORE;
1329        elf->e_machine = machine;
1330        elf->e_version = EV_CURRENT;
1331        elf->e_phoff = sizeof(struct elfhdr);
1332        elf->e_flags = flags;
1333        elf->e_ehsize = sizeof(struct elfhdr);
1334        elf->e_phentsize = sizeof(struct elf_phdr);
1335        elf->e_phnum = segs;
1336
1337        return;
1338}
1339
1340static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1341{
1342        phdr->p_type = PT_NOTE;
1343        phdr->p_offset = offset;
1344        phdr->p_vaddr = 0;
1345        phdr->p_paddr = 0;
1346        phdr->p_filesz = sz;
1347        phdr->p_memsz = 0;
1348        phdr->p_flags = 0;
1349        phdr->p_align = 0;
1350        return;
1351}
1352
1353static void fill_note(struct memelfnote *note, const char *name, int type, 
1354                unsigned int sz, void *data)
1355{
1356        note->name = name;
1357        note->type = type;
1358        note->datasz = sz;
1359        note->data = data;
1360        return;
1361}
1362
1363/*
1364 * fill up all the fields in prstatus from the given task struct, except
1365 * registers which need to be filled up separately.
1366 */
1367static void fill_prstatus(struct elf_prstatus *prstatus,
1368                struct task_struct *p, long signr)
1369{
1370        prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1371        prstatus->pr_sigpend = p->pending.signal.sig[0];
1372        prstatus->pr_sighold = p->blocked.sig[0];
1373        rcu_read_lock();
1374        prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1375        rcu_read_unlock();
1376        prstatus->pr_pid = task_pid_vnr(p);
1377        prstatus->pr_pgrp = task_pgrp_vnr(p);
1378        prstatus->pr_sid = task_session_vnr(p);
1379        if (thread_group_leader(p)) {
1380                struct task_cputime cputime;
1381
1382                /*
1383                 * This is the record for the group leader.  It shows the
1384                 * group-wide total, not its individual thread total.
1385                 */
1386                thread_group_cputime(p, &cputime);
1387                cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1388                cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1389        } else {
1390                cputime_t utime, stime;
1391
1392                task_cputime(p, &utime, &stime);
1393                cputime_to_timeval(utime, &prstatus->pr_utime);
1394                cputime_to_timeval(stime, &prstatus->pr_stime);
1395        }
1396        cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1397        cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1398}
1399
1400static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1401                       struct mm_struct *mm)
1402{
1403        const struct cred *cred;
1404        unsigned int i, len;
1405        
1406        /* first copy the parameters from user space */
1407        memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1408
1409        len = mm->arg_end - mm->arg_start;
1410        if (len >= ELF_PRARGSZ)
1411                len = ELF_PRARGSZ-1;
1412        if (copy_from_user(&psinfo->pr_psargs,
1413                           (const char __user *)mm->arg_start, len))
1414                return -EFAULT;
1415        for(i = 0; i < len; i++)
1416                if (psinfo->pr_psargs[i] == 0)
1417                        psinfo->pr_psargs[i] = ' ';
1418        psinfo->pr_psargs[len] = 0;
1419
1420        rcu_read_lock();
1421        psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1422        rcu_read_unlock();
1423        psinfo->pr_pid = task_pid_vnr(p);
1424        psinfo->pr_pgrp = task_pgrp_vnr(p);
1425        psinfo->pr_sid = task_session_vnr(p);
1426
1427        i = p->state ? ffz(~p->state) + 1 : 0;
1428        psinfo->pr_state = i;
1429        psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1430        psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1431        psinfo->pr_nice = task_nice(p);
1432        psinfo->pr_flag = p->flags;
1433        rcu_read_lock();
1434        cred = __task_cred(p);
1435        SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1436        SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1437        rcu_read_unlock();
1438        strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1439        
1440        return 0;
1441}
1442
1443static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1444{
1445        elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1446        int i = 0;
1447        do
1448                i += 2;
1449        while (auxv[i - 2] != AT_NULL);
1450        fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1451}
1452
1453static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1454                siginfo_t *siginfo)
1455{
1456        mm_segment_t old_fs = get_fs();
1457        set_fs(KERNEL_DS);
1458        copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1459        set_fs(old_fs);
1460        fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1461}
1462
1463#define MAX_FILE_NOTE_SIZE (4*1024*1024)
1464/*
1465 * Format of NT_FILE note:
1466 *
1467 * long count     -- how many files are mapped
1468 * long page_size -- units for file_ofs
1469 * array of [COUNT] elements of
1470 *   long start
1471 *   long end
1472 *   long file_ofs
1473 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1474 */
1475static int fill_files_note(struct memelfnote *note)
1476{
1477        struct vm_area_struct *vma;
1478        unsigned count, size, names_ofs, remaining, n;
1479        user_long_t *data;
1480        user_long_t *start_end_ofs;
1481        char *name_base, *name_curpos;
1482
1483        /* *Estimated* file count and total data size needed */
1484        count = current->mm->map_count;
1485        size = count * 64;
1486
1487        names_ofs = (2 + 3 * count) * sizeof(data[0]);
1488 alloc:
1489        if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1490                return -EINVAL;
1491        size = round_up(size, PAGE_SIZE);
1492        data = vmalloc(size);
1493        if (!data)
1494                return -ENOMEM;
1495
1496        start_end_ofs = data + 2;
1497        name_base = name_curpos = ((char *)data) + names_ofs;
1498        remaining = size - names_ofs;
1499        count = 0;
1500        for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1501                struct file *file;
1502                const char *filename;
1503
1504                file = vma->vm_file;
1505                if (!file)
1506                        continue;
1507                filename = d_path(&file->f_path, name_curpos, remaining);
1508                if (IS_ERR(filename)) {
1509                        if (PTR_ERR(filename) == -ENAMETOOLONG) {
1510                                vfree(data);
1511                                size = size * 5 / 4;
1512                                goto alloc;
1513                        }
1514                        continue;
1515                }
1516
1517                /* d_path() fills at the end, move name down */
1518                /* n = strlen(filename) + 1: */
1519                n = (name_curpos + remaining) - filename;
1520                remaining = filename - name_curpos;
1521                memmove(name_curpos, filename, n);
1522                name_curpos += n;
1523
1524                *start_end_ofs++ = vma->vm_start;
1525                *start_end_ofs++ = vma->vm_end;
1526                *start_end_ofs++ = vma->vm_pgoff;
1527                count++;
1528        }
1529
1530        /* Now we know exact count of files, can store it */
1531        data[0] = count;
1532        data[1] = PAGE_SIZE;
1533        /*
1534         * Count usually is less than current->mm->map_count,
1535         * we need to move filenames down.
1536         */
1537        n = current->mm->map_count - count;
1538        if (n != 0) {
1539                unsigned shift_bytes = n * 3 * sizeof(data[0]);
1540                memmove(name_base - shift_bytes, name_base,
1541                        name_curpos - name_base);
1542                name_curpos -= shift_bytes;
1543        }
1544
1545        size = name_curpos - (char *)data;
1546        fill_note(note, "CORE", NT_FILE, size, data);
1547        return 0;
1548}
1549
1550#ifdef CORE_DUMP_USE_REGSET
1551#include <linux/regset.h>
1552
1553struct elf_thread_core_info {
1554        struct elf_thread_core_info *next;
1555        struct task_struct *task;
1556        struct elf_prstatus prstatus;
1557        struct memelfnote notes[0];
1558};
1559
1560struct elf_note_info {
1561        struct elf_thread_core_info *thread;
1562        struct memelfnote psinfo;
1563        struct memelfnote signote;
1564        struct memelfnote auxv;
1565        struct memelfnote files;
1566        user_siginfo_t csigdata;
1567        size_t size;
1568        int thread_notes;
1569};
1570
1571/*
1572 * When a regset has a writeback hook, we call it on each thread before
1573 * dumping user memory.  On register window machines, this makes sure the
1574 * user memory backing the register data is up to date before we read it.
1575 */
1576static void do_thread_regset_writeback(struct task_struct *task,
1577                                       const struct user_regset *regset)
1578{
1579        if (regset->writeback)
1580                regset->writeback(task, regset, 1);
1581}
1582
1583#ifndef PR_REG_SIZE
1584#define PR_REG_SIZE(S) sizeof(S)
1585#endif
1586
1587#ifndef PRSTATUS_SIZE
1588#define PRSTATUS_SIZE(S) sizeof(S)
1589#endif
1590
1591#ifndef PR_REG_PTR
1592#define PR_REG_PTR(S) (&((S)->pr_reg))
1593#endif
1594
1595#ifndef SET_PR_FPVALID
1596#define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1597#endif
1598
1599static int fill_thread_core_info(struct elf_thread_core_info *t,
1600                                 const struct user_regset_view *view,
1601                                 long signr, size_t *total)
1602{
1603        unsigned int i;
1604
1605        /*
1606         * NT_PRSTATUS is the one special case, because the regset data
1607         * goes into the pr_reg field inside the note contents, rather
1608         * than being the whole note contents.  We fill the reset in here.
1609         * We assume that regset 0 is NT_PRSTATUS.
1610         */
1611        fill_prstatus(&t->prstatus, t->task, signr);
1612        (void) view->regsets[0].get(t->task, &view->regsets[0],
1613                                    0, PR_REG_SIZE(t->prstatus.pr_reg),
1614                                    PR_REG_PTR(&t->prstatus), NULL);
1615
1616        fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1617                  PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1618        *total += notesize(&t->notes[0]);
1619
1620        do_thread_regset_writeback(t->task, &view->regsets[0]);
1621
1622        /*
1623         * Each other regset might generate a note too.  For each regset
1624         * that has no core_note_type or is inactive, we leave t->notes[i]
1625         * all zero and we'll know to skip writing it later.
1626         */
1627        for (i = 1; i < view->n; ++i) {
1628                const struct user_regset *regset = &view->regsets[i];
1629                do_thread_regset_writeback(t->task, regset);
1630                if (regset->core_note_type && regset->get &&
1631                    (!regset->active || regset->active(t->task, regset))) {
1632                        int ret;
1633                        size_t size = regset->n * regset->size;
1634                        void *data = kmalloc(size, GFP_KERNEL);
1635                        if (unlikely(!data))
1636                                return 0;
1637                        ret = regset->get(t->task, regset,
1638                                          0, size, data, NULL);
1639                        if (unlikely(ret))
1640                                kfree(data);
1641                        else {
1642                                if (regset->core_note_type != NT_PRFPREG)
1643                                        fill_note(&t->notes[i], "LINUX",
1644                                                  regset->core_note_type,
1645                                                  size, data);
1646                                else {
1647                                        SET_PR_FPVALID(&t->prstatus, 1);
1648                                        fill_note(&t->notes[i], "CORE",
1649                                                  NT_PRFPREG, size, data);
1650                                }
1651                                *total += notesize(&t->notes[i]);
1652                        }
1653                }
1654        }
1655
1656        return 1;
1657}
1658
1659static int fill_note_info(struct elfhdr *elf, int phdrs,
1660                          struct elf_note_info *info,
1661                          siginfo_t *siginfo, struct pt_regs *regs)
1662{
1663        struct task_struct *dump_task = current;
1664        const struct user_regset_view *view = task_user_regset_view(dump_task);
1665        struct elf_thread_core_info *t;
1666        struct elf_prpsinfo *psinfo;
1667        struct core_thread *ct;
1668        unsigned int i;
1669
1670        info->size = 0;
1671        info->thread = NULL;
1672
1673        psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1674        if (psinfo == NULL) {
1675                info->psinfo.data = NULL; /* So we don't free this wrongly */
1676                return 0;
1677        }
1678
1679        fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1680
1681        /*
1682         * Figure out how many notes we're going to need for each thread.
1683         */
1684        info->thread_notes = 0;
1685        for (i = 0; i < view->n; ++i)
1686                if (view->regsets[i].core_note_type != 0)
1687                        ++info->thread_notes;
1688
1689        /*
1690         * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1691         * since it is our one special case.
1692         */
1693        if (unlikely(info->thread_notes == 0) ||
1694            unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1695                WARN_ON(1);
1696                return 0;
1697        }
1698
1699        /*
1700         * Initialize the ELF file header.
1701         */
1702        fill_elf_header(elf, phdrs,
1703                        view->e_machine, view->e_flags);
1704
1705        /*
1706         * Allocate a structure for each thread.
1707         */
1708        for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1709                t = kzalloc(offsetof(struct elf_thread_core_info,
1710                                     notes[info->thread_notes]),
1711                            GFP_KERNEL);
1712                if (unlikely(!t))
1713                        return 0;
1714
1715                t->task = ct->task;
1716                if (ct->task == dump_task || !info->thread) {
1717                        t->next = info->thread;
1718                        info->thread = t;
1719                } else {
1720                        /*
1721                         * Make sure to keep the original task at
1722                         * the head of the list.
1723                         */
1724                        t->next = info->thread->next;
1725                        info->thread->next = t;
1726                }
1727        }
1728
1729        /*
1730         * Now fill in each thread's information.
1731         */
1732        for (t = info->thread; t != NULL; t = t->next)
1733                if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1734                        return 0;
1735
1736        /*
1737         * Fill in the two process-wide notes.
1738         */
1739        fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1740        info->size += notesize(&info->psinfo);
1741
1742        fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1743        info->size += notesize(&info->signote);
1744
1745        fill_auxv_note(&info->auxv, current->mm);
1746        info->size += notesize(&info->auxv);
1747
1748        if (fill_files_note(&info->files) == 0)
1749                info->size += notesize(&info->files);
1750
1751        return 1;
1752}
1753
1754static size_t get_note_info_size(struct elf_note_info *info)
1755{
1756        return info->size;
1757}
1758
1759/*
1760 * Write all the notes for each thread.  When writing the first thread, the
1761 * process-wide notes are interleaved after the first thread-specific note.
1762 */
1763static int write_note_info(struct elf_note_info *info,
1764                           struct file *file, loff_t *foffset)
1765{
1766        bool first = 1;
1767        struct elf_thread_core_info *t = info->thread;
1768
1769        do {
1770                int i;
1771
1772                if (!writenote(&t->notes[0], file, foffset))
1773                        return 0;
1774
1775                if (first && !writenote(&info->psinfo, file, foffset))
1776                        return 0;
1777                if (first && !writenote(&info->signote, file, foffset))
1778                        return 0;
1779                if (first && !writenote(&info->auxv, file, foffset))
1780                        return 0;
1781                if (first && info->files.data &&
1782                                !writenote(&info->files, file, foffset))
1783                        return 0;
1784
1785                for (i = 1; i < info->thread_notes; ++i)
1786                        if (t->notes[i].data &&
1787                            !writenote(&t->notes[i], file, foffset))
1788                                return 0;
1789
1790                first = 0;
1791                t = t->next;
1792        } while (t);
1793
1794        return 1;
1795}
1796
1797static void free_note_info(struct elf_note_info *info)
1798{
1799        struct elf_thread_core_info *threads = info->thread;
1800        while (threads) {
1801                unsigned int i;
1802                struct elf_thread_core_info *t = threads;
1803                threads = t->next;
1804                WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1805                for (i = 1; i < info->thread_notes; ++i)
1806                        kfree(t->notes[i].data);
1807                kfree(t);
1808        }
1809        kfree(info->psinfo.data);
1810        vfree(info->files.data);
1811}
1812
1813#else
1814
1815/* Here is the structure in which status of each thread is captured. */
1816struct elf_thread_status
1817{
1818        struct list_head list;
1819        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1820        elf_fpregset_t fpu;             /* NT_PRFPREG */
1821        struct task_struct *thread;
1822#ifdef ELF_CORE_COPY_XFPREGS
1823        elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1824#endif
1825        struct memelfnote notes[3];
1826        int num_notes;
1827};
1828
1829/*
1830 * In order to add the specific thread information for the elf file format,
1831 * we need to keep a linked list of every threads pr_status and then create
1832 * a single section for them in the final core file.
1833 */
1834static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1835{
1836        int sz = 0;
1837        struct task_struct *p = t->thread;
1838        t->num_notes = 0;
1839
1840        fill_prstatus(&t->prstatus, p, signr);
1841        elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1842        
1843        fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1844                  &(t->prstatus));
1845        t->num_notes++;
1846        sz += notesize(&t->notes[0]);
1847
1848        if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1849                                                                &t->fpu))) {
1850                fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1851                          &(t->fpu));
1852                t->num_notes++;
1853                sz += notesize(&t->notes[1]);
1854        }
1855
1856#ifdef ELF_CORE_COPY_XFPREGS
1857        if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1858                fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1859                          sizeof(t->xfpu), &t->xfpu);
1860                t->num_notes++;
1861                sz += notesize(&t->notes[2]);
1862        }
1863#endif  
1864        return sz;
1865}
1866
1867struct elf_note_info {
1868        struct memelfnote *notes;
1869        struct memelfnote *notes_files;
1870        struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1871        struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1872        struct list_head thread_list;
1873        elf_fpregset_t *fpu;
1874#ifdef ELF_CORE_COPY_XFPREGS
1875        elf_fpxregset_t *xfpu;
1876#endif
1877        user_siginfo_t csigdata;
1878        int thread_status_size;
1879        int numnote;
1880};
1881
1882static int elf_note_info_init(struct elf_note_info *info)
1883{
1884        memset(info, 0, sizeof(*info));
1885        INIT_LIST_HEAD(&info->thread_list);
1886
1887        /* Allocate space for ELF notes */
1888        info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1889        if (!info->notes)
1890                return 0;
1891        info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1892        if (!info->psinfo)
1893                return 0;
1894        info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1895        if (!info->prstatus)
1896                return 0;
1897        info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1898        if (!info->fpu)
1899                return 0;
1900#ifdef ELF_CORE_COPY_XFPREGS
1901        info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1902        if (!info->xfpu)
1903                return 0;
1904#endif
1905        return 1;
1906}
1907
1908static int fill_note_info(struct elfhdr *elf, int phdrs,
1909                          struct elf_note_info *info,
1910                          siginfo_t *siginfo, struct pt_regs *regs)
1911{
1912        struct list_head *t;
1913
1914        if (!elf_note_info_init(info))
1915                return 0;
1916
1917        if (siginfo->si_signo) {
1918                struct core_thread *ct;
1919                struct elf_thread_status *ets;
1920
1921                for (ct = current->mm->core_state->dumper.next;
1922                                                ct; ct = ct->next) {
1923                        ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1924                        if (!ets)
1925                                return 0;
1926
1927                        ets->thread = ct->task;
1928                        list_add(&ets->list, &info->thread_list);
1929                }
1930
1931                list_for_each(t, &info->thread_list) {
1932                        int sz;
1933
1934                        ets = list_entry(t, struct elf_thread_status, list);
1935                        sz = elf_dump_thread_status(siginfo->si_signo, ets);
1936                        info->thread_status_size += sz;
1937                }
1938        }
1939        /* now collect the dump for the current */
1940        memset(info->prstatus, 0, sizeof(*info->prstatus));
1941        fill_prstatus(info->prstatus, current, siginfo->si_signo);
1942        elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1943
1944        /* Set up header */
1945        fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1946
1947        /*
1948         * Set up the notes in similar form to SVR4 core dumps made
1949         * with info from their /proc.
1950         */
1951
1952        fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1953                  sizeof(*info->prstatus), info->prstatus);
1954        fill_psinfo(info->psinfo, current->group_leader, current->mm);
1955        fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1956                  sizeof(*info->psinfo), info->psinfo);
1957
1958        fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1959        fill_auxv_note(info->notes + 3, current->mm);
1960        info->numnote = 4;
1961
1962        if (fill_files_note(info->notes + info->numnote) == 0) {
1963                info->notes_files = info->notes + info->numnote;
1964                info->numnote++;
1965        }
1966
1967        /* Try to dump the FPU. */
1968        info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1969                                                               info->fpu);
1970        if (info->prstatus->pr_fpvalid)
1971                fill_note(info->notes + info->numnote++,
1972                          "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1973#ifdef ELF_CORE_COPY_XFPREGS
1974        if (elf_core_copy_task_xfpregs(current, info->xfpu))
1975                fill_note(info->notes + info->numnote++,
1976                          "LINUX", ELF_CORE_XFPREG_TYPE,
1977                          sizeof(*info->xfpu), info->xfpu);
1978#endif
1979
1980        return 1;
1981}
1982
1983static size_t get_note_info_size(struct elf_note_info *info)
1984{
1985        int sz = 0;
1986        int i;
1987
1988        for (i = 0; i < info->numnote; i++)
1989                sz += notesize(info->notes + i);
1990
1991        sz += info->thread_status_size;
1992
1993        return sz;
1994}
1995
1996static int write_note_info(struct elf_note_info *info,
1997                           struct file *file, loff_t *foffset)
1998{
1999        int i;
2000        struct list_head *t;
2001
2002        for (i = 0; i < info->numnote; i++)
2003                if (!writenote(info->notes + i, file, foffset))
2004                        return 0;
2005
2006        /* write out the thread status notes section */
2007        list_for_each(t, &info->thread_list) {
2008                struct elf_thread_status *tmp =
2009                                list_entry(t, struct elf_thread_status, list);
2010
2011                for (i = 0; i < tmp->num_notes; i++)
2012                        if (!writenote(&tmp->notes[i], file, foffset))
2013                                return 0;
2014        }
2015
2016        return 1;
2017}
2018
2019static void free_note_info(struct elf_note_info *info)
2020{
2021        while (!list_empty(&info->thread_list)) {
2022                struct list_head *tmp = info->thread_list.next;
2023                list_del(tmp);
2024                kfree(list_entry(tmp, struct elf_thread_status, list));
2025        }
2026
2027        /* Free data possibly allocated by fill_files_note(): */
2028        if (info->notes_files)
2029                vfree(info->notes_files->data);
2030
2031        kfree(info->prstatus);
2032        kfree(info->psinfo);
2033        kfree(info->notes);
2034        kfree(info->fpu);
2035#ifdef ELF_CORE_COPY_XFPREGS
2036        kfree(info->xfpu);
2037#endif
2038}
2039
2040#endif
2041
2042static struct vm_area_struct *first_vma(struct task_struct *tsk,
2043                                        struct vm_area_struct *gate_vma)
2044{
2045        struct vm_area_struct *ret = tsk->mm->mmap;
2046
2047        if (ret)
2048                return ret;
2049        return gate_vma;
2050}
2051/*
2052 * Helper function for iterating across a vma list.  It ensures that the caller
2053 * will visit `gate_vma' prior to terminating the search.
2054 */
2055static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2056                                        struct vm_area_struct *gate_vma)
2057{
2058        struct vm_area_struct *ret;
2059
2060        ret = this_vma->vm_next;
2061        if (ret)
2062                return ret;
2063        if (this_vma == gate_vma)
2064                return NULL;
2065        return gate_vma;
2066}
2067
2068static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2069                             elf_addr_t e_shoff, int segs)
2070{
2071        elf->e_shoff = e_shoff;
2072        elf->e_shentsize = sizeof(*shdr4extnum);
2073        elf->e_shnum = 1;
2074        elf->e_shstrndx = SHN_UNDEF;
2075
2076        memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2077
2078        shdr4extnum->sh_type = SHT_NULL;
2079        shdr4extnum->sh_size = elf->e_shnum;
2080        shdr4extnum->sh_link = elf->e_shstrndx;
2081        shdr4extnum->sh_info = segs;
2082}
2083
2084static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
2085                                     unsigned long mm_flags)
2086{
2087        struct vm_area_struct *vma;
2088        size_t size = 0;
2089
2090        for (vma = first_vma(current, gate_vma); vma != NULL;
2091             vma = next_vma(vma, gate_vma))
2092                size += vma_dump_size(vma, mm_flags);
2093        return size;
2094}
2095
2096/*
2097 * Actual dumper
2098 *
2099 * This is a two-pass process; first we find the offsets of the bits,
2100 * and then they are actually written out.  If we run out of core limit
2101 * we just truncate.
2102 */
2103static int elf_core_dump(struct coredump_params *cprm)
2104{
2105        int has_dumped = 0;
2106        mm_segment_t fs;
2107        int segs;
2108        size_t size = 0;
2109        struct vm_area_struct *vma, *gate_vma;
2110        struct elfhdr *elf = NULL;
2111        loff_t offset = 0, dataoff, foffset;
2112        struct elf_note_info info = { };
2113        struct elf_phdr *phdr4note = NULL;
2114        struct elf_shdr *shdr4extnum = NULL;
2115        Elf_Half e_phnum;
2116        elf_addr_t e_shoff;
2117
2118        /*
2119         * We no longer stop all VM operations.
2120         * 
2121         * This is because those proceses that could possibly change map_count
2122         * or the mmap / vma pages are now blocked in do_exit on current
2123         * finishing this core dump.
2124         *
2125         * Only ptrace can touch these memory addresses, but it doesn't change
2126         * the map_count or the pages allocated. So no possibility of crashing
2127         * exists while dumping the mm->vm_next areas to the core file.
2128         */
2129  
2130        /* alloc memory for large data structures: too large to be on stack */
2131        elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2132        if (!elf)
2133                goto out;
2134        /*
2135         * The number of segs are recored into ELF header as 16bit value.
2136         * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2137         */
2138        segs = current->mm->map_count;
2139        segs += elf_core_extra_phdrs();
2140
2141        gate_vma = get_gate_vma(current->mm);
2142        if (gate_vma != NULL)
2143                segs++;
2144
2145        /* for notes section */
2146        segs++;
2147
2148        /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2149         * this, kernel supports extended numbering. Have a look at
2150         * include/linux/elf.h for further information. */
2151        e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2152
2153        /*
2154         * Collect all the non-memory information about the process for the
2155         * notes.  This also sets up the file header.
2156         */
2157        if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2158                goto cleanup;
2159
2160        has_dumped = 1;
2161
2162        fs = get_fs();
2163        set_fs(KERNEL_DS);
2164
2165        offset += sizeof(*elf);                         /* Elf header */
2166        offset += segs * sizeof(struct elf_phdr);       /* Program headers */
2167        foffset = offset;
2168
2169        /* Write notes phdr entry */
2170        {
2171                size_t sz = get_note_info_size(&info);
2172
2173                sz += elf_coredump_extra_notes_size();
2174
2175                phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2176                if (!phdr4note)
2177                        goto end_coredump;
2178
2179                fill_elf_note_phdr(phdr4note, sz, offset);
2180                offset += sz;
2181        }
2182
2183        dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2184
2185        offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
2186        offset += elf_core_extra_data_size();
2187        e_shoff = offset;
2188
2189        if (e_phnum == PN_XNUM) {
2190                shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2191                if (!shdr4extnum)
2192                        goto end_coredump;
2193                fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2194        }
2195
2196        offset = dataoff;
2197
2198        size += sizeof(*elf);
2199        if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
2200                goto end_coredump;
2201
2202        size += sizeof(*phdr4note);
2203        if (size > cprm->limit
2204            || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
2205                goto end_coredump;
2206
2207        /* Write program headers for segments dump */
2208        for (vma = first_vma(current, gate_vma); vma != NULL;
2209                        vma = next_vma(vma, gate_vma)) {
2210                struct elf_phdr phdr;
2211
2212                phdr.p_type = PT_LOAD;
2213                phdr.p_offset = offset;
2214                phdr.p_vaddr = vma->vm_start;
2215                phdr.p_paddr = 0;
2216                phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2217                phdr.p_memsz = vma->vm_end - vma->vm_start;
2218                offset += phdr.p_filesz;
2219                phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2220                if (vma->vm_flags & VM_WRITE)
2221                        phdr.p_flags |= PF_W;
2222                if (vma->vm_flags & VM_EXEC)
2223                        phdr.p_flags |= PF_X;
2224                phdr.p_align = ELF_EXEC_PAGESIZE;
2225
2226                size += sizeof(phdr);
2227                if (size > cprm->limit
2228                    || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2229                        goto end_coredump;
2230        }
2231
2232        if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2233                goto end_coredump;
2234
2235        /* write out the notes section */
2236        if (!write_note_info(&info, cprm->file, &foffset))
2237                goto end_coredump;
2238
2239        if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2240                goto end_coredump;
2241
2242        /* Align to page */
2243        if (!dump_seek(cprm->file, dataoff - foffset))
2244                goto end_coredump;
2245
2246        for (vma = first_vma(current, gate_vma); vma != NULL;
2247                        vma = next_vma(vma, gate_vma)) {
2248                unsigned long addr;
2249                unsigned long end;
2250
2251                end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2252
2253                for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2254                        struct page *page;
2255                        int stop;
2256
2257                        page = get_dump_page(addr);
2258                        if (page) {
2259                                void *kaddr = kmap(page);
2260                                stop = ((size += PAGE_SIZE) > cprm->limit) ||
2261                                        !dump_write(cprm->file, kaddr,
2262                                                    PAGE_SIZE);
2263                                kunmap(page);
2264                                page_cache_release(page);
2265                        } else
2266                                stop = !dump_seek(cprm->file, PAGE_SIZE);
2267                        if (stop)
2268                                goto end_coredump;
2269                }
2270        }
2271
2272        if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2273                goto end_coredump;
2274
2275        if (e_phnum == PN_XNUM) {
2276                size += sizeof(*shdr4extnum);
2277                if (size > cprm->limit
2278                    || !dump_write(cprm->file, shdr4extnum,
2279                                   sizeof(*shdr4extnum)))
2280                        goto end_coredump;
2281        }
2282
2283end_coredump:
2284        set_fs(fs);
2285
2286cleanup:
2287        free_note_info(&info);
2288        kfree(shdr4extnum);
2289        kfree(phdr4note);
2290        kfree(elf);
2291out:
2292        return has_dumped;
2293}
2294
2295#endif          /* CONFIG_ELF_CORE */
2296
2297static int __init init_elf_binfmt(void)
2298{
2299        register_binfmt(&elf_format);
2300        return 0;
2301}
2302
2303static void __exit exit_elf_binfmt(void)
2304{
2305        /* Remove the COFF and ELF loaders. */
2306        unregister_binfmt(&elf_format);
2307}
2308
2309core_initcall(init_elf_binfmt);
2310module_exit(exit_elf_binfmt);
2311MODULE_LICENSE("GPL");
2312