linux/fs/binfmt_elf.c
<<
>>
Prefs
   1/*
   2 * linux/fs/binfmt_elf.c
   3 *
   4 * These are the functions used to load ELF format executables as used
   5 * on SVr4 machines.  Information on the format may be found in the book
   6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
   7 * Tools".
   8 *
   9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
  10 */
  11
  12#include <linux/module.h>
  13#include <linux/kernel.h>
  14#include <linux/fs.h>
  15#include <linux/mm.h>
  16#include <linux/mman.h>
  17#include <linux/errno.h>
  18#include <linux/signal.h>
  19#include <linux/binfmts.h>
  20#include <linux/string.h>
  21#include <linux/file.h>
  22#include <linux/slab.h>
  23#include <linux/personality.h>
  24#include <linux/elfcore.h>
  25#include <linux/init.h>
  26#include <linux/highuid.h>
  27#include <linux/compiler.h>
  28#include <linux/highmem.h>
  29#include <linux/pagemap.h>
  30#include <linux/vmalloc.h>
  31#include <linux/security.h>
  32#include <linux/random.h>
  33#include <linux/elf.h>
  34#include <linux/elf-randomize.h>
  35#include <linux/utsname.h>
  36#include <linux/coredump.h>
  37#include <linux/sched.h>
  38#include <linux/dax.h>
  39#include <asm/uaccess.h>
  40#include <asm/param.h>
  41#include <asm/page.h>
  42
  43#ifndef user_long_t
  44#define user_long_t long
  45#endif
  46#ifndef user_siginfo_t
  47#define user_siginfo_t siginfo_t
  48#endif
  49
  50static int load_elf_binary(struct linux_binprm *bprm);
  51static int load_elf_library(struct file *);
  52static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
  53                                int, int, unsigned long);
  54
  55/*
  56 * If we don't support core dumping, then supply a NULL so we
  57 * don't even try.
  58 */
  59#ifdef CONFIG_ELF_CORE
  60static int elf_core_dump(struct coredump_params *cprm);
  61#else
  62#define elf_core_dump   NULL
  63#endif
  64
  65#if ELF_EXEC_PAGESIZE > PAGE_SIZE
  66#define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
  67#else
  68#define ELF_MIN_ALIGN   PAGE_SIZE
  69#endif
  70
  71#ifndef ELF_CORE_EFLAGS
  72#define ELF_CORE_EFLAGS 0
  73#endif
  74
  75#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
  76#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
  77#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
  78
  79static struct linux_binfmt elf_format = {
  80        .module         = THIS_MODULE,
  81        .load_binary    = load_elf_binary,
  82        .load_shlib     = load_elf_library,
  83        .core_dump      = elf_core_dump,
  84        .min_coredump   = ELF_EXEC_PAGESIZE,
  85};
  86
  87#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
  88
  89static int set_brk(unsigned long start, unsigned long end, int prot)
  90{
  91        start = ELF_PAGEALIGN(start);
  92        end = ELF_PAGEALIGN(end);
  93        if (end > start) {
  94                unsigned long addr;
  95                /*
  96                 * Map the last of the bss segment.
  97                 * If the header is requesting these pages to be
  98                 * executable, honour that (ppc32 needs this).
  99                 */
 100                addr = vm_brk_flags(start, end - start,
 101                                prot & PROT_EXEC ? VM_EXEC : 0);
 102                if (BAD_ADDR(addr))
 103                        return addr;
 104        }
 105        current->mm->start_brk = current->mm->brk = end;
 106        return 0;
 107}
 108
 109/* We need to explicitly zero any fractional pages
 110   after the data section (i.e. bss).  This would
 111   contain the junk from the file that should not
 112   be in memory
 113 */
 114static int padzero(unsigned long elf_bss)
 115{
 116        unsigned long nbyte;
 117
 118        nbyte = ELF_PAGEOFFSET(elf_bss);
 119        if (nbyte) {
 120                nbyte = ELF_MIN_ALIGN - nbyte;
 121                if (clear_user((void __user *) elf_bss, nbyte))
 122                        return -EFAULT;
 123        }
 124        return 0;
 125}
 126
 127/* Let's use some macros to make this stack manipulation a little clearer */
 128#ifdef CONFIG_STACK_GROWSUP
 129#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
 130#define STACK_ROUND(sp, items) \
 131        ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
 132#define STACK_ALLOC(sp, len) ({ \
 133        elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
 134        old_sp; })
 135#else
 136#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
 137#define STACK_ROUND(sp, items) \
 138        (((unsigned long) (sp - items)) &~ 15UL)
 139#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
 140#endif
 141
 142#ifndef ELF_BASE_PLATFORM
 143/*
 144 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
 145 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
 146 * will be copied to the user stack in the same manner as AT_PLATFORM.
 147 */
 148#define ELF_BASE_PLATFORM NULL
 149#endif
 150
 151static int
 152create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 153                unsigned long load_addr, unsigned long interp_load_addr)
 154{
 155        unsigned long p = bprm->p;
 156        int argc = bprm->argc;
 157        int envc = bprm->envc;
 158        elf_addr_t __user *sp;
 159        elf_addr_t __user *u_platform;
 160        elf_addr_t __user *u_base_platform;
 161        elf_addr_t __user *u_rand_bytes;
 162        const char *k_platform = ELF_PLATFORM;
 163        const char *k_base_platform = ELF_BASE_PLATFORM;
 164        unsigned char k_rand_bytes[16];
 165        int items;
 166        elf_addr_t *elf_info;
 167        int ei_index = 0;
 168        const struct cred *cred = current_cred();
 169        struct vm_area_struct *vma;
 170
 171        /*
 172         * In some cases (e.g. Hyper-Threading), we want to avoid L1
 173         * evictions by the processes running on the same package. One
 174         * thing we can do is to shuffle the initial stack for them.
 175         */
 176
 177        p = arch_align_stack(p);
 178
 179        /*
 180         * If this architecture has a platform capability string, copy it
 181         * to userspace.  In some cases (Sparc), this info is impossible
 182         * for userspace to get any other way, in others (i386) it is
 183         * merely difficult.
 184         */
 185        u_platform = NULL;
 186        if (k_platform) {
 187                size_t len = strlen(k_platform) + 1;
 188
 189                u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 190                if (__copy_to_user(u_platform, k_platform, len))
 191                        return -EFAULT;
 192        }
 193
 194        /*
 195         * If this architecture has a "base" platform capability
 196         * string, copy it to userspace.
 197         */
 198        u_base_platform = NULL;
 199        if (k_base_platform) {
 200                size_t len = strlen(k_base_platform) + 1;
 201
 202                u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 203                if (__copy_to_user(u_base_platform, k_base_platform, len))
 204                        return -EFAULT;
 205        }
 206
 207        /*
 208         * Generate 16 random bytes for userspace PRNG seeding.
 209         */
 210        get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
 211        u_rand_bytes = (elf_addr_t __user *)
 212                       STACK_ALLOC(p, sizeof(k_rand_bytes));
 213        if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
 214                return -EFAULT;
 215
 216        /* Create the ELF interpreter info */
 217        elf_info = (elf_addr_t *)current->mm->saved_auxv;
 218        /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
 219#define NEW_AUX_ENT(id, val) \
 220        do { \
 221                elf_info[ei_index++] = id; \
 222                elf_info[ei_index++] = val; \
 223        } while (0)
 224
 225#ifdef ARCH_DLINFO
 226        /* 
 227         * ARCH_DLINFO must come first so PPC can do its special alignment of
 228         * AUXV.
 229         * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
 230         * ARCH_DLINFO changes
 231         */
 232        ARCH_DLINFO;
 233#endif
 234        NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
 235        NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
 236        NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
 237        NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
 238        NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
 239        NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
 240        NEW_AUX_ENT(AT_BASE, interp_load_addr);
 241        NEW_AUX_ENT(AT_FLAGS, 0);
 242        NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
 243        NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
 244        NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
 245        NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
 246        NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
 247        NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
 248        NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
 249#ifdef ELF_HWCAP2
 250        NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
 251#endif
 252        NEW_AUX_ENT(AT_EXECFN, bprm->exec);
 253        if (k_platform) {
 254                NEW_AUX_ENT(AT_PLATFORM,
 255                            (elf_addr_t)(unsigned long)u_platform);
 256        }
 257        if (k_base_platform) {
 258                NEW_AUX_ENT(AT_BASE_PLATFORM,
 259                            (elf_addr_t)(unsigned long)u_base_platform);
 260        }
 261        if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
 262                NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
 263        }
 264#undef NEW_AUX_ENT
 265        /* AT_NULL is zero; clear the rest too */
 266        memset(&elf_info[ei_index], 0,
 267               sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
 268
 269        /* And advance past the AT_NULL entry.  */
 270        ei_index += 2;
 271
 272        sp = STACK_ADD(p, ei_index);
 273
 274        items = (argc + 1) + (envc + 1) + 1;
 275        bprm->p = STACK_ROUND(sp, items);
 276
 277        /* Point sp at the lowest address on the stack */
 278#ifdef CONFIG_STACK_GROWSUP
 279        sp = (elf_addr_t __user *)bprm->p - items - ei_index;
 280        bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
 281#else
 282        sp = (elf_addr_t __user *)bprm->p;
 283#endif
 284
 285
 286        /*
 287         * Grow the stack manually; some architectures have a limit on how
 288         * far ahead a user-space access may be in order to grow the stack.
 289         */
 290        vma = find_extend_vma(current->mm, bprm->p);
 291        if (!vma)
 292                return -EFAULT;
 293
 294        /* Now, let's put argc (and argv, envp if appropriate) on the stack */
 295        if (__put_user(argc, sp++))
 296                return -EFAULT;
 297
 298        /* Populate list of argv pointers back to argv strings. */
 299        p = current->mm->arg_end = current->mm->arg_start;
 300        while (argc-- > 0) {
 301                size_t len;
 302                if (__put_user((elf_addr_t)p, sp++))
 303                        return -EFAULT;
 304                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 305                if (!len || len > MAX_ARG_STRLEN)
 306                        return -EINVAL;
 307                p += len;
 308        }
 309        if (__put_user(0, sp++))
 310                return -EFAULT;
 311        current->mm->arg_end = p;
 312
 313        /* Populate list of envp pointers back to envp strings. */
 314        current->mm->env_end = current->mm->env_start = p;
 315        while (envc-- > 0) {
 316                size_t len;
 317                if (__put_user((elf_addr_t)p, sp++))
 318                        return -EFAULT;
 319                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 320                if (!len || len > MAX_ARG_STRLEN)
 321                        return -EINVAL;
 322                p += len;
 323        }
 324        if (__put_user(0, sp++))
 325                return -EFAULT;
 326        current->mm->env_end = p;
 327
 328        /* Put the elf_info on the stack in the right place.  */
 329        if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
 330                return -EFAULT;
 331        return 0;
 332}
 333
 334#ifndef elf_map
 335
 336static unsigned long elf_map(struct file *filep, unsigned long addr,
 337                struct elf_phdr *eppnt, int prot, int type,
 338                unsigned long total_size)
 339{
 340        unsigned long map_addr;
 341        unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
 342        unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
 343        addr = ELF_PAGESTART(addr);
 344        size = ELF_PAGEALIGN(size);
 345
 346        /* mmap() will return -EINVAL if given a zero size, but a
 347         * segment with zero filesize is perfectly valid */
 348        if (!size)
 349                return addr;
 350
 351        /*
 352        * total_size is the size of the ELF (interpreter) image.
 353        * The _first_ mmap needs to know the full size, otherwise
 354        * randomization might put this image into an overlapping
 355        * position with the ELF binary image. (since size < total_size)
 356        * So we first map the 'big' image - and unmap the remainder at
 357        * the end. (which unmap is needed for ELF images with holes.)
 358        */
 359        if (total_size) {
 360                total_size = ELF_PAGEALIGN(total_size);
 361                map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
 362                if (!BAD_ADDR(map_addr))
 363                        vm_munmap(map_addr+size, total_size-size);
 364        } else
 365                map_addr = vm_mmap(filep, addr, size, prot, type, off);
 366
 367        return(map_addr);
 368}
 369
 370#endif /* !elf_map */
 371
 372static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
 373{
 374        int i, first_idx = -1, last_idx = -1;
 375
 376        for (i = 0; i < nr; i++) {
 377                if (cmds[i].p_type == PT_LOAD) {
 378                        last_idx = i;
 379                        if (first_idx == -1)
 380                                first_idx = i;
 381                }
 382        }
 383        if (first_idx == -1)
 384                return 0;
 385
 386        return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
 387                                ELF_PAGESTART(cmds[first_idx].p_vaddr);
 388}
 389
 390
 391/* This is much more generalized than the library routine read function,
 392   so we keep this separate.  Technically the library read function
 393   is only provided so that we can read a.out libraries that have
 394   an ELF header */
 395
 396static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 397                struct file *interpreter, unsigned long *interp_map_addr,
 398                unsigned long no_base)
 399{
 400        struct elf_phdr *elf_phdata;
 401        struct elf_phdr *eppnt;
 402        unsigned long load_addr = 0;
 403        int load_addr_set = 0;
 404        unsigned long last_bss = 0, elf_bss = 0;
 405        int bss_prot = 0;
 406        unsigned long error = ~0UL;
 407        unsigned long total_size;
 408        int retval, i, size;
 409
 410        /* First of all, some simple consistency checks */
 411        if (interp_elf_ex->e_type != ET_EXEC &&
 412            interp_elf_ex->e_type != ET_DYN)
 413                goto out;
 414        if (!elf_check_arch(interp_elf_ex))
 415                goto out;
 416        if (!interpreter->f_op || !interpreter->f_op->mmap)
 417                goto out;
 418
 419        /*
 420         * If the size of this structure has changed, then punt, since
 421         * we will be doing the wrong thing.
 422         */
 423        if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
 424                goto out;
 425        if (interp_elf_ex->e_phnum < 1 ||
 426                interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
 427                goto out;
 428
 429        /* Now read in all of the header information */
 430        size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
 431        if (size > ELF_MIN_ALIGN)
 432                goto out;
 433        elf_phdata = kmalloc(size, GFP_KERNEL);
 434        if (!elf_phdata)
 435                goto out;
 436
 437        retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
 438                             (char *)elf_phdata, size);
 439        error = -EIO;
 440        if (retval != size) {
 441                if (retval < 0)
 442                        error = retval; 
 443                goto out_close;
 444        }
 445
 446        total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
 447        if (!total_size) {
 448                error = -EINVAL;
 449                goto out_close;
 450        }
 451
 452        eppnt = elf_phdata;
 453        for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 454                if (eppnt->p_type == PT_LOAD) {
 455                        int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
 456                        int elf_prot = 0;
 457                        unsigned long vaddr = 0;
 458                        unsigned long k, map_addr;
 459
 460                        if (eppnt->p_flags & PF_R)
 461                                elf_prot = PROT_READ;
 462                        if (eppnt->p_flags & PF_W)
 463                                elf_prot |= PROT_WRITE;
 464                        if (eppnt->p_flags & PF_X)
 465                                elf_prot |= PROT_EXEC;
 466                        vaddr = eppnt->p_vaddr;
 467                        if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 468                                elf_type |= MAP_FIXED;
 469                        else if (no_base && interp_elf_ex->e_type == ET_DYN)
 470                                load_addr = -vaddr;
 471
 472                        map_addr = elf_map(interpreter, load_addr + vaddr,
 473                                        eppnt, elf_prot, elf_type, total_size);
 474                        total_size = 0;
 475                        if (!*interp_map_addr)
 476                                *interp_map_addr = map_addr;
 477                        error = map_addr;
 478                        if (BAD_ADDR(map_addr))
 479                                goto out_close;
 480
 481                        if (!load_addr_set &&
 482                            interp_elf_ex->e_type == ET_DYN) {
 483                                load_addr = map_addr - ELF_PAGESTART(vaddr);
 484                                load_addr_set = 1;
 485                        }
 486
 487                        /*
 488                         * Check to see if the section's size will overflow the
 489                         * allowed task size. Note that p_filesz must always be
 490                         * <= p_memsize so it's only necessary to check p_memsz.
 491                         */
 492                        k = load_addr + eppnt->p_vaddr;
 493                        if (BAD_ADDR(k) ||
 494                            eppnt->p_filesz > eppnt->p_memsz ||
 495                            eppnt->p_memsz > TASK_SIZE ||
 496                            TASK_SIZE - eppnt->p_memsz < k) {
 497                                error = -ENOMEM;
 498                                goto out_close;
 499                        }
 500
 501                        /*
 502                         * Find the end of the file mapping for this phdr, and
 503                         * keep track of the largest address we see for this.
 504                         */
 505                        k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
 506                        if (k > elf_bss)
 507                                elf_bss = k;
 508
 509                        /*
 510                         * Do the same thing for the memory mapping - between
 511                         * elf_bss and last_bss is the bss section.
 512                         */
 513                        k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
 514                        if (k > last_bss) {
 515                                last_bss = k;
 516                                bss_prot = elf_prot;
 517                        }
 518                }
 519        }
 520
 521        if (last_bss > elf_bss) {
 522                /*
 523                 * Now fill out the bss section.  First pad the last page up
 524                 * to the page boundary, and then perform a mmap to make sure
 525                 * that there are zero-mapped pages up to and including the
 526                 * last bss page.
 527                 */
 528                if (padzero(elf_bss)) {
 529                        error = -EFAULT;
 530                        goto out_close;
 531                }
 532
 533                /* What we have mapped so far */
 534                elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
 535
 536                /* Map the last of the bss segment */
 537                error = vm_brk_flags(elf_bss, last_bss - elf_bss,
 538                                bss_prot & PROT_EXEC ? VM_EXEC : 0);
 539                if (BAD_ADDR(error))
 540                        goto out_close;
 541        }
 542
 543        error = load_addr;
 544
 545out_close:
 546        kfree(elf_phdata);
 547out:
 548        return error;
 549}
 550
 551/*
 552 * These are the functions used to load ELF style executables and shared
 553 * libraries.  There is no binary dependent code anywhere else.
 554 */
 555
 556#define INTERPRETER_NONE 0
 557#define INTERPRETER_ELF 2
 558
 559#ifndef STACK_RND_MASK
 560#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
 561#endif
 562
 563static unsigned long randomize_stack_top(unsigned long stack_top)
 564{
 565        unsigned long random_variable = 0;
 566
 567        if ((current->flags & PF_RANDOMIZE) &&
 568                !(current->personality & ADDR_NO_RANDOMIZE)) {
 569                random_variable = get_random_long();
 570                random_variable &= STACK_RND_MASK;
 571                random_variable <<= PAGE_SHIFT;
 572        }
 573#ifdef CONFIG_STACK_GROWSUP
 574        return PAGE_ALIGN(stack_top) + random_variable;
 575#else
 576        return PAGE_ALIGN(stack_top) - random_variable;
 577#endif
 578}
 579
 580static int load_elf_binary(struct linux_binprm *bprm)
 581{
 582        struct file *interpreter = NULL; /* to shut gcc up */
 583        unsigned long load_addr = 0, load_bias = 0;
 584        int load_addr_set = 0;
 585        char * elf_interpreter = NULL;
 586        unsigned long error;
 587        struct elf_phdr *elf_ppnt, *elf_phdata;
 588        unsigned long elf_bss, elf_brk;
 589        int bss_prot = 0;
 590        int retval, i;
 591        unsigned int size;
 592        unsigned long elf_entry;
 593        unsigned long interp_load_addr = 0;
 594        unsigned long start_code, end_code, start_data, end_data;
 595        unsigned long reloc_func_desc __maybe_unused = 0;
 596        int executable_stack = EXSTACK_DEFAULT;
 597        struct pt_regs *regs = current_pt_regs();
 598        struct {
 599                struct elfhdr elf_ex;
 600                struct elfhdr interp_elf_ex;
 601        } *loc;
 602
 603        loc = kmalloc(sizeof(*loc), GFP_KERNEL);
 604        if (!loc) {
 605                retval = -ENOMEM;
 606                goto out_ret;
 607        }
 608        
 609        /* Get the exec-header */
 610        loc->elf_ex = *((struct elfhdr *)bprm->buf);
 611
 612        retval = -ENOEXEC;
 613        /* First of all, some simple consistency checks */
 614        if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 615                goto out;
 616
 617        if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
 618                goto out;
 619        if (!elf_check_arch(&loc->elf_ex))
 620                goto out;
 621        if (!bprm->file->f_op || !bprm->file->f_op->mmap)
 622                goto out;
 623
 624        /* Now read in all of the header information */
 625        if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
 626                goto out;
 627        if (loc->elf_ex.e_phnum < 1 ||
 628                loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
 629                goto out;
 630        size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
 631        retval = -ENOMEM;
 632        elf_phdata = kmalloc(size, GFP_KERNEL);
 633        if (!elf_phdata)
 634                goto out;
 635
 636        retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
 637                             (char *)elf_phdata, size);
 638        if (retval != size) {
 639                if (retval >= 0)
 640                        retval = -EIO;
 641                goto out_free_ph;
 642        }
 643
 644        elf_ppnt = elf_phdata;
 645        elf_bss = 0;
 646        elf_brk = 0;
 647
 648        start_code = ~0UL;
 649        end_code = 0;
 650        start_data = 0;
 651        end_data = 0;
 652
 653        for (i = 0; i < loc->elf_ex.e_phnum; i++) {
 654                if (elf_ppnt->p_type == PT_INTERP) {
 655                        /* This is the program interpreter used for
 656                         * shared libraries - for now assume that this
 657                         * is an a.out format binary
 658                         */
 659                        retval = -ENOEXEC;
 660                        if (elf_ppnt->p_filesz > PATH_MAX || 
 661                            elf_ppnt->p_filesz < 2)
 662                                goto out_free_ph;
 663
 664                        retval = -ENOMEM;
 665                        elf_interpreter = kmalloc(elf_ppnt->p_filesz,
 666                                                  GFP_KERNEL);
 667                        if (!elf_interpreter)
 668                                goto out_free_ph;
 669
 670                        retval = kernel_read(bprm->file, elf_ppnt->p_offset,
 671                                             elf_interpreter,
 672                                             elf_ppnt->p_filesz);
 673                        if (retval != elf_ppnt->p_filesz) {
 674                                if (retval >= 0)
 675                                        retval = -EIO;
 676                                goto out_free_interp;
 677                        }
 678                        /* make sure path is NULL terminated */
 679                        retval = -ENOEXEC;
 680                        if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
 681                                goto out_free_interp;
 682
 683                        interpreter = open_exec(elf_interpreter);
 684                        retval = PTR_ERR(interpreter);
 685                        if (IS_ERR(interpreter))
 686                                goto out_free_interp;
 687
 688                        /*
 689                         * If the binary is not readable then enforce
 690                         * mm->dumpable = 0 regardless of the interpreter's
 691                         * permissions.
 692                         */
 693                        would_dump(bprm, interpreter);
 694
 695                        retval = kernel_read(interpreter, 0, bprm->buf,
 696                                             BINPRM_BUF_SIZE);
 697                        if (retval != BINPRM_BUF_SIZE) {
 698                                if (retval >= 0)
 699                                        retval = -EIO;
 700                                goto out_free_dentry;
 701                        }
 702
 703                        /* Get the exec headers */
 704                        loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
 705                        break;
 706                }
 707                elf_ppnt++;
 708        }
 709
 710        elf_ppnt = elf_phdata;
 711        for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
 712                if (elf_ppnt->p_type == PT_GNU_STACK) {
 713                        if (elf_ppnt->p_flags & PF_X)
 714                                executable_stack = EXSTACK_ENABLE_X;
 715                        else
 716                                executable_stack = EXSTACK_DISABLE_X;
 717                        break;
 718                }
 719
 720        /* Some simple consistency checks for the interpreter */
 721        if (elf_interpreter) {
 722                retval = -ELIBBAD;
 723                /* Not an ELF interpreter */
 724                if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 725                        goto out_free_dentry;
 726                /* Verify the interpreter has a valid arch */
 727                if (!elf_check_arch(&loc->interp_elf_ex))
 728                        goto out_free_dentry;
 729        }
 730
 731        /* Flush all traces of the currently running executable */
 732        retval = flush_old_exec(bprm);
 733        if (retval)
 734                goto out_free_dentry;
 735
 736        /* Do this immediately, since STACK_TOP as used in setup_arg_pages
 737           may depend on the personality.  */
 738        SET_PERSONALITY(loc->elf_ex);
 739        if (elf_read_implies_exec(loc->elf_ex, executable_stack))
 740                current->personality |= READ_IMPLIES_EXEC;
 741
 742        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 743                current->flags |= PF_RANDOMIZE;
 744
 745        setup_new_exec(bprm);
 746        install_exec_creds(bprm);
 747
 748        /* Do this so that we can load the interpreter, if need be.  We will
 749           change some of these later */
 750        current->mm->free_area_cache = current->mm->mmap_base;
 751        current->mm->cached_hole_size = 0;
 752        retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
 753                                 executable_stack);
 754        if (retval < 0)
 755                goto out_free_dentry;
 756        
 757        current->mm->start_stack = bprm->p;
 758
 759        /* Now we do a little grungy work by mmapping the ELF image into
 760           the correct location in memory. */
 761        for(i = 0, elf_ppnt = elf_phdata;
 762            i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
 763                int elf_prot = 0, elf_flags;
 764                unsigned long k, vaddr;
 765                unsigned long total_size = 0;
 766
 767                if (elf_ppnt->p_type != PT_LOAD)
 768                        continue;
 769
 770                if (unlikely (elf_brk > elf_bss)) {
 771                        unsigned long nbyte;
 772                    
 773                        /* There was a PT_LOAD segment with p_memsz > p_filesz
 774                           before this one. Map anonymous pages, if needed,
 775                           and clear the area.  */
 776                        retval = set_brk(elf_bss + load_bias,
 777                                         elf_brk + load_bias,
 778                                         bss_prot);
 779                        if (retval)
 780                                goto out_free_dentry;
 781                        nbyte = ELF_PAGEOFFSET(elf_bss);
 782                        if (nbyte) {
 783                                nbyte = ELF_MIN_ALIGN - nbyte;
 784                                if (nbyte > elf_brk - elf_bss)
 785                                        nbyte = elf_brk - elf_bss;
 786                                if (clear_user((void __user *)elf_bss +
 787                                                        load_bias, nbyte)) {
 788                                        /*
 789                                         * This bss-zeroing can fail if the ELF
 790                                         * file specifies odd protections. So
 791                                         * we don't check the return value
 792                                         */
 793                                }
 794                        }
 795                }
 796
 797                if (elf_ppnt->p_flags & PF_R)
 798                        elf_prot |= PROT_READ;
 799                if (elf_ppnt->p_flags & PF_W)
 800                        elf_prot |= PROT_WRITE;
 801                if (elf_ppnt->p_flags & PF_X)
 802                        elf_prot |= PROT_EXEC;
 803
 804                elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
 805
 806                vaddr = elf_ppnt->p_vaddr;
 807                /*
 808                 * If we are loading ET_EXEC or we have already performed
 809                 * the ET_DYN load_addr calculations, proceed normally.
 810                 */
 811                if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
 812                        elf_flags |= MAP_FIXED;
 813                } else if (loc->elf_ex.e_type == ET_DYN) {
 814                        /*
 815                         * This logic is run once for the first LOAD Program
 816                         * Header for ET_DYN binaries to calculate the
 817                         * randomization (load_bias) for all the LOAD
 818                         * Program Headers, and to calculate the entire
 819                         * size of the ELF mapping (total_size). (Note that
 820                         * load_addr_set is set to true later once the
 821                         * initial mapping is performed.)
 822                         *
 823                         * There are effectively two types of ET_DYN
 824                         * binaries: programs (i.e. PIE: ET_DYN with INTERP)
 825                         * and loaders (ET_DYN without INTERP, since they
 826                         * _are_ the ELF interpreter). The loaders must
 827                         * be loaded away from programs since the program
 828                         * may otherwise collide with the loader (especially
 829                         * for ET_EXEC which does not have a randomized
 830                         * position). For example to handle invocations of
 831                         * "./ld.so someprog" to test out a new version of
 832                         * the loader, the subsequent program that the
 833                         * loader loads must avoid the loader itself, so
 834                         * they cannot share the same load range. Sufficient
 835                         * room for the brk must be allocated with the
 836                         * loader as well, since brk must be available with
 837                         * the loader.
 838                         *
 839                         * Therefore, programs are loaded offset from
 840                         * ELF_ET_DYN_BASE and loaders are loaded into the
 841                         * independently randomized mmap region (0 load_bias
 842                         * without MAP_FIXED).
 843                         */
 844                        if (elf_interpreter) {
 845                                load_bias = ELF_ET_DYN_BASE;
 846                                if (current->flags & PF_RANDOMIZE)
 847                                        load_bias += arch_mmap_rnd();
 848                                elf_flags |= MAP_FIXED;
 849                        } else
 850                                load_bias = 0;
 851
 852                        /*
 853                         * Since load_bias is used for all subsequent loading
 854                         * calculations, we must lower it by the first vaddr
 855                         * so that the remaining calculations based on the
 856                         * ELF vaddrs will be correctly offset. The result
 857                         * is then page aligned.
 858                         */
 859                        load_bias = ELF_PAGESTART(load_bias - vaddr);
 860
 861                        total_size = total_mapping_size(elf_phdata,
 862                                                        loc->elf_ex.e_phnum);
 863                        if (!total_size) {
 864                                retval = -EINVAL;
 865                                goto out_free_dentry;
 866                        }
 867                }
 868
 869                error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
 870                                elf_prot, elf_flags, total_size);
 871                if (BAD_ADDR(error)) {
 872                        retval = IS_ERR((void *)error) ?
 873                                PTR_ERR((void*)error) : -EINVAL;
 874                        goto out_free_dentry;
 875                }
 876
 877                if (!load_addr_set) {
 878                        load_addr_set = 1;
 879                        load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
 880                        if (loc->elf_ex.e_type == ET_DYN) {
 881                                load_bias += error -
 882                                             ELF_PAGESTART(load_bias + vaddr);
 883                                load_addr += load_bias;
 884                                reloc_func_desc = load_bias;
 885                        }
 886                }
 887                k = elf_ppnt->p_vaddr;
 888                if (k < start_code)
 889                        start_code = k;
 890                if (start_data < k)
 891                        start_data = k;
 892
 893                /*
 894                 * Check to see if the section's size will overflow the
 895                 * allowed task size. Note that p_filesz must always be
 896                 * <= p_memsz so it is only necessary to check p_memsz.
 897                 */
 898                if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
 899                    elf_ppnt->p_memsz > TASK_SIZE ||
 900                    TASK_SIZE - elf_ppnt->p_memsz < k) {
 901                        /* set_brk can never work. Avoid overflows. */
 902                        retval = -EINVAL;
 903                        goto out_free_dentry;
 904                }
 905
 906                k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
 907
 908                if (k > elf_bss)
 909                        elf_bss = k;
 910                if ((elf_ppnt->p_flags & PF_X) && end_code < k)
 911                        end_code = k;
 912                if (end_data < k)
 913                        end_data = k;
 914                k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
 915                if (k > elf_brk) {
 916                        bss_prot = elf_prot;
 917                        elf_brk = k;
 918                }
 919        }
 920
 921        loc->elf_ex.e_entry += load_bias;
 922        elf_bss += load_bias;
 923        elf_brk += load_bias;
 924        start_code += load_bias;
 925        end_code += load_bias;
 926        start_data += load_bias;
 927        end_data += load_bias;
 928
 929        /* Calling set_brk effectively mmaps the pages that we need
 930         * for the bss and break sections.  We must do this before
 931         * mapping in the interpreter, to make sure it doesn't wind
 932         * up getting placed where the bss needs to go.
 933         */
 934        retval = set_brk(elf_bss, elf_brk, bss_prot);
 935        if (retval)
 936                goto out_free_dentry;
 937        if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
 938                retval = -EFAULT; /* Nobody gets to see this, but.. */
 939                goto out_free_dentry;
 940        }
 941
 942        if (elf_interpreter) {
 943                unsigned long interp_map_addr = 0;
 944
 945                elf_entry = load_elf_interp(&loc->interp_elf_ex,
 946                                            interpreter,
 947                                            &interp_map_addr,
 948                                            load_bias);
 949                if (!IS_ERR((void *)elf_entry)) {
 950                        /*
 951                         * load_elf_interp() returns relocation
 952                         * adjustment
 953                         */
 954                        interp_load_addr = elf_entry;
 955                        elf_entry += loc->interp_elf_ex.e_entry;
 956                }
 957                if (BAD_ADDR(elf_entry)) {
 958                        retval = IS_ERR((void *)elf_entry) ?
 959                                        (int)elf_entry : -EINVAL;
 960                        goto out_free_dentry;
 961                }
 962                reloc_func_desc = interp_load_addr;
 963
 964                allow_write_access(interpreter);
 965                fput(interpreter);
 966                kfree(elf_interpreter);
 967        } else {
 968                elf_entry = loc->elf_ex.e_entry;
 969                if (BAD_ADDR(elf_entry)) {
 970                        retval = -EINVAL;
 971                        goto out_free_dentry;
 972                }
 973        }
 974
 975        kfree(elf_phdata);
 976
 977        set_binfmt(&elf_format);
 978
 979#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
 980        retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
 981        if (retval < 0)
 982                goto out;
 983#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
 984
 985        retval = create_elf_tables(bprm, &loc->elf_ex,
 986                          load_addr, interp_load_addr);
 987        if (retval < 0)
 988                goto out;
 989        /* N.B. passed_fileno might not be initialized? */
 990        current->mm->end_code = end_code;
 991        current->mm->start_code = start_code;
 992        current->mm->start_data = start_data;
 993        current->mm->end_data = end_data;
 994        current->mm->start_stack = bprm->p;
 995
 996        if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
 997                current->mm->brk = current->mm->start_brk =
 998                        arch_randomize_brk(current->mm);
 999#ifdef compat_brk_randomized
1000                current->brk_randomized = 1;
1001#endif
1002        }
1003
1004        if (current->personality & MMAP_PAGE_ZERO) {
1005                /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1006                   and some applications "depend" upon this behavior.
1007                   Since we do not have the power to recompile these, we
1008                   emulate the SVr4 behavior. Sigh. */
1009                error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1010                                MAP_FIXED | MAP_PRIVATE, 0);
1011        }
1012
1013#ifdef ELF_PLAT_INIT
1014        /*
1015         * The ABI may specify that certain registers be set up in special
1016         * ways (on i386 %edx is the address of a DT_FINI function, for
1017         * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1018         * that the e_entry field is the address of the function descriptor
1019         * for the startup routine, rather than the address of the startup
1020         * routine itself.  This macro performs whatever initialization to
1021         * the regs structure is required as well as any relocations to the
1022         * function descriptor entries when executing dynamically links apps.
1023         */
1024        ELF_PLAT_INIT(regs, reloc_func_desc);
1025#endif
1026
1027        start_thread(regs, elf_entry, bprm->p);
1028        retval = 0;
1029out:
1030        kfree(loc);
1031out_ret:
1032        return retval;
1033
1034        /* error cleanup */
1035out_free_dentry:
1036        allow_write_access(interpreter);
1037        if (interpreter)
1038                fput(interpreter);
1039out_free_interp:
1040        kfree(elf_interpreter);
1041out_free_ph:
1042        kfree(elf_phdata);
1043        goto out;
1044}
1045
1046/* This is really simpleminded and specialized - we are loading an
1047   a.out library that is given an ELF header. */
1048static int load_elf_library(struct file *file)
1049{
1050        struct elf_phdr *elf_phdata;
1051        struct elf_phdr *eppnt;
1052        unsigned long elf_bss, bss, len;
1053        int retval, error, i, j;
1054        struct elfhdr elf_ex;
1055
1056        error = -ENOEXEC;
1057        retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1058        if (retval != sizeof(elf_ex))
1059                goto out;
1060
1061        if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1062                goto out;
1063
1064        /* First of all, some simple consistency checks */
1065        if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1066            !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1067                goto out;
1068
1069        /* Now read in all of the header information */
1070
1071        j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1072        /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1073
1074        error = -ENOMEM;
1075        elf_phdata = kmalloc(j, GFP_KERNEL);
1076        if (!elf_phdata)
1077                goto out;
1078
1079        eppnt = elf_phdata;
1080        error = -ENOEXEC;
1081        retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1082        if (retval != j)
1083                goto out_free_ph;
1084
1085        for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1086                if ((eppnt + i)->p_type == PT_LOAD)
1087                        j++;
1088        if (j != 1)
1089                goto out_free_ph;
1090
1091        while (eppnt->p_type != PT_LOAD)
1092                eppnt++;
1093
1094        /* Now use mmap to map the library into memory. */
1095        error = vm_mmap(file,
1096                        ELF_PAGESTART(eppnt->p_vaddr),
1097                        (eppnt->p_filesz +
1098                         ELF_PAGEOFFSET(eppnt->p_vaddr)),
1099                        PROT_READ | PROT_WRITE | PROT_EXEC,
1100                        MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1101                        (eppnt->p_offset -
1102                         ELF_PAGEOFFSET(eppnt->p_vaddr)));
1103        if (error != ELF_PAGESTART(eppnt->p_vaddr))
1104                goto out_free_ph;
1105
1106        elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1107        if (padzero(elf_bss)) {
1108                error = -EFAULT;
1109                goto out_free_ph;
1110        }
1111
1112        len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1113                            ELF_MIN_ALIGN - 1);
1114        bss = eppnt->p_memsz + eppnt->p_vaddr;
1115        if (bss > len)
1116                vm_brk(len, bss - len);
1117        error = 0;
1118
1119out_free_ph:
1120        kfree(elf_phdata);
1121out:
1122        return error;
1123}
1124
1125#ifdef CONFIG_ELF_CORE
1126/*
1127 * ELF core dumper
1128 *
1129 * Modelled on fs/exec.c:aout_core_dump()
1130 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1131 */
1132
1133/*
1134 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1135 * that are useful for post-mortem analysis are included in every core dump.
1136 * In that way we ensure that the core dump is fully interpretable later
1137 * without matching up the same kernel and hardware config to see what PC values
1138 * meant. These special mappings include - vDSO, vsyscall, and other
1139 * architecture specific mappings
1140 */
1141static bool always_dump_vma(struct vm_area_struct *vma)
1142{
1143        /* Any vsyscall mappings? */
1144        if (vma == get_gate_vma(vma->vm_mm))
1145                return true;
1146        /*
1147         * arch_vma_name() returns non-NULL for special architecture mappings,
1148         * such as vDSO sections.
1149         */
1150        if (arch_vma_name(vma))
1151                return true;
1152
1153        return false;
1154}
1155
1156/*
1157 * Decide what to dump of a segment, part, all or none.
1158 */
1159static unsigned long vma_dump_size(struct vm_area_struct *vma,
1160                                   unsigned long mm_flags)
1161{
1162#define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1163
1164        /* always dump the vdso and vsyscall sections */
1165        if (always_dump_vma(vma))
1166                goto whole;
1167
1168        if (vma->vm_flags & VM_DONTDUMP)
1169                return 0;
1170
1171        /* support for DAX */
1172        if (vma_is_dax(vma)) {
1173                if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
1174                        goto whole;
1175                if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
1176                        goto whole;
1177                return 0;
1178        }
1179
1180        /* Hugetlb memory check */
1181        if (vma->vm_flags & VM_HUGETLB) {
1182                if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1183                        goto whole;
1184                if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1185                        goto whole;
1186                return 0;
1187        }
1188
1189        /* Do not dump I/O mapped devices or special mappings */
1190        if (vma->vm_flags & VM_IO)
1191                return 0;
1192
1193        /* By default, dump shared memory if mapped from an anonymous file. */
1194        if (vma->vm_flags & VM_SHARED) {
1195                if (file_inode(vma->vm_file)->i_nlink == 0 ?
1196                    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1197                        goto whole;
1198                return 0;
1199        }
1200
1201        /* Dump segments that have been written to.  */
1202        if (vma->anon_vma && FILTER(ANON_PRIVATE))
1203                goto whole;
1204        if (vma->vm_file == NULL)
1205                return 0;
1206
1207        if (FILTER(MAPPED_PRIVATE))
1208                goto whole;
1209
1210        /*
1211         * If this looks like the beginning of a DSO or executable mapping,
1212         * check for an ELF header.  If we find one, dump the first page to
1213         * aid in determining what was mapped here.
1214         */
1215        if (FILTER(ELF_HEADERS) &&
1216            vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1217                u32 __user *header = (u32 __user *) vma->vm_start;
1218                u32 word;
1219                mm_segment_t fs = get_fs();
1220                /*
1221                 * Doing it this way gets the constant folded by GCC.
1222                 */
1223                union {
1224                        u32 cmp;
1225                        char elfmag[SELFMAG];
1226                } magic;
1227                BUILD_BUG_ON(SELFMAG != sizeof word);
1228                magic.elfmag[EI_MAG0] = ELFMAG0;
1229                magic.elfmag[EI_MAG1] = ELFMAG1;
1230                magic.elfmag[EI_MAG2] = ELFMAG2;
1231                magic.elfmag[EI_MAG3] = ELFMAG3;
1232                /*
1233                 * Switch to the user "segment" for get_user(),
1234                 * then put back what elf_core_dump() had in place.
1235                 */
1236                set_fs(USER_DS);
1237                if (unlikely(get_user(word, header)))
1238                        word = 0;
1239                set_fs(fs);
1240                if (word == magic.cmp)
1241                        return PAGE_SIZE;
1242        }
1243
1244#undef  FILTER
1245
1246        return 0;
1247
1248whole:
1249        return vma->vm_end - vma->vm_start;
1250}
1251
1252/* An ELF note in memory */
1253struct memelfnote
1254{
1255        const char *name;
1256        int type;
1257        unsigned int datasz;
1258        void *data;
1259};
1260
1261static int notesize(struct memelfnote *en)
1262{
1263        int sz;
1264
1265        sz = sizeof(struct elf_note);
1266        sz += roundup(strlen(en->name) + 1, 4);
1267        sz += roundup(en->datasz, 4);
1268
1269        return sz;
1270}
1271
1272#define DUMP_WRITE(addr, nr, foffset)   \
1273        do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1274
1275static int alignfile(struct file *file, loff_t *foffset)
1276{
1277        static const char buf[4] = { 0, };
1278        DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1279        return 1;
1280}
1281
1282static int writenote(struct memelfnote *men, struct file *file,
1283                        loff_t *foffset)
1284{
1285        struct elf_note en;
1286        en.n_namesz = strlen(men->name) + 1;
1287        en.n_descsz = men->datasz;
1288        en.n_type = men->type;
1289
1290        DUMP_WRITE(&en, sizeof(en), foffset);
1291        DUMP_WRITE(men->name, en.n_namesz, foffset);
1292        if (!alignfile(file, foffset))
1293                return 0;
1294        DUMP_WRITE(men->data, men->datasz, foffset);
1295        if (!alignfile(file, foffset))
1296                return 0;
1297
1298        return 1;
1299}
1300#undef DUMP_WRITE
1301
1302static void fill_elf_header(struct elfhdr *elf, int segs,
1303                            u16 machine, u32 flags)
1304{
1305        memset(elf, 0, sizeof(*elf));
1306
1307        memcpy(elf->e_ident, ELFMAG, SELFMAG);
1308        elf->e_ident[EI_CLASS] = ELF_CLASS;
1309        elf->e_ident[EI_DATA] = ELF_DATA;
1310        elf->e_ident[EI_VERSION] = EV_CURRENT;
1311        elf->e_ident[EI_OSABI] = ELF_OSABI;
1312
1313        elf->e_type = ET_CORE;
1314        elf->e_machine = machine;
1315        elf->e_version = EV_CURRENT;
1316        elf->e_phoff = sizeof(struct elfhdr);
1317        elf->e_flags = flags;
1318        elf->e_ehsize = sizeof(struct elfhdr);
1319        elf->e_phentsize = sizeof(struct elf_phdr);
1320        elf->e_phnum = segs;
1321
1322        return;
1323}
1324
1325static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1326{
1327        phdr->p_type = PT_NOTE;
1328        phdr->p_offset = offset;
1329        phdr->p_vaddr = 0;
1330        phdr->p_paddr = 0;
1331        phdr->p_filesz = sz;
1332        phdr->p_memsz = 0;
1333        phdr->p_flags = 0;
1334        phdr->p_align = 0;
1335        return;
1336}
1337
1338static void fill_note(struct memelfnote *note, const char *name, int type, 
1339                unsigned int sz, void *data)
1340{
1341        note->name = name;
1342        note->type = type;
1343        note->datasz = sz;
1344        note->data = data;
1345        return;
1346}
1347
1348/*
1349 * fill up all the fields in prstatus from the given task struct, except
1350 * registers which need to be filled up separately.
1351 */
1352static void fill_prstatus(struct elf_prstatus *prstatus,
1353                struct task_struct *p, long signr)
1354{
1355        prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1356        prstatus->pr_sigpend = p->pending.signal.sig[0];
1357        prstatus->pr_sighold = p->blocked.sig[0];
1358        rcu_read_lock();
1359        prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1360        rcu_read_unlock();
1361        prstatus->pr_pid = task_pid_vnr(p);
1362        prstatus->pr_pgrp = task_pgrp_vnr(p);
1363        prstatus->pr_sid = task_session_vnr(p);
1364        if (thread_group_leader(p)) {
1365                struct task_cputime cputime;
1366
1367                /*
1368                 * This is the record for the group leader.  It shows the
1369                 * group-wide total, not its individual thread total.
1370                 */
1371                thread_group_cputime(p, &cputime);
1372                cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1373                cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1374        } else {
1375                cputime_t utime, stime;
1376
1377                task_cputime(p, &utime, &stime);
1378                cputime_to_timeval(utime, &prstatus->pr_utime);
1379                cputime_to_timeval(stime, &prstatus->pr_stime);
1380        }
1381        cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1382        cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1383}
1384
1385static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1386                       struct mm_struct *mm)
1387{
1388        const struct cred *cred;
1389        unsigned int i, len;
1390        
1391        /* first copy the parameters from user space */
1392        memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1393
1394        len = mm->arg_end - mm->arg_start;
1395        if (len >= ELF_PRARGSZ)
1396                len = ELF_PRARGSZ-1;
1397        if (copy_from_user(&psinfo->pr_psargs,
1398                           (const char __user *)mm->arg_start, len))
1399                return -EFAULT;
1400        for(i = 0; i < len; i++)
1401                if (psinfo->pr_psargs[i] == 0)
1402                        psinfo->pr_psargs[i] = ' ';
1403        psinfo->pr_psargs[len] = 0;
1404
1405        rcu_read_lock();
1406        psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1407        rcu_read_unlock();
1408        psinfo->pr_pid = task_pid_vnr(p);
1409        psinfo->pr_pgrp = task_pgrp_vnr(p);
1410        psinfo->pr_sid = task_session_vnr(p);
1411
1412        i = p->state ? ffz(~p->state) + 1 : 0;
1413        psinfo->pr_state = i;
1414        psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1415        psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1416        psinfo->pr_nice = task_nice(p);
1417        psinfo->pr_flag = p->flags;
1418        rcu_read_lock();
1419        cred = __task_cred(p);
1420        SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1421        SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1422        rcu_read_unlock();
1423        strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1424        
1425        return 0;
1426}
1427
1428static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1429{
1430        elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1431        int i = 0;
1432        do
1433                i += 2;
1434        while (auxv[i - 2] != AT_NULL);
1435        fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1436}
1437
1438static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1439                siginfo_t *siginfo)
1440{
1441        mm_segment_t old_fs = get_fs();
1442        set_fs(KERNEL_DS);
1443        copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1444        set_fs(old_fs);
1445        fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1446}
1447
1448#define MAX_FILE_NOTE_SIZE (4*1024*1024)
1449/*
1450 * Format of NT_FILE note:
1451 *
1452 * long count     -- how many files are mapped
1453 * long page_size -- units for file_ofs
1454 * array of [COUNT] elements of
1455 *   long start
1456 *   long end
1457 *   long file_ofs
1458 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1459 */
1460static int fill_files_note(struct memelfnote *note)
1461{
1462        struct vm_area_struct *vma;
1463        unsigned count, size, names_ofs, remaining, n;
1464        user_long_t *data;
1465        user_long_t *start_end_ofs;
1466        char *name_base, *name_curpos;
1467
1468        /* *Estimated* file count and total data size needed */
1469        count = current->mm->map_count;
1470        size = count * 64;
1471
1472        names_ofs = (2 + 3 * count) * sizeof(data[0]);
1473 alloc:
1474        if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1475                return -EINVAL;
1476        size = round_up(size, PAGE_SIZE);
1477        data = vmalloc(size);
1478        if (!data)
1479                return -ENOMEM;
1480
1481        start_end_ofs = data + 2;
1482        name_base = name_curpos = ((char *)data) + names_ofs;
1483        remaining = size - names_ofs;
1484        count = 0;
1485        for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1486                struct file *file;
1487                const char *filename;
1488
1489                file = vma->vm_file;
1490                if (!file)
1491                        continue;
1492                filename = d_path(&file->f_path, name_curpos, remaining);
1493                if (IS_ERR(filename)) {
1494                        if (PTR_ERR(filename) == -ENAMETOOLONG) {
1495                                vfree(data);
1496                                size = size * 5 / 4;
1497                                goto alloc;
1498                        }
1499                        continue;
1500                }
1501
1502                /* d_path() fills at the end, move name down */
1503                /* n = strlen(filename) + 1: */
1504                n = (name_curpos + remaining) - filename;
1505                remaining = filename - name_curpos;
1506                memmove(name_curpos, filename, n);
1507                name_curpos += n;
1508
1509                *start_end_ofs++ = vma->vm_start;
1510                *start_end_ofs++ = vma->vm_end;
1511                *start_end_ofs++ = vma->vm_pgoff;
1512                count++;
1513        }
1514
1515        /* Now we know exact count of files, can store it */
1516        data[0] = count;
1517        data[1] = PAGE_SIZE;
1518        /*
1519         * Count usually is less than current->mm->map_count,
1520         * we need to move filenames down.
1521         */
1522        n = current->mm->map_count - count;
1523        if (n != 0) {
1524                unsigned shift_bytes = n * 3 * sizeof(data[0]);
1525                memmove(name_base - shift_bytes, name_base,
1526                        name_curpos - name_base);
1527                name_curpos -= shift_bytes;
1528        }
1529
1530        size = name_curpos - (char *)data;
1531        fill_note(note, "CORE", NT_FILE, size, data);
1532        return 0;
1533}
1534
1535#ifdef CORE_DUMP_USE_REGSET
1536#include <linux/regset.h>
1537
1538struct elf_thread_core_info {
1539        struct elf_thread_core_info *next;
1540        struct task_struct *task;
1541        struct elf_prstatus prstatus;
1542        struct memelfnote notes[0];
1543};
1544
1545struct elf_note_info {
1546        struct elf_thread_core_info *thread;
1547        struct memelfnote psinfo;
1548        struct memelfnote signote;
1549        struct memelfnote auxv;
1550        struct memelfnote files;
1551        user_siginfo_t csigdata;
1552        size_t size;
1553        int thread_notes;
1554};
1555
1556/*
1557 * When a regset has a writeback hook, we call it on each thread before
1558 * dumping user memory.  On register window machines, this makes sure the
1559 * user memory backing the register data is up to date before we read it.
1560 */
1561static void do_thread_regset_writeback(struct task_struct *task,
1562                                       const struct user_regset *regset)
1563{
1564        if (regset->writeback)
1565                regset->writeback(task, regset, 1);
1566}
1567
1568#ifndef PR_REG_SIZE
1569#define PR_REG_SIZE(S) sizeof(S)
1570#endif
1571
1572#ifndef PRSTATUS_SIZE
1573#define PRSTATUS_SIZE(S) sizeof(S)
1574#endif
1575
1576#ifndef PR_REG_PTR
1577#define PR_REG_PTR(S) (&((S)->pr_reg))
1578#endif
1579
1580#ifndef SET_PR_FPVALID
1581#define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1582#endif
1583
1584static int fill_thread_core_info(struct elf_thread_core_info *t,
1585                                 const struct user_regset_view *view,
1586                                 long signr, size_t *total)
1587{
1588        unsigned int i;
1589
1590        /*
1591         * NT_PRSTATUS is the one special case, because the regset data
1592         * goes into the pr_reg field inside the note contents, rather
1593         * than being the whole note contents.  We fill the reset in here.
1594         * We assume that regset 0 is NT_PRSTATUS.
1595         */
1596        fill_prstatus(&t->prstatus, t->task, signr);
1597        (void) view->regsets[0].get(t->task, &view->regsets[0],
1598                                    0, PR_REG_SIZE(t->prstatus.pr_reg),
1599                                    PR_REG_PTR(&t->prstatus), NULL);
1600
1601        fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1602                  PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1603        *total += notesize(&t->notes[0]);
1604
1605        do_thread_regset_writeback(t->task, &view->regsets[0]);
1606
1607        /*
1608         * Each other regset might generate a note too.  For each regset
1609         * that has no core_note_type or is inactive, we leave t->notes[i]
1610         * all zero and we'll know to skip writing it later.
1611         */
1612        for (i = 1; i < view->n; ++i) {
1613                const struct user_regset *regset = &view->regsets[i];
1614                do_thread_regset_writeback(t->task, regset);
1615                if (regset->core_note_type && regset->get &&
1616                    (!regset->active || regset->active(t->task, regset))) {
1617                        int ret;
1618                        size_t size = regset->n * regset->size;
1619                        void *data = kmalloc(size, GFP_KERNEL);
1620                        if (unlikely(!data))
1621                                return 0;
1622                        ret = regset->get(t->task, regset,
1623                                          0, size, data, NULL);
1624                        if (unlikely(ret))
1625                                kfree(data);
1626                        else {
1627                                if (regset->core_note_type != NT_PRFPREG)
1628                                        fill_note(&t->notes[i], "LINUX",
1629                                                  regset->core_note_type,
1630                                                  size, data);
1631                                else {
1632                                        SET_PR_FPVALID(&t->prstatus, 1);
1633                                        fill_note(&t->notes[i], "CORE",
1634                                                  NT_PRFPREG, size, data);
1635                                }
1636                                *total += notesize(&t->notes[i]);
1637                        }
1638                }
1639        }
1640
1641        return 1;
1642}
1643
1644static int fill_note_info(struct elfhdr *elf, int phdrs,
1645                          struct elf_note_info *info,
1646                          siginfo_t *siginfo, struct pt_regs *regs)
1647{
1648        struct task_struct *dump_task = current;
1649        const struct user_regset_view *view = task_user_regset_view(dump_task);
1650        struct elf_thread_core_info *t;
1651        struct elf_prpsinfo *psinfo;
1652        struct core_thread *ct;
1653        unsigned int i;
1654
1655        info->size = 0;
1656        info->thread = NULL;
1657
1658        psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1659        if (psinfo == NULL) {
1660                info->psinfo.data = NULL; /* So we don't free this wrongly */
1661                return 0;
1662        }
1663
1664        fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1665
1666        /*
1667         * Figure out how many notes we're going to need for each thread.
1668         */
1669        info->thread_notes = 0;
1670        for (i = 0; i < view->n; ++i)
1671                if (view->regsets[i].core_note_type != 0)
1672                        ++info->thread_notes;
1673
1674        /*
1675         * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1676         * since it is our one special case.
1677         */
1678        if (unlikely(info->thread_notes == 0) ||
1679            unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1680                WARN_ON(1);
1681                return 0;
1682        }
1683
1684        /*
1685         * Initialize the ELF file header.
1686         */
1687        fill_elf_header(elf, phdrs,
1688                        view->e_machine, view->e_flags);
1689
1690        /*
1691         * Allocate a structure for each thread.
1692         */
1693        for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1694                t = kzalloc(offsetof(struct elf_thread_core_info,
1695                                     notes[info->thread_notes]),
1696                            GFP_KERNEL);
1697                if (unlikely(!t))
1698                        return 0;
1699
1700                t->task = ct->task;
1701                if (ct->task == dump_task || !info->thread) {
1702                        t->next = info->thread;
1703                        info->thread = t;
1704                } else {
1705                        /*
1706                         * Make sure to keep the original task at
1707                         * the head of the list.
1708                         */
1709                        t->next = info->thread->next;
1710                        info->thread->next = t;
1711                }
1712        }
1713
1714        /*
1715         * Now fill in each thread's information.
1716         */
1717        for (t = info->thread; t != NULL; t = t->next)
1718                if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1719                        return 0;
1720
1721        /*
1722         * Fill in the two process-wide notes.
1723         */
1724        fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1725        info->size += notesize(&info->psinfo);
1726
1727        fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1728        info->size += notesize(&info->signote);
1729
1730        fill_auxv_note(&info->auxv, current->mm);
1731        info->size += notesize(&info->auxv);
1732
1733        if (fill_files_note(&info->files) == 0)
1734                info->size += notesize(&info->files);
1735
1736        return 1;
1737}
1738
1739static size_t get_note_info_size(struct elf_note_info *info)
1740{
1741        return info->size;
1742}
1743
1744/*
1745 * Write all the notes for each thread.  When writing the first thread, the
1746 * process-wide notes are interleaved after the first thread-specific note.
1747 */
1748static int write_note_info(struct elf_note_info *info,
1749                           struct file *file, loff_t *foffset)
1750{
1751        bool first = 1;
1752        struct elf_thread_core_info *t = info->thread;
1753
1754        do {
1755                int i;
1756
1757                if (!writenote(&t->notes[0], file, foffset))
1758                        return 0;
1759
1760                if (first && !writenote(&info->psinfo, file, foffset))
1761                        return 0;
1762                if (first && !writenote(&info->signote, file, foffset))
1763                        return 0;
1764                if (first && !writenote(&info->auxv, file, foffset))
1765                        return 0;
1766                if (first && info->files.data &&
1767                                !writenote(&info->files, file, foffset))
1768                        return 0;
1769
1770                for (i = 1; i < info->thread_notes; ++i)
1771                        if (t->notes[i].data &&
1772                            !writenote(&t->notes[i], file, foffset))
1773                                return 0;
1774
1775                first = 0;
1776                t = t->next;
1777        } while (t);
1778
1779        return 1;
1780}
1781
1782static void free_note_info(struct elf_note_info *info)
1783{
1784        struct elf_thread_core_info *threads = info->thread;
1785        while (threads) {
1786                unsigned int i;
1787                struct elf_thread_core_info *t = threads;
1788                threads = t->next;
1789                WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1790                for (i = 1; i < info->thread_notes; ++i)
1791                        kfree(t->notes[i].data);
1792                kfree(t);
1793        }
1794        kfree(info->psinfo.data);
1795        vfree(info->files.data);
1796}
1797
1798#else
1799
1800/* Here is the structure in which status of each thread is captured. */
1801struct elf_thread_status
1802{
1803        struct list_head list;
1804        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1805        elf_fpregset_t fpu;             /* NT_PRFPREG */
1806        struct task_struct *thread;
1807#ifdef ELF_CORE_COPY_XFPREGS
1808        elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1809#endif
1810        struct memelfnote notes[3];
1811        int num_notes;
1812};
1813
1814/*
1815 * In order to add the specific thread information for the elf file format,
1816 * we need to keep a linked list of every threads pr_status and then create
1817 * a single section for them in the final core file.
1818 */
1819static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1820{
1821        int sz = 0;
1822        struct task_struct *p = t->thread;
1823        t->num_notes = 0;
1824
1825        fill_prstatus(&t->prstatus, p, signr);
1826        elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1827        
1828        fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1829                  &(t->prstatus));
1830        t->num_notes++;
1831        sz += notesize(&t->notes[0]);
1832
1833        if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1834                                                                &t->fpu))) {
1835                fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1836                          &(t->fpu));
1837                t->num_notes++;
1838                sz += notesize(&t->notes[1]);
1839        }
1840
1841#ifdef ELF_CORE_COPY_XFPREGS
1842        if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1843                fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1844                          sizeof(t->xfpu), &t->xfpu);
1845                t->num_notes++;
1846                sz += notesize(&t->notes[2]);
1847        }
1848#endif  
1849        return sz;
1850}
1851
1852struct elf_note_info {
1853        struct memelfnote *notes;
1854        struct memelfnote *notes_files;
1855        struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1856        struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1857        struct list_head thread_list;
1858        elf_fpregset_t *fpu;
1859#ifdef ELF_CORE_COPY_XFPREGS
1860        elf_fpxregset_t *xfpu;
1861#endif
1862        user_siginfo_t csigdata;
1863        int thread_status_size;
1864        int numnote;
1865};
1866
1867static int elf_note_info_init(struct elf_note_info *info)
1868{
1869        memset(info, 0, sizeof(*info));
1870        INIT_LIST_HEAD(&info->thread_list);
1871
1872        /* Allocate space for ELF notes */
1873        info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1874        if (!info->notes)
1875                return 0;
1876        info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1877        if (!info->psinfo)
1878                return 0;
1879        info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1880        if (!info->prstatus)
1881                return 0;
1882        info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1883        if (!info->fpu)
1884                return 0;
1885#ifdef ELF_CORE_COPY_XFPREGS
1886        info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1887        if (!info->xfpu)
1888                return 0;
1889#endif
1890        return 1;
1891}
1892
1893static int fill_note_info(struct elfhdr *elf, int phdrs,
1894                          struct elf_note_info *info,
1895                          siginfo_t *siginfo, struct pt_regs *regs)
1896{
1897        struct list_head *t;
1898
1899        if (!elf_note_info_init(info))
1900                return 0;
1901
1902        if (siginfo->si_signo) {
1903                struct core_thread *ct;
1904                struct elf_thread_status *ets;
1905
1906                for (ct = current->mm->core_state->dumper.next;
1907                                                ct; ct = ct->next) {
1908                        ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1909                        if (!ets)
1910                                return 0;
1911
1912                        ets->thread = ct->task;
1913                        list_add(&ets->list, &info->thread_list);
1914                }
1915
1916                list_for_each(t, &info->thread_list) {
1917                        int sz;
1918
1919                        ets = list_entry(t, struct elf_thread_status, list);
1920                        sz = elf_dump_thread_status(siginfo->si_signo, ets);
1921                        info->thread_status_size += sz;
1922                }
1923        }
1924        /* now collect the dump for the current */
1925        memset(info->prstatus, 0, sizeof(*info->prstatus));
1926        fill_prstatus(info->prstatus, current, siginfo->si_signo);
1927        elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1928
1929        /* Set up header */
1930        fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1931
1932        /*
1933         * Set up the notes in similar form to SVR4 core dumps made
1934         * with info from their /proc.
1935         */
1936
1937        fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1938                  sizeof(*info->prstatus), info->prstatus);
1939        fill_psinfo(info->psinfo, current->group_leader, current->mm);
1940        fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1941                  sizeof(*info->psinfo), info->psinfo);
1942
1943        fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1944        fill_auxv_note(info->notes + 3, current->mm);
1945        info->numnote = 4;
1946
1947        if (fill_files_note(info->notes + info->numnote) == 0) {
1948                info->notes_files = info->notes + info->numnote;
1949                info->numnote++;
1950        }
1951
1952        /* Try to dump the FPU. */
1953        info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1954                                                               info->fpu);
1955        if (info->prstatus->pr_fpvalid)
1956                fill_note(info->notes + info->numnote++,
1957                          "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1958#ifdef ELF_CORE_COPY_XFPREGS
1959        if (elf_core_copy_task_xfpregs(current, info->xfpu))
1960                fill_note(info->notes + info->numnote++,
1961                          "LINUX", ELF_CORE_XFPREG_TYPE,
1962                          sizeof(*info->xfpu), info->xfpu);
1963#endif
1964
1965        return 1;
1966}
1967
1968static size_t get_note_info_size(struct elf_note_info *info)
1969{
1970        int sz = 0;
1971        int i;
1972
1973        for (i = 0; i < info->numnote; i++)
1974                sz += notesize(info->notes + i);
1975
1976        sz += info->thread_status_size;
1977
1978        return sz;
1979}
1980
1981static int write_note_info(struct elf_note_info *info,
1982                           struct file *file, loff_t *foffset)
1983{
1984        int i;
1985        struct list_head *t;
1986
1987        for (i = 0; i < info->numnote; i++)
1988                if (!writenote(info->notes + i, file, foffset))
1989                        return 0;
1990
1991        /* write out the thread status notes section */
1992        list_for_each(t, &info->thread_list) {
1993                struct elf_thread_status *tmp =
1994                                list_entry(t, struct elf_thread_status, list);
1995
1996                for (i = 0; i < tmp->num_notes; i++)
1997                        if (!writenote(&tmp->notes[i], file, foffset))
1998                                return 0;
1999        }
2000
2001        return 1;
2002}
2003
2004static void free_note_info(struct elf_note_info *info)
2005{
2006        while (!list_empty(&info->thread_list)) {
2007                struct list_head *tmp = info->thread_list.next;
2008                list_del(tmp);
2009                kfree(list_entry(tmp, struct elf_thread_status, list));
2010        }
2011
2012        /* Free data possibly allocated by fill_files_note(): */
2013        if (info->notes_files)
2014                vfree(info->notes_files->data);
2015
2016        kfree(info->prstatus);
2017        kfree(info->psinfo);
2018        kfree(info->notes);
2019        kfree(info->fpu);
2020#ifdef ELF_CORE_COPY_XFPREGS
2021        kfree(info->xfpu);
2022#endif
2023}
2024
2025#endif
2026
2027static struct vm_area_struct *first_vma(struct task_struct *tsk,
2028                                        struct vm_area_struct *gate_vma)
2029{
2030        struct vm_area_struct *ret = tsk->mm->mmap;
2031
2032        if (ret)
2033                return ret;
2034        return gate_vma;
2035}
2036/*
2037 * Helper function for iterating across a vma list.  It ensures that the caller
2038 * will visit `gate_vma' prior to terminating the search.
2039 */
2040static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2041                                        struct vm_area_struct *gate_vma)
2042{
2043        struct vm_area_struct *ret;
2044
2045        ret = this_vma->vm_next;
2046        if (ret)
2047                return ret;
2048        if (this_vma == gate_vma)
2049                return NULL;
2050        return gate_vma;
2051}
2052
2053static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2054                             elf_addr_t e_shoff, int segs)
2055{
2056        elf->e_shoff = e_shoff;
2057        elf->e_shentsize = sizeof(*shdr4extnum);
2058        elf->e_shnum = 1;
2059        elf->e_shstrndx = SHN_UNDEF;
2060
2061        memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2062
2063        shdr4extnum->sh_type = SHT_NULL;
2064        shdr4extnum->sh_size = elf->e_shnum;
2065        shdr4extnum->sh_link = elf->e_shstrndx;
2066        shdr4extnum->sh_info = segs;
2067}
2068
2069static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
2070                                     unsigned long mm_flags)
2071{
2072        struct vm_area_struct *vma;
2073        size_t size = 0;
2074
2075        for (vma = first_vma(current, gate_vma); vma != NULL;
2076             vma = next_vma(vma, gate_vma))
2077                size += vma_dump_size(vma, mm_flags);
2078        return size;
2079}
2080
2081/*
2082 * Actual dumper
2083 *
2084 * This is a two-pass process; first we find the offsets of the bits,
2085 * and then they are actually written out.  If we run out of core limit
2086 * we just truncate.
2087 */
2088static int elf_core_dump(struct coredump_params *cprm)
2089{
2090        int has_dumped = 0;
2091        mm_segment_t fs;
2092        int segs;
2093        size_t size = 0;
2094        struct vm_area_struct *vma, *gate_vma;
2095        struct elfhdr *elf = NULL;
2096        loff_t offset = 0, dataoff, foffset;
2097        struct elf_note_info info = { };
2098        struct elf_phdr *phdr4note = NULL;
2099        struct elf_shdr *shdr4extnum = NULL;
2100        Elf_Half e_phnum;
2101        elf_addr_t e_shoff;
2102
2103        /*
2104         * We no longer stop all VM operations.
2105         * 
2106         * This is because those proceses that could possibly change map_count
2107         * or the mmap / vma pages are now blocked in do_exit on current
2108         * finishing this core dump.
2109         *
2110         * Only ptrace can touch these memory addresses, but it doesn't change
2111         * the map_count or the pages allocated. So no possibility of crashing
2112         * exists while dumping the mm->vm_next areas to the core file.
2113         */
2114  
2115        /* alloc memory for large data structures: too large to be on stack */
2116        elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2117        if (!elf)
2118                goto out;
2119        /*
2120         * The number of segs are recored into ELF header as 16bit value.
2121         * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2122         */
2123        segs = current->mm->map_count;
2124        segs += elf_core_extra_phdrs();
2125
2126        gate_vma = get_gate_vma(current->mm);
2127        if (gate_vma != NULL)
2128                segs++;
2129
2130        /* for notes section */
2131        segs++;
2132
2133        /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2134         * this, kernel supports extended numbering. Have a look at
2135         * include/linux/elf.h for further information. */
2136        e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2137
2138        /*
2139         * Collect all the non-memory information about the process for the
2140         * notes.  This also sets up the file header.
2141         */
2142        if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2143                goto cleanup;
2144
2145        has_dumped = 1;
2146
2147        fs = get_fs();
2148        set_fs(KERNEL_DS);
2149
2150        offset += sizeof(*elf);                         /* Elf header */
2151        offset += segs * sizeof(struct elf_phdr);       /* Program headers */
2152        foffset = offset;
2153
2154        /* Write notes phdr entry */
2155        {
2156                size_t sz = get_note_info_size(&info);
2157
2158                sz += elf_coredump_extra_notes_size();
2159
2160                phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2161                if (!phdr4note)
2162                        goto end_coredump;
2163
2164                fill_elf_note_phdr(phdr4note, sz, offset);
2165                offset += sz;
2166        }
2167
2168        dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2169
2170        offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
2171        offset += elf_core_extra_data_size();
2172        e_shoff = offset;
2173
2174        if (e_phnum == PN_XNUM) {
2175                shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2176                if (!shdr4extnum)
2177                        goto end_coredump;
2178                fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2179        }
2180
2181        offset = dataoff;
2182
2183        size += sizeof(*elf);
2184        if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
2185                goto end_coredump;
2186
2187        size += sizeof(*phdr4note);
2188        if (size > cprm->limit
2189            || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
2190                goto end_coredump;
2191
2192        /* Write program headers for segments dump */
2193        for (vma = first_vma(current, gate_vma); vma != NULL;
2194                        vma = next_vma(vma, gate_vma)) {
2195                struct elf_phdr phdr;
2196
2197                phdr.p_type = PT_LOAD;
2198                phdr.p_offset = offset;
2199                phdr.p_vaddr = vma->vm_start;
2200                phdr.p_paddr = 0;
2201                phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2202                phdr.p_memsz = vma->vm_end - vma->vm_start;
2203                offset += phdr.p_filesz;
2204                phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2205                if (vma->vm_flags & VM_WRITE)
2206                        phdr.p_flags |= PF_W;
2207                if (vma->vm_flags & VM_EXEC)
2208                        phdr.p_flags |= PF_X;
2209                phdr.p_align = ELF_EXEC_PAGESIZE;
2210
2211                size += sizeof(phdr);
2212                if (size > cprm->limit
2213                    || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2214                        goto end_coredump;
2215        }
2216
2217        if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2218                goto end_coredump;
2219
2220        /* write out the notes section */
2221        if (!write_note_info(&info, cprm->file, &foffset))
2222                goto end_coredump;
2223
2224        if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2225                goto end_coredump;
2226
2227        /* Align to page */
2228        if (!dump_seek(cprm->file, dataoff - foffset))
2229                goto end_coredump;
2230
2231        for (vma = first_vma(current, gate_vma); vma != NULL;
2232                        vma = next_vma(vma, gate_vma)) {
2233                unsigned long addr;
2234                unsigned long end;
2235
2236                end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2237
2238                for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2239                        struct page *page;
2240                        int stop;
2241
2242                        page = get_dump_page(addr);
2243                        if (page) {
2244                                void *kaddr = kmap(page);
2245                                stop = ((size += PAGE_SIZE) > cprm->limit) ||
2246                                        !dump_write(cprm->file, kaddr,
2247                                                    PAGE_SIZE);
2248                                kunmap(page);
2249                                page_cache_release(page);
2250                        } else
2251                                stop = !dump_seek(cprm->file, PAGE_SIZE);
2252                        if (stop)
2253                                goto end_coredump;
2254                }
2255        }
2256
2257        if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2258                goto end_coredump;
2259
2260        if (e_phnum == PN_XNUM) {
2261                size += sizeof(*shdr4extnum);
2262                if (size > cprm->limit
2263                    || !dump_write(cprm->file, shdr4extnum,
2264                                   sizeof(*shdr4extnum)))
2265                        goto end_coredump;
2266        }
2267
2268end_coredump:
2269        set_fs(fs);
2270
2271cleanup:
2272        free_note_info(&info);
2273        kfree(shdr4extnum);
2274        kfree(phdr4note);
2275        kfree(elf);
2276out:
2277        return has_dumped;
2278}
2279
2280#endif          /* CONFIG_ELF_CORE */
2281
2282static int __init init_elf_binfmt(void)
2283{
2284        register_binfmt(&elf_format);
2285        return 0;
2286}
2287
2288static void __exit exit_elf_binfmt(void)
2289{
2290        /* Remove the COFF and ELF loaders. */
2291        unregister_binfmt(&elf_format);
2292}
2293
2294core_initcall(init_elf_binfmt);
2295module_exit(exit_elf_binfmt);
2296MODULE_LICENSE("GPL");
2297