linux/fs/binfmt_elf.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * linux/fs/binfmt_elf.c
   4 *
   5 * These are the functions used to load ELF format executables as used
   6 * on SVr4 machines.  Information on the format may be found in the book
   7 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
   8 * Tools".
   9 *
  10 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
  11 */
  12
  13#include <linux/module.h>
  14#include <linux/kernel.h>
  15#include <linux/fs.h>
  16#include <linux/mm.h>
  17#include <linux/mman.h>
  18#include <linux/errno.h>
  19#include <linux/signal.h>
  20#include <linux/binfmts.h>
  21#include <linux/string.h>
  22#include <linux/file.h>
  23#include <linux/slab.h>
  24#include <linux/personality.h>
  25#include <linux/elfcore.h>
  26#include <linux/init.h>
  27#include <linux/highuid.h>
  28#include <linux/compiler.h>
  29#include <linux/highmem.h>
  30#include <linux/hugetlb.h>
  31#include <linux/pagemap.h>
  32#include <linux/vmalloc.h>
  33#include <linux/security.h>
  34#include <linux/random.h>
  35#include <linux/elf.h>
  36#include <linux/elf-randomize.h>
  37#include <linux/utsname.h>
  38#include <linux/coredump.h>
  39#include <linux/sched.h>
  40#include <linux/sched/coredump.h>
  41#include <linux/sched/task_stack.h>
  42#include <linux/sched/cputime.h>
  43#include <linux/sizes.h>
  44#include <linux/types.h>
  45#include <linux/cred.h>
  46#include <linux/dax.h>
  47#include <linux/uaccess.h>
  48#include <asm/param.h>
  49#include <asm/page.h>
  50
  51#ifndef ELF_COMPAT
  52#define ELF_COMPAT 0
  53#endif
  54
  55#ifndef user_long_t
  56#define user_long_t long
  57#endif
  58#ifndef user_siginfo_t
  59#define user_siginfo_t siginfo_t
  60#endif
  61
  62/* That's for binfmt_elf_fdpic to deal with */
  63#ifndef elf_check_fdpic
  64#define elf_check_fdpic(ex) false
  65#endif
  66
  67static int load_elf_binary(struct linux_binprm *bprm);
  68
  69#ifdef CONFIG_USELIB
  70static int load_elf_library(struct file *);
  71#else
  72#define load_elf_library NULL
  73#endif
  74
  75/*
  76 * If we don't support core dumping, then supply a NULL so we
  77 * don't even try.
  78 */
  79#ifdef CONFIG_ELF_CORE
  80static int elf_core_dump(struct coredump_params *cprm);
  81#else
  82#define elf_core_dump   NULL
  83#endif
  84
  85#if ELF_EXEC_PAGESIZE > PAGE_SIZE
  86#define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
  87#else
  88#define ELF_MIN_ALIGN   PAGE_SIZE
  89#endif
  90
  91#ifndef ELF_CORE_EFLAGS
  92#define ELF_CORE_EFLAGS 0
  93#endif
  94
  95#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
  96#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
  97#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
  98
  99static struct linux_binfmt elf_format = {
 100        .module         = THIS_MODULE,
 101        .load_binary    = load_elf_binary,
 102        .load_shlib     = load_elf_library,
 103        .core_dump      = elf_core_dump,
 104        .min_coredump   = ELF_EXEC_PAGESIZE,
 105};
 106
 107#define BAD_ADDR(x) (unlikely((unsigned long)(x) >= TASK_SIZE))
 108
 109static int set_brk(unsigned long start, unsigned long end, int prot)
 110{
 111        start = ELF_PAGEALIGN(start);
 112        end = ELF_PAGEALIGN(end);
 113        if (end > start) {
 114                /*
 115                 * Map the last of the bss segment.
 116                 * If the header is requesting these pages to be
 117                 * executable, honour that (ppc32 needs this).
 118                 */
 119                int error = vm_brk_flags(start, end - start,
 120                                prot & PROT_EXEC ? VM_EXEC : 0);
 121                if (error)
 122                        return error;
 123        }
 124        current->mm->start_brk = current->mm->brk = end;
 125        return 0;
 126}
 127
 128/* We need to explicitly zero any fractional pages
 129   after the data section (i.e. bss).  This would
 130   contain the junk from the file that should not
 131   be in memory
 132 */
 133static int padzero(unsigned long elf_bss)
 134{
 135        unsigned long nbyte;
 136
 137        nbyte = ELF_PAGEOFFSET(elf_bss);
 138        if (nbyte) {
 139                nbyte = ELF_MIN_ALIGN - nbyte;
 140                if (clear_user((void __user *) elf_bss, nbyte))
 141                        return -EFAULT;
 142        }
 143        return 0;
 144}
 145
 146/* Let's use some macros to make this stack manipulation a little clearer */
 147#ifdef CONFIG_STACK_GROWSUP
 148#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
 149#define STACK_ROUND(sp, items) \
 150        ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
 151#define STACK_ALLOC(sp, len) ({ \
 152        elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
 153        old_sp; })
 154#else
 155#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
 156#define STACK_ROUND(sp, items) \
 157        (((unsigned long) (sp - items)) &~ 15UL)
 158#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
 159#endif
 160
 161#ifndef ELF_BASE_PLATFORM
 162/*
 163 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
 164 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
 165 * will be copied to the user stack in the same manner as AT_PLATFORM.
 166 */
 167#define ELF_BASE_PLATFORM NULL
 168#endif
 169
 170static int
 171create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
 172                unsigned long load_addr, unsigned long interp_load_addr,
 173                unsigned long e_entry)
 174{
 175        struct mm_struct *mm = current->mm;
 176        unsigned long p = bprm->p;
 177        int argc = bprm->argc;
 178        int envc = bprm->envc;
 179        elf_addr_t __user *sp;
 180        elf_addr_t __user *u_platform;
 181        elf_addr_t __user *u_base_platform;
 182        elf_addr_t __user *u_rand_bytes;
 183        const char *k_platform = ELF_PLATFORM;
 184        const char *k_base_platform = ELF_BASE_PLATFORM;
 185        unsigned char k_rand_bytes[16];
 186        int items;
 187        elf_addr_t *elf_info;
 188        int ei_index;
 189        const struct cred *cred = current_cred();
 190        struct vm_area_struct *vma;
 191
 192        /*
 193         * In some cases (e.g. Hyper-Threading), we want to avoid L1
 194         * evictions by the processes running on the same package. One
 195         * thing we can do is to shuffle the initial stack for them.
 196         */
 197
 198        p = arch_align_stack(p);
 199
 200        /*
 201         * If this architecture has a platform capability string, copy it
 202         * to userspace.  In some cases (Sparc), this info is impossible
 203         * for userspace to get any other way, in others (i386) it is
 204         * merely difficult.
 205         */
 206        u_platform = NULL;
 207        if (k_platform) {
 208                size_t len = strlen(k_platform) + 1;
 209
 210                u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 211                if (copy_to_user(u_platform, k_platform, len))
 212                        return -EFAULT;
 213        }
 214
 215        /*
 216         * If this architecture has a "base" platform capability
 217         * string, copy it to userspace.
 218         */
 219        u_base_platform = NULL;
 220        if (k_base_platform) {
 221                size_t len = strlen(k_base_platform) + 1;
 222
 223                u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 224                if (copy_to_user(u_base_platform, k_base_platform, len))
 225                        return -EFAULT;
 226        }
 227
 228        /*
 229         * Generate 16 random bytes for userspace PRNG seeding.
 230         */
 231        get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
 232        u_rand_bytes = (elf_addr_t __user *)
 233                       STACK_ALLOC(p, sizeof(k_rand_bytes));
 234        if (copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
 235                return -EFAULT;
 236
 237        /* Create the ELF interpreter info */
 238        elf_info = (elf_addr_t *)mm->saved_auxv;
 239        /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
 240#define NEW_AUX_ENT(id, val) \
 241        do { \
 242                *elf_info++ = id; \
 243                *elf_info++ = val; \
 244        } while (0)
 245
 246#ifdef ARCH_DLINFO
 247        /* 
 248         * ARCH_DLINFO must come first so PPC can do its special alignment of
 249         * AUXV.
 250         * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
 251         * ARCH_DLINFO changes
 252         */
 253        ARCH_DLINFO;
 254#endif
 255        NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
 256        NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
 257        NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
 258        NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
 259        NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
 260        NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
 261        NEW_AUX_ENT(AT_BASE, interp_load_addr);
 262        NEW_AUX_ENT(AT_FLAGS, 0);
 263        NEW_AUX_ENT(AT_ENTRY, e_entry);
 264        NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
 265        NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
 266        NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
 267        NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
 268        NEW_AUX_ENT(AT_SECURE, bprm->secureexec);
 269        NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
 270#ifdef ELF_HWCAP2
 271        NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
 272#endif
 273        NEW_AUX_ENT(AT_EXECFN, bprm->exec);
 274        if (k_platform) {
 275                NEW_AUX_ENT(AT_PLATFORM,
 276                            (elf_addr_t)(unsigned long)u_platform);
 277        }
 278        if (k_base_platform) {
 279                NEW_AUX_ENT(AT_BASE_PLATFORM,
 280                            (elf_addr_t)(unsigned long)u_base_platform);
 281        }
 282        if (bprm->have_execfd) {
 283                NEW_AUX_ENT(AT_EXECFD, bprm->execfd);
 284        }
 285#undef NEW_AUX_ENT
 286        /* AT_NULL is zero; clear the rest too */
 287        memset(elf_info, 0, (char *)mm->saved_auxv +
 288                        sizeof(mm->saved_auxv) - (char *)elf_info);
 289
 290        /* And advance past the AT_NULL entry.  */
 291        elf_info += 2;
 292
 293        ei_index = elf_info - (elf_addr_t *)mm->saved_auxv;
 294        sp = STACK_ADD(p, ei_index);
 295
 296        items = (argc + 1) + (envc + 1) + 1;
 297        bprm->p = STACK_ROUND(sp, items);
 298
 299        /* Point sp at the lowest address on the stack */
 300#ifdef CONFIG_STACK_GROWSUP
 301        sp = (elf_addr_t __user *)bprm->p - items - ei_index;
 302        bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
 303#else
 304        sp = (elf_addr_t __user *)bprm->p;
 305#endif
 306
 307
 308        /*
 309         * Grow the stack manually; some architectures have a limit on how
 310         * far ahead a user-space access may be in order to grow the stack.
 311         */
 312        vma = find_extend_vma(mm, bprm->p);
 313        if (!vma)
 314                return -EFAULT;
 315
 316        /* Now, let's put argc (and argv, envp if appropriate) on the stack */
 317        if (put_user(argc, sp++))
 318                return -EFAULT;
 319
 320        /* Populate list of argv pointers back to argv strings. */
 321        p = mm->arg_end = mm->arg_start;
 322        while (argc-- > 0) {
 323                size_t len;
 324                if (put_user((elf_addr_t)p, sp++))
 325                        return -EFAULT;
 326                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 327                if (!len || len > MAX_ARG_STRLEN)
 328                        return -EINVAL;
 329                p += len;
 330        }
 331        if (put_user(0, sp++))
 332                return -EFAULT;
 333        mm->arg_end = p;
 334
 335        /* Populate list of envp pointers back to envp strings. */
 336        mm->env_end = mm->env_start = p;
 337        while (envc-- > 0) {
 338                size_t len;
 339                if (put_user((elf_addr_t)p, sp++))
 340                        return -EFAULT;
 341                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 342                if (!len || len > MAX_ARG_STRLEN)
 343                        return -EINVAL;
 344                p += len;
 345        }
 346        if (put_user(0, sp++))
 347                return -EFAULT;
 348        mm->env_end = p;
 349
 350        /* Put the elf_info on the stack in the right place.  */
 351        if (copy_to_user(sp, mm->saved_auxv, ei_index * sizeof(elf_addr_t)))
 352                return -EFAULT;
 353        return 0;
 354}
 355
 356static unsigned long elf_map(struct file *filep, unsigned long addr,
 357                const struct elf_phdr *eppnt, int prot, int type,
 358                unsigned long total_size)
 359{
 360        unsigned long map_addr;
 361        unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
 362        unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
 363        addr = ELF_PAGESTART(addr);
 364        size = ELF_PAGEALIGN(size);
 365
 366        /* mmap() will return -EINVAL if given a zero size, but a
 367         * segment with zero filesize is perfectly valid */
 368        if (!size)
 369                return addr;
 370
 371        /*
 372        * total_size is the size of the ELF (interpreter) image.
 373        * The _first_ mmap needs to know the full size, otherwise
 374        * randomization might put this image into an overlapping
 375        * position with the ELF binary image. (since size < total_size)
 376        * So we first map the 'big' image - and unmap the remainder at
 377        * the end. (which unmap is needed for ELF images with holes.)
 378        */
 379        if (total_size) {
 380                total_size = ELF_PAGEALIGN(total_size);
 381                map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
 382                if (!BAD_ADDR(map_addr))
 383                        vm_munmap(map_addr+size, total_size-size);
 384        } else
 385                map_addr = vm_mmap(filep, addr, size, prot, type, off);
 386
 387        if ((type & MAP_FIXED_NOREPLACE) &&
 388            PTR_ERR((void *)map_addr) == -EEXIST)
 389                pr_info("%d (%s): Uhuuh, elf segment at %px requested but the memory is mapped already\n",
 390                        task_pid_nr(current), current->comm, (void *)addr);
 391
 392        return(map_addr);
 393}
 394
 395static unsigned long total_mapping_size(const struct elf_phdr *cmds, int nr)
 396{
 397        int i, first_idx = -1, last_idx = -1;
 398
 399        for (i = 0; i < nr; i++) {
 400                if (cmds[i].p_type == PT_LOAD) {
 401                        last_idx = i;
 402                        if (first_idx == -1)
 403                                first_idx = i;
 404                }
 405        }
 406        if (first_idx == -1)
 407                return 0;
 408
 409        return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
 410                                ELF_PAGESTART(cmds[first_idx].p_vaddr);
 411}
 412
 413static int elf_read(struct file *file, void *buf, size_t len, loff_t pos)
 414{
 415        ssize_t rv;
 416
 417        rv = kernel_read(file, buf, len, &pos);
 418        if (unlikely(rv != len)) {
 419                return (rv < 0) ? rv : -EIO;
 420        }
 421        return 0;
 422}
 423
 424/**
 425 * load_elf_phdrs() - load ELF program headers
 426 * @elf_ex:   ELF header of the binary whose program headers should be loaded
 427 * @elf_file: the opened ELF binary file
 428 *
 429 * Loads ELF program headers from the binary file elf_file, which has the ELF
 430 * header pointed to by elf_ex, into a newly allocated array. The caller is
 431 * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
 432 */
 433static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
 434                                       struct file *elf_file)
 435{
 436        struct elf_phdr *elf_phdata = NULL;
 437        int retval, err = -1;
 438        unsigned int size;
 439
 440        /*
 441         * If the size of this structure has changed, then punt, since
 442         * we will be doing the wrong thing.
 443         */
 444        if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
 445                goto out;
 446
 447        /* Sanity check the number of program headers... */
 448        /* ...and their total size. */
 449        size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
 450        if (size == 0 || size > 65536 || size > ELF_MIN_ALIGN)
 451                goto out;
 452
 453        elf_phdata = kmalloc(size, GFP_KERNEL);
 454        if (!elf_phdata)
 455                goto out;
 456
 457        /* Read in the program headers */
 458        retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff);
 459        if (retval < 0) {
 460                err = retval;
 461                goto out;
 462        }
 463
 464        /* Success! */
 465        err = 0;
 466out:
 467        if (err) {
 468                kfree(elf_phdata);
 469                elf_phdata = NULL;
 470        }
 471        return elf_phdata;
 472}
 473
 474#ifndef CONFIG_ARCH_BINFMT_ELF_STATE
 475
 476/**
 477 * struct arch_elf_state - arch-specific ELF loading state
 478 *
 479 * This structure is used to preserve architecture specific data during
 480 * the loading of an ELF file, throughout the checking of architecture
 481 * specific ELF headers & through to the point where the ELF load is
 482 * known to be proceeding (ie. SET_PERSONALITY).
 483 *
 484 * This implementation is a dummy for architectures which require no
 485 * specific state.
 486 */
 487struct arch_elf_state {
 488};
 489
 490#define INIT_ARCH_ELF_STATE {}
 491
 492/**
 493 * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
 494 * @ehdr:       The main ELF header
 495 * @phdr:       The program header to check
 496 * @elf:        The open ELF file
 497 * @is_interp:  True if the phdr is from the interpreter of the ELF being
 498 *              loaded, else false.
 499 * @state:      Architecture-specific state preserved throughout the process
 500 *              of loading the ELF.
 501 *
 502 * Inspects the program header phdr to validate its correctness and/or
 503 * suitability for the system. Called once per ELF program header in the
 504 * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
 505 * interpreter.
 506 *
 507 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
 508 *         with that return code.
 509 */
 510static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
 511                                   struct elf_phdr *phdr,
 512                                   struct file *elf, bool is_interp,
 513                                   struct arch_elf_state *state)
 514{
 515        /* Dummy implementation, always proceed */
 516        return 0;
 517}
 518
 519/**
 520 * arch_check_elf() - check an ELF executable
 521 * @ehdr:       The main ELF header
 522 * @has_interp: True if the ELF has an interpreter, else false.
 523 * @interp_ehdr: The interpreter's ELF header
 524 * @state:      Architecture-specific state preserved throughout the process
 525 *              of loading the ELF.
 526 *
 527 * Provides a final opportunity for architecture code to reject the loading
 528 * of the ELF & cause an exec syscall to return an error. This is called after
 529 * all program headers to be checked by arch_elf_pt_proc have been.
 530 *
 531 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
 532 *         with that return code.
 533 */
 534static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
 535                                 struct elfhdr *interp_ehdr,
 536                                 struct arch_elf_state *state)
 537{
 538        /* Dummy implementation, always proceed */
 539        return 0;
 540}
 541
 542#endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
 543
 544static inline int make_prot(u32 p_flags, struct arch_elf_state *arch_state,
 545                            bool has_interp, bool is_interp)
 546{
 547        int prot = 0;
 548
 549        if (p_flags & PF_R)
 550                prot |= PROT_READ;
 551        if (p_flags & PF_W)
 552                prot |= PROT_WRITE;
 553        if (p_flags & PF_X)
 554                prot |= PROT_EXEC;
 555
 556        return arch_elf_adjust_prot(prot, arch_state, has_interp, is_interp);
 557}
 558
 559/* This is much more generalized than the library routine read function,
 560   so we keep this separate.  Technically the library read function
 561   is only provided so that we can read a.out libraries that have
 562   an ELF header */
 563
 564static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 565                struct file *interpreter,
 566                unsigned long no_base, struct elf_phdr *interp_elf_phdata,
 567                struct arch_elf_state *arch_state)
 568{
 569        struct elf_phdr *eppnt;
 570        unsigned long load_addr = 0;
 571        int load_addr_set = 0;
 572        unsigned long last_bss = 0, elf_bss = 0;
 573        int bss_prot = 0;
 574        unsigned long error = ~0UL;
 575        unsigned long total_size;
 576        int i;
 577
 578        /* First of all, some simple consistency checks */
 579        if (interp_elf_ex->e_type != ET_EXEC &&
 580            interp_elf_ex->e_type != ET_DYN)
 581                goto out;
 582        if (!elf_check_arch(interp_elf_ex) ||
 583            elf_check_fdpic(interp_elf_ex))
 584                goto out;
 585        if (!interpreter->f_op->mmap)
 586                goto out;
 587
 588        total_size = total_mapping_size(interp_elf_phdata,
 589                                        interp_elf_ex->e_phnum);
 590        if (!total_size) {
 591                error = -EINVAL;
 592                goto out;
 593        }
 594
 595        eppnt = interp_elf_phdata;
 596        for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 597                if (eppnt->p_type == PT_LOAD) {
 598                        int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
 599                        int elf_prot = make_prot(eppnt->p_flags, arch_state,
 600                                                 true, true);
 601                        unsigned long vaddr = 0;
 602                        unsigned long k, map_addr;
 603
 604                        vaddr = eppnt->p_vaddr;
 605                        if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 606                                elf_type |= MAP_FIXED_NOREPLACE;
 607                        else if (no_base && interp_elf_ex->e_type == ET_DYN)
 608                                load_addr = -vaddr;
 609
 610                        map_addr = elf_map(interpreter, load_addr + vaddr,
 611                                        eppnt, elf_prot, elf_type, total_size);
 612                        total_size = 0;
 613                        error = map_addr;
 614                        if (BAD_ADDR(map_addr))
 615                                goto out;
 616
 617                        if (!load_addr_set &&
 618                            interp_elf_ex->e_type == ET_DYN) {
 619                                load_addr = map_addr - ELF_PAGESTART(vaddr);
 620                                load_addr_set = 1;
 621                        }
 622
 623                        /*
 624                         * Check to see if the section's size will overflow the
 625                         * allowed task size. Note that p_filesz must always be
 626                         * <= p_memsize so it's only necessary to check p_memsz.
 627                         */
 628                        k = load_addr + eppnt->p_vaddr;
 629                        if (BAD_ADDR(k) ||
 630                            eppnt->p_filesz > eppnt->p_memsz ||
 631                            eppnt->p_memsz > TASK_SIZE ||
 632                            TASK_SIZE - eppnt->p_memsz < k) {
 633                                error = -ENOMEM;
 634                                goto out;
 635                        }
 636
 637                        /*
 638                         * Find the end of the file mapping for this phdr, and
 639                         * keep track of the largest address we see for this.
 640                         */
 641                        k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
 642                        if (k > elf_bss)
 643                                elf_bss = k;
 644
 645                        /*
 646                         * Do the same thing for the memory mapping - between
 647                         * elf_bss and last_bss is the bss section.
 648                         */
 649                        k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
 650                        if (k > last_bss) {
 651                                last_bss = k;
 652                                bss_prot = elf_prot;
 653                        }
 654                }
 655        }
 656
 657        /*
 658         * Now fill out the bss section: first pad the last page from
 659         * the file up to the page boundary, and zero it from elf_bss
 660         * up to the end of the page.
 661         */
 662        if (padzero(elf_bss)) {
 663                error = -EFAULT;
 664                goto out;
 665        }
 666        /*
 667         * Next, align both the file and mem bss up to the page size,
 668         * since this is where elf_bss was just zeroed up to, and where
 669         * last_bss will end after the vm_brk_flags() below.
 670         */
 671        elf_bss = ELF_PAGEALIGN(elf_bss);
 672        last_bss = ELF_PAGEALIGN(last_bss);
 673        /* Finally, if there is still more bss to allocate, do it. */
 674        if (last_bss > elf_bss) {
 675                error = vm_brk_flags(elf_bss, last_bss - elf_bss,
 676                                bss_prot & PROT_EXEC ? VM_EXEC : 0);
 677                if (error)
 678                        goto out;
 679        }
 680
 681        error = load_addr;
 682out:
 683        return error;
 684}
 685
 686/*
 687 * These are the functions used to load ELF style executables and shared
 688 * libraries.  There is no binary dependent code anywhere else.
 689 */
 690
 691static int parse_elf_property(const char *data, size_t *off, size_t datasz,
 692                              struct arch_elf_state *arch,
 693                              bool have_prev_type, u32 *prev_type)
 694{
 695        size_t o, step;
 696        const struct gnu_property *pr;
 697        int ret;
 698
 699        if (*off == datasz)
 700                return -ENOENT;
 701
 702        if (WARN_ON_ONCE(*off > datasz || *off % ELF_GNU_PROPERTY_ALIGN))
 703                return -EIO;
 704        o = *off;
 705        datasz -= *off;
 706
 707        if (datasz < sizeof(*pr))
 708                return -ENOEXEC;
 709        pr = (const struct gnu_property *)(data + o);
 710        o += sizeof(*pr);
 711        datasz -= sizeof(*pr);
 712
 713        if (pr->pr_datasz > datasz)
 714                return -ENOEXEC;
 715
 716        WARN_ON_ONCE(o % ELF_GNU_PROPERTY_ALIGN);
 717        step = round_up(pr->pr_datasz, ELF_GNU_PROPERTY_ALIGN);
 718        if (step > datasz)
 719                return -ENOEXEC;
 720
 721        /* Properties are supposed to be unique and sorted on pr_type: */
 722        if (have_prev_type && pr->pr_type <= *prev_type)
 723                return -ENOEXEC;
 724        *prev_type = pr->pr_type;
 725
 726        ret = arch_parse_elf_property(pr->pr_type, data + o,
 727                                      pr->pr_datasz, ELF_COMPAT, arch);
 728        if (ret)
 729                return ret;
 730
 731        *off = o + step;
 732        return 0;
 733}
 734
 735#define NOTE_DATA_SZ SZ_1K
 736#define GNU_PROPERTY_TYPE_0_NAME "GNU"
 737#define NOTE_NAME_SZ (sizeof(GNU_PROPERTY_TYPE_0_NAME))
 738
 739static int parse_elf_properties(struct file *f, const struct elf_phdr *phdr,
 740                                struct arch_elf_state *arch)
 741{
 742        union {
 743                struct elf_note nhdr;
 744                char data[NOTE_DATA_SZ];
 745        } note;
 746        loff_t pos;
 747        ssize_t n;
 748        size_t off, datasz;
 749        int ret;
 750        bool have_prev_type;
 751        u32 prev_type;
 752
 753        if (!IS_ENABLED(CONFIG_ARCH_USE_GNU_PROPERTY) || !phdr)
 754                return 0;
 755
 756        /* load_elf_binary() shouldn't call us unless this is true... */
 757        if (WARN_ON_ONCE(phdr->p_type != PT_GNU_PROPERTY))
 758                return -ENOEXEC;
 759
 760        /* If the properties are crazy large, that's too bad (for now): */
 761        if (phdr->p_filesz > sizeof(note))
 762                return -ENOEXEC;
 763
 764        pos = phdr->p_offset;
 765        n = kernel_read(f, &note, phdr->p_filesz, &pos);
 766
 767        BUILD_BUG_ON(sizeof(note) < sizeof(note.nhdr) + NOTE_NAME_SZ);
 768        if (n < 0 || n < sizeof(note.nhdr) + NOTE_NAME_SZ)
 769                return -EIO;
 770
 771        if (note.nhdr.n_type != NT_GNU_PROPERTY_TYPE_0 ||
 772            note.nhdr.n_namesz != NOTE_NAME_SZ ||
 773            strncmp(note.data + sizeof(note.nhdr),
 774                    GNU_PROPERTY_TYPE_0_NAME, n - sizeof(note.nhdr)))
 775                return -ENOEXEC;
 776
 777        off = round_up(sizeof(note.nhdr) + NOTE_NAME_SZ,
 778                       ELF_GNU_PROPERTY_ALIGN);
 779        if (off > n)
 780                return -ENOEXEC;
 781
 782        if (note.nhdr.n_descsz > n - off)
 783                return -ENOEXEC;
 784        datasz = off + note.nhdr.n_descsz;
 785
 786        have_prev_type = false;
 787        do {
 788                ret = parse_elf_property(note.data, &off, datasz, arch,
 789                                         have_prev_type, &prev_type);
 790                have_prev_type = true;
 791        } while (!ret);
 792
 793        return ret == -ENOENT ? 0 : ret;
 794}
 795
 796static int load_elf_binary(struct linux_binprm *bprm)
 797{
 798        struct file *interpreter = NULL; /* to shut gcc up */
 799        unsigned long load_addr = 0, load_bias = 0;
 800        int load_addr_set = 0;
 801        unsigned long error;
 802        struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
 803        struct elf_phdr *elf_property_phdata = NULL;
 804        unsigned long elf_bss, elf_brk;
 805        int bss_prot = 0;
 806        int retval, i;
 807        unsigned long elf_entry;
 808        unsigned long e_entry;
 809        unsigned long interp_load_addr = 0;
 810        unsigned long start_code, end_code, start_data, end_data;
 811        unsigned long reloc_func_desc __maybe_unused = 0;
 812        int executable_stack = EXSTACK_DEFAULT;
 813        struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf;
 814        struct elfhdr *interp_elf_ex = NULL;
 815        struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
 816        struct mm_struct *mm;
 817        struct pt_regs *regs;
 818
 819        retval = -ENOEXEC;
 820        /* First of all, some simple consistency checks */
 821        if (memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
 822                goto out;
 823
 824        if (elf_ex->e_type != ET_EXEC && elf_ex->e_type != ET_DYN)
 825                goto out;
 826        if (!elf_check_arch(elf_ex))
 827                goto out;
 828        if (elf_check_fdpic(elf_ex))
 829                goto out;
 830        if (!bprm->file->f_op->mmap)
 831                goto out;
 832
 833        elf_phdata = load_elf_phdrs(elf_ex, bprm->file);
 834        if (!elf_phdata)
 835                goto out;
 836
 837        elf_ppnt = elf_phdata;
 838        for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++) {
 839                char *elf_interpreter;
 840
 841                if (elf_ppnt->p_type == PT_GNU_PROPERTY) {
 842                        elf_property_phdata = elf_ppnt;
 843                        continue;
 844                }
 845
 846                if (elf_ppnt->p_type != PT_INTERP)
 847                        continue;
 848
 849                /*
 850                 * This is the program interpreter used for shared libraries -
 851                 * for now assume that this is an a.out format binary.
 852                 */
 853                retval = -ENOEXEC;
 854                if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2)
 855                        goto out_free_ph;
 856
 857                retval = -ENOMEM;
 858                elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL);
 859                if (!elf_interpreter)
 860                        goto out_free_ph;
 861
 862                retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz,
 863                                  elf_ppnt->p_offset);
 864                if (retval < 0)
 865                        goto out_free_interp;
 866                /* make sure path is NULL terminated */
 867                retval = -ENOEXEC;
 868                if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
 869                        goto out_free_interp;
 870
 871                interpreter = open_exec(elf_interpreter);
 872                kfree(elf_interpreter);
 873                retval = PTR_ERR(interpreter);
 874                if (IS_ERR(interpreter))
 875                        goto out_free_ph;
 876
 877                /*
 878                 * If the binary is not readable then enforce mm->dumpable = 0
 879                 * regardless of the interpreter's permissions.
 880                 */
 881                would_dump(bprm, interpreter);
 882
 883                interp_elf_ex = kmalloc(sizeof(*interp_elf_ex), GFP_KERNEL);
 884                if (!interp_elf_ex) {
 885                        retval = -ENOMEM;
 886                        goto out_free_ph;
 887                }
 888
 889                /* Get the exec headers */
 890                retval = elf_read(interpreter, interp_elf_ex,
 891                                  sizeof(*interp_elf_ex), 0);
 892                if (retval < 0)
 893                        goto out_free_dentry;
 894
 895                break;
 896
 897out_free_interp:
 898                kfree(elf_interpreter);
 899                goto out_free_ph;
 900        }
 901
 902        elf_ppnt = elf_phdata;
 903        for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++)
 904                switch (elf_ppnt->p_type) {
 905                case PT_GNU_STACK:
 906                        if (elf_ppnt->p_flags & PF_X)
 907                                executable_stack = EXSTACK_ENABLE_X;
 908                        else
 909                                executable_stack = EXSTACK_DISABLE_X;
 910                        break;
 911
 912                case PT_LOPROC ... PT_HIPROC:
 913                        retval = arch_elf_pt_proc(elf_ex, elf_ppnt,
 914                                                  bprm->file, false,
 915                                                  &arch_state);
 916                        if (retval)
 917                                goto out_free_dentry;
 918                        break;
 919                }
 920
 921        /* Some simple consistency checks for the interpreter */
 922        if (interpreter) {
 923                retval = -ELIBBAD;
 924                /* Not an ELF interpreter */
 925                if (memcmp(interp_elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
 926                        goto out_free_dentry;
 927                /* Verify the interpreter has a valid arch */
 928                if (!elf_check_arch(interp_elf_ex) ||
 929                    elf_check_fdpic(interp_elf_ex))
 930                        goto out_free_dentry;
 931
 932                /* Load the interpreter program headers */
 933                interp_elf_phdata = load_elf_phdrs(interp_elf_ex,
 934                                                   interpreter);
 935                if (!interp_elf_phdata)
 936                        goto out_free_dentry;
 937
 938                /* Pass PT_LOPROC..PT_HIPROC headers to arch code */
 939                elf_property_phdata = NULL;
 940                elf_ppnt = interp_elf_phdata;
 941                for (i = 0; i < interp_elf_ex->e_phnum; i++, elf_ppnt++)
 942                        switch (elf_ppnt->p_type) {
 943                        case PT_GNU_PROPERTY:
 944                                elf_property_phdata = elf_ppnt;
 945                                break;
 946
 947                        case PT_LOPROC ... PT_HIPROC:
 948                                retval = arch_elf_pt_proc(interp_elf_ex,
 949                                                          elf_ppnt, interpreter,
 950                                                          true, &arch_state);
 951                                if (retval)
 952                                        goto out_free_dentry;
 953                                break;
 954                        }
 955        }
 956
 957        retval = parse_elf_properties(interpreter ?: bprm->file,
 958                                      elf_property_phdata, &arch_state);
 959        if (retval)
 960                goto out_free_dentry;
 961
 962        /*
 963         * Allow arch code to reject the ELF at this point, whilst it's
 964         * still possible to return an error to the code that invoked
 965         * the exec syscall.
 966         */
 967        retval = arch_check_elf(elf_ex,
 968                                !!interpreter, interp_elf_ex,
 969                                &arch_state);
 970        if (retval)
 971                goto out_free_dentry;
 972
 973        /* Flush all traces of the currently running executable */
 974        retval = begin_new_exec(bprm);
 975        if (retval)
 976                goto out_free_dentry;
 977
 978        /* Do this immediately, since STACK_TOP as used in setup_arg_pages
 979           may depend on the personality.  */
 980        SET_PERSONALITY2(*elf_ex, &arch_state);
 981        if (elf_read_implies_exec(*elf_ex, executable_stack))
 982                current->personality |= READ_IMPLIES_EXEC;
 983
 984        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 985                current->flags |= PF_RANDOMIZE;
 986
 987        setup_new_exec(bprm);
 988
 989        /* Do this so that we can load the interpreter, if need be.  We will
 990           change some of these later */
 991        retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
 992                                 executable_stack);
 993        if (retval < 0)
 994                goto out_free_dentry;
 995        
 996        elf_bss = 0;
 997        elf_brk = 0;
 998
 999        start_code = ~0UL;
1000        end_code = 0;
1001        start_data = 0;
1002        end_data = 0;
1003
1004        /* Now we do a little grungy work by mmapping the ELF image into
1005           the correct location in memory. */
1006        for(i = 0, elf_ppnt = elf_phdata;
1007            i < elf_ex->e_phnum; i++, elf_ppnt++) {
1008                int elf_prot, elf_flags;
1009                unsigned long k, vaddr;
1010                unsigned long total_size = 0;
1011
1012                if (elf_ppnt->p_type != PT_LOAD)
1013                        continue;
1014
1015                if (unlikely (elf_brk > elf_bss)) {
1016                        unsigned long nbyte;
1017                    
1018                        /* There was a PT_LOAD segment with p_memsz > p_filesz
1019                           before this one. Map anonymous pages, if needed,
1020                           and clear the area.  */
1021                        retval = set_brk(elf_bss + load_bias,
1022                                         elf_brk + load_bias,
1023                                         bss_prot);
1024                        if (retval)
1025                                goto out_free_dentry;
1026                        nbyte = ELF_PAGEOFFSET(elf_bss);
1027                        if (nbyte) {
1028                                nbyte = ELF_MIN_ALIGN - nbyte;
1029                                if (nbyte > elf_brk - elf_bss)
1030                                        nbyte = elf_brk - elf_bss;
1031                                if (clear_user((void __user *)elf_bss +
1032                                                        load_bias, nbyte)) {
1033                                        /*
1034                                         * This bss-zeroing can fail if the ELF
1035                                         * file specifies odd protections. So
1036                                         * we don't check the return value
1037                                         */
1038                                }
1039                        }
1040                }
1041
1042                elf_prot = make_prot(elf_ppnt->p_flags, &arch_state,
1043                                     !!interpreter, false);
1044
1045                elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
1046
1047                vaddr = elf_ppnt->p_vaddr;
1048                /*
1049                 * If we are loading ET_EXEC or we have already performed
1050                 * the ET_DYN load_addr calculations, proceed normally.
1051                 */
1052                if (elf_ex->e_type == ET_EXEC || load_addr_set) {
1053                        elf_flags |= MAP_FIXED;
1054                } else if (elf_ex->e_type == ET_DYN) {
1055                        /*
1056                         * This logic is run once for the first LOAD Program
1057                         * Header for ET_DYN binaries to calculate the
1058                         * randomization (load_bias) for all the LOAD
1059                         * Program Headers, and to calculate the entire
1060                         * size of the ELF mapping (total_size). (Note that
1061                         * load_addr_set is set to true later once the
1062                         * initial mapping is performed.)
1063                         *
1064                         * There are effectively two types of ET_DYN
1065                         * binaries: programs (i.e. PIE: ET_DYN with INTERP)
1066                         * and loaders (ET_DYN without INTERP, since they
1067                         * _are_ the ELF interpreter). The loaders must
1068                         * be loaded away from programs since the program
1069                         * may otherwise collide with the loader (especially
1070                         * for ET_EXEC which does not have a randomized
1071                         * position). For example to handle invocations of
1072                         * "./ld.so someprog" to test out a new version of
1073                         * the loader, the subsequent program that the
1074                         * loader loads must avoid the loader itself, so
1075                         * they cannot share the same load range. Sufficient
1076                         * room for the brk must be allocated with the
1077                         * loader as well, since brk must be available with
1078                         * the loader.
1079                         *
1080                         * Therefore, programs are loaded offset from
1081                         * ELF_ET_DYN_BASE and loaders are loaded into the
1082                         * independently randomized mmap region (0 load_bias
1083                         * without MAP_FIXED).
1084                         */
1085                        if (interpreter) {
1086                                load_bias = ELF_ET_DYN_BASE;
1087                                if (current->flags & PF_RANDOMIZE)
1088                                        load_bias += arch_mmap_rnd();
1089                                elf_flags |= MAP_FIXED;
1090                        } else
1091                                load_bias = 0;
1092
1093                        /*
1094                         * Since load_bias is used for all subsequent loading
1095                         * calculations, we must lower it by the first vaddr
1096                         * so that the remaining calculations based on the
1097                         * ELF vaddrs will be correctly offset. The result
1098                         * is then page aligned.
1099                         */
1100                        load_bias = ELF_PAGESTART(load_bias - vaddr);
1101
1102                        total_size = total_mapping_size(elf_phdata,
1103                                                        elf_ex->e_phnum);
1104                        if (!total_size) {
1105                                retval = -EINVAL;
1106                                goto out_free_dentry;
1107                        }
1108                }
1109
1110                error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
1111                                elf_prot, elf_flags, total_size);
1112                if (BAD_ADDR(error)) {
1113                        retval = IS_ERR((void *)error) ?
1114                                PTR_ERR((void*)error) : -EINVAL;
1115                        goto out_free_dentry;
1116                }
1117
1118                if (!load_addr_set) {
1119                        load_addr_set = 1;
1120                        load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
1121                        if (elf_ex->e_type == ET_DYN) {
1122                                load_bias += error -
1123                                             ELF_PAGESTART(load_bias + vaddr);
1124                                load_addr += load_bias;
1125                                reloc_func_desc = load_bias;
1126                        }
1127                }
1128                k = elf_ppnt->p_vaddr;
1129                if ((elf_ppnt->p_flags & PF_X) && k < start_code)
1130                        start_code = k;
1131                if (start_data < k)
1132                        start_data = k;
1133
1134                /*
1135                 * Check to see if the section's size will overflow the
1136                 * allowed task size. Note that p_filesz must always be
1137                 * <= p_memsz so it is only necessary to check p_memsz.
1138                 */
1139                if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
1140                    elf_ppnt->p_memsz > TASK_SIZE ||
1141                    TASK_SIZE - elf_ppnt->p_memsz < k) {
1142                        /* set_brk can never work. Avoid overflows. */
1143                        retval = -EINVAL;
1144                        goto out_free_dentry;
1145                }
1146
1147                k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
1148
1149                if (k > elf_bss)
1150                        elf_bss = k;
1151                if ((elf_ppnt->p_flags & PF_X) && end_code < k)
1152                        end_code = k;
1153                if (end_data < k)
1154                        end_data = k;
1155                k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
1156                if (k > elf_brk) {
1157                        bss_prot = elf_prot;
1158                        elf_brk = k;
1159                }
1160        }
1161
1162        e_entry = elf_ex->e_entry + load_bias;
1163        elf_bss += load_bias;
1164        elf_brk += load_bias;
1165        start_code += load_bias;
1166        end_code += load_bias;
1167        start_data += load_bias;
1168        end_data += load_bias;
1169
1170        /* Calling set_brk effectively mmaps the pages that we need
1171         * for the bss and break sections.  We must do this before
1172         * mapping in the interpreter, to make sure it doesn't wind
1173         * up getting placed where the bss needs to go.
1174         */
1175        retval = set_brk(elf_bss, elf_brk, bss_prot);
1176        if (retval)
1177                goto out_free_dentry;
1178        if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1179                retval = -EFAULT; /* Nobody gets to see this, but.. */
1180                goto out_free_dentry;
1181        }
1182
1183        if (interpreter) {
1184                elf_entry = load_elf_interp(interp_elf_ex,
1185                                            interpreter,
1186                                            load_bias, interp_elf_phdata,
1187                                            &arch_state);
1188                if (!IS_ERR((void *)elf_entry)) {
1189                        /*
1190                         * load_elf_interp() returns relocation
1191                         * adjustment
1192                         */
1193                        interp_load_addr = elf_entry;
1194                        elf_entry += interp_elf_ex->e_entry;
1195                }
1196                if (BAD_ADDR(elf_entry)) {
1197                        retval = IS_ERR((void *)elf_entry) ?
1198                                        (int)elf_entry : -EINVAL;
1199                        goto out_free_dentry;
1200                }
1201                reloc_func_desc = interp_load_addr;
1202
1203                allow_write_access(interpreter);
1204                fput(interpreter);
1205
1206                kfree(interp_elf_ex);
1207                kfree(interp_elf_phdata);
1208        } else {
1209                elf_entry = e_entry;
1210                if (BAD_ADDR(elf_entry)) {
1211                        retval = -EINVAL;
1212                        goto out_free_dentry;
1213                }
1214        }
1215
1216        kfree(elf_phdata);
1217
1218        set_binfmt(&elf_format);
1219
1220#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1221        retval = arch_setup_additional_pages(bprm, !!interpreter);
1222        if (retval < 0)
1223                goto out;
1224#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1225
1226        retval = create_elf_tables(bprm, elf_ex,
1227                          load_addr, interp_load_addr, e_entry);
1228        if (retval < 0)
1229                goto out;
1230
1231        mm = current->mm;
1232        mm->end_code = end_code;
1233        mm->start_code = start_code;
1234        mm->start_data = start_data;
1235        mm->end_data = end_data;
1236        mm->start_stack = bprm->p;
1237
1238        if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1239                /*
1240                 * For architectures with ELF randomization, when executing
1241                 * a loader directly (i.e. no interpreter listed in ELF
1242                 * headers), move the brk area out of the mmap region
1243                 * (since it grows up, and may collide early with the stack
1244                 * growing down), and into the unused ELF_ET_DYN_BASE region.
1245                 */
1246                if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
1247                    elf_ex->e_type == ET_DYN && !interpreter) {
1248                        mm->brk = mm->start_brk = ELF_ET_DYN_BASE;
1249                }
1250
1251                mm->brk = mm->start_brk = arch_randomize_brk(mm);
1252#ifdef compat_brk_randomized
1253                current->brk_randomized = 1;
1254#endif
1255        }
1256
1257        if (current->personality & MMAP_PAGE_ZERO) {
1258                /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1259                   and some applications "depend" upon this behavior.
1260                   Since we do not have the power to recompile these, we
1261                   emulate the SVr4 behavior. Sigh. */
1262                error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1263                                MAP_FIXED | MAP_PRIVATE, 0);
1264        }
1265
1266        regs = current_pt_regs();
1267#ifdef ELF_PLAT_INIT
1268        /*
1269         * The ABI may specify that certain registers be set up in special
1270         * ways (on i386 %edx is the address of a DT_FINI function, for
1271         * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1272         * that the e_entry field is the address of the function descriptor
1273         * for the startup routine, rather than the address of the startup
1274         * routine itself.  This macro performs whatever initialization to
1275         * the regs structure is required as well as any relocations to the
1276         * function descriptor entries when executing dynamically links apps.
1277         */
1278        ELF_PLAT_INIT(regs, reloc_func_desc);
1279#endif
1280
1281        finalize_exec(bprm);
1282        start_thread(regs, elf_entry, bprm->p);
1283        retval = 0;
1284out:
1285        return retval;
1286
1287        /* error cleanup */
1288out_free_dentry:
1289        kfree(interp_elf_ex);
1290        kfree(interp_elf_phdata);
1291        allow_write_access(interpreter);
1292        if (interpreter)
1293                fput(interpreter);
1294out_free_ph:
1295        kfree(elf_phdata);
1296        goto out;
1297}
1298
1299#ifdef CONFIG_USELIB
1300/* This is really simpleminded and specialized - we are loading an
1301   a.out library that is given an ELF header. */
1302static int load_elf_library(struct file *file)
1303{
1304        struct elf_phdr *elf_phdata;
1305        struct elf_phdr *eppnt;
1306        unsigned long elf_bss, bss, len;
1307        int retval, error, i, j;
1308        struct elfhdr elf_ex;
1309
1310        error = -ENOEXEC;
1311        retval = elf_read(file, &elf_ex, sizeof(elf_ex), 0);
1312        if (retval < 0)
1313                goto out;
1314
1315        if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1316                goto out;
1317
1318        /* First of all, some simple consistency checks */
1319        if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1320            !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1321                goto out;
1322        if (elf_check_fdpic(&elf_ex))
1323                goto out;
1324
1325        /* Now read in all of the header information */
1326
1327        j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1328        /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1329
1330        error = -ENOMEM;
1331        elf_phdata = kmalloc(j, GFP_KERNEL);
1332        if (!elf_phdata)
1333                goto out;
1334
1335        eppnt = elf_phdata;
1336        error = -ENOEXEC;
1337        retval = elf_read(file, eppnt, j, elf_ex.e_phoff);
1338        if (retval < 0)
1339                goto out_free_ph;
1340
1341        for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1342                if ((eppnt + i)->p_type == PT_LOAD)
1343                        j++;
1344        if (j != 1)
1345                goto out_free_ph;
1346
1347        while (eppnt->p_type != PT_LOAD)
1348                eppnt++;
1349
1350        /* Now use mmap to map the library into memory. */
1351        error = vm_mmap(file,
1352                        ELF_PAGESTART(eppnt->p_vaddr),
1353                        (eppnt->p_filesz +
1354                         ELF_PAGEOFFSET(eppnt->p_vaddr)),
1355                        PROT_READ | PROT_WRITE | PROT_EXEC,
1356                        MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_DENYWRITE,
1357                        (eppnt->p_offset -
1358                         ELF_PAGEOFFSET(eppnt->p_vaddr)));
1359        if (error != ELF_PAGESTART(eppnt->p_vaddr))
1360                goto out_free_ph;
1361
1362        elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1363        if (padzero(elf_bss)) {
1364                error = -EFAULT;
1365                goto out_free_ph;
1366        }
1367
1368        len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr);
1369        bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr);
1370        if (bss > len) {
1371                error = vm_brk(len, bss - len);
1372                if (error)
1373                        goto out_free_ph;
1374        }
1375        error = 0;
1376
1377out_free_ph:
1378        kfree(elf_phdata);
1379out:
1380        return error;
1381}
1382#endif /* #ifdef CONFIG_USELIB */
1383
1384#ifdef CONFIG_ELF_CORE
1385/*
1386 * ELF core dumper
1387 *
1388 * Modelled on fs/exec.c:aout_core_dump()
1389 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1390 */
1391
1392/*
1393 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1394 * that are useful for post-mortem analysis are included in every core dump.
1395 * In that way we ensure that the core dump is fully interpretable later
1396 * without matching up the same kernel and hardware config to see what PC values
1397 * meant. These special mappings include - vDSO, vsyscall, and other
1398 * architecture specific mappings
1399 */
1400static bool always_dump_vma(struct vm_area_struct *vma)
1401{
1402        /* Any vsyscall mappings? */
1403        if (vma == get_gate_vma(vma->vm_mm))
1404                return true;
1405
1406        /*
1407         * Assume that all vmas with a .name op should always be dumped.
1408         * If this changes, a new vm_ops field can easily be added.
1409         */
1410        if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1411                return true;
1412
1413        /*
1414         * arch_vma_name() returns non-NULL for special architecture mappings,
1415         * such as vDSO sections.
1416         */
1417        if (arch_vma_name(vma))
1418                return true;
1419
1420        return false;
1421}
1422
1423/*
1424 * Decide what to dump of a segment, part, all or none.
1425 */
1426static unsigned long vma_dump_size(struct vm_area_struct *vma,
1427                                   unsigned long mm_flags)
1428{
1429#define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1430
1431        /* always dump the vdso and vsyscall sections */
1432        if (always_dump_vma(vma))
1433                goto whole;
1434
1435        if (vma->vm_flags & VM_DONTDUMP)
1436                return 0;
1437
1438        /* support for DAX */
1439        if (vma_is_dax(vma)) {
1440                if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
1441                        goto whole;
1442                if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
1443                        goto whole;
1444                return 0;
1445        }
1446
1447        /* Hugetlb memory check */
1448        if (is_vm_hugetlb_page(vma)) {
1449                if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1450                        goto whole;
1451                if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1452                        goto whole;
1453                return 0;
1454        }
1455
1456        /* Do not dump I/O mapped devices or special mappings */
1457        if (vma->vm_flags & VM_IO)
1458                return 0;
1459
1460        /* By default, dump shared memory if mapped from an anonymous file. */
1461        if (vma->vm_flags & VM_SHARED) {
1462                if (file_inode(vma->vm_file)->i_nlink == 0 ?
1463                    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1464                        goto whole;
1465                return 0;
1466        }
1467
1468        /* Dump segments that have been written to.  */
1469        if (vma->anon_vma && FILTER(ANON_PRIVATE))
1470                goto whole;
1471        if (vma->vm_file == NULL)
1472                return 0;
1473
1474        if (FILTER(MAPPED_PRIVATE))
1475                goto whole;
1476
1477        /*
1478         * If this looks like the beginning of a DSO or executable mapping,
1479         * check for an ELF header.  If we find one, dump the first page to
1480         * aid in determining what was mapped here.
1481         */
1482        if (FILTER(ELF_HEADERS) &&
1483            vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1484                u32 __user *header = (u32 __user *) vma->vm_start;
1485                u32 word;
1486                /*
1487                 * Doing it this way gets the constant folded by GCC.
1488                 */
1489                union {
1490                        u32 cmp;
1491                        char elfmag[SELFMAG];
1492                } magic;
1493                BUILD_BUG_ON(SELFMAG != sizeof word);
1494                magic.elfmag[EI_MAG0] = ELFMAG0;
1495                magic.elfmag[EI_MAG1] = ELFMAG1;
1496                magic.elfmag[EI_MAG2] = ELFMAG2;
1497                magic.elfmag[EI_MAG3] = ELFMAG3;
1498                if (unlikely(get_user(word, header)))
1499                        word = 0;
1500                if (word == magic.cmp)
1501                        return PAGE_SIZE;
1502        }
1503
1504#undef  FILTER
1505
1506        return 0;
1507
1508whole:
1509        return vma->vm_end - vma->vm_start;
1510}
1511
1512/* An ELF note in memory */
1513struct memelfnote
1514{
1515        const char *name;
1516        int type;
1517        unsigned int datasz;
1518        void *data;
1519};
1520
1521static int notesize(struct memelfnote *en)
1522{
1523        int sz;
1524
1525        sz = sizeof(struct elf_note);
1526        sz += roundup(strlen(en->name) + 1, 4);
1527        sz += roundup(en->datasz, 4);
1528
1529        return sz;
1530}
1531
1532static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1533{
1534        struct elf_note en;
1535        en.n_namesz = strlen(men->name) + 1;
1536        en.n_descsz = men->datasz;
1537        en.n_type = men->type;
1538
1539        return dump_emit(cprm, &en, sizeof(en)) &&
1540            dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1541            dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1542}
1543
1544static void fill_elf_header(struct elfhdr *elf, int segs,
1545                            u16 machine, u32 flags)
1546{
1547        memset(elf, 0, sizeof(*elf));
1548
1549        memcpy(elf->e_ident, ELFMAG, SELFMAG);
1550        elf->e_ident[EI_CLASS] = ELF_CLASS;
1551        elf->e_ident[EI_DATA] = ELF_DATA;
1552        elf->e_ident[EI_VERSION] = EV_CURRENT;
1553        elf->e_ident[EI_OSABI] = ELF_OSABI;
1554
1555        elf->e_type = ET_CORE;
1556        elf->e_machine = machine;
1557        elf->e_version = EV_CURRENT;
1558        elf->e_phoff = sizeof(struct elfhdr);
1559        elf->e_flags = flags;
1560        elf->e_ehsize = sizeof(struct elfhdr);
1561        elf->e_phentsize = sizeof(struct elf_phdr);
1562        elf->e_phnum = segs;
1563}
1564
1565static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1566{
1567        phdr->p_type = PT_NOTE;
1568        phdr->p_offset = offset;
1569        phdr->p_vaddr = 0;
1570        phdr->p_paddr = 0;
1571        phdr->p_filesz = sz;
1572        phdr->p_memsz = 0;
1573        phdr->p_flags = 0;
1574        phdr->p_align = 0;
1575}
1576
1577static void fill_note(struct memelfnote *note, const char *name, int type, 
1578                unsigned int sz, void *data)
1579{
1580        note->name = name;
1581        note->type = type;
1582        note->datasz = sz;
1583        note->data = data;
1584}
1585
1586/*
1587 * fill up all the fields in prstatus from the given task struct, except
1588 * registers which need to be filled up separately.
1589 */
1590static void fill_prstatus(struct elf_prstatus *prstatus,
1591                struct task_struct *p, long signr)
1592{
1593        prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1594        prstatus->pr_sigpend = p->pending.signal.sig[0];
1595        prstatus->pr_sighold = p->blocked.sig[0];
1596        rcu_read_lock();
1597        prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1598        rcu_read_unlock();
1599        prstatus->pr_pid = task_pid_vnr(p);
1600        prstatus->pr_pgrp = task_pgrp_vnr(p);
1601        prstatus->pr_sid = task_session_vnr(p);
1602        if (thread_group_leader(p)) {
1603                struct task_cputime cputime;
1604
1605                /*
1606                 * This is the record for the group leader.  It shows the
1607                 * group-wide total, not its individual thread total.
1608                 */
1609                thread_group_cputime(p, &cputime);
1610                prstatus->pr_utime = ns_to_kernel_old_timeval(cputime.utime);
1611                prstatus->pr_stime = ns_to_kernel_old_timeval(cputime.stime);
1612        } else {
1613                u64 utime, stime;
1614
1615                task_cputime(p, &utime, &stime);
1616                prstatus->pr_utime = ns_to_kernel_old_timeval(utime);
1617                prstatus->pr_stime = ns_to_kernel_old_timeval(stime);
1618        }
1619
1620        prstatus->pr_cutime = ns_to_kernel_old_timeval(p->signal->cutime);
1621        prstatus->pr_cstime = ns_to_kernel_old_timeval(p->signal->cstime);
1622}
1623
1624static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1625                       struct mm_struct *mm)
1626{
1627        const struct cred *cred;
1628        unsigned int i, len;
1629        
1630        /* first copy the parameters from user space */
1631        memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1632
1633        len = mm->arg_end - mm->arg_start;
1634        if (len >= ELF_PRARGSZ)
1635                len = ELF_PRARGSZ-1;
1636        if (copy_from_user(&psinfo->pr_psargs,
1637                           (const char __user *)mm->arg_start, len))
1638                return -EFAULT;
1639        for(i = 0; i < len; i++)
1640                if (psinfo->pr_psargs[i] == 0)
1641                        psinfo->pr_psargs[i] = ' ';
1642        psinfo->pr_psargs[len] = 0;
1643
1644        rcu_read_lock();
1645        psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1646        rcu_read_unlock();
1647        psinfo->pr_pid = task_pid_vnr(p);
1648        psinfo->pr_pgrp = task_pgrp_vnr(p);
1649        psinfo->pr_sid = task_session_vnr(p);
1650
1651        i = p->state ? ffz(~p->state) + 1 : 0;
1652        psinfo->pr_state = i;
1653        psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1654        psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1655        psinfo->pr_nice = task_nice(p);
1656        psinfo->pr_flag = p->flags;
1657        rcu_read_lock();
1658        cred = __task_cred(p);
1659        SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1660        SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1661        rcu_read_unlock();
1662        strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1663        
1664        return 0;
1665}
1666
1667static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1668{
1669        elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1670        int i = 0;
1671        do
1672                i += 2;
1673        while (auxv[i - 2] != AT_NULL);
1674        fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1675}
1676
1677static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1678                const kernel_siginfo_t *siginfo)
1679{
1680        copy_siginfo_to_external(csigdata, siginfo);
1681        fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1682}
1683
1684#define MAX_FILE_NOTE_SIZE (4*1024*1024)
1685/*
1686 * Format of NT_FILE note:
1687 *
1688 * long count     -- how many files are mapped
1689 * long page_size -- units for file_ofs
1690 * array of [COUNT] elements of
1691 *   long start
1692 *   long end
1693 *   long file_ofs
1694 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1695 */
1696static int fill_files_note(struct memelfnote *note)
1697{
1698        struct mm_struct *mm = current->mm;
1699        struct vm_area_struct *vma;
1700        unsigned count, size, names_ofs, remaining, n;
1701        user_long_t *data;
1702        user_long_t *start_end_ofs;
1703        char *name_base, *name_curpos;
1704
1705        /* *Estimated* file count and total data size needed */
1706        count = mm->map_count;
1707        if (count > UINT_MAX / 64)
1708                return -EINVAL;
1709        size = count * 64;
1710
1711        names_ofs = (2 + 3 * count) * sizeof(data[0]);
1712 alloc:
1713        if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1714                return -EINVAL;
1715        size = round_up(size, PAGE_SIZE);
1716        /*
1717         * "size" can be 0 here legitimately.
1718         * Let it ENOMEM and omit NT_FILE section which will be empty anyway.
1719         */
1720        data = kvmalloc(size, GFP_KERNEL);
1721        if (ZERO_OR_NULL_PTR(data))
1722                return -ENOMEM;
1723
1724        start_end_ofs = data + 2;
1725        name_base = name_curpos = ((char *)data) + names_ofs;
1726        remaining = size - names_ofs;
1727        count = 0;
1728        for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
1729                struct file *file;
1730                const char *filename;
1731
1732                file = vma->vm_file;
1733                if (!file)
1734                        continue;
1735                filename = file_path(file, name_curpos, remaining);
1736                if (IS_ERR(filename)) {
1737                        if (PTR_ERR(filename) == -ENAMETOOLONG) {
1738                                kvfree(data);
1739                                size = size * 5 / 4;
1740                                goto alloc;
1741                        }
1742                        continue;
1743                }
1744
1745                /* file_path() fills at the end, move name down */
1746                /* n = strlen(filename) + 1: */
1747                n = (name_curpos + remaining) - filename;
1748                remaining = filename - name_curpos;
1749                memmove(name_curpos, filename, n);
1750                name_curpos += n;
1751
1752                *start_end_ofs++ = vma->vm_start;
1753                *start_end_ofs++ = vma->vm_end;
1754                *start_end_ofs++ = vma->vm_pgoff;
1755                count++;
1756        }
1757
1758        /* Now we know exact count of files, can store it */
1759        data[0] = count;
1760        data[1] = PAGE_SIZE;
1761        /*
1762         * Count usually is less than mm->map_count,
1763         * we need to move filenames down.
1764         */
1765        n = mm->map_count - count;
1766        if (n != 0) {
1767                unsigned shift_bytes = n * 3 * sizeof(data[0]);
1768                memmove(name_base - shift_bytes, name_base,
1769                        name_curpos - name_base);
1770                name_curpos -= shift_bytes;
1771        }
1772
1773        size = name_curpos - (char *)data;
1774        fill_note(note, "CORE", NT_FILE, size, data);
1775        return 0;
1776}
1777
1778#ifdef CORE_DUMP_USE_REGSET
1779#include <linux/regset.h>
1780
1781struct elf_thread_core_info {
1782        struct elf_thread_core_info *next;
1783        struct task_struct *task;
1784        struct elf_prstatus prstatus;
1785        struct memelfnote notes[0];
1786};
1787
1788struct elf_note_info {
1789        struct elf_thread_core_info *thread;
1790        struct memelfnote psinfo;
1791        struct memelfnote signote;
1792        struct memelfnote auxv;
1793        struct memelfnote files;
1794        user_siginfo_t csigdata;
1795        size_t size;
1796        int thread_notes;
1797};
1798
1799/*
1800 * When a regset has a writeback hook, we call it on each thread before
1801 * dumping user memory.  On register window machines, this makes sure the
1802 * user memory backing the register data is up to date before we read it.
1803 */
1804static void do_thread_regset_writeback(struct task_struct *task,
1805                                       const struct user_regset *regset)
1806{
1807        if (regset->writeback)
1808                regset->writeback(task, regset, 1);
1809}
1810
1811#ifndef PRSTATUS_SIZE
1812#define PRSTATUS_SIZE(S, R) sizeof(S)
1813#endif
1814
1815#ifndef SET_PR_FPVALID
1816#define SET_PR_FPVALID(S, V, R) ((S)->pr_fpvalid = (V))
1817#endif
1818
1819static int fill_thread_core_info(struct elf_thread_core_info *t,
1820                                 const struct user_regset_view *view,
1821                                 long signr, size_t *total)
1822{
1823        unsigned int i;
1824        unsigned int regset0_size = regset_size(t->task, &view->regsets[0]);
1825
1826        /*
1827         * NT_PRSTATUS is the one special case, because the regset data
1828         * goes into the pr_reg field inside the note contents, rather
1829         * than being the whole note contents.  We fill the reset in here.
1830         * We assume that regset 0 is NT_PRSTATUS.
1831         */
1832        fill_prstatus(&t->prstatus, t->task, signr);
1833        (void) view->regsets[0].get(t->task, &view->regsets[0], 0, regset0_size,
1834                                    &t->prstatus.pr_reg, NULL);
1835
1836        fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1837                  PRSTATUS_SIZE(t->prstatus, regset0_size), &t->prstatus);
1838        *total += notesize(&t->notes[0]);
1839
1840        do_thread_regset_writeback(t->task, &view->regsets[0]);
1841
1842        /*
1843         * Each other regset might generate a note too.  For each regset
1844         * that has no core_note_type or is inactive, we leave t->notes[i]
1845         * all zero and we'll know to skip writing it later.
1846         */
1847        for (i = 1; i < view->n; ++i) {
1848                const struct user_regset *regset = &view->regsets[i];
1849                do_thread_regset_writeback(t->task, regset);
1850                if (regset->core_note_type && regset->get &&
1851                    (!regset->active || regset->active(t->task, regset) > 0)) {
1852                        int ret;
1853                        size_t size = regset_size(t->task, regset);
1854                        void *data = kzalloc(size, GFP_KERNEL);
1855                        if (unlikely(!data))
1856                                return 0;
1857                        ret = regset->get(t->task, regset,
1858                                          0, size, data, NULL);
1859                        if (unlikely(ret))
1860                                kfree(data);
1861                        else {
1862                                if (regset->core_note_type != NT_PRFPREG)
1863                                        fill_note(&t->notes[i], "LINUX",
1864                                                  regset->core_note_type,
1865                                                  size, data);
1866                                else {
1867                                        SET_PR_FPVALID(&t->prstatus,
1868                                                        1, regset0_size);
1869                                        fill_note(&t->notes[i], "CORE",
1870                                                  NT_PRFPREG, size, data);
1871                                }
1872                                *total += notesize(&t->notes[i]);
1873                        }
1874                }
1875        }
1876
1877        return 1;
1878}
1879
1880static int fill_note_info(struct elfhdr *elf, int phdrs,
1881                          struct elf_note_info *info,
1882                          const kernel_siginfo_t *siginfo, struct pt_regs *regs)
1883{
1884        struct task_struct *dump_task = current;
1885        const struct user_regset_view *view = task_user_regset_view(dump_task);
1886        struct elf_thread_core_info *t;
1887        struct elf_prpsinfo *psinfo;
1888        struct core_thread *ct;
1889        unsigned int i;
1890
1891        info->size = 0;
1892        info->thread = NULL;
1893
1894        psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1895        if (psinfo == NULL) {
1896                info->psinfo.data = NULL; /* So we don't free this wrongly */
1897                return 0;
1898        }
1899
1900        fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1901
1902        /*
1903         * Figure out how many notes we're going to need for each thread.
1904         */
1905        info->thread_notes = 0;
1906        for (i = 0; i < view->n; ++i)
1907                if (view->regsets[i].core_note_type != 0)
1908                        ++info->thread_notes;
1909
1910        /*
1911         * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1912         * since it is our one special case.
1913         */
1914        if (unlikely(info->thread_notes == 0) ||
1915            unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1916                WARN_ON(1);
1917                return 0;
1918        }
1919
1920        /*
1921         * Initialize the ELF file header.
1922         */
1923        fill_elf_header(elf, phdrs,
1924                        view->e_machine, view->e_flags);
1925
1926        /*
1927         * Allocate a structure for each thread.
1928         */
1929        for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1930                t = kzalloc(offsetof(struct elf_thread_core_info,
1931                                     notes[info->thread_notes]),
1932                            GFP_KERNEL);
1933                if (unlikely(!t))
1934                        return 0;
1935
1936                t->task = ct->task;
1937                if (ct->task == dump_task || !info->thread) {
1938                        t->next = info->thread;
1939                        info->thread = t;
1940                } else {
1941                        /*
1942                         * Make sure to keep the original task at
1943                         * the head of the list.
1944                         */
1945                        t->next = info->thread->next;
1946                        info->thread->next = t;
1947                }
1948        }
1949
1950        /*
1951         * Now fill in each thread's information.
1952         */
1953        for (t = info->thread; t != NULL; t = t->next)
1954                if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1955                        return 0;
1956
1957        /*
1958         * Fill in the two process-wide notes.
1959         */
1960        fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1961        info->size += notesize(&info->psinfo);
1962
1963        fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1964        info->size += notesize(&info->signote);
1965
1966        fill_auxv_note(&info->auxv, current->mm);
1967        info->size += notesize(&info->auxv);
1968
1969        if (fill_files_note(&info->files) == 0)
1970                info->size += notesize(&info->files);
1971
1972        return 1;
1973}
1974
1975static size_t get_note_info_size(struct elf_note_info *info)
1976{
1977        return info->size;
1978}
1979
1980/*
1981 * Write all the notes for each thread.  When writing the first thread, the
1982 * process-wide notes are interleaved after the first thread-specific note.
1983 */
1984static int write_note_info(struct elf_note_info *info,
1985                           struct coredump_params *cprm)
1986{
1987        bool first = true;
1988        struct elf_thread_core_info *t = info->thread;
1989
1990        do {
1991                int i;
1992
1993                if (!writenote(&t->notes[0], cprm))
1994                        return 0;
1995
1996                if (first && !writenote(&info->psinfo, cprm))
1997                        return 0;
1998                if (first && !writenote(&info->signote, cprm))
1999                        return 0;
2000                if (first && !writenote(&info->auxv, cprm))
2001                        return 0;
2002                if (first && info->files.data &&
2003                                !writenote(&info->files, cprm))
2004                        return 0;
2005
2006                for (i = 1; i < info->thread_notes; ++i)
2007                        if (t->notes[i].data &&
2008                            !writenote(&t->notes[i], cprm))
2009                                return 0;
2010
2011                first = false;
2012                t = t->next;
2013        } while (t);
2014
2015        return 1;
2016}
2017
2018static void free_note_info(struct elf_note_info *info)
2019{
2020        struct elf_thread_core_info *threads = info->thread;
2021        while (threads) {
2022                unsigned int i;
2023                struct elf_thread_core_info *t = threads;
2024                threads = t->next;
2025                WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
2026                for (i = 1; i < info->thread_notes; ++i)
2027                        kfree(t->notes[i].data);
2028                kfree(t);
2029        }
2030        kfree(info->psinfo.data);
2031        kvfree(info->files.data);
2032}
2033
2034#else
2035
2036/* Here is the structure in which status of each thread is captured. */
2037struct elf_thread_status
2038{
2039        struct list_head list;
2040        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
2041        elf_fpregset_t fpu;             /* NT_PRFPREG */
2042        struct task_struct *thread;
2043#ifdef ELF_CORE_COPY_XFPREGS
2044        elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
2045#endif
2046        struct memelfnote notes[3];
2047        int num_notes;
2048};
2049
2050/*
2051 * In order to add the specific thread information for the elf file format,
2052 * we need to keep a linked list of every threads pr_status and then create
2053 * a single section for them in the final core file.
2054 */
2055static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
2056{
2057        int sz = 0;
2058        struct task_struct *p = t->thread;
2059        t->num_notes = 0;
2060
2061        fill_prstatus(&t->prstatus, p, signr);
2062        elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
2063        
2064        fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
2065                  &(t->prstatus));
2066        t->num_notes++;
2067        sz += notesize(&t->notes[0]);
2068
2069        if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
2070                                                                &t->fpu))) {
2071                fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
2072                          &(t->fpu));
2073                t->num_notes++;
2074                sz += notesize(&t->notes[1]);
2075        }
2076
2077#ifdef ELF_CORE_COPY_XFPREGS
2078        if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
2079                fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
2080                          sizeof(t->xfpu), &t->xfpu);
2081                t->num_notes++;
2082                sz += notesize(&t->notes[2]);
2083        }
2084#endif  
2085        return sz;
2086}
2087
2088struct elf_note_info {
2089        struct memelfnote *notes;
2090        struct memelfnote *notes_files;
2091        struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
2092        struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
2093        struct list_head thread_list;
2094        elf_fpregset_t *fpu;
2095#ifdef ELF_CORE_COPY_XFPREGS
2096        elf_fpxregset_t *xfpu;
2097#endif
2098        user_siginfo_t csigdata;
2099        int thread_status_size;
2100        int numnote;
2101};
2102
2103static int elf_note_info_init(struct elf_note_info *info)
2104{
2105        memset(info, 0, sizeof(*info));
2106        INIT_LIST_HEAD(&info->thread_list);
2107
2108        /* Allocate space for ELF notes */
2109        info->notes = kmalloc_array(8, sizeof(struct memelfnote), GFP_KERNEL);
2110        if (!info->notes)
2111                return 0;
2112        info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
2113        if (!info->psinfo)
2114                return 0;
2115        info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
2116        if (!info->prstatus)
2117                return 0;
2118        info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
2119        if (!info->fpu)
2120                return 0;
2121#ifdef ELF_CORE_COPY_XFPREGS
2122        info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
2123        if (!info->xfpu)
2124                return 0;
2125#endif
2126        return 1;
2127}
2128
2129static int fill_note_info(struct elfhdr *elf, int phdrs,
2130                          struct elf_note_info *info,
2131                          const kernel_siginfo_t *siginfo, struct pt_regs *regs)
2132{
2133        struct core_thread *ct;
2134        struct elf_thread_status *ets;
2135
2136        if (!elf_note_info_init(info))
2137                return 0;
2138
2139        for (ct = current->mm->core_state->dumper.next;
2140                                        ct; ct = ct->next) {
2141                ets = kzalloc(sizeof(*ets), GFP_KERNEL);
2142                if (!ets)
2143                        return 0;
2144
2145                ets->thread = ct->task;
2146                list_add(&ets->list, &info->thread_list);
2147        }
2148
2149        list_for_each_entry(ets, &info->thread_list, list) {
2150                int sz;
2151
2152                sz = elf_dump_thread_status(siginfo->si_signo, ets);
2153                info->thread_status_size += sz;
2154        }
2155        /* now collect the dump for the current */
2156        memset(info->prstatus, 0, sizeof(*info->prstatus));
2157        fill_prstatus(info->prstatus, current, siginfo->si_signo);
2158        elf_core_copy_regs(&info->prstatus->pr_reg, regs);
2159
2160        /* Set up header */
2161        fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
2162
2163        /*
2164         * Set up the notes in similar form to SVR4 core dumps made
2165         * with info from their /proc.
2166         */
2167
2168        fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
2169                  sizeof(*info->prstatus), info->prstatus);
2170        fill_psinfo(info->psinfo, current->group_leader, current->mm);
2171        fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
2172                  sizeof(*info->psinfo), info->psinfo);
2173
2174        fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
2175        fill_auxv_note(info->notes + 3, current->mm);
2176        info->numnote = 4;
2177
2178        if (fill_files_note(info->notes + info->numnote) == 0) {
2179                info->notes_files = info->notes + info->numnote;
2180                info->numnote++;
2181        }
2182
2183        /* Try to dump the FPU. */
2184        info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2185                                                               info->fpu);
2186        if (info->prstatus->pr_fpvalid)
2187                fill_note(info->notes + info->numnote++,
2188                          "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2189#ifdef ELF_CORE_COPY_XFPREGS
2190        if (elf_core_copy_task_xfpregs(current, info->xfpu))
2191                fill_note(info->notes + info->numnote++,
2192                          "LINUX", ELF_CORE_XFPREG_TYPE,
2193                          sizeof(*info->xfpu), info->xfpu);
2194#endif
2195
2196        return 1;
2197}
2198
2199static size_t get_note_info_size(struct elf_note_info *info)
2200{
2201        int sz = 0;
2202        int i;
2203
2204        for (i = 0; i < info->numnote; i++)
2205                sz += notesize(info->notes + i);
2206
2207        sz += info->thread_status_size;
2208
2209        return sz;
2210}
2211
2212static int write_note_info(struct elf_note_info *info,
2213                           struct coredump_params *cprm)
2214{
2215        struct elf_thread_status *ets;
2216        int i;
2217
2218        for (i = 0; i < info->numnote; i++)
2219                if (!writenote(info->notes + i, cprm))
2220                        return 0;
2221
2222        /* write out the thread status notes section */
2223        list_for_each_entry(ets, &info->thread_list, list) {
2224                for (i = 0; i < ets->num_notes; i++)
2225                        if (!writenote(&ets->notes[i], cprm))
2226                                return 0;
2227        }
2228
2229        return 1;
2230}
2231
2232static void free_note_info(struct elf_note_info *info)
2233{
2234        while (!list_empty(&info->thread_list)) {
2235                struct list_head *tmp = info->thread_list.next;
2236                list_del(tmp);
2237                kfree(list_entry(tmp, struct elf_thread_status, list));
2238        }
2239
2240        /* Free data possibly allocated by fill_files_note(): */
2241        if (info->notes_files)
2242                kvfree(info->notes_files->data);
2243
2244        kfree(info->prstatus);
2245        kfree(info->psinfo);
2246        kfree(info->notes);
2247        kfree(info->fpu);
2248#ifdef ELF_CORE_COPY_XFPREGS
2249        kfree(info->xfpu);
2250#endif
2251}
2252
2253#endif
2254
2255static struct vm_area_struct *first_vma(struct task_struct *tsk,
2256                                        struct vm_area_struct *gate_vma)
2257{
2258        struct vm_area_struct *ret = tsk->mm->mmap;
2259
2260        if (ret)
2261                return ret;
2262        return gate_vma;
2263}
2264/*
2265 * Helper function for iterating across a vma list.  It ensures that the caller
2266 * will visit `gate_vma' prior to terminating the search.
2267 */
2268static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2269                                        struct vm_area_struct *gate_vma)
2270{
2271        struct vm_area_struct *ret;
2272
2273        ret = this_vma->vm_next;
2274        if (ret)
2275                return ret;
2276        if (this_vma == gate_vma)
2277                return NULL;
2278        return gate_vma;
2279}
2280
2281static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2282                             elf_addr_t e_shoff, int segs)
2283{
2284        elf->e_shoff = e_shoff;
2285        elf->e_shentsize = sizeof(*shdr4extnum);
2286        elf->e_shnum = 1;
2287        elf->e_shstrndx = SHN_UNDEF;
2288
2289        memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2290
2291        shdr4extnum->sh_type = SHT_NULL;
2292        shdr4extnum->sh_size = elf->e_shnum;
2293        shdr4extnum->sh_link = elf->e_shstrndx;
2294        shdr4extnum->sh_info = segs;
2295}
2296
2297/*
2298 * Actual dumper
2299 *
2300 * This is a two-pass process; first we find the offsets of the bits,
2301 * and then they are actually written out.  If we run out of core limit
2302 * we just truncate.
2303 */
2304static int elf_core_dump(struct coredump_params *cprm)
2305{
2306        int has_dumped = 0;
2307        int segs, i;
2308        size_t vma_data_size = 0;
2309        struct vm_area_struct *vma, *gate_vma;
2310        struct elfhdr elf;
2311        loff_t offset = 0, dataoff;
2312        struct elf_note_info info = { };
2313        struct elf_phdr *phdr4note = NULL;
2314        struct elf_shdr *shdr4extnum = NULL;
2315        Elf_Half e_phnum;
2316        elf_addr_t e_shoff;
2317        elf_addr_t *vma_filesz = NULL;
2318
2319        /*
2320         * We no longer stop all VM operations.
2321         * 
2322         * This is because those proceses that could possibly change map_count
2323         * or the mmap / vma pages are now blocked in do_exit on current
2324         * finishing this core dump.
2325         *
2326         * Only ptrace can touch these memory addresses, but it doesn't change
2327         * the map_count or the pages allocated. So no possibility of crashing
2328         * exists while dumping the mm->vm_next areas to the core file.
2329         */
2330  
2331        /*
2332         * The number of segs are recored into ELF header as 16bit value.
2333         * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2334         */
2335        segs = current->mm->map_count;
2336        segs += elf_core_extra_phdrs();
2337
2338        gate_vma = get_gate_vma(current->mm);
2339        if (gate_vma != NULL)
2340                segs++;
2341
2342        /* for notes section */
2343        segs++;
2344
2345        /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2346         * this, kernel supports extended numbering. Have a look at
2347         * include/linux/elf.h for further information. */
2348        e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2349
2350        /*
2351         * Collect all the non-memory information about the process for the
2352         * notes.  This also sets up the file header.
2353         */
2354        if (!fill_note_info(&elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2355                goto end_coredump;
2356
2357        has_dumped = 1;
2358
2359        offset += sizeof(elf);                          /* Elf header */
2360        offset += segs * sizeof(struct elf_phdr);       /* Program headers */
2361
2362        /* Write notes phdr entry */
2363        {
2364                size_t sz = get_note_info_size(&info);
2365
2366                sz += elf_coredump_extra_notes_size();
2367
2368                phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2369                if (!phdr4note)
2370                        goto end_coredump;
2371
2372                fill_elf_note_phdr(phdr4note, sz, offset);
2373                offset += sz;
2374        }
2375
2376        dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2377
2378        /*
2379         * Zero vma process will get ZERO_SIZE_PTR here.
2380         * Let coredump continue for register state at least.
2381         */
2382        vma_filesz = kvmalloc(array_size(sizeof(*vma_filesz), (segs - 1)),
2383                              GFP_KERNEL);
2384        if (!vma_filesz)
2385                goto end_coredump;
2386
2387        for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2388                        vma = next_vma(vma, gate_vma)) {
2389                unsigned long dump_size;
2390
2391                dump_size = vma_dump_size(vma, cprm->mm_flags);
2392                vma_filesz[i++] = dump_size;
2393                vma_data_size += dump_size;
2394        }
2395
2396        offset += vma_data_size;
2397        offset += elf_core_extra_data_size();
2398        e_shoff = offset;
2399
2400        if (e_phnum == PN_XNUM) {
2401                shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2402                if (!shdr4extnum)
2403                        goto end_coredump;
2404                fill_extnum_info(&elf, shdr4extnum, e_shoff, segs);
2405        }
2406
2407        offset = dataoff;
2408
2409        if (!dump_emit(cprm, &elf, sizeof(elf)))
2410                goto end_coredump;
2411
2412        if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2413                goto end_coredump;
2414
2415        /* Write program headers for segments dump */
2416        for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2417                        vma = next_vma(vma, gate_vma)) {
2418                struct elf_phdr phdr;
2419
2420                phdr.p_type = PT_LOAD;
2421                phdr.p_offset = offset;
2422                phdr.p_vaddr = vma->vm_start;
2423                phdr.p_paddr = 0;
2424                phdr.p_filesz = vma_filesz[i++];
2425                phdr.p_memsz = vma->vm_end - vma->vm_start;
2426                offset += phdr.p_filesz;
2427                phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2428                if (vma->vm_flags & VM_WRITE)
2429                        phdr.p_flags |= PF_W;
2430                if (vma->vm_flags & VM_EXEC)
2431                        phdr.p_flags |= PF_X;
2432                phdr.p_align = ELF_EXEC_PAGESIZE;
2433
2434                if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2435                        goto end_coredump;
2436        }
2437
2438        if (!elf_core_write_extra_phdrs(cprm, offset))
2439                goto end_coredump;
2440
2441        /* write out the notes section */
2442        if (!write_note_info(&info, cprm))
2443                goto end_coredump;
2444
2445        if (elf_coredump_extra_notes_write(cprm))
2446                goto end_coredump;
2447
2448        /* Align to page */
2449        if (!dump_skip(cprm, dataoff - cprm->pos))
2450                goto end_coredump;
2451
2452        for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2453                        vma = next_vma(vma, gate_vma)) {
2454                unsigned long addr;
2455                unsigned long end;
2456
2457                end = vma->vm_start + vma_filesz[i++];
2458
2459                for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2460                        struct page *page;
2461                        int stop;
2462
2463                        page = get_dump_page(addr);
2464                        if (page) {
2465                                void *kaddr = kmap(page);
2466                                stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2467                                kunmap(page);
2468                                put_page(page);
2469                        } else
2470                                stop = !dump_skip(cprm, PAGE_SIZE);
2471                        if (stop)
2472                                goto end_coredump;
2473                }
2474        }
2475        dump_truncate(cprm);
2476
2477        if (!elf_core_write_extra_data(cprm))
2478                goto end_coredump;
2479
2480        if (e_phnum == PN_XNUM) {
2481                if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2482                        goto end_coredump;
2483        }
2484
2485end_coredump:
2486        free_note_info(&info);
2487        kfree(shdr4extnum);
2488        kvfree(vma_filesz);
2489        kfree(phdr4note);
2490        return has_dumped;
2491}
2492
2493#endif          /* CONFIG_ELF_CORE */
2494
2495static int __init init_elf_binfmt(void)
2496{
2497        register_binfmt(&elf_format);
2498        return 0;
2499}
2500
2501static void __exit exit_elf_binfmt(void)
2502{
2503        /* Remove the COFF and ELF loaders. */
2504        unregister_binfmt(&elf_format);
2505}
2506
2507core_initcall(init_elf_binfmt);
2508module_exit(exit_elf_binfmt);
2509MODULE_LICENSE("GPL");
2510