linux/fs/binfmt_elf.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * linux/fs/binfmt_elf.c
   4 *
   5 * These are the functions used to load ELF format executables as used
   6 * on SVr4 machines.  Information on the format may be found in the book
   7 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
   8 * Tools".
   9 *
  10 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
  11 */
  12
  13#include <linux/module.h>
  14#include <linux/kernel.h>
  15#include <linux/fs.h>
  16#include <linux/log2.h>
  17#include <linux/mm.h>
  18#include <linux/mman.h>
  19#include <linux/errno.h>
  20#include <linux/signal.h>
  21#include <linux/binfmts.h>
  22#include <linux/string.h>
  23#include <linux/file.h>
  24#include <linux/slab.h>
  25#include <linux/personality.h>
  26#include <linux/elfcore.h>
  27#include <linux/init.h>
  28#include <linux/highuid.h>
  29#include <linux/compiler.h>
  30#include <linux/highmem.h>
  31#include <linux/hugetlb.h>
  32#include <linux/pagemap.h>
  33#include <linux/vmalloc.h>
  34#include <linux/security.h>
  35#include <linux/random.h>
  36#include <linux/elf.h>
  37#include <linux/elf-randomize.h>
  38#include <linux/utsname.h>
  39#include <linux/coredump.h>
  40#include <linux/sched.h>
  41#include <linux/sched/coredump.h>
  42#include <linux/sched/task_stack.h>
  43#include <linux/sched/cputime.h>
  44#include <linux/sizes.h>
  45#include <linux/types.h>
  46#include <linux/cred.h>
  47#include <linux/dax.h>
  48#include <linux/uaccess.h>
  49#include <asm/param.h>
  50#include <asm/page.h>
  51
  52#ifndef ELF_COMPAT
  53#define ELF_COMPAT 0
  54#endif
  55
  56#ifndef user_long_t
  57#define user_long_t long
  58#endif
  59#ifndef user_siginfo_t
  60#define user_siginfo_t siginfo_t
  61#endif
  62
  63/* That's for binfmt_elf_fdpic to deal with */
  64#ifndef elf_check_fdpic
  65#define elf_check_fdpic(ex) false
  66#endif
  67
  68static int load_elf_binary(struct linux_binprm *bprm);
  69
  70#ifdef CONFIG_USELIB
  71static int load_elf_library(struct file *);
  72#else
  73#define load_elf_library NULL
  74#endif
  75
  76/*
  77 * If we don't support core dumping, then supply a NULL so we
  78 * don't even try.
  79 */
  80#ifdef CONFIG_ELF_CORE
  81static int elf_core_dump(struct coredump_params *cprm);
  82#else
  83#define elf_core_dump   NULL
  84#endif
  85
  86#if ELF_EXEC_PAGESIZE > PAGE_SIZE
  87#define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
  88#else
  89#define ELF_MIN_ALIGN   PAGE_SIZE
  90#endif
  91
  92#ifndef ELF_CORE_EFLAGS
  93#define ELF_CORE_EFLAGS 0
  94#endif
  95
  96#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
  97#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
  98#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
  99
 100static struct linux_binfmt elf_format = {
 101        .module         = THIS_MODULE,
 102        .load_binary    = load_elf_binary,
 103        .load_shlib     = load_elf_library,
 104        .core_dump      = elf_core_dump,
 105        .min_coredump   = ELF_EXEC_PAGESIZE,
 106};
 107
 108#define BAD_ADDR(x) (unlikely((unsigned long)(x) >= TASK_SIZE))
 109
 110static int set_brk(unsigned long start, unsigned long end, int prot)
 111{
 112        start = ELF_PAGEALIGN(start);
 113        end = ELF_PAGEALIGN(end);
 114        if (end > start) {
 115                /*
 116                 * Map the last of the bss segment.
 117                 * If the header is requesting these pages to be
 118                 * executable, honour that (ppc32 needs this).
 119                 */
 120                int error = vm_brk_flags(start, end - start,
 121                                prot & PROT_EXEC ? VM_EXEC : 0);
 122                if (error)
 123                        return error;
 124        }
 125        current->mm->start_brk = current->mm->brk = end;
 126        return 0;
 127}
 128
 129/* We need to explicitly zero any fractional pages
 130   after the data section (i.e. bss).  This would
 131   contain the junk from the file that should not
 132   be in memory
 133 */
 134static int padzero(unsigned long elf_bss)
 135{
 136        unsigned long nbyte;
 137
 138        nbyte = ELF_PAGEOFFSET(elf_bss);
 139        if (nbyte) {
 140                nbyte = ELF_MIN_ALIGN - nbyte;
 141                if (clear_user((void __user *) elf_bss, nbyte))
 142                        return -EFAULT;
 143        }
 144        return 0;
 145}
 146
 147/* Let's use some macros to make this stack manipulation a little clearer */
 148#ifdef CONFIG_STACK_GROWSUP
 149#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
 150#define STACK_ROUND(sp, items) \
 151        ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
 152#define STACK_ALLOC(sp, len) ({ \
 153        elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
 154        old_sp; })
 155#else
 156#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
 157#define STACK_ROUND(sp, items) \
 158        (((unsigned long) (sp - items)) &~ 15UL)
 159#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
 160#endif
 161
 162#ifndef ELF_BASE_PLATFORM
 163/*
 164 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
 165 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
 166 * will be copied to the user stack in the same manner as AT_PLATFORM.
 167 */
 168#define ELF_BASE_PLATFORM NULL
 169#endif
 170
 171static int
 172create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
 173                unsigned long load_addr, unsigned long interp_load_addr,
 174                unsigned long e_entry)
 175{
 176        struct mm_struct *mm = current->mm;
 177        unsigned long p = bprm->p;
 178        int argc = bprm->argc;
 179        int envc = bprm->envc;
 180        elf_addr_t __user *sp;
 181        elf_addr_t __user *u_platform;
 182        elf_addr_t __user *u_base_platform;
 183        elf_addr_t __user *u_rand_bytes;
 184        const char *k_platform = ELF_PLATFORM;
 185        const char *k_base_platform = ELF_BASE_PLATFORM;
 186        unsigned char k_rand_bytes[16];
 187        int items;
 188        elf_addr_t *elf_info;
 189        elf_addr_t flags = 0;
 190        int ei_index;
 191        const struct cred *cred = current_cred();
 192        struct vm_area_struct *vma;
 193
 194        /*
 195         * In some cases (e.g. Hyper-Threading), we want to avoid L1
 196         * evictions by the processes running on the same package. One
 197         * thing we can do is to shuffle the initial stack for them.
 198         */
 199
 200        p = arch_align_stack(p);
 201
 202        /*
 203         * If this architecture has a platform capability string, copy it
 204         * to userspace.  In some cases (Sparc), this info is impossible
 205         * for userspace to get any other way, in others (i386) it is
 206         * merely difficult.
 207         */
 208        u_platform = NULL;
 209        if (k_platform) {
 210                size_t len = strlen(k_platform) + 1;
 211
 212                u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 213                if (copy_to_user(u_platform, k_platform, len))
 214                        return -EFAULT;
 215        }
 216
 217        /*
 218         * If this architecture has a "base" platform capability
 219         * string, copy it to userspace.
 220         */
 221        u_base_platform = NULL;
 222        if (k_base_platform) {
 223                size_t len = strlen(k_base_platform) + 1;
 224
 225                u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 226                if (copy_to_user(u_base_platform, k_base_platform, len))
 227                        return -EFAULT;
 228        }
 229
 230        /*
 231         * Generate 16 random bytes for userspace PRNG seeding.
 232         */
 233        get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
 234        u_rand_bytes = (elf_addr_t __user *)
 235                       STACK_ALLOC(p, sizeof(k_rand_bytes));
 236        if (copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
 237                return -EFAULT;
 238
 239        /* Create the ELF interpreter info */
 240        elf_info = (elf_addr_t *)mm->saved_auxv;
 241        /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
 242#define NEW_AUX_ENT(id, val) \
 243        do { \
 244                *elf_info++ = id; \
 245                *elf_info++ = val; \
 246        } while (0)
 247
 248#ifdef ARCH_DLINFO
 249        /* 
 250         * ARCH_DLINFO must come first so PPC can do its special alignment of
 251         * AUXV.
 252         * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
 253         * ARCH_DLINFO changes
 254         */
 255        ARCH_DLINFO;
 256#endif
 257        NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
 258        NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
 259        NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
 260        NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
 261        NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
 262        NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
 263        NEW_AUX_ENT(AT_BASE, interp_load_addr);
 264        if (bprm->interp_flags & BINPRM_FLAGS_PRESERVE_ARGV0)
 265                flags |= AT_FLAGS_PRESERVE_ARGV0;
 266        NEW_AUX_ENT(AT_FLAGS, flags);
 267        NEW_AUX_ENT(AT_ENTRY, e_entry);
 268        NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
 269        NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
 270        NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
 271        NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
 272        NEW_AUX_ENT(AT_SECURE, bprm->secureexec);
 273        NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
 274#ifdef ELF_HWCAP2
 275        NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
 276#endif
 277        NEW_AUX_ENT(AT_EXECFN, bprm->exec);
 278        if (k_platform) {
 279                NEW_AUX_ENT(AT_PLATFORM,
 280                            (elf_addr_t)(unsigned long)u_platform);
 281        }
 282        if (k_base_platform) {
 283                NEW_AUX_ENT(AT_BASE_PLATFORM,
 284                            (elf_addr_t)(unsigned long)u_base_platform);
 285        }
 286        if (bprm->have_execfd) {
 287                NEW_AUX_ENT(AT_EXECFD, bprm->execfd);
 288        }
 289#undef NEW_AUX_ENT
 290        /* AT_NULL is zero; clear the rest too */
 291        memset(elf_info, 0, (char *)mm->saved_auxv +
 292                        sizeof(mm->saved_auxv) - (char *)elf_info);
 293
 294        /* And advance past the AT_NULL entry.  */
 295        elf_info += 2;
 296
 297        ei_index = elf_info - (elf_addr_t *)mm->saved_auxv;
 298        sp = STACK_ADD(p, ei_index);
 299
 300        items = (argc + 1) + (envc + 1) + 1;
 301        bprm->p = STACK_ROUND(sp, items);
 302
 303        /* Point sp at the lowest address on the stack */
 304#ifdef CONFIG_STACK_GROWSUP
 305        sp = (elf_addr_t __user *)bprm->p - items - ei_index;
 306        bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
 307#else
 308        sp = (elf_addr_t __user *)bprm->p;
 309#endif
 310
 311
 312        /*
 313         * Grow the stack manually; some architectures have a limit on how
 314         * far ahead a user-space access may be in order to grow the stack.
 315         */
 316        if (mmap_read_lock_killable(mm))
 317                return -EINTR;
 318        vma = find_extend_vma(mm, bprm->p);
 319        mmap_read_unlock(mm);
 320        if (!vma)
 321                return -EFAULT;
 322
 323        /* Now, let's put argc (and argv, envp if appropriate) on the stack */
 324        if (put_user(argc, sp++))
 325                return -EFAULT;
 326
 327        /* Populate list of argv pointers back to argv strings. */
 328        p = mm->arg_end = mm->arg_start;
 329        while (argc-- > 0) {
 330                size_t len;
 331                if (put_user((elf_addr_t)p, sp++))
 332                        return -EFAULT;
 333                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 334                if (!len || len > MAX_ARG_STRLEN)
 335                        return -EINVAL;
 336                p += len;
 337        }
 338        if (put_user(0, sp++))
 339                return -EFAULT;
 340        mm->arg_end = p;
 341
 342        /* Populate list of envp pointers back to envp strings. */
 343        mm->env_end = mm->env_start = p;
 344        while (envc-- > 0) {
 345                size_t len;
 346                if (put_user((elf_addr_t)p, sp++))
 347                        return -EFAULT;
 348                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 349                if (!len || len > MAX_ARG_STRLEN)
 350                        return -EINVAL;
 351                p += len;
 352        }
 353        if (put_user(0, sp++))
 354                return -EFAULT;
 355        mm->env_end = p;
 356
 357        /* Put the elf_info on the stack in the right place.  */
 358        if (copy_to_user(sp, mm->saved_auxv, ei_index * sizeof(elf_addr_t)))
 359                return -EFAULT;
 360        return 0;
 361}
 362
 363static unsigned long elf_map(struct file *filep, unsigned long addr,
 364                const struct elf_phdr *eppnt, int prot, int type,
 365                unsigned long total_size)
 366{
 367        unsigned long map_addr;
 368        unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
 369        unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
 370        addr = ELF_PAGESTART(addr);
 371        size = ELF_PAGEALIGN(size);
 372
 373        /* mmap() will return -EINVAL if given a zero size, but a
 374         * segment with zero filesize is perfectly valid */
 375        if (!size)
 376                return addr;
 377
 378        /*
 379        * total_size is the size of the ELF (interpreter) image.
 380        * The _first_ mmap needs to know the full size, otherwise
 381        * randomization might put this image into an overlapping
 382        * position with the ELF binary image. (since size < total_size)
 383        * So we first map the 'big' image - and unmap the remainder at
 384        * the end. (which unmap is needed for ELF images with holes.)
 385        */
 386        if (total_size) {
 387                total_size = ELF_PAGEALIGN(total_size);
 388                map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
 389                if (!BAD_ADDR(map_addr))
 390                        vm_munmap(map_addr+size, total_size-size);
 391        } else
 392                map_addr = vm_mmap(filep, addr, size, prot, type, off);
 393
 394        if ((type & MAP_FIXED_NOREPLACE) &&
 395            PTR_ERR((void *)map_addr) == -EEXIST)
 396                pr_info("%d (%s): Uhuuh, elf segment at %px requested but the memory is mapped already\n",
 397                        task_pid_nr(current), current->comm, (void *)addr);
 398
 399        return(map_addr);
 400}
 401
 402static unsigned long total_mapping_size(const struct elf_phdr *cmds, int nr)
 403{
 404        int i, first_idx = -1, last_idx = -1;
 405
 406        for (i = 0; i < nr; i++) {
 407                if (cmds[i].p_type == PT_LOAD) {
 408                        last_idx = i;
 409                        if (first_idx == -1)
 410                                first_idx = i;
 411                }
 412        }
 413        if (first_idx == -1)
 414                return 0;
 415
 416        return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
 417                                ELF_PAGESTART(cmds[first_idx].p_vaddr);
 418}
 419
 420static int elf_read(struct file *file, void *buf, size_t len, loff_t pos)
 421{
 422        ssize_t rv;
 423
 424        rv = kernel_read(file, buf, len, &pos);
 425        if (unlikely(rv != len)) {
 426                return (rv < 0) ? rv : -EIO;
 427        }
 428        return 0;
 429}
 430
 431static unsigned long maximum_alignment(struct elf_phdr *cmds, int nr)
 432{
 433        unsigned long alignment = 0;
 434        int i;
 435
 436        for (i = 0; i < nr; i++) {
 437                if (cmds[i].p_type == PT_LOAD) {
 438                        unsigned long p_align = cmds[i].p_align;
 439
 440                        /* skip non-power of two alignments as invalid */
 441                        if (!is_power_of_2(p_align))
 442                                continue;
 443                        alignment = max(alignment, p_align);
 444                }
 445        }
 446
 447        /* ensure we align to at least one page */
 448        return ELF_PAGEALIGN(alignment);
 449}
 450
 451/**
 452 * load_elf_phdrs() - load ELF program headers
 453 * @elf_ex:   ELF header of the binary whose program headers should be loaded
 454 * @elf_file: the opened ELF binary file
 455 *
 456 * Loads ELF program headers from the binary file elf_file, which has the ELF
 457 * header pointed to by elf_ex, into a newly allocated array. The caller is
 458 * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
 459 */
 460static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
 461                                       struct file *elf_file)
 462{
 463        struct elf_phdr *elf_phdata = NULL;
 464        int retval, err = -1;
 465        unsigned int size;
 466
 467        /*
 468         * If the size of this structure has changed, then punt, since
 469         * we will be doing the wrong thing.
 470         */
 471        if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
 472                goto out;
 473
 474        /* Sanity check the number of program headers... */
 475        /* ...and their total size. */
 476        size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
 477        if (size == 0 || size > 65536 || size > ELF_MIN_ALIGN)
 478                goto out;
 479
 480        elf_phdata = kmalloc(size, GFP_KERNEL);
 481        if (!elf_phdata)
 482                goto out;
 483
 484        /* Read in the program headers */
 485        retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff);
 486        if (retval < 0) {
 487                err = retval;
 488                goto out;
 489        }
 490
 491        /* Success! */
 492        err = 0;
 493out:
 494        if (err) {
 495                kfree(elf_phdata);
 496                elf_phdata = NULL;
 497        }
 498        return elf_phdata;
 499}
 500
 501#ifndef CONFIG_ARCH_BINFMT_ELF_STATE
 502
 503/**
 504 * struct arch_elf_state - arch-specific ELF loading state
 505 *
 506 * This structure is used to preserve architecture specific data during
 507 * the loading of an ELF file, throughout the checking of architecture
 508 * specific ELF headers & through to the point where the ELF load is
 509 * known to be proceeding (ie. SET_PERSONALITY).
 510 *
 511 * This implementation is a dummy for architectures which require no
 512 * specific state.
 513 */
 514struct arch_elf_state {
 515};
 516
 517#define INIT_ARCH_ELF_STATE {}
 518
 519/**
 520 * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
 521 * @ehdr:       The main ELF header
 522 * @phdr:       The program header to check
 523 * @elf:        The open ELF file
 524 * @is_interp:  True if the phdr is from the interpreter of the ELF being
 525 *              loaded, else false.
 526 * @state:      Architecture-specific state preserved throughout the process
 527 *              of loading the ELF.
 528 *
 529 * Inspects the program header phdr to validate its correctness and/or
 530 * suitability for the system. Called once per ELF program header in the
 531 * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
 532 * interpreter.
 533 *
 534 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
 535 *         with that return code.
 536 */
 537static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
 538                                   struct elf_phdr *phdr,
 539                                   struct file *elf, bool is_interp,
 540                                   struct arch_elf_state *state)
 541{
 542        /* Dummy implementation, always proceed */
 543        return 0;
 544}
 545
 546/**
 547 * arch_check_elf() - check an ELF executable
 548 * @ehdr:       The main ELF header
 549 * @has_interp: True if the ELF has an interpreter, else false.
 550 * @interp_ehdr: The interpreter's ELF header
 551 * @state:      Architecture-specific state preserved throughout the process
 552 *              of loading the ELF.
 553 *
 554 * Provides a final opportunity for architecture code to reject the loading
 555 * of the ELF & cause an exec syscall to return an error. This is called after
 556 * all program headers to be checked by arch_elf_pt_proc have been.
 557 *
 558 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
 559 *         with that return code.
 560 */
 561static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
 562                                 struct elfhdr *interp_ehdr,
 563                                 struct arch_elf_state *state)
 564{
 565        /* Dummy implementation, always proceed */
 566        return 0;
 567}
 568
 569#endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
 570
 571static inline int make_prot(u32 p_flags, struct arch_elf_state *arch_state,
 572                            bool has_interp, bool is_interp)
 573{
 574        int prot = 0;
 575
 576        if (p_flags & PF_R)
 577                prot |= PROT_READ;
 578        if (p_flags & PF_W)
 579                prot |= PROT_WRITE;
 580        if (p_flags & PF_X)
 581                prot |= PROT_EXEC;
 582
 583        return arch_elf_adjust_prot(prot, arch_state, has_interp, is_interp);
 584}
 585
 586/* This is much more generalized than the library routine read function,
 587   so we keep this separate.  Technically the library read function
 588   is only provided so that we can read a.out libraries that have
 589   an ELF header */
 590
 591static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 592                struct file *interpreter,
 593                unsigned long no_base, struct elf_phdr *interp_elf_phdata,
 594                struct arch_elf_state *arch_state)
 595{
 596        struct elf_phdr *eppnt;
 597        unsigned long load_addr = 0;
 598        int load_addr_set = 0;
 599        unsigned long last_bss = 0, elf_bss = 0;
 600        int bss_prot = 0;
 601        unsigned long error = ~0UL;
 602        unsigned long total_size;
 603        int i;
 604
 605        /* First of all, some simple consistency checks */
 606        if (interp_elf_ex->e_type != ET_EXEC &&
 607            interp_elf_ex->e_type != ET_DYN)
 608                goto out;
 609        if (!elf_check_arch(interp_elf_ex) ||
 610            elf_check_fdpic(interp_elf_ex))
 611                goto out;
 612        if (!interpreter->f_op->mmap)
 613                goto out;
 614
 615        total_size = total_mapping_size(interp_elf_phdata,
 616                                        interp_elf_ex->e_phnum);
 617        if (!total_size) {
 618                error = -EINVAL;
 619                goto out;
 620        }
 621
 622        eppnt = interp_elf_phdata;
 623        for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 624                if (eppnt->p_type == PT_LOAD) {
 625                        int elf_type = MAP_PRIVATE;
 626                        int elf_prot = make_prot(eppnt->p_flags, arch_state,
 627                                                 true, true);
 628                        unsigned long vaddr = 0;
 629                        unsigned long k, map_addr;
 630
 631                        vaddr = eppnt->p_vaddr;
 632                        if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 633                                elf_type |= MAP_FIXED;
 634                        else if (no_base && interp_elf_ex->e_type == ET_DYN)
 635                                load_addr = -vaddr;
 636
 637                        map_addr = elf_map(interpreter, load_addr + vaddr,
 638                                        eppnt, elf_prot, elf_type, total_size);
 639                        total_size = 0;
 640                        error = map_addr;
 641                        if (BAD_ADDR(map_addr))
 642                                goto out;
 643
 644                        if (!load_addr_set &&
 645                            interp_elf_ex->e_type == ET_DYN) {
 646                                load_addr = map_addr - ELF_PAGESTART(vaddr);
 647                                load_addr_set = 1;
 648                        }
 649
 650                        /*
 651                         * Check to see if the section's size will overflow the
 652                         * allowed task size. Note that p_filesz must always be
 653                         * <= p_memsize so it's only necessary to check p_memsz.
 654                         */
 655                        k = load_addr + eppnt->p_vaddr;
 656                        if (BAD_ADDR(k) ||
 657                            eppnt->p_filesz > eppnt->p_memsz ||
 658                            eppnt->p_memsz > TASK_SIZE ||
 659                            TASK_SIZE - eppnt->p_memsz < k) {
 660                                error = -ENOMEM;
 661                                goto out;
 662                        }
 663
 664                        /*
 665                         * Find the end of the file mapping for this phdr, and
 666                         * keep track of the largest address we see for this.
 667                         */
 668                        k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
 669                        if (k > elf_bss)
 670                                elf_bss = k;
 671
 672                        /*
 673                         * Do the same thing for the memory mapping - between
 674                         * elf_bss and last_bss is the bss section.
 675                         */
 676                        k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
 677                        if (k > last_bss) {
 678                                last_bss = k;
 679                                bss_prot = elf_prot;
 680                        }
 681                }
 682        }
 683
 684        /*
 685         * Now fill out the bss section: first pad the last page from
 686         * the file up to the page boundary, and zero it from elf_bss
 687         * up to the end of the page.
 688         */
 689        if (padzero(elf_bss)) {
 690                error = -EFAULT;
 691                goto out;
 692        }
 693        /*
 694         * Next, align both the file and mem bss up to the page size,
 695         * since this is where elf_bss was just zeroed up to, and where
 696         * last_bss will end after the vm_brk_flags() below.
 697         */
 698        elf_bss = ELF_PAGEALIGN(elf_bss);
 699        last_bss = ELF_PAGEALIGN(last_bss);
 700        /* Finally, if there is still more bss to allocate, do it. */
 701        if (last_bss > elf_bss) {
 702                error = vm_brk_flags(elf_bss, last_bss - elf_bss,
 703                                bss_prot & PROT_EXEC ? VM_EXEC : 0);
 704                if (error)
 705                        goto out;
 706        }
 707
 708        error = load_addr;
 709out:
 710        return error;
 711}
 712
 713/*
 714 * These are the functions used to load ELF style executables and shared
 715 * libraries.  There is no binary dependent code anywhere else.
 716 */
 717
 718static int parse_elf_property(const char *data, size_t *off, size_t datasz,
 719                              struct arch_elf_state *arch,
 720                              bool have_prev_type, u32 *prev_type)
 721{
 722        size_t o, step;
 723        const struct gnu_property *pr;
 724        int ret;
 725
 726        if (*off == datasz)
 727                return -ENOENT;
 728
 729        if (WARN_ON_ONCE(*off > datasz || *off % ELF_GNU_PROPERTY_ALIGN))
 730                return -EIO;
 731        o = *off;
 732        datasz -= *off;
 733
 734        if (datasz < sizeof(*pr))
 735                return -ENOEXEC;
 736        pr = (const struct gnu_property *)(data + o);
 737        o += sizeof(*pr);
 738        datasz -= sizeof(*pr);
 739
 740        if (pr->pr_datasz > datasz)
 741                return -ENOEXEC;
 742
 743        WARN_ON_ONCE(o % ELF_GNU_PROPERTY_ALIGN);
 744        step = round_up(pr->pr_datasz, ELF_GNU_PROPERTY_ALIGN);
 745        if (step > datasz)
 746                return -ENOEXEC;
 747
 748        /* Properties are supposed to be unique and sorted on pr_type: */
 749        if (have_prev_type && pr->pr_type <= *prev_type)
 750                return -ENOEXEC;
 751        *prev_type = pr->pr_type;
 752
 753        ret = arch_parse_elf_property(pr->pr_type, data + o,
 754                                      pr->pr_datasz, ELF_COMPAT, arch);
 755        if (ret)
 756                return ret;
 757
 758        *off = o + step;
 759        return 0;
 760}
 761
 762#define NOTE_DATA_SZ SZ_1K
 763#define GNU_PROPERTY_TYPE_0_NAME "GNU"
 764#define NOTE_NAME_SZ (sizeof(GNU_PROPERTY_TYPE_0_NAME))
 765
 766static int parse_elf_properties(struct file *f, const struct elf_phdr *phdr,
 767                                struct arch_elf_state *arch)
 768{
 769        union {
 770                struct elf_note nhdr;
 771                char data[NOTE_DATA_SZ];
 772        } note;
 773        loff_t pos;
 774        ssize_t n;
 775        size_t off, datasz;
 776        int ret;
 777        bool have_prev_type;
 778        u32 prev_type;
 779
 780        if (!IS_ENABLED(CONFIG_ARCH_USE_GNU_PROPERTY) || !phdr)
 781                return 0;
 782
 783        /* load_elf_binary() shouldn't call us unless this is true... */
 784        if (WARN_ON_ONCE(phdr->p_type != PT_GNU_PROPERTY))
 785                return -ENOEXEC;
 786
 787        /* If the properties are crazy large, that's too bad (for now): */
 788        if (phdr->p_filesz > sizeof(note))
 789                return -ENOEXEC;
 790
 791        pos = phdr->p_offset;
 792        n = kernel_read(f, &note, phdr->p_filesz, &pos);
 793
 794        BUILD_BUG_ON(sizeof(note) < sizeof(note.nhdr) + NOTE_NAME_SZ);
 795        if (n < 0 || n < sizeof(note.nhdr) + NOTE_NAME_SZ)
 796                return -EIO;
 797
 798        if (note.nhdr.n_type != NT_GNU_PROPERTY_TYPE_0 ||
 799            note.nhdr.n_namesz != NOTE_NAME_SZ ||
 800            strncmp(note.data + sizeof(note.nhdr),
 801                    GNU_PROPERTY_TYPE_0_NAME, n - sizeof(note.nhdr)))
 802                return -ENOEXEC;
 803
 804        off = round_up(sizeof(note.nhdr) + NOTE_NAME_SZ,
 805                       ELF_GNU_PROPERTY_ALIGN);
 806        if (off > n)
 807                return -ENOEXEC;
 808
 809        if (note.nhdr.n_descsz > n - off)
 810                return -ENOEXEC;
 811        datasz = off + note.nhdr.n_descsz;
 812
 813        have_prev_type = false;
 814        do {
 815                ret = parse_elf_property(note.data, &off, datasz, arch,
 816                                         have_prev_type, &prev_type);
 817                have_prev_type = true;
 818        } while (!ret);
 819
 820        return ret == -ENOENT ? 0 : ret;
 821}
 822
 823static int load_elf_binary(struct linux_binprm *bprm)
 824{
 825        struct file *interpreter = NULL; /* to shut gcc up */
 826        unsigned long load_addr = 0, load_bias = 0;
 827        int load_addr_set = 0;
 828        unsigned long error;
 829        struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
 830        struct elf_phdr *elf_property_phdata = NULL;
 831        unsigned long elf_bss, elf_brk;
 832        int bss_prot = 0;
 833        int retval, i;
 834        unsigned long elf_entry;
 835        unsigned long e_entry;
 836        unsigned long interp_load_addr = 0;
 837        unsigned long start_code, end_code, start_data, end_data;
 838        unsigned long reloc_func_desc __maybe_unused = 0;
 839        int executable_stack = EXSTACK_DEFAULT;
 840        struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf;
 841        struct elfhdr *interp_elf_ex = NULL;
 842        struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
 843        struct mm_struct *mm;
 844        struct pt_regs *regs;
 845
 846        retval = -ENOEXEC;
 847        /* First of all, some simple consistency checks */
 848        if (memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
 849                goto out;
 850
 851        if (elf_ex->e_type != ET_EXEC && elf_ex->e_type != ET_DYN)
 852                goto out;
 853        if (!elf_check_arch(elf_ex))
 854                goto out;
 855        if (elf_check_fdpic(elf_ex))
 856                goto out;
 857        if (!bprm->file->f_op->mmap)
 858                goto out;
 859
 860        elf_phdata = load_elf_phdrs(elf_ex, bprm->file);
 861        if (!elf_phdata)
 862                goto out;
 863
 864        elf_ppnt = elf_phdata;
 865        for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++) {
 866                char *elf_interpreter;
 867
 868                if (elf_ppnt->p_type == PT_GNU_PROPERTY) {
 869                        elf_property_phdata = elf_ppnt;
 870                        continue;
 871                }
 872
 873                if (elf_ppnt->p_type != PT_INTERP)
 874                        continue;
 875
 876                /*
 877                 * This is the program interpreter used for shared libraries -
 878                 * for now assume that this is an a.out format binary.
 879                 */
 880                retval = -ENOEXEC;
 881                if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2)
 882                        goto out_free_ph;
 883
 884                retval = -ENOMEM;
 885                elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL);
 886                if (!elf_interpreter)
 887                        goto out_free_ph;
 888
 889                retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz,
 890                                  elf_ppnt->p_offset);
 891                if (retval < 0)
 892                        goto out_free_interp;
 893                /* make sure path is NULL terminated */
 894                retval = -ENOEXEC;
 895                if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
 896                        goto out_free_interp;
 897
 898                interpreter = open_exec(elf_interpreter);
 899                kfree(elf_interpreter);
 900                retval = PTR_ERR(interpreter);
 901                if (IS_ERR(interpreter))
 902                        goto out_free_ph;
 903
 904                /*
 905                 * If the binary is not readable then enforce mm->dumpable = 0
 906                 * regardless of the interpreter's permissions.
 907                 */
 908                would_dump(bprm, interpreter);
 909
 910                interp_elf_ex = kmalloc(sizeof(*interp_elf_ex), GFP_KERNEL);
 911                if (!interp_elf_ex) {
 912                        retval = -ENOMEM;
 913                        goto out_free_ph;
 914                }
 915
 916                /* Get the exec headers */
 917                retval = elf_read(interpreter, interp_elf_ex,
 918                                  sizeof(*interp_elf_ex), 0);
 919                if (retval < 0)
 920                        goto out_free_dentry;
 921
 922                break;
 923
 924out_free_interp:
 925                kfree(elf_interpreter);
 926                goto out_free_ph;
 927        }
 928
 929        elf_ppnt = elf_phdata;
 930        for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++)
 931                switch (elf_ppnt->p_type) {
 932                case PT_GNU_STACK:
 933                        if (elf_ppnt->p_flags & PF_X)
 934                                executable_stack = EXSTACK_ENABLE_X;
 935                        else
 936                                executable_stack = EXSTACK_DISABLE_X;
 937                        break;
 938
 939                case PT_LOPROC ... PT_HIPROC:
 940                        retval = arch_elf_pt_proc(elf_ex, elf_ppnt,
 941                                                  bprm->file, false,
 942                                                  &arch_state);
 943                        if (retval)
 944                                goto out_free_dentry;
 945                        break;
 946                }
 947
 948        /* Some simple consistency checks for the interpreter */
 949        if (interpreter) {
 950                retval = -ELIBBAD;
 951                /* Not an ELF interpreter */
 952                if (memcmp(interp_elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
 953                        goto out_free_dentry;
 954                /* Verify the interpreter has a valid arch */
 955                if (!elf_check_arch(interp_elf_ex) ||
 956                    elf_check_fdpic(interp_elf_ex))
 957                        goto out_free_dentry;
 958
 959                /* Load the interpreter program headers */
 960                interp_elf_phdata = load_elf_phdrs(interp_elf_ex,
 961                                                   interpreter);
 962                if (!interp_elf_phdata)
 963                        goto out_free_dentry;
 964
 965                /* Pass PT_LOPROC..PT_HIPROC headers to arch code */
 966                elf_property_phdata = NULL;
 967                elf_ppnt = interp_elf_phdata;
 968                for (i = 0; i < interp_elf_ex->e_phnum; i++, elf_ppnt++)
 969                        switch (elf_ppnt->p_type) {
 970                        case PT_GNU_PROPERTY:
 971                                elf_property_phdata = elf_ppnt;
 972                                break;
 973
 974                        case PT_LOPROC ... PT_HIPROC:
 975                                retval = arch_elf_pt_proc(interp_elf_ex,
 976                                                          elf_ppnt, interpreter,
 977                                                          true, &arch_state);
 978                                if (retval)
 979                                        goto out_free_dentry;
 980                                break;
 981                        }
 982        }
 983
 984        retval = parse_elf_properties(interpreter ?: bprm->file,
 985                                      elf_property_phdata, &arch_state);
 986        if (retval)
 987                goto out_free_dentry;
 988
 989        /*
 990         * Allow arch code to reject the ELF at this point, whilst it's
 991         * still possible to return an error to the code that invoked
 992         * the exec syscall.
 993         */
 994        retval = arch_check_elf(elf_ex,
 995                                !!interpreter, interp_elf_ex,
 996                                &arch_state);
 997        if (retval)
 998                goto out_free_dentry;
 999
1000        /* Flush all traces of the currently running executable */
1001        retval = begin_new_exec(bprm);
1002        if (retval)
1003                goto out_free_dentry;
1004
1005        /* Do this immediately, since STACK_TOP as used in setup_arg_pages
1006           may depend on the personality.  */
1007        SET_PERSONALITY2(*elf_ex, &arch_state);
1008        if (elf_read_implies_exec(*elf_ex, executable_stack))
1009                current->personality |= READ_IMPLIES_EXEC;
1010
1011        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
1012                current->flags |= PF_RANDOMIZE;
1013
1014        setup_new_exec(bprm);
1015
1016        /* Do this so that we can load the interpreter, if need be.  We will
1017           change some of these later */
1018        retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
1019                                 executable_stack);
1020        if (retval < 0)
1021                goto out_free_dentry;
1022        
1023        elf_bss = 0;
1024        elf_brk = 0;
1025
1026        start_code = ~0UL;
1027        end_code = 0;
1028        start_data = 0;
1029        end_data = 0;
1030
1031        /* Now we do a little grungy work by mmapping the ELF image into
1032           the correct location in memory. */
1033        for(i = 0, elf_ppnt = elf_phdata;
1034            i < elf_ex->e_phnum; i++, elf_ppnt++) {
1035                int elf_prot, elf_flags;
1036                unsigned long k, vaddr;
1037                unsigned long total_size = 0;
1038                unsigned long alignment;
1039
1040                if (elf_ppnt->p_type != PT_LOAD)
1041                        continue;
1042
1043                if (unlikely (elf_brk > elf_bss)) {
1044                        unsigned long nbyte;
1045                    
1046                        /* There was a PT_LOAD segment with p_memsz > p_filesz
1047                           before this one. Map anonymous pages, if needed,
1048                           and clear the area.  */
1049                        retval = set_brk(elf_bss + load_bias,
1050                                         elf_brk + load_bias,
1051                                         bss_prot);
1052                        if (retval)
1053                                goto out_free_dentry;
1054                        nbyte = ELF_PAGEOFFSET(elf_bss);
1055                        if (nbyte) {
1056                                nbyte = ELF_MIN_ALIGN - nbyte;
1057                                if (nbyte > elf_brk - elf_bss)
1058                                        nbyte = elf_brk - elf_bss;
1059                                if (clear_user((void __user *)elf_bss +
1060                                                        load_bias, nbyte)) {
1061                                        /*
1062                                         * This bss-zeroing can fail if the ELF
1063                                         * file specifies odd protections. So
1064                                         * we don't check the return value
1065                                         */
1066                                }
1067                        }
1068                }
1069
1070                elf_prot = make_prot(elf_ppnt->p_flags, &arch_state,
1071                                     !!interpreter, false);
1072
1073                elf_flags = MAP_PRIVATE;
1074
1075                vaddr = elf_ppnt->p_vaddr;
1076                /*
1077                 * If we are loading ET_EXEC or we have already performed
1078                 * the ET_DYN load_addr calculations, proceed normally.
1079                 */
1080                if (elf_ex->e_type == ET_EXEC || load_addr_set) {
1081                        elf_flags |= MAP_FIXED;
1082                } else if (elf_ex->e_type == ET_DYN) {
1083                        /*
1084                         * This logic is run once for the first LOAD Program
1085                         * Header for ET_DYN binaries to calculate the
1086                         * randomization (load_bias) for all the LOAD
1087                         * Program Headers, and to calculate the entire
1088                         * size of the ELF mapping (total_size). (Note that
1089                         * load_addr_set is set to true later once the
1090                         * initial mapping is performed.)
1091                         *
1092                         * There are effectively two types of ET_DYN
1093                         * binaries: programs (i.e. PIE: ET_DYN with INTERP)
1094                         * and loaders (ET_DYN without INTERP, since they
1095                         * _are_ the ELF interpreter). The loaders must
1096                         * be loaded away from programs since the program
1097                         * may otherwise collide with the loader (especially
1098                         * for ET_EXEC which does not have a randomized
1099                         * position). For example to handle invocations of
1100                         * "./ld.so someprog" to test out a new version of
1101                         * the loader, the subsequent program that the
1102                         * loader loads must avoid the loader itself, so
1103                         * they cannot share the same load range. Sufficient
1104                         * room for the brk must be allocated with the
1105                         * loader as well, since brk must be available with
1106                         * the loader.
1107                         *
1108                         * Therefore, programs are loaded offset from
1109                         * ELF_ET_DYN_BASE and loaders are loaded into the
1110                         * independently randomized mmap region (0 load_bias
1111                         * without MAP_FIXED).
1112                         */
1113                        if (interpreter) {
1114                                load_bias = ELF_ET_DYN_BASE;
1115                                if (current->flags & PF_RANDOMIZE)
1116                                        load_bias += arch_mmap_rnd();
1117                                alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
1118                                if (alignment)
1119                                        load_bias &= ~(alignment - 1);
1120                                elf_flags |= MAP_FIXED;
1121                        } else
1122                                load_bias = 0;
1123
1124                        /*
1125                         * Since load_bias is used for all subsequent loading
1126                         * calculations, we must lower it by the first vaddr
1127                         * so that the remaining calculations based on the
1128                         * ELF vaddrs will be correctly offset. The result
1129                         * is then page aligned.
1130                         */
1131                        load_bias = ELF_PAGESTART(load_bias - vaddr);
1132
1133                        total_size = total_mapping_size(elf_phdata,
1134                                                        elf_ex->e_phnum);
1135                        if (!total_size) {
1136                                retval = -EINVAL;
1137                                goto out_free_dentry;
1138                        }
1139                }
1140
1141                error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
1142                                elf_prot, elf_flags, total_size);
1143                if (BAD_ADDR(error)) {
1144                        retval = IS_ERR((void *)error) ?
1145                                PTR_ERR((void*)error) : -EINVAL;
1146                        goto out_free_dentry;
1147                }
1148
1149                if (!load_addr_set) {
1150                        load_addr_set = 1;
1151                        load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
1152                        if (elf_ex->e_type == ET_DYN) {
1153                                load_bias += error -
1154                                             ELF_PAGESTART(load_bias + vaddr);
1155                                load_addr += load_bias;
1156                                reloc_func_desc = load_bias;
1157                        }
1158                }
1159                k = elf_ppnt->p_vaddr;
1160                if ((elf_ppnt->p_flags & PF_X) && k < start_code)
1161                        start_code = k;
1162                if (start_data < k)
1163                        start_data = k;
1164
1165                /*
1166                 * Check to see if the section's size will overflow the
1167                 * allowed task size. Note that p_filesz must always be
1168                 * <= p_memsz so it is only necessary to check p_memsz.
1169                 */
1170                if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
1171                    elf_ppnt->p_memsz > TASK_SIZE ||
1172                    TASK_SIZE - elf_ppnt->p_memsz < k) {
1173                        /* set_brk can never work. Avoid overflows. */
1174                        retval = -EINVAL;
1175                        goto out_free_dentry;
1176                }
1177
1178                k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
1179
1180                if (k > elf_bss)
1181                        elf_bss = k;
1182                if ((elf_ppnt->p_flags & PF_X) && end_code < k)
1183                        end_code = k;
1184                if (end_data < k)
1185                        end_data = k;
1186                k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
1187                if (k > elf_brk) {
1188                        bss_prot = elf_prot;
1189                        elf_brk = k;
1190                }
1191        }
1192
1193        e_entry = elf_ex->e_entry + load_bias;
1194        elf_bss += load_bias;
1195        elf_brk += load_bias;
1196        start_code += load_bias;
1197        end_code += load_bias;
1198        start_data += load_bias;
1199        end_data += load_bias;
1200
1201        /* Calling set_brk effectively mmaps the pages that we need
1202         * for the bss and break sections.  We must do this before
1203         * mapping in the interpreter, to make sure it doesn't wind
1204         * up getting placed where the bss needs to go.
1205         */
1206        retval = set_brk(elf_bss, elf_brk, bss_prot);
1207        if (retval)
1208                goto out_free_dentry;
1209        if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1210                retval = -EFAULT; /* Nobody gets to see this, but.. */
1211                goto out_free_dentry;
1212        }
1213
1214        if (interpreter) {
1215                elf_entry = load_elf_interp(interp_elf_ex,
1216                                            interpreter,
1217                                            load_bias, interp_elf_phdata,
1218                                            &arch_state);
1219                if (!IS_ERR((void *)elf_entry)) {
1220                        /*
1221                         * load_elf_interp() returns relocation
1222                         * adjustment
1223                         */
1224                        interp_load_addr = elf_entry;
1225                        elf_entry += interp_elf_ex->e_entry;
1226                }
1227                if (BAD_ADDR(elf_entry)) {
1228                        retval = IS_ERR((void *)elf_entry) ?
1229                                        (int)elf_entry : -EINVAL;
1230                        goto out_free_dentry;
1231                }
1232                reloc_func_desc = interp_load_addr;
1233
1234                allow_write_access(interpreter);
1235                fput(interpreter);
1236
1237                kfree(interp_elf_ex);
1238                kfree(interp_elf_phdata);
1239        } else {
1240                elf_entry = e_entry;
1241                if (BAD_ADDR(elf_entry)) {
1242                        retval = -EINVAL;
1243                        goto out_free_dentry;
1244                }
1245        }
1246
1247        kfree(elf_phdata);
1248
1249        set_binfmt(&elf_format);
1250
1251#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1252        retval = ARCH_SETUP_ADDITIONAL_PAGES(bprm, elf_ex, !!interpreter);
1253        if (retval < 0)
1254                goto out;
1255#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1256
1257        retval = create_elf_tables(bprm, elf_ex,
1258                          load_addr, interp_load_addr, e_entry);
1259        if (retval < 0)
1260                goto out;
1261
1262        mm = current->mm;
1263        mm->end_code = end_code;
1264        mm->start_code = start_code;
1265        mm->start_data = start_data;
1266        mm->end_data = end_data;
1267        mm->start_stack = bprm->p;
1268
1269        if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1270                /*
1271                 * For architectures with ELF randomization, when executing
1272                 * a loader directly (i.e. no interpreter listed in ELF
1273                 * headers), move the brk area out of the mmap region
1274                 * (since it grows up, and may collide early with the stack
1275                 * growing down), and into the unused ELF_ET_DYN_BASE region.
1276                 */
1277                if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
1278                    elf_ex->e_type == ET_DYN && !interpreter) {
1279                        mm->brk = mm->start_brk = ELF_ET_DYN_BASE;
1280                }
1281
1282                mm->brk = mm->start_brk = arch_randomize_brk(mm);
1283#ifdef compat_brk_randomized
1284                current->brk_randomized = 1;
1285#endif
1286        }
1287
1288        if (current->personality & MMAP_PAGE_ZERO) {
1289                /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1290                   and some applications "depend" upon this behavior.
1291                   Since we do not have the power to recompile these, we
1292                   emulate the SVr4 behavior. Sigh. */
1293                error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1294                                MAP_FIXED | MAP_PRIVATE, 0);
1295        }
1296
1297        regs = current_pt_regs();
1298#ifdef ELF_PLAT_INIT
1299        /*
1300         * The ABI may specify that certain registers be set up in special
1301         * ways (on i386 %edx is the address of a DT_FINI function, for
1302         * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1303         * that the e_entry field is the address of the function descriptor
1304         * for the startup routine, rather than the address of the startup
1305         * routine itself.  This macro performs whatever initialization to
1306         * the regs structure is required as well as any relocations to the
1307         * function descriptor entries when executing dynamically links apps.
1308         */
1309        ELF_PLAT_INIT(regs, reloc_func_desc);
1310#endif
1311
1312        finalize_exec(bprm);
1313        START_THREAD(elf_ex, regs, elf_entry, bprm->p);
1314        retval = 0;
1315out:
1316        return retval;
1317
1318        /* error cleanup */
1319out_free_dentry:
1320        kfree(interp_elf_ex);
1321        kfree(interp_elf_phdata);
1322        allow_write_access(interpreter);
1323        if (interpreter)
1324                fput(interpreter);
1325out_free_ph:
1326        kfree(elf_phdata);
1327        goto out;
1328}
1329
1330#ifdef CONFIG_USELIB
1331/* This is really simpleminded and specialized - we are loading an
1332   a.out library that is given an ELF header. */
1333static int load_elf_library(struct file *file)
1334{
1335        struct elf_phdr *elf_phdata;
1336        struct elf_phdr *eppnt;
1337        unsigned long elf_bss, bss, len;
1338        int retval, error, i, j;
1339        struct elfhdr elf_ex;
1340
1341        error = -ENOEXEC;
1342        retval = elf_read(file, &elf_ex, sizeof(elf_ex), 0);
1343        if (retval < 0)
1344                goto out;
1345
1346        if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1347                goto out;
1348
1349        /* First of all, some simple consistency checks */
1350        if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1351            !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1352                goto out;
1353        if (elf_check_fdpic(&elf_ex))
1354                goto out;
1355
1356        /* Now read in all of the header information */
1357
1358        j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1359        /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1360
1361        error = -ENOMEM;
1362        elf_phdata = kmalloc(j, GFP_KERNEL);
1363        if (!elf_phdata)
1364                goto out;
1365
1366        eppnt = elf_phdata;
1367        error = -ENOEXEC;
1368        retval = elf_read(file, eppnt, j, elf_ex.e_phoff);
1369        if (retval < 0)
1370                goto out_free_ph;
1371
1372        for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1373                if ((eppnt + i)->p_type == PT_LOAD)
1374                        j++;
1375        if (j != 1)
1376                goto out_free_ph;
1377
1378        while (eppnt->p_type != PT_LOAD)
1379                eppnt++;
1380
1381        /* Now use mmap to map the library into memory. */
1382        error = vm_mmap(file,
1383                        ELF_PAGESTART(eppnt->p_vaddr),
1384                        (eppnt->p_filesz +
1385                         ELF_PAGEOFFSET(eppnt->p_vaddr)),
1386                        PROT_READ | PROT_WRITE | PROT_EXEC,
1387                        MAP_FIXED_NOREPLACE | MAP_PRIVATE,
1388                        (eppnt->p_offset -
1389                         ELF_PAGEOFFSET(eppnt->p_vaddr)));
1390        if (error != ELF_PAGESTART(eppnt->p_vaddr))
1391                goto out_free_ph;
1392
1393        elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1394        if (padzero(elf_bss)) {
1395                error = -EFAULT;
1396                goto out_free_ph;
1397        }
1398
1399        len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr);
1400        bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr);
1401        if (bss > len) {
1402                error = vm_brk(len, bss - len);
1403                if (error)
1404                        goto out_free_ph;
1405        }
1406        error = 0;
1407
1408out_free_ph:
1409        kfree(elf_phdata);
1410out:
1411        return error;
1412}
1413#endif /* #ifdef CONFIG_USELIB */
1414
1415#ifdef CONFIG_ELF_CORE
1416/*
1417 * ELF core dumper
1418 *
1419 * Modelled on fs/exec.c:aout_core_dump()
1420 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1421 */
1422
1423/* An ELF note in memory */
1424struct memelfnote
1425{
1426        const char *name;
1427        int type;
1428        unsigned int datasz;
1429        void *data;
1430};
1431
1432static int notesize(struct memelfnote *en)
1433{
1434        int sz;
1435
1436        sz = sizeof(struct elf_note);
1437        sz += roundup(strlen(en->name) + 1, 4);
1438        sz += roundup(en->datasz, 4);
1439
1440        return sz;
1441}
1442
1443static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1444{
1445        struct elf_note en;
1446        en.n_namesz = strlen(men->name) + 1;
1447        en.n_descsz = men->datasz;
1448        en.n_type = men->type;
1449
1450        return dump_emit(cprm, &en, sizeof(en)) &&
1451            dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1452            dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1453}
1454
1455static void fill_elf_header(struct elfhdr *elf, int segs,
1456                            u16 machine, u32 flags)
1457{
1458        memset(elf, 0, sizeof(*elf));
1459
1460        memcpy(elf->e_ident, ELFMAG, SELFMAG);
1461        elf->e_ident[EI_CLASS] = ELF_CLASS;
1462        elf->e_ident[EI_DATA] = ELF_DATA;
1463        elf->e_ident[EI_VERSION] = EV_CURRENT;
1464        elf->e_ident[EI_OSABI] = ELF_OSABI;
1465
1466        elf->e_type = ET_CORE;
1467        elf->e_machine = machine;
1468        elf->e_version = EV_CURRENT;
1469        elf->e_phoff = sizeof(struct elfhdr);
1470        elf->e_flags = flags;
1471        elf->e_ehsize = sizeof(struct elfhdr);
1472        elf->e_phentsize = sizeof(struct elf_phdr);
1473        elf->e_phnum = segs;
1474}
1475
1476static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1477{
1478        phdr->p_type = PT_NOTE;
1479        phdr->p_offset = offset;
1480        phdr->p_vaddr = 0;
1481        phdr->p_paddr = 0;
1482        phdr->p_filesz = sz;
1483        phdr->p_memsz = 0;
1484        phdr->p_flags = 0;
1485        phdr->p_align = 0;
1486}
1487
1488static void fill_note(struct memelfnote *note, const char *name, int type, 
1489                unsigned int sz, void *data)
1490{
1491        note->name = name;
1492        note->type = type;
1493        note->datasz = sz;
1494        note->data = data;
1495}
1496
1497/*
1498 * fill up all the fields in prstatus from the given task struct, except
1499 * registers which need to be filled up separately.
1500 */
1501static void fill_prstatus(struct elf_prstatus_common *prstatus,
1502                struct task_struct *p, long signr)
1503{
1504        prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1505        prstatus->pr_sigpend = p->pending.signal.sig[0];
1506        prstatus->pr_sighold = p->blocked.sig[0];
1507        rcu_read_lock();
1508        prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1509        rcu_read_unlock();
1510        prstatus->pr_pid = task_pid_vnr(p);
1511        prstatus->pr_pgrp = task_pgrp_vnr(p);
1512        prstatus->pr_sid = task_session_vnr(p);
1513        if (thread_group_leader(p)) {
1514                struct task_cputime cputime;
1515
1516                /*
1517                 * This is the record for the group leader.  It shows the
1518                 * group-wide total, not its individual thread total.
1519                 */
1520                thread_group_cputime(p, &cputime);
1521                prstatus->pr_utime = ns_to_kernel_old_timeval(cputime.utime);
1522                prstatus->pr_stime = ns_to_kernel_old_timeval(cputime.stime);
1523        } else {
1524                u64 utime, stime;
1525
1526                task_cputime(p, &utime, &stime);
1527                prstatus->pr_utime = ns_to_kernel_old_timeval(utime);
1528                prstatus->pr_stime = ns_to_kernel_old_timeval(stime);
1529        }
1530
1531        prstatus->pr_cutime = ns_to_kernel_old_timeval(p->signal->cutime);
1532        prstatus->pr_cstime = ns_to_kernel_old_timeval(p->signal->cstime);
1533}
1534
1535static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1536                       struct mm_struct *mm)
1537{
1538        const struct cred *cred;
1539        unsigned int i, len;
1540        unsigned int state;
1541
1542        /* first copy the parameters from user space */
1543        memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1544
1545        len = mm->arg_end - mm->arg_start;
1546        if (len >= ELF_PRARGSZ)
1547                len = ELF_PRARGSZ-1;
1548        if (copy_from_user(&psinfo->pr_psargs,
1549                           (const char __user *)mm->arg_start, len))
1550                return -EFAULT;
1551        for(i = 0; i < len; i++)
1552                if (psinfo->pr_psargs[i] == 0)
1553                        psinfo->pr_psargs[i] = ' ';
1554        psinfo->pr_psargs[len] = 0;
1555
1556        rcu_read_lock();
1557        psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1558        rcu_read_unlock();
1559        psinfo->pr_pid = task_pid_vnr(p);
1560        psinfo->pr_pgrp = task_pgrp_vnr(p);
1561        psinfo->pr_sid = task_session_vnr(p);
1562
1563        state = READ_ONCE(p->__state);
1564        i = state ? ffz(~state) + 1 : 0;
1565        psinfo->pr_state = i;
1566        psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1567        psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1568        psinfo->pr_nice = task_nice(p);
1569        psinfo->pr_flag = p->flags;
1570        rcu_read_lock();
1571        cred = __task_cred(p);
1572        SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1573        SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1574        rcu_read_unlock();
1575        strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1576
1577        return 0;
1578}
1579
1580static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1581{
1582        elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1583        int i = 0;
1584        do
1585                i += 2;
1586        while (auxv[i - 2] != AT_NULL);
1587        fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1588}
1589
1590static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1591                const kernel_siginfo_t *siginfo)
1592{
1593        copy_siginfo_to_external(csigdata, siginfo);
1594        fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1595}
1596
1597#define MAX_FILE_NOTE_SIZE (4*1024*1024)
1598/*
1599 * Format of NT_FILE note:
1600 *
1601 * long count     -- how many files are mapped
1602 * long page_size -- units for file_ofs
1603 * array of [COUNT] elements of
1604 *   long start
1605 *   long end
1606 *   long file_ofs
1607 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1608 */
1609static int fill_files_note(struct memelfnote *note)
1610{
1611        struct mm_struct *mm = current->mm;
1612        struct vm_area_struct *vma;
1613        unsigned count, size, names_ofs, remaining, n;
1614        user_long_t *data;
1615        user_long_t *start_end_ofs;
1616        char *name_base, *name_curpos;
1617
1618        /* *Estimated* file count and total data size needed */
1619        count = mm->map_count;
1620        if (count > UINT_MAX / 64)
1621                return -EINVAL;
1622        size = count * 64;
1623
1624        names_ofs = (2 + 3 * count) * sizeof(data[0]);
1625 alloc:
1626        if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1627                return -EINVAL;
1628        size = round_up(size, PAGE_SIZE);
1629        /*
1630         * "size" can be 0 here legitimately.
1631         * Let it ENOMEM and omit NT_FILE section which will be empty anyway.
1632         */
1633        data = kvmalloc(size, GFP_KERNEL);
1634        if (ZERO_OR_NULL_PTR(data))
1635                return -ENOMEM;
1636
1637        start_end_ofs = data + 2;
1638        name_base = name_curpos = ((char *)data) + names_ofs;
1639        remaining = size - names_ofs;
1640        count = 0;
1641        for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
1642                struct file *file;
1643                const char *filename;
1644
1645                file = vma->vm_file;
1646                if (!file)
1647                        continue;
1648                filename = file_path(file, name_curpos, remaining);
1649                if (IS_ERR(filename)) {
1650                        if (PTR_ERR(filename) == -ENAMETOOLONG) {
1651                                kvfree(data);
1652                                size = size * 5 / 4;
1653                                goto alloc;
1654                        }
1655                        continue;
1656                }
1657
1658                /* file_path() fills at the end, move name down */
1659                /* n = strlen(filename) + 1: */
1660                n = (name_curpos + remaining) - filename;
1661                remaining = filename - name_curpos;
1662                memmove(name_curpos, filename, n);
1663                name_curpos += n;
1664
1665                *start_end_ofs++ = vma->vm_start;
1666                *start_end_ofs++ = vma->vm_end;
1667                *start_end_ofs++ = vma->vm_pgoff;
1668                count++;
1669        }
1670
1671        /* Now we know exact count of files, can store it */
1672        data[0] = count;
1673        data[1] = PAGE_SIZE;
1674        /*
1675         * Count usually is less than mm->map_count,
1676         * we need to move filenames down.
1677         */
1678        n = mm->map_count - count;
1679        if (n != 0) {
1680                unsigned shift_bytes = n * 3 * sizeof(data[0]);
1681                memmove(name_base - shift_bytes, name_base,
1682                        name_curpos - name_base);
1683                name_curpos -= shift_bytes;
1684        }
1685
1686        size = name_curpos - (char *)data;
1687        fill_note(note, "CORE", NT_FILE, size, data);
1688        return 0;
1689}
1690
1691#ifdef CORE_DUMP_USE_REGSET
1692#include <linux/regset.h>
1693
1694struct elf_thread_core_info {
1695        struct elf_thread_core_info *next;
1696        struct task_struct *task;
1697        struct elf_prstatus prstatus;
1698        struct memelfnote notes[];
1699};
1700
1701struct elf_note_info {
1702        struct elf_thread_core_info *thread;
1703        struct memelfnote psinfo;
1704        struct memelfnote signote;
1705        struct memelfnote auxv;
1706        struct memelfnote files;
1707        user_siginfo_t csigdata;
1708        size_t size;
1709        int thread_notes;
1710};
1711
1712/*
1713 * When a regset has a writeback hook, we call it on each thread before
1714 * dumping user memory.  On register window machines, this makes sure the
1715 * user memory backing the register data is up to date before we read it.
1716 */
1717static void do_thread_regset_writeback(struct task_struct *task,
1718                                       const struct user_regset *regset)
1719{
1720        if (regset->writeback)
1721                regset->writeback(task, regset, 1);
1722}
1723
1724#ifndef PRSTATUS_SIZE
1725#define PRSTATUS_SIZE sizeof(struct elf_prstatus)
1726#endif
1727
1728#ifndef SET_PR_FPVALID
1729#define SET_PR_FPVALID(S) ((S)->pr_fpvalid = 1)
1730#endif
1731
1732static int fill_thread_core_info(struct elf_thread_core_info *t,
1733                                 const struct user_regset_view *view,
1734                                 long signr, size_t *total)
1735{
1736        unsigned int i;
1737
1738        /*
1739         * NT_PRSTATUS is the one special case, because the regset data
1740         * goes into the pr_reg field inside the note contents, rather
1741         * than being the whole note contents.  We fill the reset in here.
1742         * We assume that regset 0 is NT_PRSTATUS.
1743         */
1744        fill_prstatus(&t->prstatus.common, t->task, signr);
1745        regset_get(t->task, &view->regsets[0],
1746                   sizeof(t->prstatus.pr_reg), &t->prstatus.pr_reg);
1747
1748        fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1749                  PRSTATUS_SIZE, &t->prstatus);
1750        *total += notesize(&t->notes[0]);
1751
1752        do_thread_regset_writeback(t->task, &view->regsets[0]);
1753
1754        /*
1755         * Each other regset might generate a note too.  For each regset
1756         * that has no core_note_type or is inactive, we leave t->notes[i]
1757         * all zero and we'll know to skip writing it later.
1758         */
1759        for (i = 1; i < view->n; ++i) {
1760                const struct user_regset *regset = &view->regsets[i];
1761                int note_type = regset->core_note_type;
1762                bool is_fpreg = note_type == NT_PRFPREG;
1763                void *data;
1764                int ret;
1765
1766                do_thread_regset_writeback(t->task, regset);
1767                if (!note_type) // not for coredumps
1768                        continue;
1769                if (regset->active && regset->active(t->task, regset) <= 0)
1770                        continue;
1771
1772                ret = regset_get_alloc(t->task, regset, ~0U, &data);
1773                if (ret < 0)
1774                        continue;
1775
1776                if (is_fpreg)
1777                        SET_PR_FPVALID(&t->prstatus);
1778
1779                fill_note(&t->notes[i], is_fpreg ? "CORE" : "LINUX",
1780                          note_type, ret, data);
1781
1782                *total += notesize(&t->notes[i]);
1783        }
1784
1785        return 1;
1786}
1787
1788static int fill_note_info(struct elfhdr *elf, int phdrs,
1789                          struct elf_note_info *info,
1790                          const kernel_siginfo_t *siginfo, struct pt_regs *regs)
1791{
1792        struct task_struct *dump_task = current;
1793        const struct user_regset_view *view = task_user_regset_view(dump_task);
1794        struct elf_thread_core_info *t;
1795        struct elf_prpsinfo *psinfo;
1796        struct core_thread *ct;
1797        unsigned int i;
1798
1799        info->size = 0;
1800        info->thread = NULL;
1801
1802        psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1803        if (psinfo == NULL) {
1804                info->psinfo.data = NULL; /* So we don't free this wrongly */
1805                return 0;
1806        }
1807
1808        fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1809
1810        /*
1811         * Figure out how many notes we're going to need for each thread.
1812         */
1813        info->thread_notes = 0;
1814        for (i = 0; i < view->n; ++i)
1815                if (view->regsets[i].core_note_type != 0)
1816                        ++info->thread_notes;
1817
1818        /*
1819         * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1820         * since it is our one special case.
1821         */
1822        if (unlikely(info->thread_notes == 0) ||
1823            unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1824                WARN_ON(1);
1825                return 0;
1826        }
1827
1828        /*
1829         * Initialize the ELF file header.
1830         */
1831        fill_elf_header(elf, phdrs,
1832                        view->e_machine, view->e_flags);
1833
1834        /*
1835         * Allocate a structure for each thread.
1836         */
1837        for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1838                t = kzalloc(offsetof(struct elf_thread_core_info,
1839                                     notes[info->thread_notes]),
1840                            GFP_KERNEL);
1841                if (unlikely(!t))
1842                        return 0;
1843
1844                t->task = ct->task;
1845                if (ct->task == dump_task || !info->thread) {
1846                        t->next = info->thread;
1847                        info->thread = t;
1848                } else {
1849                        /*
1850                         * Make sure to keep the original task at
1851                         * the head of the list.
1852                         */
1853                        t->next = info->thread->next;
1854                        info->thread->next = t;
1855                }
1856        }
1857
1858        /*
1859         * Now fill in each thread's information.
1860         */
1861        for (t = info->thread; t != NULL; t = t->next)
1862                if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1863                        return 0;
1864
1865        /*
1866         * Fill in the two process-wide notes.
1867         */
1868        fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1869        info->size += notesize(&info->psinfo);
1870
1871        fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1872        info->size += notesize(&info->signote);
1873
1874        fill_auxv_note(&info->auxv, current->mm);
1875        info->size += notesize(&info->auxv);
1876
1877        if (fill_files_note(&info->files) == 0)
1878                info->size += notesize(&info->files);
1879
1880        return 1;
1881}
1882
1883static size_t get_note_info_size(struct elf_note_info *info)
1884{
1885        return info->size;
1886}
1887
1888/*
1889 * Write all the notes for each thread.  When writing the first thread, the
1890 * process-wide notes are interleaved after the first thread-specific note.
1891 */
1892static int write_note_info(struct elf_note_info *info,
1893                           struct coredump_params *cprm)
1894{
1895        bool first = true;
1896        struct elf_thread_core_info *t = info->thread;
1897
1898        do {
1899                int i;
1900
1901                if (!writenote(&t->notes[0], cprm))
1902                        return 0;
1903
1904                if (first && !writenote(&info->psinfo, cprm))
1905                        return 0;
1906                if (first && !writenote(&info->signote, cprm))
1907                        return 0;
1908                if (first && !writenote(&info->auxv, cprm))
1909                        return 0;
1910                if (first && info->files.data &&
1911                                !writenote(&info->files, cprm))
1912                        return 0;
1913
1914                for (i = 1; i < info->thread_notes; ++i)
1915                        if (t->notes[i].data &&
1916                            !writenote(&t->notes[i], cprm))
1917                                return 0;
1918
1919                first = false;
1920                t = t->next;
1921        } while (t);
1922
1923        return 1;
1924}
1925
1926static void free_note_info(struct elf_note_info *info)
1927{
1928        struct elf_thread_core_info *threads = info->thread;
1929        while (threads) {
1930                unsigned int i;
1931                struct elf_thread_core_info *t = threads;
1932                threads = t->next;
1933                WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1934                for (i = 1; i < info->thread_notes; ++i)
1935                        kfree(t->notes[i].data);
1936                kfree(t);
1937        }
1938        kfree(info->psinfo.data);
1939        kvfree(info->files.data);
1940}
1941
1942#else
1943
1944/* Here is the structure in which status of each thread is captured. */
1945struct elf_thread_status
1946{
1947        struct list_head list;
1948        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1949        elf_fpregset_t fpu;             /* NT_PRFPREG */
1950        struct task_struct *thread;
1951        struct memelfnote notes[3];
1952        int num_notes;
1953};
1954
1955/*
1956 * In order to add the specific thread information for the elf file format,
1957 * we need to keep a linked list of every threads pr_status and then create
1958 * a single section for them in the final core file.
1959 */
1960static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1961{
1962        int sz = 0;
1963        struct task_struct *p = t->thread;
1964        t->num_notes = 0;
1965
1966        fill_prstatus(&t->prstatus.common, p, signr);
1967        elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1968        
1969        fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1970                  &(t->prstatus));
1971        t->num_notes++;
1972        sz += notesize(&t->notes[0]);
1973
1974        if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1975                                                                &t->fpu))) {
1976                fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1977                          &(t->fpu));
1978                t->num_notes++;
1979                sz += notesize(&t->notes[1]);
1980        }
1981        return sz;
1982}
1983
1984struct elf_note_info {
1985        struct memelfnote *notes;
1986        struct memelfnote *notes_files;
1987        struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1988        struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1989        struct list_head thread_list;
1990        elf_fpregset_t *fpu;
1991        user_siginfo_t csigdata;
1992        int thread_status_size;
1993        int numnote;
1994};
1995
1996static int elf_note_info_init(struct elf_note_info *info)
1997{
1998        memset(info, 0, sizeof(*info));
1999        INIT_LIST_HEAD(&info->thread_list);
2000
2001        /* Allocate space for ELF notes */
2002        info->notes = kmalloc_array(8, sizeof(struct memelfnote), GFP_KERNEL);
2003        if (!info->notes)
2004                return 0;
2005        info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
2006        if (!info->psinfo)
2007                return 0;
2008        info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
2009        if (!info->prstatus)
2010                return 0;
2011        info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
2012        if (!info->fpu)
2013                return 0;
2014        return 1;
2015}
2016
2017static int fill_note_info(struct elfhdr *elf, int phdrs,
2018                          struct elf_note_info *info,
2019                          const kernel_siginfo_t *siginfo, struct pt_regs *regs)
2020{
2021        struct core_thread *ct;
2022        struct elf_thread_status *ets;
2023
2024        if (!elf_note_info_init(info))
2025                return 0;
2026
2027        for (ct = current->mm->core_state->dumper.next;
2028                                        ct; ct = ct->next) {
2029                ets = kzalloc(sizeof(*ets), GFP_KERNEL);
2030                if (!ets)
2031                        return 0;
2032
2033                ets->thread = ct->task;
2034                list_add(&ets->list, &info->thread_list);
2035        }
2036
2037        list_for_each_entry(ets, &info->thread_list, list) {
2038                int sz;
2039
2040                sz = elf_dump_thread_status(siginfo->si_signo, ets);
2041                info->thread_status_size += sz;
2042        }
2043        /* now collect the dump for the current */
2044        memset(info->prstatus, 0, sizeof(*info->prstatus));
2045        fill_prstatus(&info->prstatus->common, current, siginfo->si_signo);
2046        elf_core_copy_regs(&info->prstatus->pr_reg, regs);
2047
2048        /* Set up header */
2049        fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
2050
2051        /*
2052         * Set up the notes in similar form to SVR4 core dumps made
2053         * with info from their /proc.
2054         */
2055
2056        fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
2057                  sizeof(*info->prstatus), info->prstatus);
2058        fill_psinfo(info->psinfo, current->group_leader, current->mm);
2059        fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
2060                  sizeof(*info->psinfo), info->psinfo);
2061
2062        fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
2063        fill_auxv_note(info->notes + 3, current->mm);
2064        info->numnote = 4;
2065
2066        if (fill_files_note(info->notes + info->numnote) == 0) {
2067                info->notes_files = info->notes + info->numnote;
2068                info->numnote++;
2069        }
2070
2071        /* Try to dump the FPU. */
2072        info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2073                                                               info->fpu);
2074        if (info->prstatus->pr_fpvalid)
2075                fill_note(info->notes + info->numnote++,
2076                          "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2077        return 1;
2078}
2079
2080static size_t get_note_info_size(struct elf_note_info *info)
2081{
2082        int sz = 0;
2083        int i;
2084
2085        for (i = 0; i < info->numnote; i++)
2086                sz += notesize(info->notes + i);
2087
2088        sz += info->thread_status_size;
2089
2090        return sz;
2091}
2092
2093static int write_note_info(struct elf_note_info *info,
2094                           struct coredump_params *cprm)
2095{
2096        struct elf_thread_status *ets;
2097        int i;
2098
2099        for (i = 0; i < info->numnote; i++)
2100                if (!writenote(info->notes + i, cprm))
2101                        return 0;
2102
2103        /* write out the thread status notes section */
2104        list_for_each_entry(ets, &info->thread_list, list) {
2105                for (i = 0; i < ets->num_notes; i++)
2106                        if (!writenote(&ets->notes[i], cprm))
2107                                return 0;
2108        }
2109
2110        return 1;
2111}
2112
2113static void free_note_info(struct elf_note_info *info)
2114{
2115        while (!list_empty(&info->thread_list)) {
2116                struct list_head *tmp = info->thread_list.next;
2117                list_del(tmp);
2118                kfree(list_entry(tmp, struct elf_thread_status, list));
2119        }
2120
2121        /* Free data possibly allocated by fill_files_note(): */
2122        if (info->notes_files)
2123                kvfree(info->notes_files->data);
2124
2125        kfree(info->prstatus);
2126        kfree(info->psinfo);
2127        kfree(info->notes);
2128        kfree(info->fpu);
2129}
2130
2131#endif
2132
2133static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2134                             elf_addr_t e_shoff, int segs)
2135{
2136        elf->e_shoff = e_shoff;
2137        elf->e_shentsize = sizeof(*shdr4extnum);
2138        elf->e_shnum = 1;
2139        elf->e_shstrndx = SHN_UNDEF;
2140
2141        memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2142
2143        shdr4extnum->sh_type = SHT_NULL;
2144        shdr4extnum->sh_size = elf->e_shnum;
2145        shdr4extnum->sh_link = elf->e_shstrndx;
2146        shdr4extnum->sh_info = segs;
2147}
2148
2149/*
2150 * Actual dumper
2151 *
2152 * This is a two-pass process; first we find the offsets of the bits,
2153 * and then they are actually written out.  If we run out of core limit
2154 * we just truncate.
2155 */
2156static int elf_core_dump(struct coredump_params *cprm)
2157{
2158        int has_dumped = 0;
2159        int vma_count, segs, i;
2160        size_t vma_data_size;
2161        struct elfhdr elf;
2162        loff_t offset = 0, dataoff;
2163        struct elf_note_info info = { };
2164        struct elf_phdr *phdr4note = NULL;
2165        struct elf_shdr *shdr4extnum = NULL;
2166        Elf_Half e_phnum;
2167        elf_addr_t e_shoff;
2168        struct core_vma_metadata *vma_meta;
2169
2170        if (dump_vma_snapshot(cprm, &vma_count, &vma_meta, &vma_data_size))
2171                return 0;
2172
2173        /*
2174         * The number of segs are recored into ELF header as 16bit value.
2175         * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2176         */
2177        segs = vma_count + elf_core_extra_phdrs();
2178
2179        /* for notes section */
2180        segs++;
2181
2182        /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2183         * this, kernel supports extended numbering. Have a look at
2184         * include/linux/elf.h for further information. */
2185        e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2186
2187        /*
2188         * Collect all the non-memory information about the process for the
2189         * notes.  This also sets up the file header.
2190         */
2191        if (!fill_note_info(&elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2192                goto end_coredump;
2193
2194        has_dumped = 1;
2195
2196        offset += sizeof(elf);                          /* Elf header */
2197        offset += segs * sizeof(struct elf_phdr);       /* Program headers */
2198
2199        /* Write notes phdr entry */
2200        {
2201                size_t sz = get_note_info_size(&info);
2202
2203                /* For cell spufs */
2204                sz += elf_coredump_extra_notes_size();
2205
2206                phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2207                if (!phdr4note)
2208                        goto end_coredump;
2209
2210                fill_elf_note_phdr(phdr4note, sz, offset);
2211                offset += sz;
2212        }
2213
2214        dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2215
2216        offset += vma_data_size;
2217        offset += elf_core_extra_data_size();
2218        e_shoff = offset;
2219
2220        if (e_phnum == PN_XNUM) {
2221                shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2222                if (!shdr4extnum)
2223                        goto end_coredump;
2224                fill_extnum_info(&elf, shdr4extnum, e_shoff, segs);
2225        }
2226
2227        offset = dataoff;
2228
2229        if (!dump_emit(cprm, &elf, sizeof(elf)))
2230                goto end_coredump;
2231
2232        if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2233                goto end_coredump;
2234
2235        /* Write program headers for segments dump */
2236        for (i = 0; i < vma_count; i++) {
2237                struct core_vma_metadata *meta = vma_meta + i;
2238                struct elf_phdr phdr;
2239
2240                phdr.p_type = PT_LOAD;
2241                phdr.p_offset = offset;
2242                phdr.p_vaddr = meta->start;
2243                phdr.p_paddr = 0;
2244                phdr.p_filesz = meta->dump_size;
2245                phdr.p_memsz = meta->end - meta->start;
2246                offset += phdr.p_filesz;
2247                phdr.p_flags = 0;
2248                if (meta->flags & VM_READ)
2249                        phdr.p_flags |= PF_R;
2250                if (meta->flags & VM_WRITE)
2251                        phdr.p_flags |= PF_W;
2252                if (meta->flags & VM_EXEC)
2253                        phdr.p_flags |= PF_X;
2254                phdr.p_align = ELF_EXEC_PAGESIZE;
2255
2256                if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2257                        goto end_coredump;
2258        }
2259
2260        if (!elf_core_write_extra_phdrs(cprm, offset))
2261                goto end_coredump;
2262
2263        /* write out the notes section */
2264        if (!write_note_info(&info, cprm))
2265                goto end_coredump;
2266
2267        /* For cell spufs */
2268        if (elf_coredump_extra_notes_write(cprm))
2269                goto end_coredump;
2270
2271        /* Align to page */
2272        dump_skip_to(cprm, dataoff);
2273
2274        for (i = 0; i < vma_count; i++) {
2275                struct core_vma_metadata *meta = vma_meta + i;
2276
2277                if (!dump_user_range(cprm, meta->start, meta->dump_size))
2278                        goto end_coredump;
2279        }
2280
2281        if (!elf_core_write_extra_data(cprm))
2282                goto end_coredump;
2283
2284        if (e_phnum == PN_XNUM) {
2285                if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2286                        goto end_coredump;
2287        }
2288
2289end_coredump:
2290        free_note_info(&info);
2291        kfree(shdr4extnum);
2292        kvfree(vma_meta);
2293        kfree(phdr4note);
2294        return has_dumped;
2295}
2296
2297#endif          /* CONFIG_ELF_CORE */
2298
2299static int __init init_elf_binfmt(void)
2300{
2301        register_binfmt(&elf_format);
2302        return 0;
2303}
2304
2305static void __exit exit_elf_binfmt(void)
2306{
2307        /* Remove the COFF and ELF loaders. */
2308        unregister_binfmt(&elf_format);
2309}
2310
2311core_initcall(init_elf_binfmt);
2312module_exit(exit_elf_binfmt);
2313MODULE_LICENSE("GPL");
2314