linux/fs/binfmt_elf.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * linux/fs/binfmt_elf.c
   4 *
   5 * These are the functions used to load ELF format executables as used
   6 * on SVr4 machines.  Information on the format may be found in the book
   7 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
   8 * Tools".
   9 *
  10 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
  11 */
  12
  13#include <linux/module.h>
  14#include <linux/kernel.h>
  15#include <linux/fs.h>
  16#include <linux/log2.h>
  17#include <linux/mm.h>
  18#include <linux/mman.h>
  19#include <linux/errno.h>
  20#include <linux/signal.h>
  21#include <linux/binfmts.h>
  22#include <linux/string.h>
  23#include <linux/file.h>
  24#include <linux/slab.h>
  25#include <linux/personality.h>
  26#include <linux/elfcore.h>
  27#include <linux/init.h>
  28#include <linux/highuid.h>
  29#include <linux/compiler.h>
  30#include <linux/highmem.h>
  31#include <linux/hugetlb.h>
  32#include <linux/pagemap.h>
  33#include <linux/vmalloc.h>
  34#include <linux/security.h>
  35#include <linux/random.h>
  36#include <linux/elf.h>
  37#include <linux/elf-randomize.h>
  38#include <linux/utsname.h>
  39#include <linux/coredump.h>
  40#include <linux/sched.h>
  41#include <linux/sched/coredump.h>
  42#include <linux/sched/task_stack.h>
  43#include <linux/sched/cputime.h>
  44#include <linux/sizes.h>
  45#include <linux/types.h>
  46#include <linux/cred.h>
  47#include <linux/dax.h>
  48#include <linux/uaccess.h>
  49#include <asm/param.h>
  50#include <asm/page.h>
  51
  52#ifndef ELF_COMPAT
  53#define ELF_COMPAT 0
  54#endif
  55
  56#ifndef user_long_t
  57#define user_long_t long
  58#endif
  59#ifndef user_siginfo_t
  60#define user_siginfo_t siginfo_t
  61#endif
  62
  63/* That's for binfmt_elf_fdpic to deal with */
  64#ifndef elf_check_fdpic
  65#define elf_check_fdpic(ex) false
  66#endif
  67
  68static int load_elf_binary(struct linux_binprm *bprm);
  69
  70#ifdef CONFIG_USELIB
  71static int load_elf_library(struct file *);
  72#else
  73#define load_elf_library NULL
  74#endif
  75
  76/*
  77 * If we don't support core dumping, then supply a NULL so we
  78 * don't even try.
  79 */
  80#ifdef CONFIG_ELF_CORE
  81static int elf_core_dump(struct coredump_params *cprm);
  82#else
  83#define elf_core_dump   NULL
  84#endif
  85
  86#if ELF_EXEC_PAGESIZE > PAGE_SIZE
  87#define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
  88#else
  89#define ELF_MIN_ALIGN   PAGE_SIZE
  90#endif
  91
  92#ifndef ELF_CORE_EFLAGS
  93#define ELF_CORE_EFLAGS 0
  94#endif
  95
  96#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
  97#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
  98#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
  99
 100static struct linux_binfmt elf_format = {
 101        .module         = THIS_MODULE,
 102        .load_binary    = load_elf_binary,
 103        .load_shlib     = load_elf_library,
 104        .core_dump      = elf_core_dump,
 105        .min_coredump   = ELF_EXEC_PAGESIZE,
 106};
 107
 108#define BAD_ADDR(x) (unlikely((unsigned long)(x) >= TASK_SIZE))
 109
 110static int set_brk(unsigned long start, unsigned long end, int prot)
 111{
 112        start = ELF_PAGEALIGN(start);
 113        end = ELF_PAGEALIGN(end);
 114        if (end > start) {
 115                /*
 116                 * Map the last of the bss segment.
 117                 * If the header is requesting these pages to be
 118                 * executable, honour that (ppc32 needs this).
 119                 */
 120                int error = vm_brk_flags(start, end - start,
 121                                prot & PROT_EXEC ? VM_EXEC : 0);
 122                if (error)
 123                        return error;
 124        }
 125        current->mm->start_brk = current->mm->brk = end;
 126        return 0;
 127}
 128
 129/* We need to explicitly zero any fractional pages
 130   after the data section (i.e. bss).  This would
 131   contain the junk from the file that should not
 132   be in memory
 133 */
 134static int padzero(unsigned long elf_bss)
 135{
 136        unsigned long nbyte;
 137
 138        nbyte = ELF_PAGEOFFSET(elf_bss);
 139        if (nbyte) {
 140                nbyte = ELF_MIN_ALIGN - nbyte;
 141                if (clear_user((void __user *) elf_bss, nbyte))
 142                        return -EFAULT;
 143        }
 144        return 0;
 145}
 146
 147/* Let's use some macros to make this stack manipulation a little clearer */
 148#ifdef CONFIG_STACK_GROWSUP
 149#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
 150#define STACK_ROUND(sp, items) \
 151        ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
 152#define STACK_ALLOC(sp, len) ({ \
 153        elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
 154        old_sp; })
 155#else
 156#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
 157#define STACK_ROUND(sp, items) \
 158        (((unsigned long) (sp - items)) &~ 15UL)
 159#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
 160#endif
 161
 162#ifndef ELF_BASE_PLATFORM
 163/*
 164 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
 165 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
 166 * will be copied to the user stack in the same manner as AT_PLATFORM.
 167 */
 168#define ELF_BASE_PLATFORM NULL
 169#endif
 170
 171static int
 172create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
 173                unsigned long load_addr, unsigned long interp_load_addr,
 174                unsigned long e_entry)
 175{
 176        struct mm_struct *mm = current->mm;
 177        unsigned long p = bprm->p;
 178        int argc = bprm->argc;
 179        int envc = bprm->envc;
 180        elf_addr_t __user *sp;
 181        elf_addr_t __user *u_platform;
 182        elf_addr_t __user *u_base_platform;
 183        elf_addr_t __user *u_rand_bytes;
 184        const char *k_platform = ELF_PLATFORM;
 185        const char *k_base_platform = ELF_BASE_PLATFORM;
 186        unsigned char k_rand_bytes[16];
 187        int items;
 188        elf_addr_t *elf_info;
 189        elf_addr_t flags = 0;
 190        int ei_index;
 191        const struct cred *cred = current_cred();
 192        struct vm_area_struct *vma;
 193
 194        /*
 195         * In some cases (e.g. Hyper-Threading), we want to avoid L1
 196         * evictions by the processes running on the same package. One
 197         * thing we can do is to shuffle the initial stack for them.
 198         */
 199
 200        p = arch_align_stack(p);
 201
 202        /*
 203         * If this architecture has a platform capability string, copy it
 204         * to userspace.  In some cases (Sparc), this info is impossible
 205         * for userspace to get any other way, in others (i386) it is
 206         * merely difficult.
 207         */
 208        u_platform = NULL;
 209        if (k_platform) {
 210                size_t len = strlen(k_platform) + 1;
 211
 212                u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 213                if (copy_to_user(u_platform, k_platform, len))
 214                        return -EFAULT;
 215        }
 216
 217        /*
 218         * If this architecture has a "base" platform capability
 219         * string, copy it to userspace.
 220         */
 221        u_base_platform = NULL;
 222        if (k_base_platform) {
 223                size_t len = strlen(k_base_platform) + 1;
 224
 225                u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 226                if (copy_to_user(u_base_platform, k_base_platform, len))
 227                        return -EFAULT;
 228        }
 229
 230        /*
 231         * Generate 16 random bytes for userspace PRNG seeding.
 232         */
 233        get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
 234        u_rand_bytes = (elf_addr_t __user *)
 235                       STACK_ALLOC(p, sizeof(k_rand_bytes));
 236        if (copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
 237                return -EFAULT;
 238
 239        /* Create the ELF interpreter info */
 240        elf_info = (elf_addr_t *)mm->saved_auxv;
 241        /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
 242#define NEW_AUX_ENT(id, val) \
 243        do { \
 244                *elf_info++ = id; \
 245                *elf_info++ = val; \
 246        } while (0)
 247
 248#ifdef ARCH_DLINFO
 249        /* 
 250         * ARCH_DLINFO must come first so PPC can do its special alignment of
 251         * AUXV.
 252         * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
 253         * ARCH_DLINFO changes
 254         */
 255        ARCH_DLINFO;
 256#endif
 257        NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
 258        NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
 259        NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
 260        NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
 261        NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
 262        NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
 263        NEW_AUX_ENT(AT_BASE, interp_load_addr);
 264        if (bprm->interp_flags & BINPRM_FLAGS_PRESERVE_ARGV0)
 265                flags |= AT_FLAGS_PRESERVE_ARGV0;
 266        NEW_AUX_ENT(AT_FLAGS, flags);
 267        NEW_AUX_ENT(AT_ENTRY, e_entry);
 268        NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
 269        NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
 270        NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
 271        NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
 272        NEW_AUX_ENT(AT_SECURE, bprm->secureexec);
 273        NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
 274#ifdef ELF_HWCAP2
 275        NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
 276#endif
 277        NEW_AUX_ENT(AT_EXECFN, bprm->exec);
 278        if (k_platform) {
 279                NEW_AUX_ENT(AT_PLATFORM,
 280                            (elf_addr_t)(unsigned long)u_platform);
 281        }
 282        if (k_base_platform) {
 283                NEW_AUX_ENT(AT_BASE_PLATFORM,
 284                            (elf_addr_t)(unsigned long)u_base_platform);
 285        }
 286        if (bprm->have_execfd) {
 287                NEW_AUX_ENT(AT_EXECFD, bprm->execfd);
 288        }
 289#undef NEW_AUX_ENT
 290        /* AT_NULL is zero; clear the rest too */
 291        memset(elf_info, 0, (char *)mm->saved_auxv +
 292                        sizeof(mm->saved_auxv) - (char *)elf_info);
 293
 294        /* And advance past the AT_NULL entry.  */
 295        elf_info += 2;
 296
 297        ei_index = elf_info - (elf_addr_t *)mm->saved_auxv;
 298        sp = STACK_ADD(p, ei_index);
 299
 300        items = (argc + 1) + (envc + 1) + 1;
 301        bprm->p = STACK_ROUND(sp, items);
 302
 303        /* Point sp at the lowest address on the stack */
 304#ifdef CONFIG_STACK_GROWSUP
 305        sp = (elf_addr_t __user *)bprm->p - items - ei_index;
 306        bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
 307#else
 308        sp = (elf_addr_t __user *)bprm->p;
 309#endif
 310
 311
 312        /*
 313         * Grow the stack manually; some architectures have a limit on how
 314         * far ahead a user-space access may be in order to grow the stack.
 315         */
 316        if (mmap_read_lock_killable(mm))
 317                return -EINTR;
 318        vma = find_extend_vma(mm, bprm->p);
 319        mmap_read_unlock(mm);
 320        if (!vma)
 321                return -EFAULT;
 322
 323        /* Now, let's put argc (and argv, envp if appropriate) on the stack */
 324        if (put_user(argc, sp++))
 325                return -EFAULT;
 326
 327        /* Populate list of argv pointers back to argv strings. */
 328        p = mm->arg_end = mm->arg_start;
 329        while (argc-- > 0) {
 330                size_t len;
 331                if (put_user((elf_addr_t)p, sp++))
 332                        return -EFAULT;
 333                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 334                if (!len || len > MAX_ARG_STRLEN)
 335                        return -EINVAL;
 336                p += len;
 337        }
 338        if (put_user(0, sp++))
 339                return -EFAULT;
 340        mm->arg_end = p;
 341
 342        /* Populate list of envp pointers back to envp strings. */
 343        mm->env_end = mm->env_start = p;
 344        while (envc-- > 0) {
 345                size_t len;
 346                if (put_user((elf_addr_t)p, sp++))
 347                        return -EFAULT;
 348                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 349                if (!len || len > MAX_ARG_STRLEN)
 350                        return -EINVAL;
 351                p += len;
 352        }
 353        if (put_user(0, sp++))
 354                return -EFAULT;
 355        mm->env_end = p;
 356
 357        /* Put the elf_info on the stack in the right place.  */
 358        if (copy_to_user(sp, mm->saved_auxv, ei_index * sizeof(elf_addr_t)))
 359                return -EFAULT;
 360        return 0;
 361}
 362
 363static unsigned long elf_map(struct file *filep, unsigned long addr,
 364                const struct elf_phdr *eppnt, int prot, int type,
 365                unsigned long total_size)
 366{
 367        unsigned long map_addr;
 368        unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
 369        unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
 370        addr = ELF_PAGESTART(addr);
 371        size = ELF_PAGEALIGN(size);
 372
 373        /* mmap() will return -EINVAL if given a zero size, but a
 374         * segment with zero filesize is perfectly valid */
 375        if (!size)
 376                return addr;
 377
 378        /*
 379        * total_size is the size of the ELF (interpreter) image.
 380        * The _first_ mmap needs to know the full size, otherwise
 381        * randomization might put this image into an overlapping
 382        * position with the ELF binary image. (since size < total_size)
 383        * So we first map the 'big' image - and unmap the remainder at
 384        * the end. (which unmap is needed for ELF images with holes.)
 385        */
 386        if (total_size) {
 387                total_size = ELF_PAGEALIGN(total_size);
 388                map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
 389                if (!BAD_ADDR(map_addr))
 390                        vm_munmap(map_addr+size, total_size-size);
 391        } else
 392                map_addr = vm_mmap(filep, addr, size, prot, type, off);
 393
 394        if ((type & MAP_FIXED_NOREPLACE) &&
 395            PTR_ERR((void *)map_addr) == -EEXIST)
 396                pr_info("%d (%s): Uhuuh, elf segment at %px requested but the memory is mapped already\n",
 397                        task_pid_nr(current), current->comm, (void *)addr);
 398
 399        return(map_addr);
 400}
 401
 402static unsigned long total_mapping_size(const struct elf_phdr *cmds, int nr)
 403{
 404        int i, first_idx = -1, last_idx = -1;
 405
 406        for (i = 0; i < nr; i++) {
 407                if (cmds[i].p_type == PT_LOAD) {
 408                        last_idx = i;
 409                        if (first_idx == -1)
 410                                first_idx = i;
 411                }
 412        }
 413        if (first_idx == -1)
 414                return 0;
 415
 416        return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
 417                                ELF_PAGESTART(cmds[first_idx].p_vaddr);
 418}
 419
 420static int elf_read(struct file *file, void *buf, size_t len, loff_t pos)
 421{
 422        ssize_t rv;
 423
 424        rv = kernel_read(file, buf, len, &pos);
 425        if (unlikely(rv != len)) {
 426                return (rv < 0) ? rv : -EIO;
 427        }
 428        return 0;
 429}
 430
 431static unsigned long maximum_alignment(struct elf_phdr *cmds, int nr)
 432{
 433        unsigned long alignment = 0;
 434        int i;
 435
 436        for (i = 0; i < nr; i++) {
 437                if (cmds[i].p_type == PT_LOAD) {
 438                        unsigned long p_align = cmds[i].p_align;
 439
 440                        /* skip non-power of two alignments as invalid */
 441                        if (!is_power_of_2(p_align))
 442                                continue;
 443                        alignment = max(alignment, p_align);
 444                }
 445        }
 446
 447        /* ensure we align to at least one page */
 448        return ELF_PAGEALIGN(alignment);
 449}
 450
 451/**
 452 * load_elf_phdrs() - load ELF program headers
 453 * @elf_ex:   ELF header of the binary whose program headers should be loaded
 454 * @elf_file: the opened ELF binary file
 455 *
 456 * Loads ELF program headers from the binary file elf_file, which has the ELF
 457 * header pointed to by elf_ex, into a newly allocated array. The caller is
 458 * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
 459 */
 460static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
 461                                       struct file *elf_file)
 462{
 463        struct elf_phdr *elf_phdata = NULL;
 464        int retval, err = -1;
 465        unsigned int size;
 466
 467        /*
 468         * If the size of this structure has changed, then punt, since
 469         * we will be doing the wrong thing.
 470         */
 471        if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
 472                goto out;
 473
 474        /* Sanity check the number of program headers... */
 475        /* ...and their total size. */
 476        size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
 477        if (size == 0 || size > 65536 || size > ELF_MIN_ALIGN)
 478                goto out;
 479
 480        elf_phdata = kmalloc(size, GFP_KERNEL);
 481        if (!elf_phdata)
 482                goto out;
 483
 484        /* Read in the program headers */
 485        retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff);
 486        if (retval < 0) {
 487                err = retval;
 488                goto out;
 489        }
 490
 491        /* Success! */
 492        err = 0;
 493out:
 494        if (err) {
 495                kfree(elf_phdata);
 496                elf_phdata = NULL;
 497        }
 498        return elf_phdata;
 499}
 500
 501#ifndef CONFIG_ARCH_BINFMT_ELF_STATE
 502
 503/**
 504 * struct arch_elf_state - arch-specific ELF loading state
 505 *
 506 * This structure is used to preserve architecture specific data during
 507 * the loading of an ELF file, throughout the checking of architecture
 508 * specific ELF headers & through to the point where the ELF load is
 509 * known to be proceeding (ie. SET_PERSONALITY).
 510 *
 511 * This implementation is a dummy for architectures which require no
 512 * specific state.
 513 */
 514struct arch_elf_state {
 515};
 516
 517#define INIT_ARCH_ELF_STATE {}
 518
 519/**
 520 * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
 521 * @ehdr:       The main ELF header
 522 * @phdr:       The program header to check
 523 * @elf:        The open ELF file
 524 * @is_interp:  True if the phdr is from the interpreter of the ELF being
 525 *              loaded, else false.
 526 * @state:      Architecture-specific state preserved throughout the process
 527 *              of loading the ELF.
 528 *
 529 * Inspects the program header phdr to validate its correctness and/or
 530 * suitability for the system. Called once per ELF program header in the
 531 * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
 532 * interpreter.
 533 *
 534 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
 535 *         with that return code.
 536 */
 537static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
 538                                   struct elf_phdr *phdr,
 539                                   struct file *elf, bool is_interp,
 540                                   struct arch_elf_state *state)
 541{
 542        /* Dummy implementation, always proceed */
 543        return 0;
 544}
 545
 546/**
 547 * arch_check_elf() - check an ELF executable
 548 * @ehdr:       The main ELF header
 549 * @has_interp: True if the ELF has an interpreter, else false.
 550 * @interp_ehdr: The interpreter's ELF header
 551 * @state:      Architecture-specific state preserved throughout the process
 552 *              of loading the ELF.
 553 *
 554 * Provides a final opportunity for architecture code to reject the loading
 555 * of the ELF & cause an exec syscall to return an error. This is called after
 556 * all program headers to be checked by arch_elf_pt_proc have been.
 557 *
 558 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
 559 *         with that return code.
 560 */
 561static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
 562                                 struct elfhdr *interp_ehdr,
 563                                 struct arch_elf_state *state)
 564{
 565        /* Dummy implementation, always proceed */
 566        return 0;
 567}
 568
 569#endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
 570
 571static inline int make_prot(u32 p_flags, struct arch_elf_state *arch_state,
 572                            bool has_interp, bool is_interp)
 573{
 574        int prot = 0;
 575
 576        if (p_flags & PF_R)
 577                prot |= PROT_READ;
 578        if (p_flags & PF_W)
 579                prot |= PROT_WRITE;
 580        if (p_flags & PF_X)
 581                prot |= PROT_EXEC;
 582
 583        return arch_elf_adjust_prot(prot, arch_state, has_interp, is_interp);
 584}
 585
 586/* This is much more generalized than the library routine read function,
 587   so we keep this separate.  Technically the library read function
 588   is only provided so that we can read a.out libraries that have
 589   an ELF header */
 590
 591static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 592                struct file *interpreter,
 593                unsigned long no_base, struct elf_phdr *interp_elf_phdata,
 594                struct arch_elf_state *arch_state)
 595{
 596        struct elf_phdr *eppnt;
 597        unsigned long load_addr = 0;
 598        int load_addr_set = 0;
 599        unsigned long last_bss = 0, elf_bss = 0;
 600        int bss_prot = 0;
 601        unsigned long error = ~0UL;
 602        unsigned long total_size;
 603        int i;
 604
 605        /* First of all, some simple consistency checks */
 606        if (interp_elf_ex->e_type != ET_EXEC &&
 607            interp_elf_ex->e_type != ET_DYN)
 608                goto out;
 609        if (!elf_check_arch(interp_elf_ex) ||
 610            elf_check_fdpic(interp_elf_ex))
 611                goto out;
 612        if (!interpreter->f_op->mmap)
 613                goto out;
 614
 615        total_size = total_mapping_size(interp_elf_phdata,
 616                                        interp_elf_ex->e_phnum);
 617        if (!total_size) {
 618                error = -EINVAL;
 619                goto out;
 620        }
 621
 622        eppnt = interp_elf_phdata;
 623        for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 624                if (eppnt->p_type == PT_LOAD) {
 625                        int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
 626                        int elf_prot = make_prot(eppnt->p_flags, arch_state,
 627                                                 true, true);
 628                        unsigned long vaddr = 0;
 629                        unsigned long k, map_addr;
 630
 631                        vaddr = eppnt->p_vaddr;
 632                        if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 633                                elf_type |= MAP_FIXED_NOREPLACE;
 634                        else if (no_base && interp_elf_ex->e_type == ET_DYN)
 635                                load_addr = -vaddr;
 636
 637                        map_addr = elf_map(interpreter, load_addr + vaddr,
 638                                        eppnt, elf_prot, elf_type, total_size);
 639                        total_size = 0;
 640                        error = map_addr;
 641                        if (BAD_ADDR(map_addr))
 642                                goto out;
 643
 644                        if (!load_addr_set &&
 645                            interp_elf_ex->e_type == ET_DYN) {
 646                                load_addr = map_addr - ELF_PAGESTART(vaddr);
 647                                load_addr_set = 1;
 648                        }
 649
 650                        /*
 651                         * Check to see if the section's size will overflow the
 652                         * allowed task size. Note that p_filesz must always be
 653                         * <= p_memsize so it's only necessary to check p_memsz.
 654                         */
 655                        k = load_addr + eppnt->p_vaddr;
 656                        if (BAD_ADDR(k) ||
 657                            eppnt->p_filesz > eppnt->p_memsz ||
 658                            eppnt->p_memsz > TASK_SIZE ||
 659                            TASK_SIZE - eppnt->p_memsz < k) {
 660                                error = -ENOMEM;
 661                                goto out;
 662                        }
 663
 664                        /*
 665                         * Find the end of the file mapping for this phdr, and
 666                         * keep track of the largest address we see for this.
 667                         */
 668                        k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
 669                        if (k > elf_bss)
 670                                elf_bss = k;
 671
 672                        /*
 673                         * Do the same thing for the memory mapping - between
 674                         * elf_bss and last_bss is the bss section.
 675                         */
 676                        k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
 677                        if (k > last_bss) {
 678                                last_bss = k;
 679                                bss_prot = elf_prot;
 680                        }
 681                }
 682        }
 683
 684        /*
 685         * Now fill out the bss section: first pad the last page from
 686         * the file up to the page boundary, and zero it from elf_bss
 687         * up to the end of the page.
 688         */
 689        if (padzero(elf_bss)) {
 690                error = -EFAULT;
 691                goto out;
 692        }
 693        /*
 694         * Next, align both the file and mem bss up to the page size,
 695         * since this is where elf_bss was just zeroed up to, and where
 696         * last_bss will end after the vm_brk_flags() below.
 697         */
 698        elf_bss = ELF_PAGEALIGN(elf_bss);
 699        last_bss = ELF_PAGEALIGN(last_bss);
 700        /* Finally, if there is still more bss to allocate, do it. */
 701        if (last_bss > elf_bss) {
 702                error = vm_brk_flags(elf_bss, last_bss - elf_bss,
 703                                bss_prot & PROT_EXEC ? VM_EXEC : 0);
 704                if (error)
 705                        goto out;
 706        }
 707
 708        error = load_addr;
 709out:
 710        return error;
 711}
 712
 713/*
 714 * These are the functions used to load ELF style executables and shared
 715 * libraries.  There is no binary dependent code anywhere else.
 716 */
 717
 718static int parse_elf_property(const char *data, size_t *off, size_t datasz,
 719                              struct arch_elf_state *arch,
 720                              bool have_prev_type, u32 *prev_type)
 721{
 722        size_t o, step;
 723        const struct gnu_property *pr;
 724        int ret;
 725
 726        if (*off == datasz)
 727                return -ENOENT;
 728
 729        if (WARN_ON_ONCE(*off > datasz || *off % ELF_GNU_PROPERTY_ALIGN))
 730                return -EIO;
 731        o = *off;
 732        datasz -= *off;
 733
 734        if (datasz < sizeof(*pr))
 735                return -ENOEXEC;
 736        pr = (const struct gnu_property *)(data + o);
 737        o += sizeof(*pr);
 738        datasz -= sizeof(*pr);
 739
 740        if (pr->pr_datasz > datasz)
 741                return -ENOEXEC;
 742
 743        WARN_ON_ONCE(o % ELF_GNU_PROPERTY_ALIGN);
 744        step = round_up(pr->pr_datasz, ELF_GNU_PROPERTY_ALIGN);
 745        if (step > datasz)
 746                return -ENOEXEC;
 747
 748        /* Properties are supposed to be unique and sorted on pr_type: */
 749        if (have_prev_type && pr->pr_type <= *prev_type)
 750                return -ENOEXEC;
 751        *prev_type = pr->pr_type;
 752
 753        ret = arch_parse_elf_property(pr->pr_type, data + o,
 754                                      pr->pr_datasz, ELF_COMPAT, arch);
 755        if (ret)
 756                return ret;
 757
 758        *off = o + step;
 759        return 0;
 760}
 761
 762#define NOTE_DATA_SZ SZ_1K
 763#define GNU_PROPERTY_TYPE_0_NAME "GNU"
 764#define NOTE_NAME_SZ (sizeof(GNU_PROPERTY_TYPE_0_NAME))
 765
 766static int parse_elf_properties(struct file *f, const struct elf_phdr *phdr,
 767                                struct arch_elf_state *arch)
 768{
 769        union {
 770                struct elf_note nhdr;
 771                char data[NOTE_DATA_SZ];
 772        } note;
 773        loff_t pos;
 774        ssize_t n;
 775        size_t off, datasz;
 776        int ret;
 777        bool have_prev_type;
 778        u32 prev_type;
 779
 780        if (!IS_ENABLED(CONFIG_ARCH_USE_GNU_PROPERTY) || !phdr)
 781                return 0;
 782
 783        /* load_elf_binary() shouldn't call us unless this is true... */
 784        if (WARN_ON_ONCE(phdr->p_type != PT_GNU_PROPERTY))
 785                return -ENOEXEC;
 786
 787        /* If the properties are crazy large, that's too bad (for now): */
 788        if (phdr->p_filesz > sizeof(note))
 789                return -ENOEXEC;
 790
 791        pos = phdr->p_offset;
 792        n = kernel_read(f, &note, phdr->p_filesz, &pos);
 793
 794        BUILD_BUG_ON(sizeof(note) < sizeof(note.nhdr) + NOTE_NAME_SZ);
 795        if (n < 0 || n < sizeof(note.nhdr) + NOTE_NAME_SZ)
 796                return -EIO;
 797
 798        if (note.nhdr.n_type != NT_GNU_PROPERTY_TYPE_0 ||
 799            note.nhdr.n_namesz != NOTE_NAME_SZ ||
 800            strncmp(note.data + sizeof(note.nhdr),
 801                    GNU_PROPERTY_TYPE_0_NAME, n - sizeof(note.nhdr)))
 802                return -ENOEXEC;
 803
 804        off = round_up(sizeof(note.nhdr) + NOTE_NAME_SZ,
 805                       ELF_GNU_PROPERTY_ALIGN);
 806        if (off > n)
 807                return -ENOEXEC;
 808
 809        if (note.nhdr.n_descsz > n - off)
 810                return -ENOEXEC;
 811        datasz = off + note.nhdr.n_descsz;
 812
 813        have_prev_type = false;
 814        do {
 815                ret = parse_elf_property(note.data, &off, datasz, arch,
 816                                         have_prev_type, &prev_type);
 817                have_prev_type = true;
 818        } while (!ret);
 819
 820        return ret == -ENOENT ? 0 : ret;
 821}
 822
 823static int load_elf_binary(struct linux_binprm *bprm)
 824{
 825        struct file *interpreter = NULL; /* to shut gcc up */
 826        unsigned long load_addr = 0, load_bias = 0;
 827        int load_addr_set = 0;
 828        unsigned long error;
 829        struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
 830        struct elf_phdr *elf_property_phdata = NULL;
 831        unsigned long elf_bss, elf_brk;
 832        int bss_prot = 0;
 833        int retval, i;
 834        unsigned long elf_entry;
 835        unsigned long e_entry;
 836        unsigned long interp_load_addr = 0;
 837        unsigned long start_code, end_code, start_data, end_data;
 838        unsigned long reloc_func_desc __maybe_unused = 0;
 839        int executable_stack = EXSTACK_DEFAULT;
 840        struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf;
 841        struct elfhdr *interp_elf_ex = NULL;
 842        struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
 843        struct mm_struct *mm;
 844        struct pt_regs *regs;
 845
 846        retval = -ENOEXEC;
 847        /* First of all, some simple consistency checks */
 848        if (memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
 849                goto out;
 850
 851        if (elf_ex->e_type != ET_EXEC && elf_ex->e_type != ET_DYN)
 852                goto out;
 853        if (!elf_check_arch(elf_ex))
 854                goto out;
 855        if (elf_check_fdpic(elf_ex))
 856                goto out;
 857        if (!bprm->file->f_op->mmap)
 858                goto out;
 859
 860        elf_phdata = load_elf_phdrs(elf_ex, bprm->file);
 861        if (!elf_phdata)
 862                goto out;
 863
 864        elf_ppnt = elf_phdata;
 865        for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++) {
 866                char *elf_interpreter;
 867
 868                if (elf_ppnt->p_type == PT_GNU_PROPERTY) {
 869                        elf_property_phdata = elf_ppnt;
 870                        continue;
 871                }
 872
 873                if (elf_ppnt->p_type != PT_INTERP)
 874                        continue;
 875
 876                /*
 877                 * This is the program interpreter used for shared libraries -
 878                 * for now assume that this is an a.out format binary.
 879                 */
 880                retval = -ENOEXEC;
 881                if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2)
 882                        goto out_free_ph;
 883
 884                retval = -ENOMEM;
 885                elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL);
 886                if (!elf_interpreter)
 887                        goto out_free_ph;
 888
 889                retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz,
 890                                  elf_ppnt->p_offset);
 891                if (retval < 0)
 892                        goto out_free_interp;
 893                /* make sure path is NULL terminated */
 894                retval = -ENOEXEC;
 895                if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
 896                        goto out_free_interp;
 897
 898                interpreter = open_exec(elf_interpreter);
 899                kfree(elf_interpreter);
 900                retval = PTR_ERR(interpreter);
 901                if (IS_ERR(interpreter))
 902                        goto out_free_ph;
 903
 904                /*
 905                 * If the binary is not readable then enforce mm->dumpable = 0
 906                 * regardless of the interpreter's permissions.
 907                 */
 908                would_dump(bprm, interpreter);
 909
 910                interp_elf_ex = kmalloc(sizeof(*interp_elf_ex), GFP_KERNEL);
 911                if (!interp_elf_ex) {
 912                        retval = -ENOMEM;
 913                        goto out_free_ph;
 914                }
 915
 916                /* Get the exec headers */
 917                retval = elf_read(interpreter, interp_elf_ex,
 918                                  sizeof(*interp_elf_ex), 0);
 919                if (retval < 0)
 920                        goto out_free_dentry;
 921
 922                break;
 923
 924out_free_interp:
 925                kfree(elf_interpreter);
 926                goto out_free_ph;
 927        }
 928
 929        elf_ppnt = elf_phdata;
 930        for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++)
 931                switch (elf_ppnt->p_type) {
 932                case PT_GNU_STACK:
 933                        if (elf_ppnt->p_flags & PF_X)
 934                                executable_stack = EXSTACK_ENABLE_X;
 935                        else
 936                                executable_stack = EXSTACK_DISABLE_X;
 937                        break;
 938
 939                case PT_LOPROC ... PT_HIPROC:
 940                        retval = arch_elf_pt_proc(elf_ex, elf_ppnt,
 941                                                  bprm->file, false,
 942                                                  &arch_state);
 943                        if (retval)
 944                                goto out_free_dentry;
 945                        break;
 946                }
 947
 948        /* Some simple consistency checks for the interpreter */
 949        if (interpreter) {
 950                retval = -ELIBBAD;
 951                /* Not an ELF interpreter */
 952                if (memcmp(interp_elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
 953                        goto out_free_dentry;
 954                /* Verify the interpreter has a valid arch */
 955                if (!elf_check_arch(interp_elf_ex) ||
 956                    elf_check_fdpic(interp_elf_ex))
 957                        goto out_free_dentry;
 958
 959                /* Load the interpreter program headers */
 960                interp_elf_phdata = load_elf_phdrs(interp_elf_ex,
 961                                                   interpreter);
 962                if (!interp_elf_phdata)
 963                        goto out_free_dentry;
 964
 965                /* Pass PT_LOPROC..PT_HIPROC headers to arch code */
 966                elf_property_phdata = NULL;
 967                elf_ppnt = interp_elf_phdata;
 968                for (i = 0; i < interp_elf_ex->e_phnum; i++, elf_ppnt++)
 969                        switch (elf_ppnt->p_type) {
 970                        case PT_GNU_PROPERTY:
 971                                elf_property_phdata = elf_ppnt;
 972                                break;
 973
 974                        case PT_LOPROC ... PT_HIPROC:
 975                                retval = arch_elf_pt_proc(interp_elf_ex,
 976                                                          elf_ppnt, interpreter,
 977                                                          true, &arch_state);
 978                                if (retval)
 979                                        goto out_free_dentry;
 980                                break;
 981                        }
 982        }
 983
 984        retval = parse_elf_properties(interpreter ?: bprm->file,
 985                                      elf_property_phdata, &arch_state);
 986        if (retval)
 987                goto out_free_dentry;
 988
 989        /*
 990         * Allow arch code to reject the ELF at this point, whilst it's
 991         * still possible to return an error to the code that invoked
 992         * the exec syscall.
 993         */
 994        retval = arch_check_elf(elf_ex,
 995                                !!interpreter, interp_elf_ex,
 996                                &arch_state);
 997        if (retval)
 998                goto out_free_dentry;
 999
1000        /* Flush all traces of the currently running executable */
1001        retval = begin_new_exec(bprm);
1002        if (retval)
1003                goto out_free_dentry;
1004
1005        /* Do this immediately, since STACK_TOP as used in setup_arg_pages
1006           may depend on the personality.  */
1007        SET_PERSONALITY2(*elf_ex, &arch_state);
1008        if (elf_read_implies_exec(*elf_ex, executable_stack))
1009                current->personality |= READ_IMPLIES_EXEC;
1010
1011        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
1012                current->flags |= PF_RANDOMIZE;
1013
1014        setup_new_exec(bprm);
1015
1016        /* Do this so that we can load the interpreter, if need be.  We will
1017           change some of these later */
1018        retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
1019                                 executable_stack);
1020        if (retval < 0)
1021                goto out_free_dentry;
1022        
1023        elf_bss = 0;
1024        elf_brk = 0;
1025
1026        start_code = ~0UL;
1027        end_code = 0;
1028        start_data = 0;
1029        end_data = 0;
1030
1031        /* Now we do a little grungy work by mmapping the ELF image into
1032           the correct location in memory. */
1033        for(i = 0, elf_ppnt = elf_phdata;
1034            i < elf_ex->e_phnum; i++, elf_ppnt++) {
1035                int elf_prot, elf_flags;
1036                unsigned long k, vaddr;
1037                unsigned long total_size = 0;
1038                unsigned long alignment;
1039
1040                if (elf_ppnt->p_type != PT_LOAD)
1041                        continue;
1042
1043                if (unlikely (elf_brk > elf_bss)) {
1044                        unsigned long nbyte;
1045                    
1046                        /* There was a PT_LOAD segment with p_memsz > p_filesz
1047                           before this one. Map anonymous pages, if needed,
1048                           and clear the area.  */
1049                        retval = set_brk(elf_bss + load_bias,
1050                                         elf_brk + load_bias,
1051                                         bss_prot);
1052                        if (retval)
1053                                goto out_free_dentry;
1054                        nbyte = ELF_PAGEOFFSET(elf_bss);
1055                        if (nbyte) {
1056                                nbyte = ELF_MIN_ALIGN - nbyte;
1057                                if (nbyte > elf_brk - elf_bss)
1058                                        nbyte = elf_brk - elf_bss;
1059                                if (clear_user((void __user *)elf_bss +
1060                                                        load_bias, nbyte)) {
1061                                        /*
1062                                         * This bss-zeroing can fail if the ELF
1063                                         * file specifies odd protections. So
1064                                         * we don't check the return value
1065                                         */
1066                                }
1067                        }
1068                }
1069
1070                elf_prot = make_prot(elf_ppnt->p_flags, &arch_state,
1071                                     !!interpreter, false);
1072
1073                elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
1074
1075                vaddr = elf_ppnt->p_vaddr;
1076                /*
1077                 * If we are loading ET_EXEC or we have already performed
1078                 * the ET_DYN load_addr calculations, proceed normally.
1079                 */
1080                if (elf_ex->e_type == ET_EXEC || load_addr_set) {
1081                        elf_flags |= MAP_FIXED;
1082                } else if (elf_ex->e_type == ET_DYN) {
1083                        /*
1084                         * This logic is run once for the first LOAD Program
1085                         * Header for ET_DYN binaries to calculate the
1086                         * randomization (load_bias) for all the LOAD
1087                         * Program Headers, and to calculate the entire
1088                         * size of the ELF mapping (total_size). (Note that
1089                         * load_addr_set is set to true later once the
1090                         * initial mapping is performed.)
1091                         *
1092                         * There are effectively two types of ET_DYN
1093                         * binaries: programs (i.e. PIE: ET_DYN with INTERP)
1094                         * and loaders (ET_DYN without INTERP, since they
1095                         * _are_ the ELF interpreter). The loaders must
1096                         * be loaded away from programs since the program
1097                         * may otherwise collide with the loader (especially
1098                         * for ET_EXEC which does not have a randomized
1099                         * position). For example to handle invocations of
1100                         * "./ld.so someprog" to test out a new version of
1101                         * the loader, the subsequent program that the
1102                         * loader loads must avoid the loader itself, so
1103                         * they cannot share the same load range. Sufficient
1104                         * room for the brk must be allocated with the
1105                         * loader as well, since brk must be available with
1106                         * the loader.
1107                         *
1108                         * Therefore, programs are loaded offset from
1109                         * ELF_ET_DYN_BASE and loaders are loaded into the
1110                         * independently randomized mmap region (0 load_bias
1111                         * without MAP_FIXED).
1112                         */
1113                        if (interpreter) {
1114                                load_bias = ELF_ET_DYN_BASE;
1115                                if (current->flags & PF_RANDOMIZE)
1116                                        load_bias += arch_mmap_rnd();
1117                                alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
1118                                if (alignment)
1119                                        load_bias &= ~(alignment - 1);
1120                                elf_flags |= MAP_FIXED;
1121                        } else
1122                                load_bias = 0;
1123
1124                        /*
1125                         * Since load_bias is used for all subsequent loading
1126                         * calculations, we must lower it by the first vaddr
1127                         * so that the remaining calculations based on the
1128                         * ELF vaddrs will be correctly offset. The result
1129                         * is then page aligned.
1130                         */
1131                        load_bias = ELF_PAGESTART(load_bias - vaddr);
1132
1133                        total_size = total_mapping_size(elf_phdata,
1134                                                        elf_ex->e_phnum);
1135                        if (!total_size) {
1136                                retval = -EINVAL;
1137                                goto out_free_dentry;
1138                        }
1139                }
1140
1141                error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
1142                                elf_prot, elf_flags, total_size);
1143                if (BAD_ADDR(error)) {
1144                        retval = IS_ERR((void *)error) ?
1145                                PTR_ERR((void*)error) : -EINVAL;
1146                        goto out_free_dentry;
1147                }
1148
1149                if (!load_addr_set) {
1150                        load_addr_set = 1;
1151                        load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
1152                        if (elf_ex->e_type == ET_DYN) {
1153                                load_bias += error -
1154                                             ELF_PAGESTART(load_bias + vaddr);
1155                                load_addr += load_bias;
1156                                reloc_func_desc = load_bias;
1157                        }
1158                }
1159                k = elf_ppnt->p_vaddr;
1160                if ((elf_ppnt->p_flags & PF_X) && k < start_code)
1161                        start_code = k;
1162                if (start_data < k)
1163                        start_data = k;
1164
1165                /*
1166                 * Check to see if the section's size will overflow the
1167                 * allowed task size. Note that p_filesz must always be
1168                 * <= p_memsz so it is only necessary to check p_memsz.
1169                 */
1170                if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
1171                    elf_ppnt->p_memsz > TASK_SIZE ||
1172                    TASK_SIZE - elf_ppnt->p_memsz < k) {
1173                        /* set_brk can never work. Avoid overflows. */
1174                        retval = -EINVAL;
1175                        goto out_free_dentry;
1176                }
1177
1178                k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
1179
1180                if (k > elf_bss)
1181                        elf_bss = k;
1182                if ((elf_ppnt->p_flags & PF_X) && end_code < k)
1183                        end_code = k;
1184                if (end_data < k)
1185                        end_data = k;
1186                k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
1187                if (k > elf_brk) {
1188                        bss_prot = elf_prot;
1189                        elf_brk = k;
1190                }
1191        }
1192
1193        e_entry = elf_ex->e_entry + load_bias;
1194        elf_bss += load_bias;
1195        elf_brk += load_bias;
1196        start_code += load_bias;
1197        end_code += load_bias;
1198        start_data += load_bias;
1199        end_data += load_bias;
1200
1201        /* Calling set_brk effectively mmaps the pages that we need
1202         * for the bss and break sections.  We must do this before
1203         * mapping in the interpreter, to make sure it doesn't wind
1204         * up getting placed where the bss needs to go.
1205         */
1206        retval = set_brk(elf_bss, elf_brk, bss_prot);
1207        if (retval)
1208                goto out_free_dentry;
1209        if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1210                retval = -EFAULT; /* Nobody gets to see this, but.. */
1211                goto out_free_dentry;
1212        }
1213
1214        if (interpreter) {
1215                elf_entry = load_elf_interp(interp_elf_ex,
1216                                            interpreter,
1217                                            load_bias, interp_elf_phdata,
1218                                            &arch_state);
1219                if (!IS_ERR((void *)elf_entry)) {
1220                        /*
1221                         * load_elf_interp() returns relocation
1222                         * adjustment
1223                         */
1224                        interp_load_addr = elf_entry;
1225                        elf_entry += interp_elf_ex->e_entry;
1226                }
1227                if (BAD_ADDR(elf_entry)) {
1228                        retval = IS_ERR((void *)elf_entry) ?
1229                                        (int)elf_entry : -EINVAL;
1230                        goto out_free_dentry;
1231                }
1232                reloc_func_desc = interp_load_addr;
1233
1234                allow_write_access(interpreter);
1235                fput(interpreter);
1236
1237                kfree(interp_elf_ex);
1238                kfree(interp_elf_phdata);
1239        } else {
1240                elf_entry = e_entry;
1241                if (BAD_ADDR(elf_entry)) {
1242                        retval = -EINVAL;
1243                        goto out_free_dentry;
1244                }
1245        }
1246
1247        kfree(elf_phdata);
1248
1249        set_binfmt(&elf_format);
1250
1251#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1252        retval = ARCH_SETUP_ADDITIONAL_PAGES(bprm, elf_ex, !!interpreter);
1253        if (retval < 0)
1254                goto out;
1255#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1256
1257        retval = create_elf_tables(bprm, elf_ex,
1258                          load_addr, interp_load_addr, e_entry);
1259        if (retval < 0)
1260                goto out;
1261
1262        mm = current->mm;
1263        mm->end_code = end_code;
1264        mm->start_code = start_code;
1265        mm->start_data = start_data;
1266        mm->end_data = end_data;
1267        mm->start_stack = bprm->p;
1268
1269        if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1270                /*
1271                 * For architectures with ELF randomization, when executing
1272                 * a loader directly (i.e. no interpreter listed in ELF
1273                 * headers), move the brk area out of the mmap region
1274                 * (since it grows up, and may collide early with the stack
1275                 * growing down), and into the unused ELF_ET_DYN_BASE region.
1276                 */
1277                if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
1278                    elf_ex->e_type == ET_DYN && !interpreter) {
1279                        mm->brk = mm->start_brk = ELF_ET_DYN_BASE;
1280                }
1281
1282                mm->brk = mm->start_brk = arch_randomize_brk(mm);
1283#ifdef compat_brk_randomized
1284                current->brk_randomized = 1;
1285#endif
1286        }
1287
1288        if (current->personality & MMAP_PAGE_ZERO) {
1289                /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1290                   and some applications "depend" upon this behavior.
1291                   Since we do not have the power to recompile these, we
1292                   emulate the SVr4 behavior. Sigh. */
1293                error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1294                                MAP_FIXED | MAP_PRIVATE, 0);
1295        }
1296
1297        regs = current_pt_regs();
1298#ifdef ELF_PLAT_INIT
1299        /*
1300         * The ABI may specify that certain registers be set up in special
1301         * ways (on i386 %edx is the address of a DT_FINI function, for
1302         * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1303         * that the e_entry field is the address of the function descriptor
1304         * for the startup routine, rather than the address of the startup
1305         * routine itself.  This macro performs whatever initialization to
1306         * the regs structure is required as well as any relocations to the
1307         * function descriptor entries when executing dynamically links apps.
1308         */
1309        ELF_PLAT_INIT(regs, reloc_func_desc);
1310#endif
1311
1312        finalize_exec(bprm);
1313        START_THREAD(elf_ex, regs, elf_entry, bprm->p);
1314        retval = 0;
1315out:
1316        return retval;
1317
1318        /* error cleanup */
1319out_free_dentry:
1320        kfree(interp_elf_ex);
1321        kfree(interp_elf_phdata);
1322        allow_write_access(interpreter);
1323        if (interpreter)
1324                fput(interpreter);
1325out_free_ph:
1326        kfree(elf_phdata);
1327        goto out;
1328}
1329
1330#ifdef CONFIG_USELIB
1331/* This is really simpleminded and specialized - we are loading an
1332   a.out library that is given an ELF header. */
1333static int load_elf_library(struct file *file)
1334{
1335        struct elf_phdr *elf_phdata;
1336        struct elf_phdr *eppnt;
1337        unsigned long elf_bss, bss, len;
1338        int retval, error, i, j;
1339        struct elfhdr elf_ex;
1340
1341        error = -ENOEXEC;
1342        retval = elf_read(file, &elf_ex, sizeof(elf_ex), 0);
1343        if (retval < 0)
1344                goto out;
1345
1346        if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1347                goto out;
1348
1349        /* First of all, some simple consistency checks */
1350        if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1351            !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1352                goto out;
1353        if (elf_check_fdpic(&elf_ex))
1354                goto out;
1355
1356        /* Now read in all of the header information */
1357
1358        j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1359        /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1360
1361        error = -ENOMEM;
1362        elf_phdata = kmalloc(j, GFP_KERNEL);
1363        if (!elf_phdata)
1364                goto out;
1365
1366        eppnt = elf_phdata;
1367        error = -ENOEXEC;
1368        retval = elf_read(file, eppnt, j, elf_ex.e_phoff);
1369        if (retval < 0)
1370                goto out_free_ph;
1371
1372        for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1373                if ((eppnt + i)->p_type == PT_LOAD)
1374                        j++;
1375        if (j != 1)
1376                goto out_free_ph;
1377
1378        while (eppnt->p_type != PT_LOAD)
1379                eppnt++;
1380
1381        /* Now use mmap to map the library into memory. */
1382        error = vm_mmap(file,
1383                        ELF_PAGESTART(eppnt->p_vaddr),
1384                        (eppnt->p_filesz +
1385                         ELF_PAGEOFFSET(eppnt->p_vaddr)),
1386                        PROT_READ | PROT_WRITE | PROT_EXEC,
1387                        MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_DENYWRITE,
1388                        (eppnt->p_offset -
1389                         ELF_PAGEOFFSET(eppnt->p_vaddr)));
1390        if (error != ELF_PAGESTART(eppnt->p_vaddr))
1391                goto out_free_ph;
1392
1393        elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1394        if (padzero(elf_bss)) {
1395                error = -EFAULT;
1396                goto out_free_ph;
1397        }
1398
1399        len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr);
1400        bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr);
1401        if (bss > len) {
1402                error = vm_brk(len, bss - len);
1403                if (error)
1404                        goto out_free_ph;
1405        }
1406        error = 0;
1407
1408out_free_ph:
1409        kfree(elf_phdata);
1410out:
1411        return error;
1412}
1413#endif /* #ifdef CONFIG_USELIB */
1414
1415#ifdef CONFIG_ELF_CORE
1416/*
1417 * ELF core dumper
1418 *
1419 * Modelled on fs/exec.c:aout_core_dump()
1420 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1421 */
1422
1423/* An ELF note in memory */
1424struct memelfnote
1425{
1426        const char *name;
1427        int type;
1428        unsigned int datasz;
1429        void *data;
1430};
1431
1432static int notesize(struct memelfnote *en)
1433{
1434        int sz;
1435
1436        sz = sizeof(struct elf_note);
1437        sz += roundup(strlen(en->name) + 1, 4);
1438        sz += roundup(en->datasz, 4);
1439
1440        return sz;
1441}
1442
1443static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1444{
1445        struct elf_note en;
1446        en.n_namesz = strlen(men->name) + 1;
1447        en.n_descsz = men->datasz;
1448        en.n_type = men->type;
1449
1450        return dump_emit(cprm, &en, sizeof(en)) &&
1451            dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1452            dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1453}
1454
1455static void fill_elf_header(struct elfhdr *elf, int segs,
1456                            u16 machine, u32 flags)
1457{
1458        memset(elf, 0, sizeof(*elf));
1459
1460        memcpy(elf->e_ident, ELFMAG, SELFMAG);
1461        elf->e_ident[EI_CLASS] = ELF_CLASS;
1462        elf->e_ident[EI_DATA] = ELF_DATA;
1463        elf->e_ident[EI_VERSION] = EV_CURRENT;
1464        elf->e_ident[EI_OSABI] = ELF_OSABI;
1465
1466        elf->e_type = ET_CORE;
1467        elf->e_machine = machine;
1468        elf->e_version = EV_CURRENT;
1469        elf->e_phoff = sizeof(struct elfhdr);
1470        elf->e_flags = flags;
1471        elf->e_ehsize = sizeof(struct elfhdr);
1472        elf->e_phentsize = sizeof(struct elf_phdr);
1473        elf->e_phnum = segs;
1474}
1475
1476static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1477{
1478        phdr->p_type = PT_NOTE;
1479        phdr->p_offset = offset;
1480        phdr->p_vaddr = 0;
1481        phdr->p_paddr = 0;
1482        phdr->p_filesz = sz;
1483        phdr->p_memsz = 0;
1484        phdr->p_flags = 0;
1485        phdr->p_align = 0;
1486}
1487
1488static void fill_note(struct memelfnote *note, const char *name, int type, 
1489                unsigned int sz, void *data)
1490{
1491        note->name = name;
1492        note->type = type;
1493        note->datasz = sz;
1494        note->data = data;
1495}
1496
1497/*
1498 * fill up all the fields in prstatus from the given task struct, except
1499 * registers which need to be filled up separately.
1500 */
1501static void fill_prstatus(struct elf_prstatus_common *prstatus,
1502                struct task_struct *p, long signr)
1503{
1504        prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1505        prstatus->pr_sigpend = p->pending.signal.sig[0];
1506        prstatus->pr_sighold = p->blocked.sig[0];
1507        rcu_read_lock();
1508        prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1509        rcu_read_unlock();
1510        prstatus->pr_pid = task_pid_vnr(p);
1511        prstatus->pr_pgrp = task_pgrp_vnr(p);
1512        prstatus->pr_sid = task_session_vnr(p);
1513        if (thread_group_leader(p)) {
1514                struct task_cputime cputime;
1515
1516                /*
1517                 * This is the record for the group leader.  It shows the
1518                 * group-wide total, not its individual thread total.
1519                 */
1520                thread_group_cputime(p, &cputime);
1521                prstatus->pr_utime = ns_to_kernel_old_timeval(cputime.utime);
1522                prstatus->pr_stime = ns_to_kernel_old_timeval(cputime.stime);
1523        } else {
1524                u64 utime, stime;
1525
1526                task_cputime(p, &utime, &stime);
1527                prstatus->pr_utime = ns_to_kernel_old_timeval(utime);
1528                prstatus->pr_stime = ns_to_kernel_old_timeval(stime);
1529        }
1530
1531        prstatus->pr_cutime = ns_to_kernel_old_timeval(p->signal->cutime);
1532        prstatus->pr_cstime = ns_to_kernel_old_timeval(p->signal->cstime);
1533}
1534
1535static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1536                       struct mm_struct *mm)
1537{
1538        const struct cred *cred;
1539        unsigned int i, len;
1540        
1541        /* first copy the parameters from user space */
1542        memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1543
1544        len = mm->arg_end - mm->arg_start;
1545        if (len >= ELF_PRARGSZ)
1546                len = ELF_PRARGSZ-1;
1547        if (copy_from_user(&psinfo->pr_psargs,
1548                           (const char __user *)mm->arg_start, len))
1549                return -EFAULT;
1550        for(i = 0; i < len; i++)
1551                if (psinfo->pr_psargs[i] == 0)
1552                        psinfo->pr_psargs[i] = ' ';
1553        psinfo->pr_psargs[len] = 0;
1554
1555        rcu_read_lock();
1556        psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1557        rcu_read_unlock();
1558        psinfo->pr_pid = task_pid_vnr(p);
1559        psinfo->pr_pgrp = task_pgrp_vnr(p);
1560        psinfo->pr_sid = task_session_vnr(p);
1561
1562        i = p->state ? ffz(~p->state) + 1 : 0;
1563        psinfo->pr_state = i;
1564        psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1565        psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1566        psinfo->pr_nice = task_nice(p);
1567        psinfo->pr_flag = p->flags;
1568        rcu_read_lock();
1569        cred = __task_cred(p);
1570        SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1571        SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1572        rcu_read_unlock();
1573        strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1574        
1575        return 0;
1576}
1577
1578static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1579{
1580        elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1581        int i = 0;
1582        do
1583                i += 2;
1584        while (auxv[i - 2] != AT_NULL);
1585        fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1586}
1587
1588static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1589                const kernel_siginfo_t *siginfo)
1590{
1591        copy_siginfo_to_external(csigdata, siginfo);
1592        fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1593}
1594
1595#define MAX_FILE_NOTE_SIZE (4*1024*1024)
1596/*
1597 * Format of NT_FILE note:
1598 *
1599 * long count     -- how many files are mapped
1600 * long page_size -- units for file_ofs
1601 * array of [COUNT] elements of
1602 *   long start
1603 *   long end
1604 *   long file_ofs
1605 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1606 */
1607static int fill_files_note(struct memelfnote *note)
1608{
1609        struct mm_struct *mm = current->mm;
1610        struct vm_area_struct *vma;
1611        unsigned count, size, names_ofs, remaining, n;
1612        user_long_t *data;
1613        user_long_t *start_end_ofs;
1614        char *name_base, *name_curpos;
1615
1616        /* *Estimated* file count and total data size needed */
1617        count = mm->map_count;
1618        if (count > UINT_MAX / 64)
1619                return -EINVAL;
1620        size = count * 64;
1621
1622        names_ofs = (2 + 3 * count) * sizeof(data[0]);
1623 alloc:
1624        if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1625                return -EINVAL;
1626        size = round_up(size, PAGE_SIZE);
1627        /*
1628         * "size" can be 0 here legitimately.
1629         * Let it ENOMEM and omit NT_FILE section which will be empty anyway.
1630         */
1631        data = kvmalloc(size, GFP_KERNEL);
1632        if (ZERO_OR_NULL_PTR(data))
1633                return -ENOMEM;
1634
1635        start_end_ofs = data + 2;
1636        name_base = name_curpos = ((char *)data) + names_ofs;
1637        remaining = size - names_ofs;
1638        count = 0;
1639        for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
1640                struct file *file;
1641                const char *filename;
1642
1643                file = vma->vm_file;
1644                if (!file)
1645                        continue;
1646                filename = file_path(file, name_curpos, remaining);
1647                if (IS_ERR(filename)) {
1648                        if (PTR_ERR(filename) == -ENAMETOOLONG) {
1649                                kvfree(data);
1650                                size = size * 5 / 4;
1651                                goto alloc;
1652                        }
1653                        continue;
1654                }
1655
1656                /* file_path() fills at the end, move name down */
1657                /* n = strlen(filename) + 1: */
1658                n = (name_curpos + remaining) - filename;
1659                remaining = filename - name_curpos;
1660                memmove(name_curpos, filename, n);
1661                name_curpos += n;
1662
1663                *start_end_ofs++ = vma->vm_start;
1664                *start_end_ofs++ = vma->vm_end;
1665                *start_end_ofs++ = vma->vm_pgoff;
1666                count++;
1667        }
1668
1669        /* Now we know exact count of files, can store it */
1670        data[0] = count;
1671        data[1] = PAGE_SIZE;
1672        /*
1673         * Count usually is less than mm->map_count,
1674         * we need to move filenames down.
1675         */
1676        n = mm->map_count - count;
1677        if (n != 0) {
1678                unsigned shift_bytes = n * 3 * sizeof(data[0]);
1679                memmove(name_base - shift_bytes, name_base,
1680                        name_curpos - name_base);
1681                name_curpos -= shift_bytes;
1682        }
1683
1684        size = name_curpos - (char *)data;
1685        fill_note(note, "CORE", NT_FILE, size, data);
1686        return 0;
1687}
1688
1689#ifdef CORE_DUMP_USE_REGSET
1690#include <linux/regset.h>
1691
1692struct elf_thread_core_info {
1693        struct elf_thread_core_info *next;
1694        struct task_struct *task;
1695        struct elf_prstatus prstatus;
1696        struct memelfnote notes[];
1697};
1698
1699struct elf_note_info {
1700        struct elf_thread_core_info *thread;
1701        struct memelfnote psinfo;
1702        struct memelfnote signote;
1703        struct memelfnote auxv;
1704        struct memelfnote files;
1705        user_siginfo_t csigdata;
1706        size_t size;
1707        int thread_notes;
1708};
1709
1710/*
1711 * When a regset has a writeback hook, we call it on each thread before
1712 * dumping user memory.  On register window machines, this makes sure the
1713 * user memory backing the register data is up to date before we read it.
1714 */
1715static void do_thread_regset_writeback(struct task_struct *task,
1716                                       const struct user_regset *regset)
1717{
1718        if (regset->writeback)
1719                regset->writeback(task, regset, 1);
1720}
1721
1722#ifndef PRSTATUS_SIZE
1723#define PRSTATUS_SIZE sizeof(struct elf_prstatus)
1724#endif
1725
1726#ifndef SET_PR_FPVALID
1727#define SET_PR_FPVALID(S) ((S)->pr_fpvalid = 1)
1728#endif
1729
1730static int fill_thread_core_info(struct elf_thread_core_info *t,
1731                                 const struct user_regset_view *view,
1732                                 long signr, size_t *total)
1733{
1734        unsigned int i;
1735
1736        /*
1737         * NT_PRSTATUS is the one special case, because the regset data
1738         * goes into the pr_reg field inside the note contents, rather
1739         * than being the whole note contents.  We fill the reset in here.
1740         * We assume that regset 0 is NT_PRSTATUS.
1741         */
1742        fill_prstatus(&t->prstatus.common, t->task, signr);
1743        regset_get(t->task, &view->regsets[0],
1744                   sizeof(t->prstatus.pr_reg), &t->prstatus.pr_reg);
1745
1746        fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1747                  PRSTATUS_SIZE, &t->prstatus);
1748        *total += notesize(&t->notes[0]);
1749
1750        do_thread_regset_writeback(t->task, &view->regsets[0]);
1751
1752        /*
1753         * Each other regset might generate a note too.  For each regset
1754         * that has no core_note_type or is inactive, we leave t->notes[i]
1755         * all zero and we'll know to skip writing it later.
1756         */
1757        for (i = 1; i < view->n; ++i) {
1758                const struct user_regset *regset = &view->regsets[i];
1759                int note_type = regset->core_note_type;
1760                bool is_fpreg = note_type == NT_PRFPREG;
1761                void *data;
1762                int ret;
1763
1764                do_thread_regset_writeback(t->task, regset);
1765                if (!note_type) // not for coredumps
1766                        continue;
1767                if (regset->active && regset->active(t->task, regset) <= 0)
1768                        continue;
1769
1770                ret = regset_get_alloc(t->task, regset, ~0U, &data);
1771                if (ret < 0)
1772                        continue;
1773
1774                if (is_fpreg)
1775                        SET_PR_FPVALID(&t->prstatus);
1776
1777                fill_note(&t->notes[i], is_fpreg ? "CORE" : "LINUX",
1778                          note_type, ret, data);
1779
1780                *total += notesize(&t->notes[i]);
1781        }
1782
1783        return 1;
1784}
1785
1786static int fill_note_info(struct elfhdr *elf, int phdrs,
1787                          struct elf_note_info *info,
1788                          const kernel_siginfo_t *siginfo, struct pt_regs *regs)
1789{
1790        struct task_struct *dump_task = current;
1791        const struct user_regset_view *view = task_user_regset_view(dump_task);
1792        struct elf_thread_core_info *t;
1793        struct elf_prpsinfo *psinfo;
1794        struct core_thread *ct;
1795        unsigned int i;
1796
1797        info->size = 0;
1798        info->thread = NULL;
1799
1800        psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1801        if (psinfo == NULL) {
1802                info->psinfo.data = NULL; /* So we don't free this wrongly */
1803                return 0;
1804        }
1805
1806        fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1807
1808        /*
1809         * Figure out how many notes we're going to need for each thread.
1810         */
1811        info->thread_notes = 0;
1812        for (i = 0; i < view->n; ++i)
1813                if (view->regsets[i].core_note_type != 0)
1814                        ++info->thread_notes;
1815
1816        /*
1817         * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1818         * since it is our one special case.
1819         */
1820        if (unlikely(info->thread_notes == 0) ||
1821            unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1822                WARN_ON(1);
1823                return 0;
1824        }
1825
1826        /*
1827         * Initialize the ELF file header.
1828         */
1829        fill_elf_header(elf, phdrs,
1830                        view->e_machine, view->e_flags);
1831
1832        /*
1833         * Allocate a structure for each thread.
1834         */
1835        for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1836                t = kzalloc(offsetof(struct elf_thread_core_info,
1837                                     notes[info->thread_notes]),
1838                            GFP_KERNEL);
1839                if (unlikely(!t))
1840                        return 0;
1841
1842                t->task = ct->task;
1843                if (ct->task == dump_task || !info->thread) {
1844                        t->next = info->thread;
1845                        info->thread = t;
1846                } else {
1847                        /*
1848                         * Make sure to keep the original task at
1849                         * the head of the list.
1850                         */
1851                        t->next = info->thread->next;
1852                        info->thread->next = t;
1853                }
1854        }
1855
1856        /*
1857         * Now fill in each thread's information.
1858         */
1859        for (t = info->thread; t != NULL; t = t->next)
1860                if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1861                        return 0;
1862
1863        /*
1864         * Fill in the two process-wide notes.
1865         */
1866        fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1867        info->size += notesize(&info->psinfo);
1868
1869        fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1870        info->size += notesize(&info->signote);
1871
1872        fill_auxv_note(&info->auxv, current->mm);
1873        info->size += notesize(&info->auxv);
1874
1875        if (fill_files_note(&info->files) == 0)
1876                info->size += notesize(&info->files);
1877
1878        return 1;
1879}
1880
1881static size_t get_note_info_size(struct elf_note_info *info)
1882{
1883        return info->size;
1884}
1885
1886/*
1887 * Write all the notes for each thread.  When writing the first thread, the
1888 * process-wide notes are interleaved after the first thread-specific note.
1889 */
1890static int write_note_info(struct elf_note_info *info,
1891                           struct coredump_params *cprm)
1892{
1893        bool first = true;
1894        struct elf_thread_core_info *t = info->thread;
1895
1896        do {
1897                int i;
1898
1899                if (!writenote(&t->notes[0], cprm))
1900                        return 0;
1901
1902                if (first && !writenote(&info->psinfo, cprm))
1903                        return 0;
1904                if (first && !writenote(&info->signote, cprm))
1905                        return 0;
1906                if (first && !writenote(&info->auxv, cprm))
1907                        return 0;
1908                if (first && info->files.data &&
1909                                !writenote(&info->files, cprm))
1910                        return 0;
1911
1912                for (i = 1; i < info->thread_notes; ++i)
1913                        if (t->notes[i].data &&
1914                            !writenote(&t->notes[i], cprm))
1915                                return 0;
1916
1917                first = false;
1918                t = t->next;
1919        } while (t);
1920
1921        return 1;
1922}
1923
1924static void free_note_info(struct elf_note_info *info)
1925{
1926        struct elf_thread_core_info *threads = info->thread;
1927        while (threads) {
1928                unsigned int i;
1929                struct elf_thread_core_info *t = threads;
1930                threads = t->next;
1931                WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1932                for (i = 1; i < info->thread_notes; ++i)
1933                        kfree(t->notes[i].data);
1934                kfree(t);
1935        }
1936        kfree(info->psinfo.data);
1937        kvfree(info->files.data);
1938}
1939
1940#else
1941
1942/* Here is the structure in which status of each thread is captured. */
1943struct elf_thread_status
1944{
1945        struct list_head list;
1946        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1947        elf_fpregset_t fpu;             /* NT_PRFPREG */
1948        struct task_struct *thread;
1949        struct memelfnote notes[3];
1950        int num_notes;
1951};
1952
1953/*
1954 * In order to add the specific thread information for the elf file format,
1955 * we need to keep a linked list of every threads pr_status and then create
1956 * a single section for them in the final core file.
1957 */
1958static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1959{
1960        int sz = 0;
1961        struct task_struct *p = t->thread;
1962        t->num_notes = 0;
1963
1964        fill_prstatus(&t->prstatus.common, p, signr);
1965        elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1966        
1967        fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1968                  &(t->prstatus));
1969        t->num_notes++;
1970        sz += notesize(&t->notes[0]);
1971
1972        if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1973                                                                &t->fpu))) {
1974                fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1975                          &(t->fpu));
1976                t->num_notes++;
1977                sz += notesize(&t->notes[1]);
1978        }
1979        return sz;
1980}
1981
1982struct elf_note_info {
1983        struct memelfnote *notes;
1984        struct memelfnote *notes_files;
1985        struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1986        struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1987        struct list_head thread_list;
1988        elf_fpregset_t *fpu;
1989        user_siginfo_t csigdata;
1990        int thread_status_size;
1991        int numnote;
1992};
1993
1994static int elf_note_info_init(struct elf_note_info *info)
1995{
1996        memset(info, 0, sizeof(*info));
1997        INIT_LIST_HEAD(&info->thread_list);
1998
1999        /* Allocate space for ELF notes */
2000        info->notes = kmalloc_array(8, sizeof(struct memelfnote), GFP_KERNEL);
2001        if (!info->notes)
2002                return 0;
2003        info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
2004        if (!info->psinfo)
2005                return 0;
2006        info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
2007        if (!info->prstatus)
2008                return 0;
2009        info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
2010        if (!info->fpu)
2011                return 0;
2012        return 1;
2013}
2014
2015static int fill_note_info(struct elfhdr *elf, int phdrs,
2016                          struct elf_note_info *info,
2017                          const kernel_siginfo_t *siginfo, struct pt_regs *regs)
2018{
2019        struct core_thread *ct;
2020        struct elf_thread_status *ets;
2021
2022        if (!elf_note_info_init(info))
2023                return 0;
2024
2025        for (ct = current->mm->core_state->dumper.next;
2026                                        ct; ct = ct->next) {
2027                ets = kzalloc(sizeof(*ets), GFP_KERNEL);
2028                if (!ets)
2029                        return 0;
2030
2031                ets->thread = ct->task;
2032                list_add(&ets->list, &info->thread_list);
2033        }
2034
2035        list_for_each_entry(ets, &info->thread_list, list) {
2036                int sz;
2037
2038                sz = elf_dump_thread_status(siginfo->si_signo, ets);
2039                info->thread_status_size += sz;
2040        }
2041        /* now collect the dump for the current */
2042        memset(info->prstatus, 0, sizeof(*info->prstatus));
2043        fill_prstatus(&info->prstatus->common, current, siginfo->si_signo);
2044        elf_core_copy_regs(&info->prstatus->pr_reg, regs);
2045
2046        /* Set up header */
2047        fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
2048
2049        /*
2050         * Set up the notes in similar form to SVR4 core dumps made
2051         * with info from their /proc.
2052         */
2053
2054        fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
2055                  sizeof(*info->prstatus), info->prstatus);
2056        fill_psinfo(info->psinfo, current->group_leader, current->mm);
2057        fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
2058                  sizeof(*info->psinfo), info->psinfo);
2059
2060        fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
2061        fill_auxv_note(info->notes + 3, current->mm);
2062        info->numnote = 4;
2063
2064        if (fill_files_note(info->notes + info->numnote) == 0) {
2065                info->notes_files = info->notes + info->numnote;
2066                info->numnote++;
2067        }
2068
2069        /* Try to dump the FPU. */
2070        info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2071                                                               info->fpu);
2072        if (info->prstatus->pr_fpvalid)
2073                fill_note(info->notes + info->numnote++,
2074                          "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2075        return 1;
2076}
2077
2078static size_t get_note_info_size(struct elf_note_info *info)
2079{
2080        int sz = 0;
2081        int i;
2082
2083        for (i = 0; i < info->numnote; i++)
2084                sz += notesize(info->notes + i);
2085
2086        sz += info->thread_status_size;
2087
2088        return sz;
2089}
2090
2091static int write_note_info(struct elf_note_info *info,
2092                           struct coredump_params *cprm)
2093{
2094        struct elf_thread_status *ets;
2095        int i;
2096
2097        for (i = 0; i < info->numnote; i++)
2098                if (!writenote(info->notes + i, cprm))
2099                        return 0;
2100
2101        /* write out the thread status notes section */
2102        list_for_each_entry(ets, &info->thread_list, list) {
2103                for (i = 0; i < ets->num_notes; i++)
2104                        if (!writenote(&ets->notes[i], cprm))
2105                                return 0;
2106        }
2107
2108        return 1;
2109}
2110
2111static void free_note_info(struct elf_note_info *info)
2112{
2113        while (!list_empty(&info->thread_list)) {
2114                struct list_head *tmp = info->thread_list.next;
2115                list_del(tmp);
2116                kfree(list_entry(tmp, struct elf_thread_status, list));
2117        }
2118
2119        /* Free data possibly allocated by fill_files_note(): */
2120        if (info->notes_files)
2121                kvfree(info->notes_files->data);
2122
2123        kfree(info->prstatus);
2124        kfree(info->psinfo);
2125        kfree(info->notes);
2126        kfree(info->fpu);
2127}
2128
2129#endif
2130
2131static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2132                             elf_addr_t e_shoff, int segs)
2133{
2134        elf->e_shoff = e_shoff;
2135        elf->e_shentsize = sizeof(*shdr4extnum);
2136        elf->e_shnum = 1;
2137        elf->e_shstrndx = SHN_UNDEF;
2138
2139        memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2140
2141        shdr4extnum->sh_type = SHT_NULL;
2142        shdr4extnum->sh_size = elf->e_shnum;
2143        shdr4extnum->sh_link = elf->e_shstrndx;
2144        shdr4extnum->sh_info = segs;
2145}
2146
2147/*
2148 * Actual dumper
2149 *
2150 * This is a two-pass process; first we find the offsets of the bits,
2151 * and then they are actually written out.  If we run out of core limit
2152 * we just truncate.
2153 */
2154static int elf_core_dump(struct coredump_params *cprm)
2155{
2156        int has_dumped = 0;
2157        int vma_count, segs, i;
2158        size_t vma_data_size;
2159        struct elfhdr elf;
2160        loff_t offset = 0, dataoff;
2161        struct elf_note_info info = { };
2162        struct elf_phdr *phdr4note = NULL;
2163        struct elf_shdr *shdr4extnum = NULL;
2164        Elf_Half e_phnum;
2165        elf_addr_t e_shoff;
2166        struct core_vma_metadata *vma_meta;
2167
2168        if (dump_vma_snapshot(cprm, &vma_count, &vma_meta, &vma_data_size))
2169                return 0;
2170
2171        /*
2172         * The number of segs are recored into ELF header as 16bit value.
2173         * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2174         */
2175        segs = vma_count + elf_core_extra_phdrs();
2176
2177        /* for notes section */
2178        segs++;
2179
2180        /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2181         * this, kernel supports extended numbering. Have a look at
2182         * include/linux/elf.h for further information. */
2183        e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2184
2185        /*
2186         * Collect all the non-memory information about the process for the
2187         * notes.  This also sets up the file header.
2188         */
2189        if (!fill_note_info(&elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2190                goto end_coredump;
2191
2192        has_dumped = 1;
2193
2194        offset += sizeof(elf);                          /* Elf header */
2195        offset += segs * sizeof(struct elf_phdr);       /* Program headers */
2196
2197        /* Write notes phdr entry */
2198        {
2199                size_t sz = get_note_info_size(&info);
2200
2201                /* For cell spufs */
2202                sz += elf_coredump_extra_notes_size();
2203
2204                phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2205                if (!phdr4note)
2206                        goto end_coredump;
2207
2208                fill_elf_note_phdr(phdr4note, sz, offset);
2209                offset += sz;
2210        }
2211
2212        dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2213
2214        offset += vma_data_size;
2215        offset += elf_core_extra_data_size();
2216        e_shoff = offset;
2217
2218        if (e_phnum == PN_XNUM) {
2219                shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2220                if (!shdr4extnum)
2221                        goto end_coredump;
2222                fill_extnum_info(&elf, shdr4extnum, e_shoff, segs);
2223        }
2224
2225        offset = dataoff;
2226
2227        if (!dump_emit(cprm, &elf, sizeof(elf)))
2228                goto end_coredump;
2229
2230        if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2231                goto end_coredump;
2232
2233        /* Write program headers for segments dump */
2234        for (i = 0; i < vma_count; i++) {
2235                struct core_vma_metadata *meta = vma_meta + i;
2236                struct elf_phdr phdr;
2237
2238                phdr.p_type = PT_LOAD;
2239                phdr.p_offset = offset;
2240                phdr.p_vaddr = meta->start;
2241                phdr.p_paddr = 0;
2242                phdr.p_filesz = meta->dump_size;
2243                phdr.p_memsz = meta->end - meta->start;
2244                offset += phdr.p_filesz;
2245                phdr.p_flags = 0;
2246                if (meta->flags & VM_READ)
2247                        phdr.p_flags |= PF_R;
2248                if (meta->flags & VM_WRITE)
2249                        phdr.p_flags |= PF_W;
2250                if (meta->flags & VM_EXEC)
2251                        phdr.p_flags |= PF_X;
2252                phdr.p_align = ELF_EXEC_PAGESIZE;
2253
2254                if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2255                        goto end_coredump;
2256        }
2257
2258        if (!elf_core_write_extra_phdrs(cprm, offset))
2259                goto end_coredump;
2260
2261        /* write out the notes section */
2262        if (!write_note_info(&info, cprm))
2263                goto end_coredump;
2264
2265        /* For cell spufs */
2266        if (elf_coredump_extra_notes_write(cprm))
2267                goto end_coredump;
2268
2269        /* Align to page */
2270        if (!dump_skip(cprm, dataoff - cprm->pos))
2271                goto end_coredump;
2272
2273        for (i = 0; i < vma_count; i++) {
2274                struct core_vma_metadata *meta = vma_meta + i;
2275
2276                if (!dump_user_range(cprm, meta->start, meta->dump_size))
2277                        goto end_coredump;
2278        }
2279        dump_truncate(cprm);
2280
2281        if (!elf_core_write_extra_data(cprm))
2282                goto end_coredump;
2283
2284        if (e_phnum == PN_XNUM) {
2285                if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2286                        goto end_coredump;
2287        }
2288
2289end_coredump:
2290        free_note_info(&info);
2291        kfree(shdr4extnum);
2292        kvfree(vma_meta);
2293        kfree(phdr4note);
2294        return has_dumped;
2295}
2296
2297#endif          /* CONFIG_ELF_CORE */
2298
2299static int __init init_elf_binfmt(void)
2300{
2301        register_binfmt(&elf_format);
2302        return 0;
2303}
2304
2305static void __exit exit_elf_binfmt(void)
2306{
2307        /* Remove the COFF and ELF loaders. */
2308        unregister_binfmt(&elf_format);
2309}
2310
2311core_initcall(init_elf_binfmt);
2312module_exit(exit_elf_binfmt);
2313MODULE_LICENSE("GPL");
2314