linux/fs/coredump.c
<<
>>
Prefs
   1#include <linux/slab.h>
   2#include <linux/file.h>
   3#include <linux/fdtable.h>
   4#include <linux/mm.h>
   5#include <linux/stat.h>
   6#include <linux/fcntl.h>
   7#include <linux/swap.h>
   8#include <linux/string.h>
   9#include <linux/init.h>
  10#include <linux/pagemap.h>
  11#include <linux/perf_event.h>
  12#include <linux/highmem.h>
  13#include <linux/spinlock.h>
  14#include <linux/key.h>
  15#include <linux/personality.h>
  16#include <linux/binfmts.h>
  17#include <linux/coredump.h>
  18#include <linux/utsname.h>
  19#include <linux/pid_namespace.h>
  20#include <linux/module.h>
  21#include <linux/namei.h>
  22#include <linux/mount.h>
  23#include <linux/security.h>
  24#include <linux/syscalls.h>
  25#include <linux/tsacct_kern.h>
  26#include <linux/cn_proc.h>
  27#include <linux/audit.h>
  28#include <linux/tracehook.h>
  29#include <linux/kmod.h>
  30#include <linux/fsnotify.h>
  31#include <linux/fs_struct.h>
  32#include <linux/pipe_fs_i.h>
  33#include <linux/oom.h>
  34#include <linux/compat.h>
  35#include <linux/sched.h>
  36#include <linux/fs.h>
  37#include <linux/path.h>
  38#include <linux/timekeeping.h>
  39
  40#include <asm/uaccess.h>
  41#include <asm/mmu_context.h>
  42#include <asm/tlb.h>
  43#include <asm/exec.h>
  44
  45#include <trace/events/task.h>
  46#include "internal.h"
  47
  48#include <trace/events/sched.h>
  49
  50int core_uses_pid;
  51unsigned int core_pipe_limit;
  52char core_pattern[CORENAME_MAX_SIZE] = "core";
  53static int core_name_size = CORENAME_MAX_SIZE;
  54
  55struct core_name {
  56        char *corename;
  57        int used, size;
  58};
  59
  60/* The maximal length of core_pattern is also specified in sysctl.c */
  61
  62static int expand_corename(struct core_name *cn, int size)
  63{
  64        char *corename = krealloc(cn->corename, size, GFP_KERNEL);
  65
  66        if (!corename)
  67                return -ENOMEM;
  68
  69        if (size > core_name_size) /* racy but harmless */
  70                core_name_size = size;
  71
  72        cn->size = ksize(corename);
  73        cn->corename = corename;
  74        return 0;
  75}
  76
  77static __printf(2, 0) int cn_vprintf(struct core_name *cn, const char *fmt,
  78                                     va_list arg)
  79{
  80        int free, need;
  81        va_list arg_copy;
  82
  83again:
  84        free = cn->size - cn->used;
  85
  86        va_copy(arg_copy, arg);
  87        need = vsnprintf(cn->corename + cn->used, free, fmt, arg_copy);
  88        va_end(arg_copy);
  89
  90        if (need < free) {
  91                cn->used += need;
  92                return 0;
  93        }
  94
  95        if (!expand_corename(cn, cn->size + need - free + 1))
  96                goto again;
  97
  98        return -ENOMEM;
  99}
 100
 101static __printf(2, 3) int cn_printf(struct core_name *cn, const char *fmt, ...)
 102{
 103        va_list arg;
 104        int ret;
 105
 106        va_start(arg, fmt);
 107        ret = cn_vprintf(cn, fmt, arg);
 108        va_end(arg);
 109
 110        return ret;
 111}
 112
 113static __printf(2, 3)
 114int cn_esc_printf(struct core_name *cn, const char *fmt, ...)
 115{
 116        int cur = cn->used;
 117        va_list arg;
 118        int ret;
 119
 120        va_start(arg, fmt);
 121        ret = cn_vprintf(cn, fmt, arg);
 122        va_end(arg);
 123
 124        if (ret == 0) {
 125                /*
 126                 * Ensure that this coredump name component can't cause the
 127                 * resulting corefile path to consist of a ".." or ".".
 128                 */
 129                if ((cn->used - cur == 1 && cn->corename[cur] == '.') ||
 130                                (cn->used - cur == 2 && cn->corename[cur] == '.'
 131                                && cn->corename[cur+1] == '.'))
 132                        cn->corename[cur] = '!';
 133
 134                /*
 135                 * Empty names are fishy and could be used to create a "//" in a
 136                 * corefile name, causing the coredump to happen one directory
 137                 * level too high. Enforce that all components of the core
 138                 * pattern are at least one character long.
 139                 */
 140                if (cn->used == cur)
 141                        ret = cn_printf(cn, "!");
 142        }
 143
 144        for (; cur < cn->used; ++cur) {
 145                if (cn->corename[cur] == '/')
 146                        cn->corename[cur] = '!';
 147        }
 148        return ret;
 149}
 150
 151static int cn_print_exe_file(struct core_name *cn)
 152{
 153        struct file *exe_file;
 154        char *pathbuf, *path;
 155        int ret;
 156
 157        exe_file = get_mm_exe_file(current->mm);
 158        if (!exe_file)
 159                return cn_esc_printf(cn, "%s (path unknown)", current->comm);
 160
 161        pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
 162        if (!pathbuf) {
 163                ret = -ENOMEM;
 164                goto put_exe_file;
 165        }
 166
 167        path = file_path(exe_file, pathbuf, PATH_MAX);
 168        if (IS_ERR(path)) {
 169                ret = PTR_ERR(path);
 170                goto free_buf;
 171        }
 172
 173        ret = cn_esc_printf(cn, "%s", path);
 174
 175free_buf:
 176        kfree(pathbuf);
 177put_exe_file:
 178        fput(exe_file);
 179        return ret;
 180}
 181
 182/* format_corename will inspect the pattern parameter, and output a
 183 * name into corename, which must have space for at least
 184 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
 185 */
 186static int format_corename(struct core_name *cn, struct coredump_params *cprm)
 187{
 188        const struct cred *cred = current_cred();
 189        const char *pat_ptr = core_pattern;
 190        int ispipe = (*pat_ptr == '|');
 191        int pid_in_pattern = 0;
 192        int err = 0;
 193
 194        cn->used = 0;
 195        cn->corename = NULL;
 196        if (expand_corename(cn, core_name_size))
 197                return -ENOMEM;
 198        cn->corename[0] = '\0';
 199
 200        if (ispipe)
 201                ++pat_ptr;
 202
 203        /* Repeat as long as we have more pattern to process and more output
 204           space */
 205        while (*pat_ptr) {
 206                if (*pat_ptr != '%') {
 207                        err = cn_printf(cn, "%c", *pat_ptr++);
 208                } else {
 209                        switch (*++pat_ptr) {
 210                        /* single % at the end, drop that */
 211                        case 0:
 212                                goto out;
 213                        /* Double percent, output one percent */
 214                        case '%':
 215                                err = cn_printf(cn, "%c", '%');
 216                                break;
 217                        /* pid */
 218                        case 'p':
 219                                pid_in_pattern = 1;
 220                                err = cn_printf(cn, "%d",
 221                                              task_tgid_vnr(current));
 222                                break;
 223                        /* global pid */
 224                        case 'P':
 225                                err = cn_printf(cn, "%d",
 226                                              task_tgid_nr(current));
 227                                break;
 228                        case 'i':
 229                                err = cn_printf(cn, "%d",
 230                                              task_pid_vnr(current));
 231                                break;
 232                        case 'I':
 233                                err = cn_printf(cn, "%d",
 234                                              task_pid_nr(current));
 235                                break;
 236                        /* uid */
 237                        case 'u':
 238                                err = cn_printf(cn, "%u",
 239                                                from_kuid(&init_user_ns,
 240                                                          cred->uid));
 241                                break;
 242                        /* gid */
 243                        case 'g':
 244                                err = cn_printf(cn, "%u",
 245                                                from_kgid(&init_user_ns,
 246                                                          cred->gid));
 247                                break;
 248                        case 'd':
 249                                err = cn_printf(cn, "%d",
 250                                        __get_dumpable(cprm->mm_flags));
 251                                break;
 252                        /* signal that caused the coredump */
 253                        case 's':
 254                                err = cn_printf(cn, "%d",
 255                                                cprm->siginfo->si_signo);
 256                                break;
 257                        /* UNIX time of coredump */
 258                        case 't': {
 259                                time64_t time;
 260
 261                                time = ktime_get_real_seconds();
 262                                err = cn_printf(cn, "%lld", time);
 263                                break;
 264                        }
 265                        /* hostname */
 266                        case 'h':
 267                                down_read(&uts_sem);
 268                                err = cn_esc_printf(cn, "%s",
 269                                              utsname()->nodename);
 270                                up_read(&uts_sem);
 271                                break;
 272                        /* executable */
 273                        case 'e':
 274                                err = cn_esc_printf(cn, "%s", current->comm);
 275                                break;
 276                        case 'E':
 277                                err = cn_print_exe_file(cn);
 278                                break;
 279                        /* core limit size */
 280                        case 'c':
 281                                err = cn_printf(cn, "%lu",
 282                                              rlimit(RLIMIT_CORE));
 283                                break;
 284                        default:
 285                                break;
 286                        }
 287                        ++pat_ptr;
 288                }
 289
 290                if (err)
 291                        return err;
 292        }
 293
 294out:
 295        /* Backward compatibility with core_uses_pid:
 296         *
 297         * If core_pattern does not include a %p (as is the default)
 298         * and core_uses_pid is set, then .%pid will be appended to
 299         * the filename. Do not do this for piped commands. */
 300        if (!ispipe && !pid_in_pattern && core_uses_pid) {
 301                err = cn_printf(cn, ".%d", task_tgid_vnr(current));
 302                if (err)
 303                        return err;
 304        }
 305        return ispipe;
 306}
 307
 308static int zap_process(struct task_struct *start, int exit_code, int flags)
 309{
 310        struct task_struct *t;
 311        int nr = 0;
 312
 313        /* ignore all signals except SIGKILL, see prepare_signal() */
 314        start->signal->flags = SIGNAL_GROUP_COREDUMP | flags;
 315        start->signal->group_exit_code = exit_code;
 316        start->signal->group_stop_count = 0;
 317
 318        for_each_thread(start, t) {
 319                task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
 320                if (t != current && t->mm) {
 321                        sigaddset(&t->pending.signal, SIGKILL);
 322                        signal_wake_up(t, 1);
 323                        nr++;
 324                }
 325        }
 326
 327        return nr;
 328}
 329
 330static int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
 331                        struct core_state *core_state, int exit_code)
 332{
 333        struct task_struct *g, *p;
 334        unsigned long flags;
 335        int nr = -EAGAIN;
 336
 337        spin_lock_irq(&tsk->sighand->siglock);
 338        if (!signal_group_exit(tsk->signal)) {
 339                mm->core_state = core_state;
 340                tsk->signal->group_exit_task = tsk;
 341                nr = zap_process(tsk, exit_code, 0);
 342                clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
 343        }
 344        spin_unlock_irq(&tsk->sighand->siglock);
 345        if (unlikely(nr < 0))
 346                return nr;
 347
 348        tsk->flags |= PF_DUMPCORE;
 349        if (atomic_read(&mm->mm_users) == nr + 1)
 350                goto done;
 351        /*
 352         * We should find and kill all tasks which use this mm, and we should
 353         * count them correctly into ->nr_threads. We don't take tasklist
 354         * lock, but this is safe wrt:
 355         *
 356         * fork:
 357         *      None of sub-threads can fork after zap_process(leader). All
 358         *      processes which were created before this point should be
 359         *      visible to zap_threads() because copy_process() adds the new
 360         *      process to the tail of init_task.tasks list, and lock/unlock
 361         *      of ->siglock provides a memory barrier.
 362         *
 363         * do_exit:
 364         *      The caller holds mm->mmap_sem. This means that the task which
 365         *      uses this mm can't pass exit_mm(), so it can't exit or clear
 366         *      its ->mm.
 367         *
 368         * de_thread:
 369         *      It does list_replace_rcu(&leader->tasks, &current->tasks),
 370         *      we must see either old or new leader, this does not matter.
 371         *      However, it can change p->sighand, so lock_task_sighand(p)
 372         *      must be used. Since p->mm != NULL and we hold ->mmap_sem
 373         *      it can't fail.
 374         *
 375         *      Note also that "g" can be the old leader with ->mm == NULL
 376         *      and already unhashed and thus removed from ->thread_group.
 377         *      This is OK, __unhash_process()->list_del_rcu() does not
 378         *      clear the ->next pointer, we will find the new leader via
 379         *      next_thread().
 380         */
 381        rcu_read_lock();
 382        for_each_process(g) {
 383                if (g == tsk->group_leader)
 384                        continue;
 385                if (g->flags & PF_KTHREAD)
 386                        continue;
 387
 388                for_each_thread(g, p) {
 389                        if (unlikely(!p->mm))
 390                                continue;
 391                        if (unlikely(p->mm == mm)) {
 392                                lock_task_sighand(p, &flags);
 393                                nr += zap_process(p, exit_code,
 394                                                        SIGNAL_GROUP_EXIT);
 395                                unlock_task_sighand(p, &flags);
 396                        }
 397                        break;
 398                }
 399        }
 400        rcu_read_unlock();
 401done:
 402        atomic_set(&core_state->nr_threads, nr);
 403        return nr;
 404}
 405
 406static int coredump_wait(int exit_code, struct core_state *core_state)
 407{
 408        struct task_struct *tsk = current;
 409        struct mm_struct *mm = tsk->mm;
 410        int core_waiters = -EBUSY;
 411
 412        init_completion(&core_state->startup);
 413        core_state->dumper.task = tsk;
 414        core_state->dumper.next = NULL;
 415
 416        down_write(&mm->mmap_sem);
 417        if (!mm->core_state)
 418                core_waiters = zap_threads(tsk, mm, core_state, exit_code);
 419        up_write(&mm->mmap_sem);
 420
 421        if (core_waiters > 0) {
 422                struct core_thread *ptr;
 423
 424                wait_for_completion(&core_state->startup);
 425                /*
 426                 * Wait for all the threads to become inactive, so that
 427                 * all the thread context (extended register state, like
 428                 * fpu etc) gets copied to the memory.
 429                 */
 430                ptr = core_state->dumper.next;
 431                while (ptr != NULL) {
 432                        wait_task_inactive(ptr->task, 0);
 433                        ptr = ptr->next;
 434                }
 435        }
 436
 437        return core_waiters;
 438}
 439
 440static void coredump_finish(struct mm_struct *mm, bool core_dumped)
 441{
 442        struct core_thread *curr, *next;
 443        struct task_struct *task;
 444
 445        spin_lock_irq(&current->sighand->siglock);
 446        if (core_dumped && !__fatal_signal_pending(current))
 447                current->signal->group_exit_code |= 0x80;
 448        current->signal->group_exit_task = NULL;
 449        current->signal->flags = SIGNAL_GROUP_EXIT;
 450        spin_unlock_irq(&current->sighand->siglock);
 451
 452        next = mm->core_state->dumper.next;
 453        while ((curr = next) != NULL) {
 454                next = curr->next;
 455                task = curr->task;
 456                /*
 457                 * see exit_mm(), curr->task must not see
 458                 * ->task == NULL before we read ->next.
 459                 */
 460                smp_mb();
 461                curr->task = NULL;
 462                wake_up_process(task);
 463        }
 464
 465        mm->core_state = NULL;
 466}
 467
 468static bool dump_interrupted(void)
 469{
 470        /*
 471         * SIGKILL or freezing() interrupt the coredumping. Perhaps we
 472         * can do try_to_freeze() and check __fatal_signal_pending(),
 473         * but then we need to teach dump_write() to restart and clear
 474         * TIF_SIGPENDING.
 475         */
 476        return signal_pending(current);
 477}
 478
 479static void wait_for_dump_helpers(struct file *file)
 480{
 481        struct pipe_inode_info *pipe = file->private_data;
 482
 483        pipe_lock(pipe);
 484        pipe->readers++;
 485        pipe->writers--;
 486        wake_up_interruptible_sync(&pipe->wait);
 487        kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 488        pipe_unlock(pipe);
 489
 490        /*
 491         * We actually want wait_event_freezable() but then we need
 492         * to clear TIF_SIGPENDING and improve dump_interrupted().
 493         */
 494        wait_event_interruptible(pipe->wait, pipe->readers == 1);
 495
 496        pipe_lock(pipe);
 497        pipe->readers--;
 498        pipe->writers++;
 499        pipe_unlock(pipe);
 500}
 501
 502/*
 503 * umh_pipe_setup
 504 * helper function to customize the process used
 505 * to collect the core in userspace.  Specifically
 506 * it sets up a pipe and installs it as fd 0 (stdin)
 507 * for the process.  Returns 0 on success, or
 508 * PTR_ERR on failure.
 509 * Note that it also sets the core limit to 1.  This
 510 * is a special value that we use to trap recursive
 511 * core dumps
 512 */
 513static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
 514{
 515        struct file *files[2];
 516        struct coredump_params *cp = (struct coredump_params *)info->data;
 517        int err = create_pipe_files(files, 0);
 518        if (err)
 519                return err;
 520
 521        cp->file = files[1];
 522
 523        err = replace_fd(0, files[0], 0);
 524        fput(files[0]);
 525        /* and disallow core files too */
 526        current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
 527
 528        return err;
 529}
 530
 531void do_coredump(const siginfo_t *siginfo)
 532{
 533        struct core_state core_state;
 534        struct core_name cn;
 535        struct mm_struct *mm = current->mm;
 536        struct linux_binfmt * binfmt;
 537        const struct cred *old_cred;
 538        struct cred *cred;
 539        int retval = 0;
 540        int ispipe;
 541        struct files_struct *displaced;
 542        /* require nonrelative corefile path and be extra careful */
 543        bool need_suid_safe = false;
 544        bool core_dumped = false;
 545        static atomic_t core_dump_count = ATOMIC_INIT(0);
 546        struct coredump_params cprm = {
 547                .siginfo = siginfo,
 548                .regs = signal_pt_regs(),
 549                .limit = rlimit(RLIMIT_CORE),
 550                /*
 551                 * We must use the same mm->flags while dumping core to avoid
 552                 * inconsistency of bit flags, since this flag is not protected
 553                 * by any locks.
 554                 */
 555                .mm_flags = mm->flags,
 556        };
 557
 558        audit_core_dumps(siginfo->si_signo);
 559
 560        binfmt = mm->binfmt;
 561        if (!binfmt || !binfmt->core_dump)
 562                goto fail;
 563        if (!__get_dumpable(cprm.mm_flags))
 564                goto fail;
 565
 566        cred = prepare_creds();
 567        if (!cred)
 568                goto fail;
 569        /*
 570         * We cannot trust fsuid as being the "true" uid of the process
 571         * nor do we know its entire history. We only know it was tainted
 572         * so we dump it as root in mode 2, and only into a controlled
 573         * environment (pipe handler or fully qualified path).
 574         */
 575        if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) {
 576                /* Setuid core dump mode */
 577                cred->fsuid = GLOBAL_ROOT_UID;  /* Dump root private */
 578                need_suid_safe = true;
 579        }
 580
 581        retval = coredump_wait(siginfo->si_signo, &core_state);
 582        if (retval < 0)
 583                goto fail_creds;
 584
 585        old_cred = override_creds(cred);
 586
 587        ispipe = format_corename(&cn, &cprm);
 588
 589        if (ispipe) {
 590                int dump_count;
 591                char **helper_argv;
 592                struct subprocess_info *sub_info;
 593
 594                if (ispipe < 0) {
 595                        printk(KERN_WARNING "format_corename failed\n");
 596                        printk(KERN_WARNING "Aborting core\n");
 597                        goto fail_unlock;
 598                }
 599
 600                if (cprm.limit == 1) {
 601                        /* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
 602                         *
 603                         * Normally core limits are irrelevant to pipes, since
 604                         * we're not writing to the file system, but we use
 605                         * cprm.limit of 1 here as a special value, this is a
 606                         * consistent way to catch recursive crashes.
 607                         * We can still crash if the core_pattern binary sets
 608                         * RLIM_CORE = !1, but it runs as root, and can do
 609                         * lots of stupid things.
 610                         *
 611                         * Note that we use task_tgid_vnr here to grab the pid
 612                         * of the process group leader.  That way we get the
 613                         * right pid if a thread in a multi-threaded
 614                         * core_pattern process dies.
 615                         */
 616                        printk(KERN_WARNING
 617                                "Process %d(%s) has RLIMIT_CORE set to 1\n",
 618                                task_tgid_vnr(current), current->comm);
 619                        printk(KERN_WARNING "Aborting core\n");
 620                        goto fail_unlock;
 621                }
 622                cprm.limit = RLIM_INFINITY;
 623
 624                dump_count = atomic_inc_return(&core_dump_count);
 625                if (core_pipe_limit && (core_pipe_limit < dump_count)) {
 626                        printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
 627                               task_tgid_vnr(current), current->comm);
 628                        printk(KERN_WARNING "Skipping core dump\n");
 629                        goto fail_dropcount;
 630                }
 631
 632                helper_argv = argv_split(GFP_KERNEL, cn.corename, NULL);
 633                if (!helper_argv) {
 634                        printk(KERN_WARNING "%s failed to allocate memory\n",
 635                               __func__);
 636                        goto fail_dropcount;
 637                }
 638
 639                retval = -ENOMEM;
 640                sub_info = call_usermodehelper_setup(helper_argv[0],
 641                                                helper_argv, NULL, GFP_KERNEL,
 642                                                umh_pipe_setup, NULL, &cprm);
 643                if (sub_info)
 644                        retval = call_usermodehelper_exec(sub_info,
 645                                                          UMH_WAIT_EXEC);
 646
 647                argv_free(helper_argv);
 648                if (retval) {
 649                        printk(KERN_INFO "Core dump to |%s pipe failed\n",
 650                               cn.corename);
 651                        goto close_fail;
 652                }
 653        } else {
 654                struct inode *inode;
 655                int open_flags = O_CREAT | O_RDWR | O_NOFOLLOW |
 656                                 O_LARGEFILE | O_EXCL;
 657
 658                if (cprm.limit < binfmt->min_coredump)
 659                        goto fail_unlock;
 660
 661                if (need_suid_safe && cn.corename[0] != '/') {
 662                        printk(KERN_WARNING "Pid %d(%s) can only dump core "\
 663                                "to fully qualified path!\n",
 664                                task_tgid_vnr(current), current->comm);
 665                        printk(KERN_WARNING "Skipping core dump\n");
 666                        goto fail_unlock;
 667                }
 668
 669                /*
 670                 * Unlink the file if it exists unless this is a SUID
 671                 * binary - in that case, we're running around with root
 672                 * privs and don't want to unlink another user's coredump.
 673                 */
 674                if (!need_suid_safe) {
 675                        mm_segment_t old_fs;
 676
 677                        old_fs = get_fs();
 678                        set_fs(KERNEL_DS);
 679                        /*
 680                         * If it doesn't exist, that's fine. If there's some
 681                         * other problem, we'll catch it at the filp_open().
 682                         */
 683                        (void) sys_unlink((const char __user *)cn.corename);
 684                        set_fs(old_fs);
 685                }
 686
 687                /*
 688                 * There is a race between unlinking and creating the
 689                 * file, but if that causes an EEXIST here, that's
 690                 * fine - another process raced with us while creating
 691                 * the corefile, and the other process won. To userspace,
 692                 * what matters is that at least one of the two processes
 693                 * writes its coredump successfully, not which one.
 694                 */
 695                if (need_suid_safe) {
 696                        /*
 697                         * Using user namespaces, normal user tasks can change
 698                         * their current->fs->root to point to arbitrary
 699                         * directories. Since the intention of the "only dump
 700                         * with a fully qualified path" rule is to control where
 701                         * coredumps may be placed using root privileges,
 702                         * current->fs->root must not be used. Instead, use the
 703                         * root directory of init_task.
 704                         */
 705                        struct path root;
 706
 707                        task_lock(&init_task);
 708                        get_fs_root(init_task.fs, &root);
 709                        task_unlock(&init_task);
 710                        cprm.file = file_open_root(root.dentry, root.mnt,
 711                                cn.corename, open_flags, 0600);
 712                        path_put(&root);
 713                } else {
 714                        cprm.file = filp_open(cn.corename, open_flags, 0600);
 715                }
 716                if (IS_ERR(cprm.file))
 717                        goto fail_unlock;
 718
 719                inode = file_inode(cprm.file);
 720                if (inode->i_nlink > 1)
 721                        goto close_fail;
 722                if (d_unhashed(cprm.file->f_path.dentry))
 723                        goto close_fail;
 724                /*
 725                 * AK: actually i see no reason to not allow this for named
 726                 * pipes etc, but keep the previous behaviour for now.
 727                 */
 728                if (!S_ISREG(inode->i_mode))
 729                        goto close_fail;
 730                /*
 731                 * Don't dump core if the filesystem changed owner or mode
 732                 * of the file during file creation. This is an issue when
 733                 * a process dumps core while its cwd is e.g. on a vfat
 734                 * filesystem.
 735                 */
 736                if (!uid_eq(inode->i_uid, current_fsuid()))
 737                        goto close_fail;
 738                if ((inode->i_mode & 0677) != 0600)
 739                        goto close_fail;
 740                if (!(cprm.file->f_mode & FMODE_CAN_WRITE))
 741                        goto close_fail;
 742                if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
 743                        goto close_fail;
 744        }
 745
 746        /* get us an unshared descriptor table; almost always a no-op */
 747        retval = unshare_files(&displaced);
 748        if (retval)
 749                goto close_fail;
 750        if (displaced)
 751                put_files_struct(displaced);
 752        if (!dump_interrupted()) {
 753                file_start_write(cprm.file);
 754                core_dumped = binfmt->core_dump(&cprm);
 755                file_end_write(cprm.file);
 756        }
 757        if (ispipe && core_pipe_limit)
 758                wait_for_dump_helpers(cprm.file);
 759close_fail:
 760        if (cprm.file)
 761                filp_close(cprm.file, NULL);
 762fail_dropcount:
 763        if (ispipe)
 764                atomic_dec(&core_dump_count);
 765fail_unlock:
 766        kfree(cn.corename);
 767        coredump_finish(mm, core_dumped);
 768        revert_creds(old_cred);
 769fail_creds:
 770        put_cred(cred);
 771fail:
 772        return;
 773}
 774
 775/*
 776 * Core dumping helper functions.  These are the only things you should
 777 * do on a core-file: use only these functions to write out all the
 778 * necessary info.
 779 */
 780int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
 781{
 782        struct file *file = cprm->file;
 783        loff_t pos = file->f_pos;
 784        ssize_t n;
 785        if (cprm->written + nr > cprm->limit)
 786                return 0;
 787        while (nr) {
 788                if (dump_interrupted())
 789                        return 0;
 790                n = __kernel_write(file, addr, nr, &pos);
 791                if (n <= 0)
 792                        return 0;
 793                file->f_pos = pos;
 794                cprm->written += n;
 795                nr -= n;
 796        }
 797        return 1;
 798}
 799EXPORT_SYMBOL(dump_emit);
 800
 801int dump_skip(struct coredump_params *cprm, size_t nr)
 802{
 803        static char zeroes[PAGE_SIZE];
 804        struct file *file = cprm->file;
 805        if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
 806                if (cprm->written + nr > cprm->limit)
 807                        return 0;
 808                if (dump_interrupted() ||
 809                    file->f_op->llseek(file, nr, SEEK_CUR) < 0)
 810                        return 0;
 811                cprm->written += nr;
 812                return 1;
 813        } else {
 814                while (nr > PAGE_SIZE) {
 815                        if (!dump_emit(cprm, zeroes, PAGE_SIZE))
 816                                return 0;
 817                        nr -= PAGE_SIZE;
 818                }
 819                return dump_emit(cprm, zeroes, nr);
 820        }
 821}
 822EXPORT_SYMBOL(dump_skip);
 823
 824int dump_align(struct coredump_params *cprm, int align)
 825{
 826        unsigned mod = cprm->written & (align - 1);
 827        if (align & (align - 1))
 828                return 0;
 829        return mod ? dump_skip(cprm, align - mod) : 1;
 830}
 831EXPORT_SYMBOL(dump_align);
 832