linux/kernel/ptrace.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * linux/kernel/ptrace.c
   4 *
   5 * (C) Copyright 1999 Linus Torvalds
   6 *
   7 * Common interfaces for "ptrace()" which we do not want
   8 * to continually duplicate across every architecture.
   9 */
  10
  11#include <linux/capability.h>
  12#include <linux/export.h>
  13#include <linux/sched.h>
  14#include <linux/sched/mm.h>
  15#include <linux/sched/coredump.h>
  16#include <linux/sched/task.h>
  17#include <linux/errno.h>
  18#include <linux/mm.h>
  19#include <linux/highmem.h>
  20#include <linux/pagemap.h>
  21#include <linux/ptrace.h>
  22#include <linux/security.h>
  23#include <linux/signal.h>
  24#include <linux/uio.h>
  25#include <linux/audit.h>
  26#include <linux/pid_namespace.h>
  27#include <linux/syscalls.h>
  28#include <linux/uaccess.h>
  29#include <linux/regset.h>
  30#include <linux/hw_breakpoint.h>
  31#include <linux/cn_proc.h>
  32#include <linux/compat.h>
  33#include <linux/sched/signal.h>
  34
  35/*
  36 * Access another process' address space via ptrace.
  37 * Source/target buffer must be kernel space,
  38 * Do not walk the page table directly, use get_user_pages
  39 */
  40int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,
  41                     void *buf, int len, unsigned int gup_flags)
  42{
  43        struct mm_struct *mm;
  44        int ret;
  45
  46        mm = get_task_mm(tsk);
  47        if (!mm)
  48                return 0;
  49
  50        if (!tsk->ptrace ||
  51            (current != tsk->parent) ||
  52            ((get_dumpable(mm) != SUID_DUMP_USER) &&
  53             !ptracer_capable(tsk, mm->user_ns))) {
  54                mmput(mm);
  55                return 0;
  56        }
  57
  58        ret = __access_remote_vm(tsk, mm, addr, buf, len, gup_flags);
  59        mmput(mm);
  60
  61        return ret;
  62}
  63
  64
  65void __ptrace_link(struct task_struct *child, struct task_struct *new_parent,
  66                   const struct cred *ptracer_cred)
  67{
  68        BUG_ON(!list_empty(&child->ptrace_entry));
  69        list_add(&child->ptrace_entry, &new_parent->ptraced);
  70        child->parent = new_parent;
  71        child->ptracer_cred = get_cred(ptracer_cred);
  72}
  73
  74/*
  75 * ptrace a task: make the debugger its new parent and
  76 * move it to the ptrace list.
  77 *
  78 * Must be called with the tasklist lock write-held.
  79 */
  80static void ptrace_link(struct task_struct *child, struct task_struct *new_parent)
  81{
  82        __ptrace_link(child, new_parent, current_cred());
  83}
  84
  85/**
  86 * __ptrace_unlink - unlink ptracee and restore its execution state
  87 * @child: ptracee to be unlinked
  88 *
  89 * Remove @child from the ptrace list, move it back to the original parent,
  90 * and restore the execution state so that it conforms to the group stop
  91 * state.
  92 *
  93 * Unlinking can happen via two paths - explicit PTRACE_DETACH or ptracer
  94 * exiting.  For PTRACE_DETACH, unless the ptracee has been killed between
  95 * ptrace_check_attach() and here, it's guaranteed to be in TASK_TRACED.
  96 * If the ptracer is exiting, the ptracee can be in any state.
  97 *
  98 * After detach, the ptracee should be in a state which conforms to the
  99 * group stop.  If the group is stopped or in the process of stopping, the
 100 * ptracee should be put into TASK_STOPPED; otherwise, it should be woken
 101 * up from TASK_TRACED.
 102 *
 103 * If the ptracee is in TASK_TRACED and needs to be moved to TASK_STOPPED,
 104 * it goes through TRACED -> RUNNING -> STOPPED transition which is similar
 105 * to but in the opposite direction of what happens while attaching to a
 106 * stopped task.  However, in this direction, the intermediate RUNNING
 107 * state is not hidden even from the current ptracer and if it immediately
 108 * re-attaches and performs a WNOHANG wait(2), it may fail.
 109 *
 110 * CONTEXT:
 111 * write_lock_irq(tasklist_lock)
 112 */
 113void __ptrace_unlink(struct task_struct *child)
 114{
 115        const struct cred *old_cred;
 116        BUG_ON(!child->ptrace);
 117
 118        clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
 119
 120        child->parent = child->real_parent;
 121        list_del_init(&child->ptrace_entry);
 122        old_cred = child->ptracer_cred;
 123        child->ptracer_cred = NULL;
 124        put_cred(old_cred);
 125
 126        spin_lock(&child->sighand->siglock);
 127        child->ptrace = 0;
 128        /*
 129         * Clear all pending traps and TRAPPING.  TRAPPING should be
 130         * cleared regardless of JOBCTL_STOP_PENDING.  Do it explicitly.
 131         */
 132        task_clear_jobctl_pending(child, JOBCTL_TRAP_MASK);
 133        task_clear_jobctl_trapping(child);
 134
 135        /*
 136         * Reinstate JOBCTL_STOP_PENDING if group stop is in effect and
 137         * @child isn't dead.
 138         */
 139        if (!(child->flags & PF_EXITING) &&
 140            (child->signal->flags & SIGNAL_STOP_STOPPED ||
 141             child->signal->group_stop_count)) {
 142                child->jobctl |= JOBCTL_STOP_PENDING;
 143
 144                /*
 145                 * This is only possible if this thread was cloned by the
 146                 * traced task running in the stopped group, set the signal
 147                 * for the future reports.
 148                 * FIXME: we should change ptrace_init_task() to handle this
 149                 * case.
 150                 */
 151                if (!(child->jobctl & JOBCTL_STOP_SIGMASK))
 152                        child->jobctl |= SIGSTOP;
 153        }
 154
 155        /*
 156         * If transition to TASK_STOPPED is pending or in TASK_TRACED, kick
 157         * @child in the butt.  Note that @resume should be used iff @child
 158         * is in TASK_TRACED; otherwise, we might unduly disrupt
 159         * TASK_KILLABLE sleeps.
 160         */
 161        if (child->jobctl & JOBCTL_STOP_PENDING || task_is_traced(child))
 162                ptrace_signal_wake_up(child, true);
 163
 164        spin_unlock(&child->sighand->siglock);
 165}
 166
 167/* Ensure that nothing can wake it up, even SIGKILL */
 168static bool ptrace_freeze_traced(struct task_struct *task)
 169{
 170        bool ret = false;
 171
 172        /* Lockless, nobody but us can set this flag */
 173        if (task->jobctl & JOBCTL_LISTENING)
 174                return ret;
 175
 176        spin_lock_irq(&task->sighand->siglock);
 177        if (task_is_traced(task) && !__fatal_signal_pending(task)) {
 178                task->state = __TASK_TRACED;
 179                ret = true;
 180        }
 181        spin_unlock_irq(&task->sighand->siglock);
 182
 183        return ret;
 184}
 185
 186static void ptrace_unfreeze_traced(struct task_struct *task)
 187{
 188        if (task->state != __TASK_TRACED)
 189                return;
 190
 191        WARN_ON(!task->ptrace || task->parent != current);
 192
 193        /*
 194         * PTRACE_LISTEN can allow ptrace_trap_notify to wake us up remotely.
 195         * Recheck state under the lock to close this race.
 196         */
 197        spin_lock_irq(&task->sighand->siglock);
 198        if (task->state == __TASK_TRACED) {
 199                if (__fatal_signal_pending(task))
 200                        wake_up_state(task, __TASK_TRACED);
 201                else
 202                        task->state = TASK_TRACED;
 203        }
 204        spin_unlock_irq(&task->sighand->siglock);
 205}
 206
 207/**
 208 * ptrace_check_attach - check whether ptracee is ready for ptrace operation
 209 * @child: ptracee to check for
 210 * @ignore_state: don't check whether @child is currently %TASK_TRACED
 211 *
 212 * Check whether @child is being ptraced by %current and ready for further
 213 * ptrace operations.  If @ignore_state is %false, @child also should be in
 214 * %TASK_TRACED state and on return the child is guaranteed to be traced
 215 * and not executing.  If @ignore_state is %true, @child can be in any
 216 * state.
 217 *
 218 * CONTEXT:
 219 * Grabs and releases tasklist_lock and @child->sighand->siglock.
 220 *
 221 * RETURNS:
 222 * 0 on success, -ESRCH if %child is not ready.
 223 */
 224static int ptrace_check_attach(struct task_struct *child, bool ignore_state)
 225{
 226        int ret = -ESRCH;
 227
 228        /*
 229         * We take the read lock around doing both checks to close a
 230         * possible race where someone else was tracing our child and
 231         * detached between these two checks.  After this locked check,
 232         * we are sure that this is our traced child and that can only
 233         * be changed by us so it's not changing right after this.
 234         */
 235        read_lock(&tasklist_lock);
 236        if (child->ptrace && child->parent == current) {
 237                WARN_ON(child->state == __TASK_TRACED);
 238                /*
 239                 * child->sighand can't be NULL, release_task()
 240                 * does ptrace_unlink() before __exit_signal().
 241                 */
 242                if (ignore_state || ptrace_freeze_traced(child))
 243                        ret = 0;
 244        }
 245        read_unlock(&tasklist_lock);
 246
 247        if (!ret && !ignore_state) {
 248                if (!wait_task_inactive(child, __TASK_TRACED)) {
 249                        /*
 250                         * This can only happen if may_ptrace_stop() fails and
 251                         * ptrace_stop() changes ->state back to TASK_RUNNING,
 252                         * so we should not worry about leaking __TASK_TRACED.
 253                         */
 254                        WARN_ON(child->state == __TASK_TRACED);
 255                        ret = -ESRCH;
 256                }
 257        }
 258
 259        return ret;
 260}
 261
 262static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode)
 263{
 264        if (mode & PTRACE_MODE_NOAUDIT)
 265                return has_ns_capability_noaudit(current, ns, CAP_SYS_PTRACE);
 266        else
 267                return has_ns_capability(current, ns, CAP_SYS_PTRACE);
 268}
 269
 270/* Returns 0 on success, -errno on denial. */
 271static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
 272{
 273        const struct cred *cred = current_cred(), *tcred;
 274        struct mm_struct *mm;
 275        kuid_t caller_uid;
 276        kgid_t caller_gid;
 277
 278        if (!(mode & PTRACE_MODE_FSCREDS) == !(mode & PTRACE_MODE_REALCREDS)) {
 279                WARN(1, "denying ptrace access check without PTRACE_MODE_*CREDS\n");
 280                return -EPERM;
 281        }
 282
 283        /* May we inspect the given task?
 284         * This check is used both for attaching with ptrace
 285         * and for allowing access to sensitive information in /proc.
 286         *
 287         * ptrace_attach denies several cases that /proc allows
 288         * because setting up the necessary parent/child relationship
 289         * or halting the specified task is impossible.
 290         */
 291
 292        /* Don't let security modules deny introspection */
 293        if (same_thread_group(task, current))
 294                return 0;
 295        rcu_read_lock();
 296        if (mode & PTRACE_MODE_FSCREDS) {
 297                caller_uid = cred->fsuid;
 298                caller_gid = cred->fsgid;
 299        } else {
 300                /*
 301                 * Using the euid would make more sense here, but something
 302                 * in userland might rely on the old behavior, and this
 303                 * shouldn't be a security problem since
 304                 * PTRACE_MODE_REALCREDS implies that the caller explicitly
 305                 * used a syscall that requests access to another process
 306                 * (and not a filesystem syscall to procfs).
 307                 */
 308                caller_uid = cred->uid;
 309                caller_gid = cred->gid;
 310        }
 311        tcred = __task_cred(task);
 312        if (uid_eq(caller_uid, tcred->euid) &&
 313            uid_eq(caller_uid, tcred->suid) &&
 314            uid_eq(caller_uid, tcred->uid)  &&
 315            gid_eq(caller_gid, tcred->egid) &&
 316            gid_eq(caller_gid, tcred->sgid) &&
 317            gid_eq(caller_gid, tcred->gid))
 318                goto ok;
 319        if (ptrace_has_cap(tcred->user_ns, mode))
 320                goto ok;
 321        rcu_read_unlock();
 322        return -EPERM;
 323ok:
 324        rcu_read_unlock();
 325        /*
 326         * If a task drops privileges and becomes nondumpable (through a syscall
 327         * like setresuid()) while we are trying to access it, we must ensure
 328         * that the dumpability is read after the credentials; otherwise,
 329         * we may be able to attach to a task that we shouldn't be able to
 330         * attach to (as if the task had dropped privileges without becoming
 331         * nondumpable).
 332         * Pairs with a write barrier in commit_creds().
 333         */
 334        smp_rmb();
 335        mm = task->mm;
 336        if (mm &&
 337            ((get_dumpable(mm) != SUID_DUMP_USER) &&
 338             !ptrace_has_cap(mm->user_ns, mode)))
 339            return -EPERM;
 340
 341        return security_ptrace_access_check(task, mode);
 342}
 343
 344bool ptrace_may_access(struct task_struct *task, unsigned int mode)
 345{
 346        int err;
 347        task_lock(task);
 348        err = __ptrace_may_access(task, mode);
 349        task_unlock(task);
 350        return !err;
 351}
 352
 353static int ptrace_attach(struct task_struct *task, long request,
 354                         unsigned long addr,
 355                         unsigned long flags)
 356{
 357        bool seize = (request == PTRACE_SEIZE);
 358        int retval;
 359
 360        retval = -EIO;
 361        if (seize) {
 362                if (addr != 0)
 363                        goto out;
 364                if (flags & ~(unsigned long)PTRACE_O_MASK)
 365                        goto out;
 366                flags = PT_PTRACED | PT_SEIZED | (flags << PT_OPT_FLAG_SHIFT);
 367        } else {
 368                flags = PT_PTRACED;
 369        }
 370
 371        audit_ptrace(task);
 372
 373        retval = -EPERM;
 374        if (unlikely(task->flags & PF_KTHREAD))
 375                goto out;
 376        if (same_thread_group(task, current))
 377                goto out;
 378
 379        /*
 380         * Protect exec's credential calculations against our interference;
 381         * SUID, SGID and LSM creds get determined differently
 382         * under ptrace.
 383         */
 384        retval = -ERESTARTNOINTR;
 385        if (mutex_lock_interruptible(&task->signal->cred_guard_mutex))
 386                goto out;
 387
 388        task_lock(task);
 389        retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS);
 390        task_unlock(task);
 391        if (retval)
 392                goto unlock_creds;
 393
 394        write_lock_irq(&tasklist_lock);
 395        retval = -EPERM;
 396        if (unlikely(task->exit_state))
 397                goto unlock_tasklist;
 398        if (task->ptrace)
 399                goto unlock_tasklist;
 400
 401        if (seize)
 402                flags |= PT_SEIZED;
 403        task->ptrace = flags;
 404
 405        ptrace_link(task, current);
 406
 407        /* SEIZE doesn't trap tracee on attach */
 408        if (!seize)
 409                send_sig_info(SIGSTOP, SEND_SIG_PRIV, task);
 410
 411        spin_lock(&task->sighand->siglock);
 412
 413        /*
 414         * If the task is already STOPPED, set JOBCTL_TRAP_STOP and
 415         * TRAPPING, and kick it so that it transits to TRACED.  TRAPPING
 416         * will be cleared if the child completes the transition or any
 417         * event which clears the group stop states happens.  We'll wait
 418         * for the transition to complete before returning from this
 419         * function.
 420         *
 421         * This hides STOPPED -> RUNNING -> TRACED transition from the
 422         * attaching thread but a different thread in the same group can
 423         * still observe the transient RUNNING state.  IOW, if another
 424         * thread's WNOHANG wait(2) on the stopped tracee races against
 425         * ATTACH, the wait(2) may fail due to the transient RUNNING.
 426         *
 427         * The following task_is_stopped() test is safe as both transitions
 428         * in and out of STOPPED are protected by siglock.
 429         */
 430        if (task_is_stopped(task) &&
 431            task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING))
 432                signal_wake_up_state(task, __TASK_STOPPED);
 433
 434        spin_unlock(&task->sighand->siglock);
 435
 436        retval = 0;
 437unlock_tasklist:
 438        write_unlock_irq(&tasklist_lock);
 439unlock_creds:
 440        mutex_unlock(&task->signal->cred_guard_mutex);
 441out:
 442        if (!retval) {
 443                /*
 444                 * We do not bother to change retval or clear JOBCTL_TRAPPING
 445                 * if wait_on_bit() was interrupted by SIGKILL. The tracer will
 446                 * not return to user-mode, it will exit and clear this bit in
 447                 * __ptrace_unlink() if it wasn't already cleared by the tracee;
 448                 * and until then nobody can ptrace this task.
 449                 */
 450                wait_on_bit(&task->jobctl, JOBCTL_TRAPPING_BIT, TASK_KILLABLE);
 451                proc_ptrace_connector(task, PTRACE_ATTACH);
 452        }
 453
 454        return retval;
 455}
 456
 457/**
 458 * ptrace_traceme  --  helper for PTRACE_TRACEME
 459 *
 460 * Performs checks and sets PT_PTRACED.
 461 * Should be used by all ptrace implementations for PTRACE_TRACEME.
 462 */
 463static int ptrace_traceme(void)
 464{
 465        int ret = -EPERM;
 466
 467        write_lock_irq(&tasklist_lock);
 468        /* Are we already being traced? */
 469        if (!current->ptrace) {
 470                ret = security_ptrace_traceme(current->parent);
 471                /*
 472                 * Check PF_EXITING to ensure ->real_parent has not passed
 473                 * exit_ptrace(). Otherwise we don't report the error but
 474                 * pretend ->real_parent untraces us right after return.
 475                 */
 476                if (!ret && !(current->real_parent->flags & PF_EXITING)) {
 477                        current->ptrace = PT_PTRACED;
 478                        ptrace_link(current, current->real_parent);
 479                }
 480        }
 481        write_unlock_irq(&tasklist_lock);
 482
 483        return ret;
 484}
 485
 486/*
 487 * Called with irqs disabled, returns true if childs should reap themselves.
 488 */
 489static int ignoring_children(struct sighand_struct *sigh)
 490{
 491        int ret;
 492        spin_lock(&sigh->siglock);
 493        ret = (sigh->action[SIGCHLD-1].sa.sa_handler == SIG_IGN) ||
 494              (sigh->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT);
 495        spin_unlock(&sigh->siglock);
 496        return ret;
 497}
 498
 499/*
 500 * Called with tasklist_lock held for writing.
 501 * Unlink a traced task, and clean it up if it was a traced zombie.
 502 * Return true if it needs to be reaped with release_task().
 503 * (We can't call release_task() here because we already hold tasklist_lock.)
 504 *
 505 * If it's a zombie, our attachedness prevented normal parent notification
 506 * or self-reaping.  Do notification now if it would have happened earlier.
 507 * If it should reap itself, return true.
 508 *
 509 * If it's our own child, there is no notification to do. But if our normal
 510 * children self-reap, then this child was prevented by ptrace and we must
 511 * reap it now, in that case we must also wake up sub-threads sleeping in
 512 * do_wait().
 513 */
 514static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p)
 515{
 516        bool dead;
 517
 518        __ptrace_unlink(p);
 519
 520        if (p->exit_state != EXIT_ZOMBIE)
 521                return false;
 522
 523        dead = !thread_group_leader(p);
 524
 525        if (!dead && thread_group_empty(p)) {
 526                if (!same_thread_group(p->real_parent, tracer))
 527                        dead = do_notify_parent(p, p->exit_signal);
 528                else if (ignoring_children(tracer->sighand)) {
 529                        __wake_up_parent(p, tracer);
 530                        dead = true;
 531                }
 532        }
 533        /* Mark it as in the process of being reaped. */
 534        if (dead)
 535                p->exit_state = EXIT_DEAD;
 536        return dead;
 537}
 538
 539static int ptrace_detach(struct task_struct *child, unsigned int data)
 540{
 541        if (!valid_signal(data))
 542                return -EIO;
 543
 544        /* Architecture-specific hardware disable .. */
 545        ptrace_disable(child);
 546
 547        write_lock_irq(&tasklist_lock);
 548        /*
 549         * We rely on ptrace_freeze_traced(). It can't be killed and
 550         * untraced by another thread, it can't be a zombie.
 551         */
 552        WARN_ON(!child->ptrace || child->exit_state);
 553        /*
 554         * tasklist_lock avoids the race with wait_task_stopped(), see
 555         * the comment in ptrace_resume().
 556         */
 557        child->exit_code = data;
 558        __ptrace_detach(current, child);
 559        write_unlock_irq(&tasklist_lock);
 560
 561        proc_ptrace_connector(child, PTRACE_DETACH);
 562
 563        return 0;
 564}
 565
 566/*
 567 * Detach all tasks we were using ptrace on. Called with tasklist held
 568 * for writing.
 569 */
 570void exit_ptrace(struct task_struct *tracer, struct list_head *dead)
 571{
 572        struct task_struct *p, *n;
 573
 574        list_for_each_entry_safe(p, n, &tracer->ptraced, ptrace_entry) {
 575                if (unlikely(p->ptrace & PT_EXITKILL))
 576                        send_sig_info(SIGKILL, SEND_SIG_PRIV, p);
 577
 578                if (__ptrace_detach(tracer, p))
 579                        list_add(&p->ptrace_entry, dead);
 580        }
 581}
 582
 583int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len)
 584{
 585        int copied = 0;
 586
 587        while (len > 0) {
 588                char buf[128];
 589                int this_len, retval;
 590
 591                this_len = (len > sizeof(buf)) ? sizeof(buf) : len;
 592                retval = ptrace_access_vm(tsk, src, buf, this_len, FOLL_FORCE);
 593
 594                if (!retval) {
 595                        if (copied)
 596                                break;
 597                        return -EIO;
 598                }
 599                if (copy_to_user(dst, buf, retval))
 600                        return -EFAULT;
 601                copied += retval;
 602                src += retval;
 603                dst += retval;
 604                len -= retval;
 605        }
 606        return copied;
 607}
 608
 609int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len)
 610{
 611        int copied = 0;
 612
 613        while (len > 0) {
 614                char buf[128];
 615                int this_len, retval;
 616
 617                this_len = (len > sizeof(buf)) ? sizeof(buf) : len;
 618                if (copy_from_user(buf, src, this_len))
 619                        return -EFAULT;
 620                retval = ptrace_access_vm(tsk, dst, buf, this_len,
 621                                FOLL_FORCE | FOLL_WRITE);
 622                if (!retval) {
 623                        if (copied)
 624                                break;
 625                        return -EIO;
 626                }
 627                copied += retval;
 628                src += retval;
 629                dst += retval;
 630                len -= retval;
 631        }
 632        return copied;
 633}
 634
 635static int ptrace_setoptions(struct task_struct *child, unsigned long data)
 636{
 637        unsigned flags;
 638
 639        if (data & ~(unsigned long)PTRACE_O_MASK)
 640                return -EINVAL;
 641
 642        if (unlikely(data & PTRACE_O_SUSPEND_SECCOMP)) {
 643                if (!IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) ||
 644                    !IS_ENABLED(CONFIG_SECCOMP))
 645                        return -EINVAL;
 646
 647                if (!capable(CAP_SYS_ADMIN))
 648                        return -EPERM;
 649
 650                if (seccomp_mode(&current->seccomp) != SECCOMP_MODE_DISABLED ||
 651                    current->ptrace & PT_SUSPEND_SECCOMP)
 652                        return -EPERM;
 653        }
 654
 655        /* Avoid intermediate state when all opts are cleared */
 656        flags = child->ptrace;
 657        flags &= ~(PTRACE_O_MASK << PT_OPT_FLAG_SHIFT);
 658        flags |= (data << PT_OPT_FLAG_SHIFT);
 659        child->ptrace = flags;
 660
 661        return 0;
 662}
 663
 664static int ptrace_getsiginfo(struct task_struct *child, kernel_siginfo_t *info)
 665{
 666        unsigned long flags;
 667        int error = -ESRCH;
 668
 669        if (lock_task_sighand(child, &flags)) {
 670                error = -EINVAL;
 671                if (likely(child->last_siginfo != NULL)) {
 672                        copy_siginfo(info, child->last_siginfo);
 673                        error = 0;
 674                }
 675                unlock_task_sighand(child, &flags);
 676        }
 677        return error;
 678}
 679
 680static int ptrace_setsiginfo(struct task_struct *child, const kernel_siginfo_t *info)
 681{
 682        unsigned long flags;
 683        int error = -ESRCH;
 684
 685        if (lock_task_sighand(child, &flags)) {
 686                error = -EINVAL;
 687                if (likely(child->last_siginfo != NULL)) {
 688                        copy_siginfo(child->last_siginfo, info);
 689                        error = 0;
 690                }
 691                unlock_task_sighand(child, &flags);
 692        }
 693        return error;
 694}
 695
 696static int ptrace_peek_siginfo(struct task_struct *child,
 697                                unsigned long addr,
 698                                unsigned long data)
 699{
 700        struct ptrace_peeksiginfo_args arg;
 701        struct sigpending *pending;
 702        struct sigqueue *q;
 703        int ret, i;
 704
 705        ret = copy_from_user(&arg, (void __user *) addr,
 706                                sizeof(struct ptrace_peeksiginfo_args));
 707        if (ret)
 708                return -EFAULT;
 709
 710        if (arg.flags & ~PTRACE_PEEKSIGINFO_SHARED)
 711                return -EINVAL; /* unknown flags */
 712
 713        if (arg.nr < 0)
 714                return -EINVAL;
 715
 716        /* Ensure arg.off fits in an unsigned long */
 717        if (arg.off > ULONG_MAX)
 718                return 0;
 719
 720        if (arg.flags & PTRACE_PEEKSIGINFO_SHARED)
 721                pending = &child->signal->shared_pending;
 722        else
 723                pending = &child->pending;
 724
 725        for (i = 0; i < arg.nr; ) {
 726                kernel_siginfo_t info;
 727                unsigned long off = arg.off + i;
 728                bool found = false;
 729
 730                spin_lock_irq(&child->sighand->siglock);
 731                list_for_each_entry(q, &pending->list, list) {
 732                        if (!off--) {
 733                                found = true;
 734                                copy_siginfo(&info, &q->info);
 735                                break;
 736                        }
 737                }
 738                spin_unlock_irq(&child->sighand->siglock);
 739
 740                if (!found) /* beyond the end of the list */
 741                        break;
 742
 743#ifdef CONFIG_COMPAT
 744                if (unlikely(in_compat_syscall())) {
 745                        compat_siginfo_t __user *uinfo = compat_ptr(data);
 746
 747                        if (copy_siginfo_to_user32(uinfo, &info)) {
 748                                ret = -EFAULT;
 749                                break;
 750                        }
 751
 752                } else
 753#endif
 754                {
 755                        siginfo_t __user *uinfo = (siginfo_t __user *) data;
 756
 757                        if (copy_siginfo_to_user(uinfo, &info)) {
 758                                ret = -EFAULT;
 759                                break;
 760                        }
 761                }
 762
 763                data += sizeof(siginfo_t);
 764                i++;
 765
 766                if (signal_pending(current))
 767                        break;
 768
 769                cond_resched();
 770        }
 771
 772        if (i > 0)
 773                return i;
 774
 775        return ret;
 776}
 777
 778#ifdef PTRACE_SINGLESTEP
 779#define is_singlestep(request)          ((request) == PTRACE_SINGLESTEP)
 780#else
 781#define is_singlestep(request)          0
 782#endif
 783
 784#ifdef PTRACE_SINGLEBLOCK
 785#define is_singleblock(request)         ((request) == PTRACE_SINGLEBLOCK)
 786#else
 787#define is_singleblock(request)         0
 788#endif
 789
 790#ifdef PTRACE_SYSEMU
 791#define is_sysemu_singlestep(request)   ((request) == PTRACE_SYSEMU_SINGLESTEP)
 792#else
 793#define is_sysemu_singlestep(request)   0
 794#endif
 795
 796static int ptrace_resume(struct task_struct *child, long request,
 797                         unsigned long data)
 798{
 799        bool need_siglock;
 800
 801        if (!valid_signal(data))
 802                return -EIO;
 803
 804        if (request == PTRACE_SYSCALL)
 805                set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
 806        else
 807                clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
 808
 809#ifdef TIF_SYSCALL_EMU
 810        if (request == PTRACE_SYSEMU || request == PTRACE_SYSEMU_SINGLESTEP)
 811                set_tsk_thread_flag(child, TIF_SYSCALL_EMU);
 812        else
 813                clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
 814#endif
 815
 816        if (is_singleblock(request)) {
 817                if (unlikely(!arch_has_block_step()))
 818                        return -EIO;
 819                user_enable_block_step(child);
 820        } else if (is_singlestep(request) || is_sysemu_singlestep(request)) {
 821                if (unlikely(!arch_has_single_step()))
 822                        return -EIO;
 823                user_enable_single_step(child);
 824        } else {
 825                user_disable_single_step(child);
 826        }
 827
 828        /*
 829         * Change ->exit_code and ->state under siglock to avoid the race
 830         * with wait_task_stopped() in between; a non-zero ->exit_code will
 831         * wrongly look like another report from tracee.
 832         *
 833         * Note that we need siglock even if ->exit_code == data and/or this
 834         * status was not reported yet, the new status must not be cleared by
 835         * wait_task_stopped() after resume.
 836         *
 837         * If data == 0 we do not care if wait_task_stopped() reports the old
 838         * status and clears the code too; this can't race with the tracee, it
 839         * takes siglock after resume.
 840         */
 841        need_siglock = data && !thread_group_empty(current);
 842        if (need_siglock)
 843                spin_lock_irq(&child->sighand->siglock);
 844        child->exit_code = data;
 845        wake_up_state(child, __TASK_TRACED);
 846        if (need_siglock)
 847                spin_unlock_irq(&child->sighand->siglock);
 848
 849        return 0;
 850}
 851
 852#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
 853
 854static const struct user_regset *
 855find_regset(const struct user_regset_view *view, unsigned int type)
 856{
 857        const struct user_regset *regset;
 858        int n;
 859
 860        for (n = 0; n < view->n; ++n) {
 861                regset = view->regsets + n;
 862                if (regset->core_note_type == type)
 863                        return regset;
 864        }
 865
 866        return NULL;
 867}
 868
 869static int ptrace_regset(struct task_struct *task, int req, unsigned int type,
 870                         struct iovec *kiov)
 871{
 872        const struct user_regset_view *view = task_user_regset_view(task);
 873        const struct user_regset *regset = find_regset(view, type);
 874        int regset_no;
 875
 876        if (!regset || (kiov->iov_len % regset->size) != 0)
 877                return -EINVAL;
 878
 879        regset_no = regset - view->regsets;
 880        kiov->iov_len = min(kiov->iov_len,
 881                            (__kernel_size_t) (regset->n * regset->size));
 882
 883        if (req == PTRACE_GETREGSET)
 884                return copy_regset_to_user(task, view, regset_no, 0,
 885                                           kiov->iov_len, kiov->iov_base);
 886        else
 887                return copy_regset_from_user(task, view, regset_no, 0,
 888                                             kiov->iov_len, kiov->iov_base);
 889}
 890
 891/*
 892 * This is declared in linux/regset.h and defined in machine-dependent
 893 * code.  We put the export here, near the primary machine-neutral use,
 894 * to ensure no machine forgets it.
 895 */
 896EXPORT_SYMBOL_GPL(task_user_regset_view);
 897#endif
 898
 899int ptrace_request(struct task_struct *child, long request,
 900                   unsigned long addr, unsigned long data)
 901{
 902        bool seized = child->ptrace & PT_SEIZED;
 903        int ret = -EIO;
 904        kernel_siginfo_t siginfo, *si;
 905        void __user *datavp = (void __user *) data;
 906        unsigned long __user *datalp = datavp;
 907        unsigned long flags;
 908
 909        switch (request) {
 910        case PTRACE_PEEKTEXT:
 911        case PTRACE_PEEKDATA:
 912                return generic_ptrace_peekdata(child, addr, data);
 913        case PTRACE_POKETEXT:
 914        case PTRACE_POKEDATA:
 915                return generic_ptrace_pokedata(child, addr, data);
 916
 917#ifdef PTRACE_OLDSETOPTIONS
 918        case PTRACE_OLDSETOPTIONS:
 919#endif
 920        case PTRACE_SETOPTIONS:
 921                ret = ptrace_setoptions(child, data);
 922                break;
 923        case PTRACE_GETEVENTMSG:
 924                ret = put_user(child->ptrace_message, datalp);
 925                break;
 926
 927        case PTRACE_PEEKSIGINFO:
 928                ret = ptrace_peek_siginfo(child, addr, data);
 929                break;
 930
 931        case PTRACE_GETSIGINFO:
 932                ret = ptrace_getsiginfo(child, &siginfo);
 933                if (!ret)
 934                        ret = copy_siginfo_to_user(datavp, &siginfo);
 935                break;
 936
 937        case PTRACE_SETSIGINFO:
 938                ret = copy_siginfo_from_user(&siginfo, datavp);
 939                if (!ret)
 940                        ret = ptrace_setsiginfo(child, &siginfo);
 941                break;
 942
 943        case PTRACE_GETSIGMASK: {
 944                sigset_t *mask;
 945
 946                if (addr != sizeof(sigset_t)) {
 947                        ret = -EINVAL;
 948                        break;
 949                }
 950
 951                if (test_tsk_restore_sigmask(child))
 952                        mask = &child->saved_sigmask;
 953                else
 954                        mask = &child->blocked;
 955
 956                if (copy_to_user(datavp, mask, sizeof(sigset_t)))
 957                        ret = -EFAULT;
 958                else
 959                        ret = 0;
 960
 961                break;
 962        }
 963
 964        case PTRACE_SETSIGMASK: {
 965                sigset_t new_set;
 966
 967                if (addr != sizeof(sigset_t)) {
 968                        ret = -EINVAL;
 969                        break;
 970                }
 971
 972                if (copy_from_user(&new_set, datavp, sizeof(sigset_t))) {
 973                        ret = -EFAULT;
 974                        break;
 975                }
 976
 977                sigdelsetmask(&new_set, sigmask(SIGKILL)|sigmask(SIGSTOP));
 978
 979                /*
 980                 * Every thread does recalc_sigpending() after resume, so
 981                 * retarget_shared_pending() and recalc_sigpending() are not
 982                 * called here.
 983                 */
 984                spin_lock_irq(&child->sighand->siglock);
 985                child->blocked = new_set;
 986                spin_unlock_irq(&child->sighand->siglock);
 987
 988                clear_tsk_restore_sigmask(child);
 989
 990                ret = 0;
 991                break;
 992        }
 993
 994        case PTRACE_INTERRUPT:
 995                /*
 996                 * Stop tracee without any side-effect on signal or job
 997                 * control.  At least one trap is guaranteed to happen
 998                 * after this request.  If @child is already trapped, the
 999                 * current trap is not disturbed and another trap will
1000                 * happen after the current trap is ended with PTRACE_CONT.
1001                 *
1002                 * The actual trap might not be PTRACE_EVENT_STOP trap but
1003                 * the pending condition is cleared regardless.
1004                 */
1005                if (unlikely(!seized || !lock_task_sighand(child, &flags)))
1006                        break;
1007
1008                /*
1009                 * INTERRUPT doesn't disturb existing trap sans one
1010                 * exception.  If ptracer issued LISTEN for the current
1011                 * STOP, this INTERRUPT should clear LISTEN and re-trap
1012                 * tracee into STOP.
1013                 */
1014                if (likely(task_set_jobctl_pending(child, JOBCTL_TRAP_STOP)))
1015                        ptrace_signal_wake_up(child, child->jobctl & JOBCTL_LISTENING);
1016
1017                unlock_task_sighand(child, &flags);
1018                ret = 0;
1019                break;
1020
1021        case PTRACE_LISTEN:
1022                /*
1023                 * Listen for events.  Tracee must be in STOP.  It's not
1024                 * resumed per-se but is not considered to be in TRACED by
1025                 * wait(2) or ptrace(2).  If an async event (e.g. group
1026                 * stop state change) happens, tracee will enter STOP trap
1027                 * again.  Alternatively, ptracer can issue INTERRUPT to
1028                 * finish listening and re-trap tracee into STOP.
1029                 */
1030                if (unlikely(!seized || !lock_task_sighand(child, &flags)))
1031                        break;
1032
1033                si = child->last_siginfo;
1034                if (likely(si && (si->si_code >> 8) == PTRACE_EVENT_STOP)) {
1035                        child->jobctl |= JOBCTL_LISTENING;
1036                        /*
1037                         * If NOTIFY is set, it means event happened between
1038                         * start of this trap and now.  Trigger re-trap.
1039                         */
1040                        if (child->jobctl & JOBCTL_TRAP_NOTIFY)
1041                                ptrace_signal_wake_up(child, true);
1042                        ret = 0;
1043                }
1044                unlock_task_sighand(child, &flags);
1045                break;
1046
1047        case PTRACE_DETACH:      /* detach a process that was attached. */
1048                ret = ptrace_detach(child, data);
1049                break;
1050
1051#ifdef CONFIG_BINFMT_ELF_FDPIC
1052        case PTRACE_GETFDPIC: {
1053                struct mm_struct *mm = get_task_mm(child);
1054                unsigned long tmp = 0;
1055
1056                ret = -ESRCH;
1057                if (!mm)
1058                        break;
1059
1060                switch (addr) {
1061                case PTRACE_GETFDPIC_EXEC:
1062                        tmp = mm->context.exec_fdpic_loadmap;
1063                        break;
1064                case PTRACE_GETFDPIC_INTERP:
1065                        tmp = mm->context.interp_fdpic_loadmap;
1066                        break;
1067                default:
1068                        break;
1069                }
1070                mmput(mm);
1071
1072                ret = put_user(tmp, datalp);
1073                break;
1074        }
1075#endif
1076
1077#ifdef PTRACE_SINGLESTEP
1078        case PTRACE_SINGLESTEP:
1079#endif
1080#ifdef PTRACE_SINGLEBLOCK
1081        case PTRACE_SINGLEBLOCK:
1082#endif
1083#ifdef PTRACE_SYSEMU
1084        case PTRACE_SYSEMU:
1085        case PTRACE_SYSEMU_SINGLESTEP:
1086#endif
1087        case PTRACE_SYSCALL:
1088        case PTRACE_CONT:
1089                return ptrace_resume(child, request, data);
1090
1091        case PTRACE_KILL:
1092                if (child->exit_state)  /* already dead */
1093                        return 0;
1094                return ptrace_resume(child, request, SIGKILL);
1095
1096#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
1097        case PTRACE_GETREGSET:
1098        case PTRACE_SETREGSET: {
1099                struct iovec kiov;
1100                struct iovec __user *uiov = datavp;
1101
1102                if (!access_ok(uiov, sizeof(*uiov)))
1103                        return -EFAULT;
1104
1105                if (__get_user(kiov.iov_base, &uiov->iov_base) ||
1106                    __get_user(kiov.iov_len, &uiov->iov_len))
1107                        return -EFAULT;
1108
1109                ret = ptrace_regset(child, request, addr, &kiov);
1110                if (!ret)
1111                        ret = __put_user(kiov.iov_len, &uiov->iov_len);
1112                break;
1113        }
1114#endif
1115
1116        case PTRACE_SECCOMP_GET_FILTER:
1117                ret = seccomp_get_filter(child, addr, datavp);
1118                break;
1119
1120        case PTRACE_SECCOMP_GET_METADATA:
1121                ret = seccomp_get_metadata(child, addr, datavp);
1122                break;
1123
1124        default:
1125                break;
1126        }
1127
1128        return ret;
1129}
1130
1131#ifndef arch_ptrace_attach
1132#define arch_ptrace_attach(child)       do { } while (0)
1133#endif
1134
1135SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr,
1136                unsigned long, data)
1137{
1138        struct task_struct *child;
1139        long ret;
1140
1141        if (request == PTRACE_TRACEME) {
1142                ret = ptrace_traceme();
1143                if (!ret)
1144                        arch_ptrace_attach(current);
1145                goto out;
1146        }
1147
1148        child = find_get_task_by_vpid(pid);
1149        if (!child) {
1150                ret = -ESRCH;
1151                goto out;
1152        }
1153
1154        if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) {
1155                ret = ptrace_attach(child, request, addr, data);
1156                /*
1157                 * Some architectures need to do book-keeping after
1158                 * a ptrace attach.
1159                 */
1160                if (!ret)
1161                        arch_ptrace_attach(child);
1162                goto out_put_task_struct;
1163        }
1164
1165        ret = ptrace_check_attach(child, request == PTRACE_KILL ||
1166                                  request == PTRACE_INTERRUPT);
1167        if (ret < 0)
1168                goto out_put_task_struct;
1169
1170        ret = arch_ptrace(child, request, addr, data);
1171        if (ret || request != PTRACE_DETACH)
1172                ptrace_unfreeze_traced(child);
1173
1174 out_put_task_struct:
1175        put_task_struct(child);
1176 out:
1177        return ret;
1178}
1179
1180int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr,
1181                            unsigned long data)
1182{
1183        unsigned long tmp;
1184        int copied;
1185
1186        copied = ptrace_access_vm(tsk, addr, &tmp, sizeof(tmp), FOLL_FORCE);
1187        if (copied != sizeof(tmp))
1188                return -EIO;
1189        return put_user(tmp, (unsigned long __user *)data);
1190}
1191
1192int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr,
1193                            unsigned long data)
1194{
1195        int copied;
1196
1197        copied = ptrace_access_vm(tsk, addr, &data, sizeof(data),
1198                        FOLL_FORCE | FOLL_WRITE);
1199        return (copied == sizeof(data)) ? 0 : -EIO;
1200}
1201
1202#if defined CONFIG_COMPAT
1203
1204int compat_ptrace_request(struct task_struct *child, compat_long_t request,
1205                          compat_ulong_t addr, compat_ulong_t data)
1206{
1207        compat_ulong_t __user *datap = compat_ptr(data);
1208        compat_ulong_t word;
1209        kernel_siginfo_t siginfo;
1210        int ret;
1211
1212        switch (request) {
1213        case PTRACE_PEEKTEXT:
1214        case PTRACE_PEEKDATA:
1215                ret = ptrace_access_vm(child, addr, &word, sizeof(word),
1216                                FOLL_FORCE);
1217                if (ret != sizeof(word))
1218                        ret = -EIO;
1219                else
1220                        ret = put_user(word, datap);
1221                break;
1222
1223        case PTRACE_POKETEXT:
1224        case PTRACE_POKEDATA:
1225                ret = ptrace_access_vm(child, addr, &data, sizeof(data),
1226                                FOLL_FORCE | FOLL_WRITE);
1227                ret = (ret != sizeof(data) ? -EIO : 0);
1228                break;
1229
1230        case PTRACE_GETEVENTMSG:
1231                ret = put_user((compat_ulong_t) child->ptrace_message, datap);
1232                break;
1233
1234        case PTRACE_GETSIGINFO:
1235                ret = ptrace_getsiginfo(child, &siginfo);
1236                if (!ret)
1237                        ret = copy_siginfo_to_user32(
1238                                (struct compat_siginfo __user *) datap,
1239                                &siginfo);
1240                break;
1241
1242        case PTRACE_SETSIGINFO:
1243                ret = copy_siginfo_from_user32(
1244                        &siginfo, (struct compat_siginfo __user *) datap);
1245                if (!ret)
1246                        ret = ptrace_setsiginfo(child, &siginfo);
1247                break;
1248#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
1249        case PTRACE_GETREGSET:
1250        case PTRACE_SETREGSET:
1251        {
1252                struct iovec kiov;
1253                struct compat_iovec __user *uiov =
1254                        (struct compat_iovec __user *) datap;
1255                compat_uptr_t ptr;
1256                compat_size_t len;
1257
1258                if (!access_ok(uiov, sizeof(*uiov)))
1259                        return -EFAULT;
1260
1261                if (__get_user(ptr, &uiov->iov_base) ||
1262                    __get_user(len, &uiov->iov_len))
1263                        return -EFAULT;
1264
1265                kiov.iov_base = compat_ptr(ptr);
1266                kiov.iov_len = len;
1267
1268                ret = ptrace_regset(child, request, addr, &kiov);
1269                if (!ret)
1270                        ret = __put_user(kiov.iov_len, &uiov->iov_len);
1271                break;
1272        }
1273#endif
1274
1275        default:
1276                ret = ptrace_request(child, request, addr, data);
1277        }
1278
1279        return ret;
1280}
1281
1282COMPAT_SYSCALL_DEFINE4(ptrace, compat_long_t, request, compat_long_t, pid,
1283                       compat_long_t, addr, compat_long_t, data)
1284{
1285        struct task_struct *child;
1286        long ret;
1287
1288        if (request == PTRACE_TRACEME) {
1289                ret = ptrace_traceme();
1290                goto out;
1291        }
1292
1293        child = find_get_task_by_vpid(pid);
1294        if (!child) {
1295                ret = -ESRCH;
1296                goto out;
1297        }
1298
1299        if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) {
1300                ret = ptrace_attach(child, request, addr, data);
1301                /*
1302                 * Some architectures need to do book-keeping after
1303                 * a ptrace attach.
1304                 */
1305                if (!ret)
1306                        arch_ptrace_attach(child);
1307                goto out_put_task_struct;
1308        }
1309
1310        ret = ptrace_check_attach(child, request == PTRACE_KILL ||
1311                                  request == PTRACE_INTERRUPT);
1312        if (!ret) {
1313                ret = compat_arch_ptrace(child, request, addr, data);
1314                if (ret || request != PTRACE_DETACH)
1315                        ptrace_unfreeze_traced(child);
1316        }
1317
1318 out_put_task_struct:
1319        put_task_struct(child);
1320 out:
1321        return ret;
1322}
1323#endif  /* CONFIG_COMPAT */
1324