1
2
3
4
5
6
7#include <linux/mm.h>
8#include <linux/slab.h>
9#include <linux/sched/autogroup.h>
10#include <linux/sched/mm.h>
11#include <linux/sched/stat.h>
12#include <linux/sched/task.h>
13#include <linux/sched/task_stack.h>
14#include <linux/sched/cputime.h>
15#include <linux/interrupt.h>
16#include <linux/module.h>
17#include <linux/capability.h>
18#include <linux/completion.h>
19#include <linux/personality.h>
20#include <linux/tty.h>
21#include <linux/iocontext.h>
22#include <linux/key.h>
23#include <linux/cpu.h>
24#include <linux/acct.h>
25#include <linux/tsacct_kern.h>
26#include <linux/file.h>
27#include <linux/fdtable.h>
28#include <linux/freezer.h>
29#include <linux/binfmts.h>
30#include <linux/nsproxy.h>
31#include <linux/pid_namespace.h>
32#include <linux/ptrace.h>
33#include <linux/profile.h>
34#include <linux/mount.h>
35#include <linux/proc_fs.h>
36#include <linux/kthread.h>
37#include <linux/mempolicy.h>
38#include <linux/taskstats_kern.h>
39#include <linux/delayacct.h>
40#include <linux/cgroup.h>
41#include <linux/syscalls.h>
42#include <linux/signal.h>
43#include <linux/posix-timers.h>
44#include <linux/cn_proc.h>
45#include <linux/mutex.h>
46#include <linux/futex.h>
47#include <linux/pipe_fs_i.h>
48#include <linux/audit.h>
49#include <linux/resource.h>
50#include <linux/blkdev.h>
51#include <linux/task_io_accounting_ops.h>
52#include <linux/tracehook.h>
53#include <linux/fs_struct.h>
54#include <linux/init_task.h>
55#include <linux/perf_event.h>
56#include <trace/events/sched.h>
57#include <linux/hw_breakpoint.h>
58#include <linux/oom.h>
59#include <linux/writeback.h>
60#include <linux/shm.h>
61#include <linux/kcov.h>
62#include <linux/random.h>
63#include <linux/rcuwait.h>
64#include <linux/compat.h>
65
66#include <linux/uaccess.h>
67#include <asm/unistd.h>
68#include <asm/pgtable.h>
69#include <asm/mmu_context.h>
70
71static void __unhash_process(struct task_struct *p, bool group_dead)
72{
73 nr_threads--;
74 detach_pid(p, PIDTYPE_PID);
75 if (group_dead) {
76 detach_pid(p, PIDTYPE_PGID);
77 detach_pid(p, PIDTYPE_SID);
78
79 list_del_rcu(&p->tasks);
80 list_del_init(&p->sibling);
81 __this_cpu_dec(process_counts);
82 }
83 list_del_rcu(&p->thread_group);
84 list_del_rcu(&p->thread_node);
85}
86
87
88
89
90static void __exit_signal(struct task_struct *tsk)
91{
92 struct signal_struct *sig = tsk->signal;
93 bool group_dead = thread_group_leader(tsk);
94 struct sighand_struct *sighand;
95 struct tty_struct *uninitialized_var(tty);
96 u64 utime, stime;
97
98 sighand = rcu_dereference_check(tsk->sighand,
99 lockdep_tasklist_lock_is_held());
100 spin_lock(&sighand->siglock);
101
102#ifdef CONFIG_POSIX_TIMERS
103 posix_cpu_timers_exit(tsk);
104 if (group_dead) {
105 posix_cpu_timers_exit_group(tsk);
106 } else {
107
108
109
110
111
112 if (unlikely(has_group_leader_pid(tsk)))
113 posix_cpu_timers_exit_group(tsk);
114 }
115#endif
116
117 if (group_dead) {
118 tty = sig->tty;
119 sig->tty = NULL;
120 } else {
121
122
123
124
125 if (sig->notify_count > 0 && !--sig->notify_count)
126 wake_up_process(sig->group_exit_task);
127
128 if (tsk == sig->curr_target)
129 sig->curr_target = next_thread(tsk);
130 }
131
132 add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
133 sizeof(unsigned long long));
134
135
136
137
138
139
140
141 task_cputime(tsk, &utime, &stime);
142 write_seqlock(&sig->stats_lock);
143 sig->utime += utime;
144 sig->stime += stime;
145 sig->gtime += task_gtime(tsk);
146 sig->min_flt += tsk->min_flt;
147 sig->maj_flt += tsk->maj_flt;
148 sig->nvcsw += tsk->nvcsw;
149 sig->nivcsw += tsk->nivcsw;
150 sig->inblock += task_io_get_inblock(tsk);
151 sig->oublock += task_io_get_oublock(tsk);
152 task_io_accounting_add(&sig->ioac, &tsk->ioac);
153 sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
154 sig->nr_threads--;
155 __unhash_process(tsk, group_dead);
156 write_sequnlock(&sig->stats_lock);
157
158
159
160
161
162 flush_sigqueue(&tsk->pending);
163 tsk->sighand = NULL;
164 spin_unlock(&sighand->siglock);
165
166 __cleanup_sighand(sighand);
167 clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
168 if (group_dead) {
169 flush_sigqueue(&sig->shared_pending);
170 tty_kref_put(tty);
171 }
172}
173
174static void delayed_put_task_struct(struct rcu_head *rhp)
175{
176 struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
177
178 perf_event_delayed_put(tsk);
179 trace_sched_process_free(tsk);
180 put_task_struct(tsk);
181}
182
183
184void release_task(struct task_struct *p)
185{
186 struct task_struct *leader;
187 int zap_leader;
188repeat:
189
190
191 rcu_read_lock();
192 atomic_dec(&__task_cred(p)->user->processes);
193 rcu_read_unlock();
194
195 proc_flush_task(p);
196
197 write_lock_irq(&tasklist_lock);
198 ptrace_release_task(p);
199 __exit_signal(p);
200
201
202
203
204
205
206 zap_leader = 0;
207 leader = p->group_leader;
208 if (leader != p && thread_group_empty(leader)
209 && leader->exit_state == EXIT_ZOMBIE) {
210
211
212
213
214
215 zap_leader = do_notify_parent(leader, leader->exit_signal);
216 if (zap_leader)
217 leader->exit_state = EXIT_DEAD;
218 }
219
220 write_unlock_irq(&tasklist_lock);
221 release_thread(p);
222 call_rcu(&p->rcu, delayed_put_task_struct);
223
224 p = leader;
225 if (unlikely(zap_leader))
226 goto repeat;
227}
228
229
230
231
232
233struct task_struct *task_rcu_dereference(struct task_struct **ptask)
234{
235 struct sighand_struct *sighand;
236 struct task_struct *task;
237
238
239
240
241
242
243
244retry:
245 task = rcu_dereference(*ptask);
246 if (!task)
247 return NULL;
248
249 probe_kernel_address(&task->sighand, sighand);
250
251
252
253
254
255
256 smp_rmb();
257 if (unlikely(task != READ_ONCE(*ptask)))
258 goto retry;
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286 if (!sighand)
287 return NULL;
288
289 return task;
290}
291
292void rcuwait_wake_up(struct rcuwait *w)
293{
294 struct task_struct *task;
295
296 rcu_read_lock();
297
298
299
300
301
302
303
304
305
306
307
308
309 smp_rmb();
310
311
312
313
314
315 task = rcu_dereference(w->task);
316 if (task)
317 wake_up_process(task);
318 rcu_read_unlock();
319}
320
321
322
323
324
325
326
327
328
329static int will_become_orphaned_pgrp(struct pid *pgrp,
330 struct task_struct *ignored_task)
331{
332 struct task_struct *p;
333
334 do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
335 if ((p == ignored_task) ||
336 (p->exit_state && thread_group_empty(p)) ||
337 is_global_init(p->real_parent))
338 continue;
339
340 if (task_pgrp(p->real_parent) != pgrp &&
341 task_session(p->real_parent) == task_session(p))
342 return 0;
343 } while_each_pid_task(pgrp, PIDTYPE_PGID, p);
344
345 return 1;
346}
347
348int is_current_pgrp_orphaned(void)
349{
350 int retval;
351
352 read_lock(&tasklist_lock);
353 retval = will_become_orphaned_pgrp(task_pgrp(current), NULL);
354 read_unlock(&tasklist_lock);
355
356 return retval;
357}
358
359static bool has_stopped_jobs(struct pid *pgrp)
360{
361 struct task_struct *p;
362
363 do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
364 if (p->signal->flags & SIGNAL_STOP_STOPPED)
365 return true;
366 } while_each_pid_task(pgrp, PIDTYPE_PGID, p);
367
368 return false;
369}
370
371
372
373
374
375
376static void
377kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)
378{
379 struct pid *pgrp = task_pgrp(tsk);
380 struct task_struct *ignored_task = tsk;
381
382 if (!parent)
383
384
385
386 parent = tsk->real_parent;
387 else
388
389
390
391 ignored_task = NULL;
392
393 if (task_pgrp(parent) != pgrp &&
394 task_session(parent) == task_session(tsk) &&
395 will_become_orphaned_pgrp(pgrp, ignored_task) &&
396 has_stopped_jobs(pgrp)) {
397 __kill_pgrp_info(SIGHUP, SEND_SIG_PRIV, pgrp);
398 __kill_pgrp_info(SIGCONT, SEND_SIG_PRIV, pgrp);
399 }
400}
401
402#ifdef CONFIG_MEMCG
403
404
405
406void mm_update_next_owner(struct mm_struct *mm)
407{
408 struct task_struct *c, *g, *p = current;
409
410retry:
411
412
413
414
415 if (mm->owner != p)
416 return;
417
418
419
420
421
422 if (atomic_read(&mm->mm_users) <= 1) {
423 mm->owner = NULL;
424 return;
425 }
426
427 read_lock(&tasklist_lock);
428
429
430
431 list_for_each_entry(c, &p->children, sibling) {
432 if (c->mm == mm)
433 goto assign_new_owner;
434 }
435
436
437
438
439 list_for_each_entry(c, &p->real_parent->children, sibling) {
440 if (c->mm == mm)
441 goto assign_new_owner;
442 }
443
444
445
446
447 for_each_process(g) {
448 if (g->flags & PF_KTHREAD)
449 continue;
450 for_each_thread(g, c) {
451 if (c->mm == mm)
452 goto assign_new_owner;
453 if (c->mm)
454 break;
455 }
456 }
457 read_unlock(&tasklist_lock);
458
459
460
461
462
463 mm->owner = NULL;
464 return;
465
466assign_new_owner:
467 BUG_ON(c == p);
468 get_task_struct(c);
469
470
471
472
473 task_lock(c);
474
475
476
477
478 read_unlock(&tasklist_lock);
479 if (c->mm != mm) {
480 task_unlock(c);
481 put_task_struct(c);
482 goto retry;
483 }
484 mm->owner = c;
485 task_unlock(c);
486 put_task_struct(c);
487}
488#endif
489
490
491
492
493
494static void exit_mm(void)
495{
496 struct mm_struct *mm = current->mm;
497 struct core_state *core_state;
498
499 mm_release(current, mm);
500 if (!mm)
501 return;
502 sync_mm_rss(mm);
503
504
505
506
507
508
509
510 down_read(&mm->mmap_sem);
511 core_state = mm->core_state;
512 if (core_state) {
513 struct core_thread self;
514
515 up_read(&mm->mmap_sem);
516
517 self.task = current;
518 self.next = xchg(&core_state->dumper.next, &self);
519
520
521
522
523 if (atomic_dec_and_test(&core_state->nr_threads))
524 complete(&core_state->startup);
525
526 for (;;) {
527 set_current_state(TASK_UNINTERRUPTIBLE);
528 if (!self.task)
529 break;
530 freezable_schedule();
531 }
532 __set_current_state(TASK_RUNNING);
533 down_read(&mm->mmap_sem);
534 }
535 mmgrab(mm);
536 BUG_ON(mm != current->active_mm);
537
538 task_lock(current);
539 current->mm = NULL;
540 up_read(&mm->mmap_sem);
541 enter_lazy_tlb(mm, current);
542 task_unlock(current);
543 mm_update_next_owner(mm);
544 mmput(mm);
545 if (test_thread_flag(TIF_MEMDIE))
546 exit_oom_victim();
547}
548
549static struct task_struct *find_alive_thread(struct task_struct *p)
550{
551 struct task_struct *t;
552
553 for_each_thread(p, t) {
554 if (!(t->flags & PF_EXITING))
555 return t;
556 }
557 return NULL;
558}
559
560static struct task_struct *find_child_reaper(struct task_struct *father)
561 __releases(&tasklist_lock)
562 __acquires(&tasklist_lock)
563{
564 struct pid_namespace *pid_ns = task_active_pid_ns(father);
565 struct task_struct *reaper = pid_ns->child_reaper;
566
567 if (likely(reaper != father))
568 return reaper;
569
570 reaper = find_alive_thread(father);
571 if (reaper) {
572 pid_ns->child_reaper = reaper;
573 return reaper;
574 }
575
576 write_unlock_irq(&tasklist_lock);
577 if (unlikely(pid_ns == &init_pid_ns)) {
578 panic("Attempted to kill init! exitcode=0x%08x\n",
579 father->signal->group_exit_code ?: father->exit_code);
580 }
581 zap_pid_ns_processes(pid_ns);
582 write_lock_irq(&tasklist_lock);
583
584 return father;
585}
586
587
588
589
590
591
592
593
594static struct task_struct *find_new_reaper(struct task_struct *father,
595 struct task_struct *child_reaper)
596{
597 struct task_struct *thread, *reaper;
598
599 thread = find_alive_thread(father);
600 if (thread)
601 return thread;
602
603 if (father->signal->has_child_subreaper) {
604 unsigned int ns_level = task_pid(father)->level;
605
606
607
608
609
610
611
612
613 for (reaper = father->real_parent;
614 task_pid(reaper)->level == ns_level;
615 reaper = reaper->real_parent) {
616 if (reaper == &init_task)
617 break;
618 if (!reaper->signal->is_child_subreaper)
619 continue;
620 thread = find_alive_thread(reaper);
621 if (thread)
622 return thread;
623 }
624 }
625
626 return child_reaper;
627}
628
629
630
631
632static void reparent_leader(struct task_struct *father, struct task_struct *p,
633 struct list_head *dead)
634{
635 if (unlikely(p->exit_state == EXIT_DEAD))
636 return;
637
638
639 p->exit_signal = SIGCHLD;
640
641
642 if (!p->ptrace &&
643 p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) {
644 if (do_notify_parent(p, p->exit_signal)) {
645 p->exit_state = EXIT_DEAD;
646 list_add(&p->ptrace_entry, dead);
647 }
648 }
649
650 kill_orphaned_pgrp(p, father);
651}
652
653
654
655
656
657
658
659
660
661static void forget_original_parent(struct task_struct *father,
662 struct list_head *dead)
663{
664 struct task_struct *p, *t, *reaper;
665
666 if (unlikely(!list_empty(&father->ptraced)))
667 exit_ptrace(father, dead);
668
669
670 reaper = find_child_reaper(father);
671 if (list_empty(&father->children))
672 return;
673
674 reaper = find_new_reaper(father, reaper);
675 list_for_each_entry(p, &father->children, sibling) {
676 for_each_thread(p, t) {
677 t->real_parent = reaper;
678 BUG_ON((!t->ptrace) != (t->parent == father));
679 if (likely(!t->ptrace))
680 t->parent = t->real_parent;
681 if (t->pdeath_signal)
682 group_send_sig_info(t->pdeath_signal,
683 SEND_SIG_NOINFO, t);
684 }
685
686
687
688
689 if (!same_thread_group(reaper, father))
690 reparent_leader(father, p, dead);
691 }
692 list_splice_tail_init(&father->children, &reaper->children);
693}
694
695
696
697
698
699static void exit_notify(struct task_struct *tsk, int group_dead)
700{
701 bool autoreap;
702 struct task_struct *p, *n;
703 LIST_HEAD(dead);
704
705 write_lock_irq(&tasklist_lock);
706 forget_original_parent(tsk, &dead);
707
708 if (group_dead)
709 kill_orphaned_pgrp(tsk->group_leader, NULL);
710
711 if (unlikely(tsk->ptrace)) {
712 int sig = thread_group_leader(tsk) &&
713 thread_group_empty(tsk) &&
714 !ptrace_reparented(tsk) ?
715 tsk->exit_signal : SIGCHLD;
716 autoreap = do_notify_parent(tsk, sig);
717 } else if (thread_group_leader(tsk)) {
718 autoreap = thread_group_empty(tsk) &&
719 do_notify_parent(tsk, tsk->exit_signal);
720 } else {
721 autoreap = true;
722 }
723
724 tsk->exit_state = autoreap ? EXIT_DEAD : EXIT_ZOMBIE;
725 if (tsk->exit_state == EXIT_DEAD)
726 list_add(&tsk->ptrace_entry, &dead);
727
728
729 if (unlikely(tsk->signal->notify_count < 0))
730 wake_up_process(tsk->signal->group_exit_task);
731 write_unlock_irq(&tasklist_lock);
732
733 list_for_each_entry_safe(p, n, &dead, ptrace_entry) {
734 list_del_init(&p->ptrace_entry);
735 release_task(p);
736 }
737}
738
739#ifdef CONFIG_DEBUG_STACK_USAGE
740static void check_stack_usage(void)
741{
742 static DEFINE_SPINLOCK(low_water_lock);
743 static int lowest_to_date = THREAD_SIZE;
744 unsigned long free;
745
746 free = stack_not_used(current);
747
748 if (free >= lowest_to_date)
749 return;
750
751 spin_lock(&low_water_lock);
752 if (free < lowest_to_date) {
753 pr_info("%s (%d) used greatest stack depth: %lu bytes left\n",
754 current->comm, task_pid_nr(current), free);
755 lowest_to_date = free;
756 }
757 spin_unlock(&low_water_lock);
758}
759#else
760static inline void check_stack_usage(void) {}
761#endif
762
763void __noreturn do_exit(long code)
764{
765 struct task_struct *tsk = current;
766 int group_dead;
767
768 profile_task_exit(tsk);
769 kcov_task_exit(tsk);
770
771 WARN_ON(blk_needs_flush_plug(tsk));
772
773 if (unlikely(in_interrupt()))
774 panic("Aiee, killing interrupt handler!");
775 if (unlikely(!tsk->pid))
776 panic("Attempted to kill the idle task!");
777
778
779
780
781
782
783
784
785 set_fs(USER_DS);
786
787 ptrace_event(PTRACE_EVENT_EXIT, code);
788
789 validate_creds_for_do_exit(tsk);
790
791
792
793
794
795 if (unlikely(tsk->flags & PF_EXITING)) {
796 pr_alert("Fixing recursive fault but reboot is needed!\n");
797
798
799
800
801
802
803
804
805
806 tsk->flags |= PF_EXITPIDONE;
807 set_current_state(TASK_UNINTERRUPTIBLE);
808 schedule();
809 }
810
811 exit_signals(tsk);
812
813
814
815
816 smp_mb();
817
818
819
820
821 raw_spin_lock_irq(&tsk->pi_lock);
822 raw_spin_unlock_irq(&tsk->pi_lock);
823
824 if (unlikely(in_atomic())) {
825 pr_info("note: %s[%d] exited with preempt_count %d\n",
826 current->comm, task_pid_nr(current),
827 preempt_count());
828 preempt_count_set(PREEMPT_ENABLED);
829 }
830
831
832 if (tsk->mm)
833 sync_mm_rss(tsk->mm);
834 acct_update_integrals(tsk);
835 group_dead = atomic_dec_and_test(&tsk->signal->live);
836 if (group_dead) {
837#ifdef CONFIG_POSIX_TIMERS
838 hrtimer_cancel(&tsk->signal->real_timer);
839 exit_itimers(tsk->signal);
840#endif
841 if (tsk->mm)
842 setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm);
843 }
844 acct_collect(code, group_dead);
845 if (group_dead)
846 tty_audit_exit();
847 audit_free(tsk);
848
849 tsk->exit_code = code;
850 taskstats_exit(tsk, group_dead);
851
852 exit_mm();
853
854 if (group_dead)
855 acct_process();
856 trace_sched_process_exit(tsk);
857
858 exit_sem(tsk);
859 exit_shm(tsk);
860 exit_files(tsk);
861 exit_fs(tsk);
862 if (group_dead)
863 disassociate_ctty(1);
864 exit_task_namespaces(tsk);
865 exit_task_work(tsk);
866 exit_thread(tsk);
867
868
869
870
871
872
873
874 perf_event_exit_task(tsk);
875
876 sched_autogroup_exit_task(tsk);
877 cgroup_exit(tsk);
878
879
880
881
882 flush_ptrace_hw_breakpoint(tsk);
883
884 exit_tasks_rcu_start();
885 exit_notify(tsk, group_dead);
886 proc_exit_connector(tsk);
887 mpol_put_task_policy(tsk);
888#ifdef CONFIG_FUTEX
889 if (unlikely(current->pi_state_cache))
890 kfree(current->pi_state_cache);
891#endif
892
893
894
895 debug_check_no_locks_held();
896
897
898
899
900
901 tsk->flags |= PF_EXITPIDONE;
902
903 if (tsk->io_context)
904 exit_io_context(tsk);
905
906 if (tsk->splice_pipe)
907 free_pipe_info(tsk->splice_pipe);
908
909 if (tsk->task_frag.page)
910 put_page(tsk->task_frag.page);
911
912 validate_creds_for_do_exit(tsk);
913
914 check_stack_usage();
915 preempt_disable();
916 if (tsk->nr_dirtied)
917 __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
918 exit_rcu();
919 exit_tasks_rcu_finish();
920
921 lockdep_free_task(tsk);
922 do_task_dead();
923}
924EXPORT_SYMBOL_GPL(do_exit);
925
926void complete_and_exit(struct completion *comp, long code)
927{
928 if (comp)
929 complete(comp);
930
931 do_exit(code);
932}
933EXPORT_SYMBOL(complete_and_exit);
934
935SYSCALL_DEFINE1(exit, int, error_code)
936{
937 do_exit((error_code&0xff)<<8);
938}
939
940
941
942
943
944void
945do_group_exit(int exit_code)
946{
947 struct signal_struct *sig = current->signal;
948
949 BUG_ON(exit_code & 0x80);
950
951 if (signal_group_exit(sig))
952 exit_code = sig->group_exit_code;
953 else if (!thread_group_empty(current)) {
954 struct sighand_struct *const sighand = current->sighand;
955
956 spin_lock_irq(&sighand->siglock);
957 if (signal_group_exit(sig))
958
959 exit_code = sig->group_exit_code;
960 else {
961 sig->group_exit_code = exit_code;
962 sig->flags = SIGNAL_GROUP_EXIT;
963 zap_other_threads(current);
964 }
965 spin_unlock_irq(&sighand->siglock);
966 }
967
968 do_exit(exit_code);
969
970}
971
972
973
974
975
976
977SYSCALL_DEFINE1(exit_group, int, error_code)
978{
979 do_group_exit((error_code & 0xff) << 8);
980
981 return 0;
982}
983
984struct waitid_info {
985 pid_t pid;
986 uid_t uid;
987 int status;
988 int cause;
989};
990
991struct wait_opts {
992 enum pid_type wo_type;
993 int wo_flags;
994 struct pid *wo_pid;
995
996 struct waitid_info *wo_info;
997 int wo_stat;
998 struct rusage *wo_rusage;
999
1000 wait_queue_entry_t child_wait;
1001 int notask_error;
1002};
1003
1004static inline
1005struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
1006{
1007 if (type != PIDTYPE_PID)
1008 task = task->group_leader;
1009 return task->pids[type].pid;
1010}
1011
1012static int eligible_pid(struct wait_opts *wo, struct task_struct *p)
1013{
1014 return wo->wo_type == PIDTYPE_MAX ||
1015 task_pid_type(p, wo->wo_type) == wo->wo_pid;
1016}
1017
1018static int
1019eligible_child(struct wait_opts *wo, bool ptrace, struct task_struct *p)
1020{
1021 if (!eligible_pid(wo, p))
1022 return 0;
1023
1024
1025
1026
1027
1028 if (ptrace || (wo->wo_flags & __WALL))
1029 return 1;
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039 if ((p->exit_signal != SIGCHLD) ^ !!(wo->wo_flags & __WCLONE))
1040 return 0;
1041
1042 return 1;
1043}
1044
1045
1046
1047
1048
1049
1050
1051static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
1052{
1053 int state, status;
1054 pid_t pid = task_pid_vnr(p);
1055 uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p));
1056 struct waitid_info *infop;
1057
1058 if (!likely(wo->wo_flags & WEXITED))
1059 return 0;
1060
1061 if (unlikely(wo->wo_flags & WNOWAIT)) {
1062 status = p->exit_code;
1063 get_task_struct(p);
1064 read_unlock(&tasklist_lock);
1065 sched_annotate_sleep();
1066 if (wo->wo_rusage)
1067 getrusage(p, RUSAGE_BOTH, wo->wo_rusage);
1068 put_task_struct(p);
1069 goto out_info;
1070 }
1071
1072
1073
1074 state = (ptrace_reparented(p) && thread_group_leader(p)) ?
1075 EXIT_TRACE : EXIT_DEAD;
1076 if (cmpxchg(&p->exit_state, EXIT_ZOMBIE, state) != EXIT_ZOMBIE)
1077 return 0;
1078
1079
1080
1081 read_unlock(&tasklist_lock);
1082 sched_annotate_sleep();
1083
1084
1085
1086
1087 if (state == EXIT_DEAD && thread_group_leader(p)) {
1088 struct signal_struct *sig = p->signal;
1089 struct signal_struct *psig = current->signal;
1090 unsigned long maxrss;
1091 u64 tgutime, tgstime;
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113 thread_group_cputime_adjusted(p, &tgutime, &tgstime);
1114 spin_lock_irq(¤t->sighand->siglock);
1115 write_seqlock(&psig->stats_lock);
1116 psig->cutime += tgutime + sig->cutime;
1117 psig->cstime += tgstime + sig->cstime;
1118 psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime;
1119 psig->cmin_flt +=
1120 p->min_flt + sig->min_flt + sig->cmin_flt;
1121 psig->cmaj_flt +=
1122 p->maj_flt + sig->maj_flt + sig->cmaj_flt;
1123 psig->cnvcsw +=
1124 p->nvcsw + sig->nvcsw + sig->cnvcsw;
1125 psig->cnivcsw +=
1126 p->nivcsw + sig->nivcsw + sig->cnivcsw;
1127 psig->cinblock +=
1128 task_io_get_inblock(p) +
1129 sig->inblock + sig->cinblock;
1130 psig->coublock +=
1131 task_io_get_oublock(p) +
1132 sig->oublock + sig->coublock;
1133 maxrss = max(sig->maxrss, sig->cmaxrss);
1134 if (psig->cmaxrss < maxrss)
1135 psig->cmaxrss = maxrss;
1136 task_io_accounting_add(&psig->ioac, &p->ioac);
1137 task_io_accounting_add(&psig->ioac, &sig->ioac);
1138 write_sequnlock(&psig->stats_lock);
1139 spin_unlock_irq(¤t->sighand->siglock);
1140 }
1141
1142 if (wo->wo_rusage)
1143 getrusage(p, RUSAGE_BOTH, wo->wo_rusage);
1144 status = (p->signal->flags & SIGNAL_GROUP_EXIT)
1145 ? p->signal->group_exit_code : p->exit_code;
1146 wo->wo_stat = status;
1147
1148 if (state == EXIT_TRACE) {
1149 write_lock_irq(&tasklist_lock);
1150
1151 ptrace_unlink(p);
1152
1153
1154 state = EXIT_ZOMBIE;
1155 if (do_notify_parent(p, p->exit_signal))
1156 state = EXIT_DEAD;
1157 p->exit_state = state;
1158 write_unlock_irq(&tasklist_lock);
1159 }
1160 if (state == EXIT_DEAD)
1161 release_task(p);
1162
1163out_info:
1164 infop = wo->wo_info;
1165 if (infop) {
1166 if ((status & 0x7f) == 0) {
1167 infop->cause = CLD_EXITED;
1168 infop->status = status >> 8;
1169 } else {
1170 infop->cause = (status & 0x80) ? CLD_DUMPED : CLD_KILLED;
1171 infop->status = status & 0x7f;
1172 }
1173 infop->pid = pid;
1174 infop->uid = uid;
1175 }
1176
1177 return pid;
1178}
1179
1180static int *task_stopped_code(struct task_struct *p, bool ptrace)
1181{
1182 if (ptrace) {
1183 if (task_is_traced(p) && !(p->jobctl & JOBCTL_LISTENING))
1184 return &p->exit_code;
1185 } else {
1186 if (p->signal->flags & SIGNAL_STOP_STOPPED)
1187 return &p->signal->group_exit_code;
1188 }
1189 return NULL;
1190}
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210static int wait_task_stopped(struct wait_opts *wo,
1211 int ptrace, struct task_struct *p)
1212{
1213 struct waitid_info *infop;
1214 int exit_code, *p_code, why;
1215 uid_t uid = 0;
1216 pid_t pid;
1217
1218
1219
1220
1221 if (!ptrace && !(wo->wo_flags & WUNTRACED))
1222 return 0;
1223
1224 if (!task_stopped_code(p, ptrace))
1225 return 0;
1226
1227 exit_code = 0;
1228 spin_lock_irq(&p->sighand->siglock);
1229
1230 p_code = task_stopped_code(p, ptrace);
1231 if (unlikely(!p_code))
1232 goto unlock_sig;
1233
1234 exit_code = *p_code;
1235 if (!exit_code)
1236 goto unlock_sig;
1237
1238 if (!unlikely(wo->wo_flags & WNOWAIT))
1239 *p_code = 0;
1240
1241 uid = from_kuid_munged(current_user_ns(), task_uid(p));
1242unlock_sig:
1243 spin_unlock_irq(&p->sighand->siglock);
1244 if (!exit_code)
1245 return 0;
1246
1247
1248
1249
1250
1251
1252
1253
1254 get_task_struct(p);
1255 pid = task_pid_vnr(p);
1256 why = ptrace ? CLD_TRAPPED : CLD_STOPPED;
1257 read_unlock(&tasklist_lock);
1258 sched_annotate_sleep();
1259 if (wo->wo_rusage)
1260 getrusage(p, RUSAGE_BOTH, wo->wo_rusage);
1261 put_task_struct(p);
1262
1263 if (likely(!(wo->wo_flags & WNOWAIT)))
1264 wo->wo_stat = (exit_code << 8) | 0x7f;
1265
1266 infop = wo->wo_info;
1267 if (infop) {
1268 infop->cause = why;
1269 infop->status = exit_code;
1270 infop->pid = pid;
1271 infop->uid = uid;
1272 }
1273 return pid;
1274}
1275
1276
1277
1278
1279
1280
1281
1282static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
1283{
1284 struct waitid_info *infop;
1285 pid_t pid;
1286 uid_t uid;
1287
1288 if (!unlikely(wo->wo_flags & WCONTINUED))
1289 return 0;
1290
1291 if (!(p->signal->flags & SIGNAL_STOP_CONTINUED))
1292 return 0;
1293
1294 spin_lock_irq(&p->sighand->siglock);
1295
1296 if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) {
1297 spin_unlock_irq(&p->sighand->siglock);
1298 return 0;
1299 }
1300 if (!unlikely(wo->wo_flags & WNOWAIT))
1301 p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
1302 uid = from_kuid_munged(current_user_ns(), task_uid(p));
1303 spin_unlock_irq(&p->sighand->siglock);
1304
1305 pid = task_pid_vnr(p);
1306 get_task_struct(p);
1307 read_unlock(&tasklist_lock);
1308 sched_annotate_sleep();
1309 if (wo->wo_rusage)
1310 getrusage(p, RUSAGE_BOTH, wo->wo_rusage);
1311 put_task_struct(p);
1312
1313 infop = wo->wo_info;
1314 if (!infop) {
1315 wo->wo_stat = 0xffff;
1316 } else {
1317 infop->cause = CLD_CONTINUED;
1318 infop->pid = pid;
1319 infop->uid = uid;
1320 infop->status = SIGCONT;
1321 }
1322 return pid;
1323}
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334static int wait_consider_task(struct wait_opts *wo, int ptrace,
1335 struct task_struct *p)
1336{
1337
1338
1339
1340
1341
1342 int exit_state = ACCESS_ONCE(p->exit_state);
1343 int ret;
1344
1345 if (unlikely(exit_state == EXIT_DEAD))
1346 return 0;
1347
1348 ret = eligible_child(wo, ptrace, p);
1349 if (!ret)
1350 return ret;
1351
1352 if (unlikely(exit_state == EXIT_TRACE)) {
1353
1354
1355
1356
1357 if (likely(!ptrace))
1358 wo->notask_error = 0;
1359 return 0;
1360 }
1361
1362 if (likely(!ptrace) && unlikely(p->ptrace)) {
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374 if (!ptrace_reparented(p))
1375 ptrace = 1;
1376 }
1377
1378
1379 if (exit_state == EXIT_ZOMBIE) {
1380
1381 if (!delay_group_leader(p)) {
1382
1383
1384
1385
1386
1387 if (unlikely(ptrace) || likely(!p->ptrace))
1388 return wait_task_zombie(wo, p);
1389 }
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411 if (likely(!ptrace) || (wo->wo_flags & (WCONTINUED | WEXITED)))
1412 wo->notask_error = 0;
1413 } else {
1414
1415
1416
1417
1418 wo->notask_error = 0;
1419 }
1420
1421
1422
1423
1424
1425 ret = wait_task_stopped(wo, ptrace, p);
1426 if (ret)
1427 return ret;
1428
1429
1430
1431
1432
1433
1434 return wait_task_continued(wo, p);
1435}
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk)
1447{
1448 struct task_struct *p;
1449
1450 list_for_each_entry(p, &tsk->children, sibling) {
1451 int ret = wait_consider_task(wo, 0, p);
1452
1453 if (ret)
1454 return ret;
1455 }
1456
1457 return 0;
1458}
1459
1460static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
1461{
1462 struct task_struct *p;
1463
1464 list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {
1465 int ret = wait_consider_task(wo, 1, p);
1466
1467 if (ret)
1468 return ret;
1469 }
1470
1471 return 0;
1472}
1473
1474static int child_wait_callback(wait_queue_entry_t *wait, unsigned mode,
1475 int sync, void *key)
1476{
1477 struct wait_opts *wo = container_of(wait, struct wait_opts,
1478 child_wait);
1479 struct task_struct *p = key;
1480
1481 if (!eligible_pid(wo, p))
1482 return 0;
1483
1484 if ((wo->wo_flags & __WNOTHREAD) && wait->private != p->parent)
1485 return 0;
1486
1487 return default_wake_function(wait, mode, sync, key);
1488}
1489
1490void __wake_up_parent(struct task_struct *p, struct task_struct *parent)
1491{
1492 __wake_up_sync_key(&parent->signal->wait_chldexit,
1493 TASK_INTERRUPTIBLE, 1, p);
1494}
1495
1496static long do_wait(struct wait_opts *wo)
1497{
1498 struct task_struct *tsk;
1499 int retval;
1500
1501 trace_sched_process_wait(wo->wo_pid);
1502
1503 init_waitqueue_func_entry(&wo->child_wait, child_wait_callback);
1504 wo->child_wait.private = current;
1505 add_wait_queue(¤t->signal->wait_chldexit, &wo->child_wait);
1506repeat:
1507
1508
1509
1510
1511
1512
1513 wo->notask_error = -ECHILD;
1514 if ((wo->wo_type < PIDTYPE_MAX) &&
1515 (!wo->wo_pid || hlist_empty(&wo->wo_pid->tasks[wo->wo_type])))
1516 goto notask;
1517
1518 set_current_state(TASK_INTERRUPTIBLE);
1519 read_lock(&tasklist_lock);
1520 tsk = current;
1521 do {
1522 retval = do_wait_thread(wo, tsk);
1523 if (retval)
1524 goto end;
1525
1526 retval = ptrace_do_wait(wo, tsk);
1527 if (retval)
1528 goto end;
1529
1530 if (wo->wo_flags & __WNOTHREAD)
1531 break;
1532 } while_each_thread(current, tsk);
1533 read_unlock(&tasklist_lock);
1534
1535notask:
1536 retval = wo->notask_error;
1537 if (!retval && !(wo->wo_flags & WNOHANG)) {
1538 retval = -ERESTARTSYS;
1539 if (!signal_pending(current)) {
1540 schedule();
1541 goto repeat;
1542 }
1543 }
1544end:
1545 __set_current_state(TASK_RUNNING);
1546 remove_wait_queue(¤t->signal->wait_chldexit, &wo->child_wait);
1547 return retval;
1548}
1549
1550static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
1551 int options, struct rusage *ru)
1552{
1553 struct wait_opts wo;
1554 struct pid *pid = NULL;
1555 enum pid_type type;
1556 long ret;
1557
1558 if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED|
1559 __WNOTHREAD|__WCLONE|__WALL))
1560 return -EINVAL;
1561 if (!(options & (WEXITED|WSTOPPED|WCONTINUED)))
1562 return -EINVAL;
1563
1564 switch (which) {
1565 case P_ALL:
1566 type = PIDTYPE_MAX;
1567 break;
1568 case P_PID:
1569 type = PIDTYPE_PID;
1570 if (upid <= 0)
1571 return -EINVAL;
1572 break;
1573 case P_PGID:
1574 type = PIDTYPE_PGID;
1575 if (upid <= 0)
1576 return -EINVAL;
1577 break;
1578 default:
1579 return -EINVAL;
1580 }
1581
1582 if (type < PIDTYPE_MAX)
1583 pid = find_get_pid(upid);
1584
1585 wo.wo_type = type;
1586 wo.wo_pid = pid;
1587 wo.wo_flags = options;
1588 wo.wo_info = infop;
1589 wo.wo_rusage = ru;
1590 ret = do_wait(&wo);
1591
1592 put_pid(pid);
1593 return ret;
1594}
1595
1596SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
1597 infop, int, options, struct rusage __user *, ru)
1598{
1599 struct rusage r;
1600 struct waitid_info info = {.status = 0};
1601 long err = kernel_waitid(which, upid, &info, options, ru ? &r : NULL);
1602 int signo = 0;
1603
1604 if (err > 0) {
1605 signo = SIGCHLD;
1606 err = 0;
1607 if (ru && copy_to_user(ru, &r, sizeof(struct rusage)))
1608 return -EFAULT;
1609 }
1610 if (!infop)
1611 return err;
1612
1613 if (!access_ok(VERIFY_WRITE, infop, sizeof(*infop)))
1614 return -EFAULT;
1615
1616 user_access_begin();
1617 unsafe_put_user(signo, &infop->si_signo, Efault);
1618 unsafe_put_user(0, &infop->si_errno, Efault);
1619 unsafe_put_user(info.cause, &infop->si_code, Efault);
1620 unsafe_put_user(info.pid, &infop->si_pid, Efault);
1621 unsafe_put_user(info.uid, &infop->si_uid, Efault);
1622 unsafe_put_user(info.status, &infop->si_status, Efault);
1623 user_access_end();
1624 return err;
1625Efault:
1626 user_access_end();
1627 return -EFAULT;
1628}
1629
1630long kernel_wait4(pid_t upid, int __user *stat_addr, int options,
1631 struct rusage *ru)
1632{
1633 struct wait_opts wo;
1634 struct pid *pid = NULL;
1635 enum pid_type type;
1636 long ret;
1637
1638 if (options & ~(WNOHANG|WUNTRACED|WCONTINUED|
1639 __WNOTHREAD|__WCLONE|__WALL))
1640 return -EINVAL;
1641
1642
1643 if (upid == INT_MIN)
1644 return -ESRCH;
1645
1646 if (upid == -1)
1647 type = PIDTYPE_MAX;
1648 else if (upid < 0) {
1649 type = PIDTYPE_PGID;
1650 pid = find_get_pid(-upid);
1651 } else if (upid == 0) {
1652 type = PIDTYPE_PGID;
1653 pid = get_task_pid(current, PIDTYPE_PGID);
1654 } else {
1655 type = PIDTYPE_PID;
1656 pid = find_get_pid(upid);
1657 }
1658
1659 wo.wo_type = type;
1660 wo.wo_pid = pid;
1661 wo.wo_flags = options | WEXITED;
1662 wo.wo_info = NULL;
1663 wo.wo_stat = 0;
1664 wo.wo_rusage = ru;
1665 ret = do_wait(&wo);
1666 put_pid(pid);
1667 if (ret > 0 && stat_addr && put_user(wo.wo_stat, stat_addr))
1668 ret = -EFAULT;
1669
1670 return ret;
1671}
1672
1673SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,
1674 int, options, struct rusage __user *, ru)
1675{
1676 struct rusage r;
1677 long err = kernel_wait4(upid, stat_addr, options, ru ? &r : NULL);
1678
1679 if (err > 0) {
1680 if (ru && copy_to_user(ru, &r, sizeof(struct rusage)))
1681 return -EFAULT;
1682 }
1683 return err;
1684}
1685
1686#ifdef __ARCH_WANT_SYS_WAITPID
1687
1688
1689
1690
1691
1692SYSCALL_DEFINE3(waitpid, pid_t, pid, int __user *, stat_addr, int, options)
1693{
1694 return sys_wait4(pid, stat_addr, options, NULL);
1695}
1696
1697#endif
1698
1699#ifdef CONFIG_COMPAT
1700COMPAT_SYSCALL_DEFINE4(wait4,
1701 compat_pid_t, pid,
1702 compat_uint_t __user *, stat_addr,
1703 int, options,
1704 struct compat_rusage __user *, ru)
1705{
1706 struct rusage r;
1707 long err = kernel_wait4(pid, stat_addr, options, ru ? &r : NULL);
1708 if (err > 0) {
1709 if (ru && put_compat_rusage(&r, ru))
1710 return -EFAULT;
1711 }
1712 return err;
1713}
1714
1715COMPAT_SYSCALL_DEFINE5(waitid,
1716 int, which, compat_pid_t, pid,
1717 struct compat_siginfo __user *, infop, int, options,
1718 struct compat_rusage __user *, uru)
1719{
1720 struct rusage ru;
1721 struct waitid_info info = {.status = 0};
1722 long err = kernel_waitid(which, pid, &info, options, uru ? &ru : NULL);
1723 int signo = 0;
1724 if (err > 0) {
1725 signo = SIGCHLD;
1726 err = 0;
1727 if (uru) {
1728
1729 if (COMPAT_USE_64BIT_TIME)
1730 err = copy_to_user(uru, &ru, sizeof(ru));
1731 else
1732 err = put_compat_rusage(&ru, uru);
1733 if (err)
1734 return -EFAULT;
1735 }
1736 }
1737
1738 if (!infop)
1739 return err;
1740
1741 if (!access_ok(VERIFY_WRITE, infop, sizeof(*infop)))
1742 return -EFAULT;
1743
1744 user_access_begin();
1745 unsafe_put_user(signo, &infop->si_signo, Efault);
1746 unsafe_put_user(0, &infop->si_errno, Efault);
1747 unsafe_put_user(info.cause, &infop->si_code, Efault);
1748 unsafe_put_user(info.pid, &infop->si_pid, Efault);
1749 unsafe_put_user(info.uid, &infop->si_uid, Efault);
1750 unsafe_put_user(info.status, &infop->si_status, Efault);
1751 user_access_end();
1752 return err;
1753Efault:
1754 user_access_end();
1755 return -EFAULT;
1756}
1757#endif
1758