1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20#include <linux/oom.h>
21#include <linux/mm.h>
22#include <linux/err.h>
23#include <linux/gfp.h>
24#include <linux/sched.h>
25#include <linux/swap.h>
26#include <linux/timex.h>
27#include <linux/jiffies.h>
28#include <linux/cpuset.h>
29#include <linux/export.h>
30#include <linux/notifier.h>
31#include <linux/memcontrol.h>
32#include <linux/mempolicy.h>
33#include <linux/security.h>
34#include <linux/ptrace.h>
35#include <linux/freezer.h>
36#include <linux/ftrace.h>
37#include <linux/ratelimit.h>
38
39#define CREATE_TRACE_POINTS
40#include <trace/events/oom.h>
41
42int sysctl_panic_on_oom;
43int sysctl_oom_kill_allocating_task;
44int sysctl_oom_dump_tasks = 1;
45static DEFINE_SPINLOCK(zone_scan_lock);
46
47
48
49
50
51
52
53
54
55
56void compare_swap_oom_score_adj(int old_val, int new_val)
57{
58 struct sighand_struct *sighand = current->sighand;
59
60 spin_lock_irq(&sighand->siglock);
61 if (current->signal->oom_score_adj == old_val)
62 current->signal->oom_score_adj = new_val;
63 trace_oom_score_adj_update(current);
64 spin_unlock_irq(&sighand->siglock);
65}
66
67
68
69
70
71
72
73
74
75int test_set_oom_score_adj(int new_val)
76{
77 struct sighand_struct *sighand = current->sighand;
78 int old_val;
79
80 spin_lock_irq(&sighand->siglock);
81 old_val = current->signal->oom_score_adj;
82 current->signal->oom_score_adj = new_val;
83 trace_oom_score_adj_update(current);
84 spin_unlock_irq(&sighand->siglock);
85
86 return old_val;
87}
88
89#ifdef CONFIG_NUMA
90
91
92
93
94
95
96
97
98
99static bool has_intersects_mems_allowed(struct task_struct *tsk,
100 const nodemask_t *mask)
101{
102 struct task_struct *start = tsk;
103
104 do {
105 if (mask) {
106
107
108
109
110
111
112 if (mempolicy_nodemask_intersects(tsk, mask))
113 return true;
114 } else {
115
116
117
118
119 if (cpuset_mems_allowed_intersects(current, tsk))
120 return true;
121 }
122 } while_each_thread(start, tsk);
123
124 return false;
125}
126#else
127static bool has_intersects_mems_allowed(struct task_struct *tsk,
128 const nodemask_t *mask)
129{
130 return true;
131}
132#endif
133
134
135
136
137
138
139
140struct task_struct *find_lock_task_mm(struct task_struct *p)
141{
142 struct task_struct *t = p;
143
144 do {
145 task_lock(t);
146 if (likely(t->mm))
147 return t;
148 task_unlock(t);
149 } while_each_thread(p, t);
150
151 return NULL;
152}
153
154
155static bool oom_unkillable_task(struct task_struct *p,
156 const struct mem_cgroup *memcg, const nodemask_t *nodemask)
157{
158 if (is_global_init(p))
159 return true;
160 if (p->flags & PF_KTHREAD)
161 return true;
162
163
164 if (memcg && !task_in_mem_cgroup(p, memcg))
165 return true;
166
167
168 if (!has_intersects_mems_allowed(p, nodemask))
169 return true;
170
171 return false;
172}
173
174
175
176
177
178
179
180
181
182
183unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
184 const nodemask_t *nodemask, unsigned long totalpages)
185{
186 long points;
187 long adj;
188
189 if (oom_unkillable_task(p, memcg, nodemask))
190 return 0;
191
192 p = find_lock_task_mm(p);
193 if (!p)
194 return 0;
195
196 adj = p->signal->oom_score_adj;
197 if (adj == OOM_SCORE_ADJ_MIN) {
198 task_unlock(p);
199 return 0;
200 }
201
202
203
204
205
206 points = get_mm_rss(p->mm) + p->mm->nr_ptes +
207 get_mm_counter(p->mm, MM_SWAPENTS);
208 task_unlock(p);
209
210
211
212
213
214 if (has_capability_noaudit(p, CAP_SYS_ADMIN))
215 adj -= 30;
216
217
218 adj *= totalpages / 1000;
219 points += adj;
220
221
222
223
224
225 return points > 0 ? points : 1;
226}
227
228
229
230
231#ifdef CONFIG_NUMA
232static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
233 gfp_t gfp_mask, nodemask_t *nodemask,
234 unsigned long *totalpages)
235{
236 struct zone *zone;
237 struct zoneref *z;
238 enum zone_type high_zoneidx = gfp_zone(gfp_mask);
239 bool cpuset_limited = false;
240 int nid;
241
242
243 *totalpages = totalram_pages + total_swap_pages;
244
245 if (!zonelist)
246 return CONSTRAINT_NONE;
247
248
249
250
251
252 if (gfp_mask & __GFP_THISNODE)
253 return CONSTRAINT_NONE;
254
255
256
257
258
259
260 if (nodemask && !nodes_subset(node_states[N_HIGH_MEMORY], *nodemask)) {
261 *totalpages = total_swap_pages;
262 for_each_node_mask(nid, *nodemask)
263 *totalpages += node_spanned_pages(nid);
264 return CONSTRAINT_MEMORY_POLICY;
265 }
266
267
268 for_each_zone_zonelist_nodemask(zone, z, zonelist,
269 high_zoneidx, nodemask)
270 if (!cpuset_zone_allowed_softwall(zone, gfp_mask))
271 cpuset_limited = true;
272
273 if (cpuset_limited) {
274 *totalpages = total_swap_pages;
275 for_each_node_mask(nid, cpuset_current_mems_allowed)
276 *totalpages += node_spanned_pages(nid);
277 return CONSTRAINT_CPUSET;
278 }
279 return CONSTRAINT_NONE;
280}
281#else
282static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
283 gfp_t gfp_mask, nodemask_t *nodemask,
284 unsigned long *totalpages)
285{
286 *totalpages = totalram_pages + total_swap_pages;
287 return CONSTRAINT_NONE;
288}
289#endif
290
291enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
292 unsigned long totalpages, const nodemask_t *nodemask,
293 bool force_kill)
294{
295 if (task->exit_state)
296 return OOM_SCAN_CONTINUE;
297 if (oom_unkillable_task(task, NULL, nodemask))
298 return OOM_SCAN_CONTINUE;
299
300
301
302
303
304 if (test_tsk_thread_flag(task, TIF_MEMDIE)) {
305 if (unlikely(frozen(task)))
306 __thaw_task(task);
307 if (!force_kill)
308 return OOM_SCAN_ABORT;
309 }
310 if (!task->mm)
311 return OOM_SCAN_CONTINUE;
312
313 if (task->flags & PF_EXITING) {
314
315
316
317
318
319
320
321
322 if (task == current)
323 return OOM_SCAN_SELECT;
324 else if (!force_kill) {
325
326
327
328
329
330 if (!(task->group_leader->ptrace & PT_TRACE_EXIT))
331 return OOM_SCAN_ABORT;
332 }
333 }
334 return OOM_SCAN_OK;
335}
336
337
338
339
340
341
342
343static struct task_struct *select_bad_process(unsigned int *ppoints,
344 unsigned long totalpages, const nodemask_t *nodemask,
345 bool force_kill)
346{
347 struct task_struct *g, *p;
348 struct task_struct *chosen = NULL;
349 unsigned long chosen_points = 0;
350
351 rcu_read_lock();
352 do_each_thread(g, p) {
353 unsigned int points;
354
355 switch (oom_scan_process_thread(p, totalpages, nodemask,
356 force_kill)) {
357 case OOM_SCAN_SELECT:
358 chosen = p;
359 chosen_points = ULONG_MAX;
360
361 case OOM_SCAN_CONTINUE:
362 continue;
363 case OOM_SCAN_ABORT:
364 rcu_read_unlock();
365 return ERR_PTR(-1UL);
366 case OOM_SCAN_OK:
367 break;
368 };
369 points = oom_badness(p, NULL, nodemask, totalpages);
370 if (points > chosen_points) {
371 chosen = p;
372 chosen_points = points;
373 }
374 } while_each_thread(g, p);
375 if (chosen)
376 get_task_struct(chosen);
377 rcu_read_unlock();
378
379 *ppoints = chosen_points * 1000 / totalpages;
380 return chosen;
381}
382
383
384
385
386
387
388
389
390
391
392
393
394static void dump_tasks(const struct mem_cgroup *memcg, const nodemask_t *nodemask)
395{
396 struct task_struct *p;
397 struct task_struct *task;
398
399 pr_info("[ pid ] uid tgid total_vm rss nr_ptes swapents oom_score_adj name\n");
400 rcu_read_lock();
401 for_each_process(p) {
402 if (oom_unkillable_task(p, memcg, nodemask))
403 continue;
404
405 task = find_lock_task_mm(p);
406 if (!task) {
407
408
409
410
411
412 continue;
413 }
414
415 pr_info("[%5d] %5d %5d %8lu %8lu %7lu %8lu %5d %s\n",
416 task->pid, from_kuid(&init_user_ns, task_uid(task)),
417 task->tgid, task->mm->total_vm, get_mm_rss(task->mm),
418 task->mm->nr_ptes,
419 get_mm_counter(task->mm, MM_SWAPENTS),
420 task->signal->oom_score_adj, task->comm);
421 task_unlock(task);
422 }
423 rcu_read_unlock();
424}
425
426static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
427 struct mem_cgroup *memcg, const nodemask_t *nodemask)
428{
429 task_lock(current);
430 pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, "
431 "oom_score_adj=%d\n",
432 current->comm, gfp_mask, order,
433 current->signal->oom_score_adj);
434 cpuset_print_task_mems_allowed(current);
435 task_unlock(current);
436 dump_stack();
437 mem_cgroup_print_oom_info(memcg, p);
438 show_mem(SHOW_MEM_FILTER_NODES);
439 if (sysctl_oom_dump_tasks)
440 dump_tasks(memcg, nodemask);
441}
442
443#define K(x) ((x) << (PAGE_SHIFT-10))
444
445
446
447
448void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
449 unsigned int points, unsigned long totalpages,
450 struct mem_cgroup *memcg, nodemask_t *nodemask,
451 const char *message)
452{
453 struct task_struct *victim = p;
454 struct task_struct *child;
455 struct task_struct *t = p;
456 struct mm_struct *mm;
457 unsigned int victim_points = 0;
458 static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL,
459 DEFAULT_RATELIMIT_BURST);
460
461
462
463
464
465 if (p->flags & PF_EXITING) {
466 set_tsk_thread_flag(p, TIF_MEMDIE);
467 put_task_struct(p);
468 return;
469 }
470
471 if (__ratelimit(&oom_rs))
472 dump_header(p, gfp_mask, order, memcg, nodemask);
473
474 task_lock(p);
475 pr_err("%s: Kill process %d (%s) score %d or sacrifice child\n",
476 message, task_pid_nr(p), p->comm, points);
477 task_unlock(p);
478
479
480
481
482
483
484
485 read_lock(&tasklist_lock);
486 do {
487 list_for_each_entry(child, &t->children, sibling) {
488 unsigned int child_points;
489
490 if (child->mm == p->mm)
491 continue;
492
493
494
495 child_points = oom_badness(child, memcg, nodemask,
496 totalpages);
497 if (child_points > victim_points) {
498 put_task_struct(victim);
499 victim = child;
500 victim_points = child_points;
501 get_task_struct(victim);
502 }
503 }
504 } while_each_thread(p, t);
505 read_unlock(&tasklist_lock);
506
507 rcu_read_lock();
508 p = find_lock_task_mm(victim);
509 if (!p) {
510 rcu_read_unlock();
511 put_task_struct(victim);
512 return;
513 } else if (victim != p) {
514 get_task_struct(p);
515 put_task_struct(victim);
516 victim = p;
517 }
518
519
520 mm = victim->mm;
521 pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB\n",
522 task_pid_nr(victim), victim->comm, K(victim->mm->total_vm),
523 K(get_mm_counter(victim->mm, MM_ANONPAGES)),
524 K(get_mm_counter(victim->mm, MM_FILEPAGES)));
525 task_unlock(victim);
526
527
528
529
530
531
532
533
534
535
536 for_each_process(p)
537 if (p->mm == mm && !same_thread_group(p, victim) &&
538 !(p->flags & PF_KTHREAD)) {
539 if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
540 continue;
541
542 task_lock(p);
543 pr_err("Kill process %d (%s) sharing same memory\n",
544 task_pid_nr(p), p->comm);
545 task_unlock(p);
546 do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, true);
547 }
548 rcu_read_unlock();
549
550 set_tsk_thread_flag(victim, TIF_MEMDIE);
551 do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, true);
552 put_task_struct(victim);
553}
554#undef K
555
556
557
558
559void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask,
560 int order, const nodemask_t *nodemask)
561{
562 if (likely(!sysctl_panic_on_oom))
563 return;
564 if (sysctl_panic_on_oom != 2) {
565
566
567
568
569
570 if (constraint != CONSTRAINT_NONE)
571 return;
572 }
573 dump_header(NULL, gfp_mask, order, NULL, nodemask);
574 panic("Out of memory: %s panic_on_oom is enabled\n",
575 sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
576}
577
578static BLOCKING_NOTIFIER_HEAD(oom_notify_list);
579
580int register_oom_notifier(struct notifier_block *nb)
581{
582 return blocking_notifier_chain_register(&oom_notify_list, nb);
583}
584EXPORT_SYMBOL_GPL(register_oom_notifier);
585
586int unregister_oom_notifier(struct notifier_block *nb)
587{
588 return blocking_notifier_chain_unregister(&oom_notify_list, nb);
589}
590EXPORT_SYMBOL_GPL(unregister_oom_notifier);
591
592
593
594
595
596
597int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask)
598{
599 struct zoneref *z;
600 struct zone *zone;
601 int ret = 1;
602
603 spin_lock(&zone_scan_lock);
604 for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
605 if (zone_is_oom_locked(zone)) {
606 ret = 0;
607 goto out;
608 }
609 }
610
611 for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
612
613
614
615
616
617 zone_set_flag(zone, ZONE_OOM_LOCKED);
618 }
619
620out:
621 spin_unlock(&zone_scan_lock);
622 return ret;
623}
624
625
626
627
628
629
630void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask)
631{
632 struct zoneref *z;
633 struct zone *zone;
634
635 spin_lock(&zone_scan_lock);
636 for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
637 zone_clear_flag(zone, ZONE_OOM_LOCKED);
638 }
639 spin_unlock(&zone_scan_lock);
640}
641
642
643
644
645
646
647static int try_set_system_oom(void)
648{
649 struct zone *zone;
650 int ret = 1;
651
652 spin_lock(&zone_scan_lock);
653 for_each_populated_zone(zone)
654 if (zone_is_oom_locked(zone)) {
655 ret = 0;
656 goto out;
657 }
658 for_each_populated_zone(zone)
659 zone_set_flag(zone, ZONE_OOM_LOCKED);
660out:
661 spin_unlock(&zone_scan_lock);
662 return ret;
663}
664
665
666
667
668
669static void clear_system_oom(void)
670{
671 struct zone *zone;
672
673 spin_lock(&zone_scan_lock);
674 for_each_populated_zone(zone)
675 zone_clear_flag(zone, ZONE_OOM_LOCKED);
676 spin_unlock(&zone_scan_lock);
677}
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
693 int order, nodemask_t *nodemask, bool force_kill)
694{
695 const nodemask_t *mpol_mask;
696 struct task_struct *p;
697 unsigned long totalpages;
698 unsigned long freed = 0;
699 unsigned int uninitialized_var(points);
700 enum oom_constraint constraint = CONSTRAINT_NONE;
701 int killed = 0;
702
703 blocking_notifier_call_chain(&oom_notify_list, 0, &freed);
704 if (freed > 0)
705
706 return;
707
708
709
710
711
712
713 if (fatal_signal_pending(current)) {
714 set_thread_flag(TIF_MEMDIE);
715 return;
716 }
717
718
719
720
721
722 constraint = constrained_alloc(zonelist, gfp_mask, nodemask,
723 &totalpages);
724 mpol_mask = (constraint == CONSTRAINT_MEMORY_POLICY) ? nodemask : NULL;
725 check_panic_on_oom(constraint, gfp_mask, order, mpol_mask);
726
727 if (sysctl_oom_kill_allocating_task && current->mm &&
728 !oom_unkillable_task(current, NULL, nodemask) &&
729 current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
730 get_task_struct(current);
731 oom_kill_process(current, gfp_mask, order, 0, totalpages, NULL,
732 nodemask,
733 "Out of memory (oom_kill_allocating_task)");
734 goto out;
735 }
736
737 p = select_bad_process(&points, totalpages, mpol_mask, force_kill);
738
739 if (!p) {
740 dump_header(NULL, gfp_mask, order, NULL, mpol_mask);
741 panic("Out of memory and no killable processes...\n");
742 }
743 if (PTR_ERR(p) != -1UL) {
744 oom_kill_process(p, gfp_mask, order, points, totalpages, NULL,
745 nodemask, "Out of memory");
746 killed = 1;
747 }
748out:
749
750
751
752
753 if (killed)
754 schedule_timeout_killable(1);
755}
756
757
758
759
760
761
762
763void pagefault_out_of_memory(void)
764{
765 if (try_set_system_oom()) {
766 out_of_memory(NULL, 0, 0, NULL, false);
767 clear_system_oom();
768 }
769 schedule_timeout_killable(1);
770}
771