1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38#include <linux/kernel.h>
39#include <linux/mm.h>
40#include <linux/page-flags.h>
41#include <linux/kernel-page-flags.h>
42#include <linux/sched.h>
43#include <linux/ksm.h>
44#include <linux/rmap.h>
45#include <linux/pagemap.h>
46#include <linux/swap.h>
47#include <linux/backing-dev.h>
48#include <linux/migrate.h>
49#include <linux/page-isolation.h>
50#include <linux/suspend.h>
51#include <linux/slab.h>
52#include <linux/swapops.h>
53#include <linux/hugetlb.h>
54#include <linux/memory_hotplug.h>
55#include "internal.h"
56
57int sysctl_memory_failure_early_kill __read_mostly = 0;
58
59int sysctl_memory_failure_recovery __read_mostly = 1;
60
61atomic_long_t mce_bad_pages __read_mostly = ATOMIC_LONG_INIT(0);
62
63#if defined(CONFIG_HWPOISON_INJECT) || defined(CONFIG_HWPOISON_INJECT_MODULE)
64
65u32 hwpoison_filter_enable = 0;
66u32 hwpoison_filter_dev_major = ~0U;
67u32 hwpoison_filter_dev_minor = ~0U;
68u64 hwpoison_filter_flags_mask;
69u64 hwpoison_filter_flags_value;
70EXPORT_SYMBOL_GPL(hwpoison_filter_enable);
71EXPORT_SYMBOL_GPL(hwpoison_filter_dev_major);
72EXPORT_SYMBOL_GPL(hwpoison_filter_dev_minor);
73EXPORT_SYMBOL_GPL(hwpoison_filter_flags_mask);
74EXPORT_SYMBOL_GPL(hwpoison_filter_flags_value);
75
76static int hwpoison_filter_dev(struct page *p)
77{
78 struct address_space *mapping;
79 dev_t dev;
80
81 if (hwpoison_filter_dev_major == ~0U &&
82 hwpoison_filter_dev_minor == ~0U)
83 return 0;
84
85
86
87
88 if (PageSlab(p))
89 return -EINVAL;
90
91 mapping = page_mapping(p);
92 if (mapping == NULL || mapping->host == NULL)
93 return -EINVAL;
94
95 dev = mapping->host->i_sb->s_dev;
96 if (hwpoison_filter_dev_major != ~0U &&
97 hwpoison_filter_dev_major != MAJOR(dev))
98 return -EINVAL;
99 if (hwpoison_filter_dev_minor != ~0U &&
100 hwpoison_filter_dev_minor != MINOR(dev))
101 return -EINVAL;
102
103 return 0;
104}
105
106static int hwpoison_filter_flags(struct page *p)
107{
108 if (!hwpoison_filter_flags_mask)
109 return 0;
110
111 if ((stable_page_flags(p) & hwpoison_filter_flags_mask) ==
112 hwpoison_filter_flags_value)
113 return 0;
114 else
115 return -EINVAL;
116}
117
118
119
120
121
122
123
124
125
126
127
128#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
129u64 hwpoison_filter_memcg;
130EXPORT_SYMBOL_GPL(hwpoison_filter_memcg);
131static int hwpoison_filter_task(struct page *p)
132{
133 struct mem_cgroup *mem;
134 struct cgroup_subsys_state *css;
135 unsigned long ino;
136
137 if (!hwpoison_filter_memcg)
138 return 0;
139
140 mem = try_get_mem_cgroup_from_page(p);
141 if (!mem)
142 return -EINVAL;
143
144 css = mem_cgroup_css(mem);
145
146 if (!css->cgroup->dentry)
147 return -EINVAL;
148
149 ino = css->cgroup->dentry->d_inode->i_ino;
150 css_put(css);
151
152 if (ino != hwpoison_filter_memcg)
153 return -EINVAL;
154
155 return 0;
156}
157#else
158static int hwpoison_filter_task(struct page *p) { return 0; }
159#endif
160
161int hwpoison_filter(struct page *p)
162{
163 if (!hwpoison_filter_enable)
164 return 0;
165
166 if (hwpoison_filter_dev(p))
167 return -EINVAL;
168
169 if (hwpoison_filter_flags(p))
170 return -EINVAL;
171
172 if (hwpoison_filter_task(p))
173 return -EINVAL;
174
175 return 0;
176}
177#else
178int hwpoison_filter(struct page *p)
179{
180 return 0;
181}
182#endif
183
184EXPORT_SYMBOL_GPL(hwpoison_filter);
185
186
187
188
189
190static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno,
191 unsigned long pfn, struct page *page)
192{
193 struct siginfo si;
194 int ret;
195
196 printk(KERN_ERR
197 "MCE %#lx: Killing %s:%d early due to hardware memory corruption\n",
198 pfn, t->comm, t->pid);
199 si.si_signo = SIGBUS;
200 si.si_errno = 0;
201 si.si_code = BUS_MCEERR_AO;
202 si.si_addr = (void *)addr;
203#ifdef __ARCH_SI_TRAPNO
204 si.si_trapno = trapno;
205#endif
206 si.si_addr_lsb = compound_trans_order(compound_head(page)) + PAGE_SHIFT;
207
208
209
210
211
212
213 ret = send_sig_info(SIGBUS, &si, t);
214 if (ret < 0)
215 printk(KERN_INFO "MCE: Error sending signal to %s:%d: %d\n",
216 t->comm, t->pid, ret);
217 return ret;
218}
219
220
221
222
223
224void shake_page(struct page *p, int access)
225{
226 if (!PageSlab(p)) {
227 lru_add_drain_all();
228 if (PageLRU(p))
229 return;
230 drain_all_pages();
231 if (PageLRU(p) || is_free_buddy_page(p))
232 return;
233 }
234
235
236
237
238
239 if (access) {
240 int nr;
241 do {
242 nr = shrink_slab(1000, GFP_KERNEL, 1000);
243 if (page_count(p) == 1)
244 break;
245 } while (nr > 10);
246 }
247}
248EXPORT_SYMBOL_GPL(shake_page);
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272struct to_kill {
273 struct list_head nd;
274 struct task_struct *tsk;
275 unsigned long addr;
276 char addr_valid;
277};
278
279
280
281
282
283
284
285
286
287
288
289static void add_to_kill(struct task_struct *tsk, struct page *p,
290 struct vm_area_struct *vma,
291 struct list_head *to_kill,
292 struct to_kill **tkc)
293{
294 struct to_kill *tk;
295
296 if (*tkc) {
297 tk = *tkc;
298 *tkc = NULL;
299 } else {
300 tk = kmalloc(sizeof(struct to_kill), GFP_ATOMIC);
301 if (!tk) {
302 printk(KERN_ERR
303 "MCE: Out of memory while machine check handling\n");
304 return;
305 }
306 }
307 tk->addr = page_address_in_vma(p, vma);
308 tk->addr_valid = 1;
309
310
311
312
313
314
315
316 if (tk->addr == -EFAULT) {
317 pr_info("MCE: Unable to find user space address %lx in %s\n",
318 page_to_pfn(p), tsk->comm);
319 tk->addr_valid = 0;
320 }
321 get_task_struct(tsk);
322 tk->tsk = tsk;
323 list_add_tail(&tk->nd, to_kill);
324}
325
326
327
328
329
330
331
332
333
334static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno,
335 int fail, struct page *page, unsigned long pfn)
336{
337 struct to_kill *tk, *next;
338
339 list_for_each_entry_safe (tk, next, to_kill, nd) {
340 if (doit) {
341
342
343
344
345
346 if (fail || tk->addr_valid == 0) {
347 printk(KERN_ERR
348 "MCE %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n",
349 pfn, tk->tsk->comm, tk->tsk->pid);
350 force_sig(SIGKILL, tk->tsk);
351 }
352
353
354
355
356
357
358
359 else if (kill_proc_ao(tk->tsk, tk->addr, trapno,
360 pfn, page) < 0)
361 printk(KERN_ERR
362 "MCE %#lx: Cannot send advisory machine check signal to %s:%d\n",
363 pfn, tk->tsk->comm, tk->tsk->pid);
364 }
365 put_task_struct(tk->tsk);
366 kfree(tk);
367 }
368}
369
370static int task_early_kill(struct task_struct *tsk)
371{
372 if (!tsk->mm)
373 return 0;
374 if (tsk->flags & PF_MCE_PROCESS)
375 return !!(tsk->flags & PF_MCE_EARLY);
376 return sysctl_memory_failure_early_kill;
377}
378
379
380
381
382static void collect_procs_anon(struct page *page, struct list_head *to_kill,
383 struct to_kill **tkc)
384{
385 struct vm_area_struct *vma;
386 struct task_struct *tsk;
387 struct anon_vma *av;
388
389 read_lock(&tasklist_lock);
390 av = page_lock_anon_vma(page);
391 if (av == NULL)
392 goto out;
393 for_each_process (tsk) {
394 struct anon_vma_chain *vmac;
395
396 if (!task_early_kill(tsk))
397 continue;
398 list_for_each_entry(vmac, &av->head, same_anon_vma) {
399 vma = vmac->vma;
400 if (!page_mapped_in_vma(page, vma))
401 continue;
402 if (vma->vm_mm == tsk->mm)
403 add_to_kill(tsk, page, vma, to_kill, tkc);
404 }
405 }
406 page_unlock_anon_vma(av);
407out:
408 read_unlock(&tasklist_lock);
409}
410
411
412
413
414static void collect_procs_file(struct page *page, struct list_head *to_kill,
415 struct to_kill **tkc)
416{
417 struct vm_area_struct *vma;
418 struct task_struct *tsk;
419 struct prio_tree_iter iter;
420 struct address_space *mapping = page->mapping;
421
422
423
424
425
426
427
428
429
430
431 read_lock(&tasklist_lock);
432 spin_lock(&mapping->i_mmap_lock);
433 for_each_process(tsk) {
434 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
435
436 if (!task_early_kill(tsk))
437 continue;
438
439 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff,
440 pgoff) {
441
442
443
444
445
446
447
448 if (vma->vm_mm == tsk->mm)
449 add_to_kill(tsk, page, vma, to_kill, tkc);
450 }
451 }
452 spin_unlock(&mapping->i_mmap_lock);
453 read_unlock(&tasklist_lock);
454}
455
456
457
458
459
460
461
462static void collect_procs(struct page *page, struct list_head *tokill)
463{
464 struct to_kill *tk;
465
466 if (!page->mapping)
467 return;
468
469 tk = kmalloc(sizeof(struct to_kill), GFP_NOIO);
470 if (!tk)
471 return;
472 if (PageAnon(page))
473 collect_procs_anon(page, tokill, &tk);
474 else
475 collect_procs_file(page, tokill, &tk);
476 kfree(tk);
477}
478
479
480
481
482
483enum outcome {
484 IGNORED,
485 FAILED,
486 DELAYED,
487 RECOVERED,
488};
489
490static const char *action_name[] = {
491 [IGNORED] = "Ignored",
492 [FAILED] = "Failed",
493 [DELAYED] = "Delayed",
494 [RECOVERED] = "Recovered",
495};
496
497
498
499
500
501
502
503static int delete_from_lru_cache(struct page *p)
504{
505 if (!isolate_lru_page(p)) {
506
507
508
509
510 ClearPageActive(p);
511 ClearPageUnevictable(p);
512
513
514
515 page_cache_release(p);
516 return 0;
517 }
518 return -EIO;
519}
520
521
522
523
524
525
526static int me_kernel(struct page *p, unsigned long pfn)
527{
528 return IGNORED;
529}
530
531
532
533
534static int me_unknown(struct page *p, unsigned long pfn)
535{
536 printk(KERN_ERR "MCE %#lx: Unknown page state\n", pfn);
537 return FAILED;
538}
539
540
541
542
543static int me_pagecache_clean(struct page *p, unsigned long pfn)
544{
545 int err;
546 int ret = FAILED;
547 struct address_space *mapping;
548
549 delete_from_lru_cache(p);
550
551
552
553
554
555 if (PageAnon(p))
556 return RECOVERED;
557
558
559
560
561
562
563
564
565 mapping = page_mapping(p);
566 if (!mapping) {
567
568
569
570 return FAILED;
571 }
572
573
574
575
576
577
578 if (mapping->a_ops->error_remove_page) {
579 err = mapping->a_ops->error_remove_page(mapping, p);
580 if (err != 0) {
581 printk(KERN_INFO "MCE %#lx: Failed to punch page: %d\n",
582 pfn, err);
583 } else if (page_has_private(p) &&
584 !try_to_release_page(p, GFP_NOIO)) {
585 pr_info("MCE %#lx: failed to release buffers\n", pfn);
586 } else {
587 ret = RECOVERED;
588 }
589 } else {
590
591
592
593
594 if (invalidate_inode_page(p))
595 ret = RECOVERED;
596 else
597 printk(KERN_INFO "MCE %#lx: Failed to invalidate\n",
598 pfn);
599 }
600 return ret;
601}
602
603
604
605
606
607
608static int me_pagecache_dirty(struct page *p, unsigned long pfn)
609{
610 struct address_space *mapping = page_mapping(p);
611
612 SetPageError(p);
613
614 if (mapping) {
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649 mapping_set_error(mapping, EIO);
650 }
651
652 return me_pagecache_clean(p, pfn);
653}
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674static int me_swapcache_dirty(struct page *p, unsigned long pfn)
675{
676 ClearPageDirty(p);
677
678 ClearPageUptodate(p);
679
680 if (!delete_from_lru_cache(p))
681 return DELAYED;
682 else
683 return FAILED;
684}
685
686static int me_swapcache_clean(struct page *p, unsigned long pfn)
687{
688 delete_from_swap_cache(p);
689
690 if (!delete_from_lru_cache(p))
691 return RECOVERED;
692 else
693 return FAILED;
694}
695
696
697
698
699
700
701
702static int me_huge_page(struct page *p, unsigned long pfn)
703{
704 int res = 0;
705 struct page *hpage = compound_head(p);
706
707
708
709
710
711
712
713
714
715
716 if (!(page_mapping(hpage) || PageAnon(hpage))) {
717 res = dequeue_hwpoisoned_huge_page(hpage);
718 if (!res)
719 return RECOVERED;
720 }
721 return DELAYED;
722}
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737#define dirty (1UL << PG_dirty)
738#define sc (1UL << PG_swapcache)
739#define unevict (1UL << PG_unevictable)
740#define mlock (1UL << PG_mlocked)
741#define writeback (1UL << PG_writeback)
742#define lru (1UL << PG_lru)
743#define swapbacked (1UL << PG_swapbacked)
744#define head (1UL << PG_head)
745#define tail (1UL << PG_tail)
746#define compound (1UL << PG_compound)
747#define slab (1UL << PG_slab)
748#define reserved (1UL << PG_reserved)
749
750static struct page_state {
751 unsigned long mask;
752 unsigned long res;
753 char *msg;
754 int (*action)(struct page *p, unsigned long pfn);
755} error_states[] = {
756 { reserved, reserved, "reserved kernel", me_kernel },
757
758
759
760
761
762
763
764
765
766
767 { slab, slab, "kernel slab", me_kernel },
768
769#ifdef CONFIG_PAGEFLAGS_EXTENDED
770 { head, head, "huge", me_huge_page },
771 { tail, tail, "huge", me_huge_page },
772#else
773 { compound, compound, "huge", me_huge_page },
774#endif
775
776 { sc|dirty, sc|dirty, "swapcache", me_swapcache_dirty },
777 { sc|dirty, sc, "swapcache", me_swapcache_clean },
778
779 { unevict|dirty, unevict|dirty, "unevictable LRU", me_pagecache_dirty},
780 { unevict, unevict, "unevictable LRU", me_pagecache_clean},
781
782 { mlock|dirty, mlock|dirty, "mlocked LRU", me_pagecache_dirty },
783 { mlock, mlock, "mlocked LRU", me_pagecache_clean },
784
785 { lru|dirty, lru|dirty, "LRU", me_pagecache_dirty },
786 { lru|dirty, lru, "clean LRU", me_pagecache_clean },
787
788
789
790
791 { 0, 0, "unknown page state", me_unknown },
792};
793
794#undef dirty
795#undef sc
796#undef unevict
797#undef mlock
798#undef writeback
799#undef lru
800#undef swapbacked
801#undef head
802#undef tail
803#undef compound
804#undef slab
805#undef reserved
806
807static void action_result(unsigned long pfn, char *msg, int result)
808{
809 struct page *page = pfn_to_page(pfn);
810
811 printk(KERN_ERR "MCE %#lx: %s%s page recovery: %s\n",
812 pfn,
813 PageDirty(page) ? "dirty " : "",
814 msg, action_name[result]);
815}
816
817static int page_action(struct page_state *ps, struct page *p,
818 unsigned long pfn)
819{
820 int result;
821 int count;
822
823 result = ps->action(p, pfn);
824 action_result(pfn, ps->msg, result);
825
826 count = page_count(p) - 1;
827 if (ps->action == me_swapcache_dirty && result == DELAYED)
828 count--;
829 if (count != 0) {
830 printk(KERN_ERR
831 "MCE %#lx: %s page still referenced by %d users\n",
832 pfn, ps->msg, count);
833 result = FAILED;
834 }
835
836
837
838
839
840
841 return (result == RECOVERED || result == DELAYED) ? 0 : -EBUSY;
842}
843
844
845
846
847
848static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
849 int trapno)
850{
851 enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS;
852 struct address_space *mapping;
853 LIST_HEAD(tokill);
854 int ret;
855 int kill = 1;
856 struct page *hpage = compound_head(p);
857 struct page *ppage;
858
859 if (PageReserved(p) || PageSlab(p))
860 return SWAP_SUCCESS;
861
862
863
864
865
866 if (!page_mapped(hpage))
867 return SWAP_SUCCESS;
868
869 if (PageKsm(p))
870 return SWAP_FAIL;
871
872 if (PageSwapCache(p)) {
873 printk(KERN_ERR
874 "MCE %#lx: keeping poisoned page in swap cache\n", pfn);
875 ttu |= TTU_IGNORE_HWPOISON;
876 }
877
878
879
880
881
882
883
884 mapping = page_mapping(hpage);
885 if (!PageDirty(hpage) && mapping &&
886 mapping_cap_writeback_dirty(mapping)) {
887 if (page_mkclean(hpage)) {
888 SetPageDirty(hpage);
889 } else {
890 kill = 0;
891 ttu |= TTU_IGNORE_HWPOISON;
892 printk(KERN_INFO
893 "MCE %#lx: corrupted page was clean: dropped without side effects\n",
894 pfn);
895 }
896 }
897
898
899
900
901
902
903
904 ppage = hpage;
905
906 if (PageTransHuge(hpage)) {
907
908
909
910
911
912
913
914
915
916
917 if (!PageHuge(hpage) && PageAnon(hpage)) {
918 if (unlikely(split_huge_page(hpage))) {
919
920
921
922
923
924
925 printk(KERN_INFO
926 "MCE %#lx: failed to split THP\n", pfn);
927
928 BUG_ON(!PageHWPoison(p));
929 return SWAP_FAIL;
930 }
931
932 ppage = p;
933 }
934 }
935
936
937
938
939
940
941
942
943
944 if (kill)
945 collect_procs(ppage, &tokill);
946
947 if (hpage != ppage)
948 lock_page_nosync(ppage);
949
950 ret = try_to_unmap(ppage, ttu);
951 if (ret != SWAP_SUCCESS)
952 printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n",
953 pfn, page_mapcount(ppage));
954
955 if (hpage != ppage)
956 unlock_page(ppage);
957
958
959
960
961
962
963
964
965
966
967 kill_procs_ao(&tokill, !!PageDirty(ppage), trapno,
968 ret != SWAP_SUCCESS, p, pfn);
969
970 return ret;
971}
972
973static void set_page_hwpoison_huge_page(struct page *hpage)
974{
975 int i;
976 int nr_pages = 1 << compound_trans_order(hpage);
977 for (i = 0; i < nr_pages; i++)
978 SetPageHWPoison(hpage + i);
979}
980
981static void clear_page_hwpoison_huge_page(struct page *hpage)
982{
983 int i;
984 int nr_pages = 1 << compound_trans_order(hpage);
985 for (i = 0; i < nr_pages; i++)
986 ClearPageHWPoison(hpage + i);
987}
988
989int __memory_failure(unsigned long pfn, int trapno, int flags)
990{
991 struct page_state *ps;
992 struct page *p;
993 struct page *hpage;
994 int res;
995 unsigned int nr_pages;
996
997 if (!sysctl_memory_failure_recovery)
998 panic("Memory failure from trap %d on page %lx", trapno, pfn);
999
1000 if (!pfn_valid(pfn)) {
1001 printk(KERN_ERR
1002 "MCE %#lx: memory outside kernel control\n",
1003 pfn);
1004 return -ENXIO;
1005 }
1006
1007 p = pfn_to_page(pfn);
1008 hpage = compound_head(p);
1009 if (TestSetPageHWPoison(p)) {
1010 printk(KERN_ERR "MCE %#lx: already hardware poisoned\n", pfn);
1011 return 0;
1012 }
1013
1014 nr_pages = 1 << compound_trans_order(hpage);
1015 atomic_long_add(nr_pages, &mce_bad_pages);
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031 if (!(flags & MF_COUNT_INCREASED) &&
1032 !get_page_unless_zero(hpage)) {
1033 if (is_free_buddy_page(p)) {
1034 action_result(pfn, "free buddy", DELAYED);
1035 return 0;
1036 } else if (PageHuge(hpage)) {
1037
1038
1039
1040
1041 lock_page_nosync(hpage);
1042 if (!PageHWPoison(hpage)
1043 || (hwpoison_filter(p) && TestClearPageHWPoison(p))
1044 || (p != hpage && TestSetPageHWPoison(hpage))) {
1045 atomic_long_sub(nr_pages, &mce_bad_pages);
1046 return 0;
1047 }
1048 set_page_hwpoison_huge_page(hpage);
1049 res = dequeue_hwpoisoned_huge_page(hpage);
1050 action_result(pfn, "free huge",
1051 res ? IGNORED : DELAYED);
1052 unlock_page(hpage);
1053 return res;
1054 } else {
1055 action_result(pfn, "high order kernel", IGNORED);
1056 return -EBUSY;
1057 }
1058 }
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068 if (!PageHuge(p) && !PageTransCompound(p)) {
1069 if (!PageLRU(p))
1070 shake_page(p, 0);
1071 if (!PageLRU(p)) {
1072
1073
1074
1075 if (is_free_buddy_page(p)) {
1076 action_result(pfn, "free buddy, 2nd try",
1077 DELAYED);
1078 return 0;
1079 }
1080 action_result(pfn, "non LRU", IGNORED);
1081 put_page(p);
1082 return -EBUSY;
1083 }
1084 }
1085
1086
1087
1088
1089
1090
1091 lock_page_nosync(hpage);
1092
1093
1094
1095
1096 if (!PageHWPoison(p)) {
1097 printk(KERN_ERR "MCE %#lx: just unpoisoned\n", pfn);
1098 res = 0;
1099 goto out;
1100 }
1101 if (hwpoison_filter(p)) {
1102 if (TestClearPageHWPoison(p))
1103 atomic_long_sub(nr_pages, &mce_bad_pages);
1104 unlock_page(hpage);
1105 put_page(hpage);
1106 return 0;
1107 }
1108
1109
1110
1111
1112
1113 if (PageHuge(p) && PageTail(p) && TestSetPageHWPoison(hpage)) {
1114 action_result(pfn, "hugepage already hardware poisoned",
1115 IGNORED);
1116 unlock_page(hpage);
1117 put_page(hpage);
1118 return 0;
1119 }
1120
1121
1122
1123
1124
1125
1126 if (PageHuge(p))
1127 set_page_hwpoison_huge_page(hpage);
1128
1129 wait_on_page_writeback(p);
1130
1131
1132
1133
1134
1135 if (hwpoison_user_mappings(p, pfn, trapno) != SWAP_SUCCESS) {
1136 printk(KERN_ERR "MCE %#lx: cannot unmap page, give up\n", pfn);
1137 res = -EBUSY;
1138 goto out;
1139 }
1140
1141
1142
1143
1144 if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) {
1145 action_result(pfn, "already truncated LRU", IGNORED);
1146 res = -EBUSY;
1147 goto out;
1148 }
1149
1150 res = -EBUSY;
1151 for (ps = error_states;; ps++) {
1152 if ((p->flags & ps->mask) == ps->res) {
1153 res = page_action(ps, p, pfn);
1154 break;
1155 }
1156 }
1157out:
1158 unlock_page(hpage);
1159 return res;
1160}
1161EXPORT_SYMBOL_GPL(__memory_failure);
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180void memory_failure(unsigned long pfn, int trapno)
1181{
1182 __memory_failure(pfn, trapno, 0);
1183}
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197int unpoison_memory(unsigned long pfn)
1198{
1199 struct page *page;
1200 struct page *p;
1201 int freeit = 0;
1202 unsigned int nr_pages;
1203
1204 if (!pfn_valid(pfn))
1205 return -ENXIO;
1206
1207 p = pfn_to_page(pfn);
1208 page = compound_head(p);
1209
1210 if (!PageHWPoison(p)) {
1211 pr_info("MCE: Page was already unpoisoned %#lx\n", pfn);
1212 return 0;
1213 }
1214
1215 nr_pages = 1 << compound_trans_order(page);
1216
1217 if (!get_page_unless_zero(page)) {
1218
1219
1220
1221
1222
1223
1224 if (PageHuge(page)) {
1225 pr_debug("MCE: Memory failure is now running on free hugepage %#lx\n", pfn);
1226 return 0;
1227 }
1228 if (TestClearPageHWPoison(p))
1229 atomic_long_sub(nr_pages, &mce_bad_pages);
1230 pr_info("MCE: Software-unpoisoned free page %#lx\n", pfn);
1231 return 0;
1232 }
1233
1234 lock_page_nosync(page);
1235
1236
1237
1238
1239
1240
1241 if (TestClearPageHWPoison(page)) {
1242 pr_info("MCE: Software-unpoisoned page %#lx\n", pfn);
1243 atomic_long_sub(nr_pages, &mce_bad_pages);
1244 freeit = 1;
1245 if (PageHuge(page))
1246 clear_page_hwpoison_huge_page(page);
1247 }
1248 unlock_page(page);
1249
1250 put_page(page);
1251 if (freeit)
1252 put_page(page);
1253
1254 return 0;
1255}
1256EXPORT_SYMBOL(unpoison_memory);
1257
1258static struct page *new_page(struct page *p, unsigned long private, int **x)
1259{
1260 int nid = page_to_nid(p);
1261 if (PageHuge(p))
1262 return alloc_huge_page_node(page_hstate(compound_head(p)),
1263 nid);
1264 else
1265 return alloc_pages_exact_node(nid, GFP_HIGHUSER_MOVABLE, 0);
1266}
1267
1268
1269
1270
1271
1272
1273
1274static int get_any_page(struct page *p, unsigned long pfn, int flags)
1275{
1276 int ret;
1277
1278 if (flags & MF_COUNT_INCREASED)
1279 return 1;
1280
1281
1282
1283
1284
1285 lock_memory_hotplug();
1286
1287
1288
1289
1290
1291 set_migratetype_isolate(p);
1292
1293
1294
1295
1296 if (!get_page_unless_zero(compound_head(p))) {
1297 if (PageHuge(p)) {
1298 pr_info("get_any_page: %#lx free huge page\n", pfn);
1299 ret = dequeue_hwpoisoned_huge_page(compound_head(p));
1300 } else if (is_free_buddy_page(p)) {
1301 pr_info("get_any_page: %#lx free buddy page\n", pfn);
1302
1303 SetPageHWPoison(p);
1304 ret = 0;
1305 } else {
1306 pr_info("get_any_page: %#lx: unknown zero refcount page type %lx\n",
1307 pfn, p->flags);
1308 ret = -EIO;
1309 }
1310 } else {
1311
1312 ret = 1;
1313 }
1314 unset_migratetype_isolate(p);
1315 unlock_memory_hotplug();
1316 return ret;
1317}
1318
1319static int soft_offline_huge_page(struct page *page, int flags)
1320{
1321 int ret;
1322 unsigned long pfn = page_to_pfn(page);
1323 struct page *hpage = compound_head(page);
1324 LIST_HEAD(pagelist);
1325
1326 ret = get_any_page(page, pfn, flags);
1327 if (ret < 0)
1328 return ret;
1329 if (ret == 0)
1330 goto done;
1331
1332 if (PageHWPoison(hpage)) {
1333 put_page(hpage);
1334 pr_debug("soft offline: %#lx hugepage already poisoned\n", pfn);
1335 return -EBUSY;
1336 }
1337
1338
1339
1340 list_add(&hpage->lru, &pagelist);
1341 ret = migrate_huge_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0,
1342 true);
1343 if (ret) {
1344 struct page *page1, *page2;
1345 list_for_each_entry_safe(page1, page2, &pagelist, lru)
1346 put_page(page1);
1347
1348 pr_debug("soft offline: %#lx: migration failed %d, type %lx\n",
1349 pfn, ret, page->flags);
1350 if (ret > 0)
1351 ret = -EIO;
1352 return ret;
1353 }
1354done:
1355 if (!PageHWPoison(hpage))
1356 atomic_long_add(1 << compound_trans_order(hpage), &mce_bad_pages);
1357 set_page_hwpoison_huge_page(hpage);
1358 dequeue_hwpoisoned_huge_page(hpage);
1359
1360 return ret;
1361}
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385int soft_offline_page(struct page *page, int flags)
1386{
1387 int ret;
1388 unsigned long pfn = page_to_pfn(page);
1389
1390 if (PageHuge(page))
1391 return soft_offline_huge_page(page, flags);
1392
1393 ret = get_any_page(page, pfn, flags);
1394 if (ret < 0)
1395 return ret;
1396 if (ret == 0)
1397 goto done;
1398
1399
1400
1401
1402 if (!PageLRU(page)) {
1403
1404
1405
1406 put_page(page);
1407 shake_page(page, 1);
1408
1409
1410
1411
1412 ret = get_any_page(page, pfn, 0);
1413 if (ret < 0)
1414 return ret;
1415 if (ret == 0)
1416 goto done;
1417 }
1418 if (!PageLRU(page)) {
1419 pr_info("soft_offline: %#lx: unknown non LRU page type %lx\n",
1420 pfn, page->flags);
1421 return -EIO;
1422 }
1423
1424 lock_page(page);
1425 wait_on_page_writeback(page);
1426
1427
1428
1429
1430 if (PageHWPoison(page)) {
1431 unlock_page(page);
1432 put_page(page);
1433 pr_info("soft offline: %#lx page already poisoned\n", pfn);
1434 return -EBUSY;
1435 }
1436
1437
1438
1439
1440
1441 ret = invalidate_inode_page(page);
1442 unlock_page(page);
1443
1444
1445
1446
1447
1448
1449
1450
1451 put_page(page);
1452 if (ret == 1) {
1453 ret = 0;
1454 pr_info("soft_offline: %#lx: invalidated\n", pfn);
1455 goto done;
1456 }
1457
1458
1459
1460
1461
1462
1463 ret = isolate_lru_page(page);
1464 if (!ret) {
1465 LIST_HEAD(pagelist);
1466
1467 list_add(&page->lru, &pagelist);
1468 ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL,
1469 0, true);
1470 if (ret) {
1471 putback_lru_pages(&pagelist);
1472 pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
1473 pfn, ret, page->flags);
1474 if (ret > 0)
1475 ret = -EIO;
1476 }
1477 } else {
1478 pr_info("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n",
1479 pfn, ret, page_count(page), page->flags);
1480 }
1481 if (ret)
1482 return ret;
1483
1484done:
1485 atomic_long_add(1, &mce_bad_pages);
1486 SetPageHWPoison(page);
1487
1488 return ret;
1489}
1490
1491
1492
1493
1494int is_hwpoison_address(unsigned long addr)
1495{
1496 pgd_t *pgdp;
1497 pud_t pud, *pudp;
1498 pmd_t pmd, *pmdp;
1499 pte_t pte, *ptep;
1500 swp_entry_t entry;
1501
1502 pgdp = pgd_offset(current->mm, addr);
1503 if (!pgd_present(*pgdp))
1504 return 0;
1505 pudp = pud_offset(pgdp, addr);
1506 pud = *pudp;
1507 if (!pud_present(pud) || pud_large(pud))
1508 return 0;
1509 pmdp = pmd_offset(pudp, addr);
1510 pmd = *pmdp;
1511 if (!pmd_present(pmd) || pmd_large(pmd))
1512 return 0;
1513 ptep = pte_offset_map(pmdp, addr);
1514 pte = *ptep;
1515 pte_unmap(ptep);
1516 if (!is_swap_pte(pte))
1517 return 0;
1518 entry = pte_to_swp_entry(pte);
1519 return is_hwpoison_entry(entry);
1520}
1521EXPORT_SYMBOL_GPL(is_hwpoison_address);
1522