1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36#include <linux/kernel.h>
37#include <linux/mm.h>
38#include <linux/page-flags.h>
39#include <linux/kernel-page-flags.h>
40#include <linux/sched/signal.h>
41#include <linux/sched/task.h>
42#include <linux/ksm.h>
43#include <linux/rmap.h>
44#include <linux/export.h>
45#include <linux/pagemap.h>
46#include <linux/swap.h>
47#include <linux/backing-dev.h>
48#include <linux/migrate.h>
49#include <linux/suspend.h>
50#include <linux/slab.h>
51#include <linux/swapops.h>
52#include <linux/hugetlb.h>
53#include <linux/memory_hotplug.h>
54#include <linux/mm_inline.h>
55#include <linux/memremap.h>
56#include <linux/kfifo.h>
57#include <linux/ratelimit.h>
58#include <linux/page-isolation.h>
59#include "internal.h"
60#include "ras/ras_event.h"
61
62int sysctl_memory_failure_early_kill __read_mostly = 0;
63
64int sysctl_memory_failure_recovery __read_mostly = 1;
65
66atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0);
67
68static bool page_handle_poison(struct page *page, bool hugepage_or_freepage, bool release)
69{
70 if (hugepage_or_freepage) {
71
72
73
74
75 if (dissolve_free_huge_page(page) || !take_page_off_buddy(page))
76
77
78
79
80
81
82
83 return false;
84 }
85
86 SetPageHWPoison(page);
87 if (release)
88 put_page(page);
89 page_ref_inc(page);
90 num_poisoned_pages_inc();
91
92 return true;
93}
94
95#if defined(CONFIG_HWPOISON_INJECT) || defined(CONFIG_HWPOISON_INJECT_MODULE)
96
97u32 hwpoison_filter_enable = 0;
98u32 hwpoison_filter_dev_major = ~0U;
99u32 hwpoison_filter_dev_minor = ~0U;
100u64 hwpoison_filter_flags_mask;
101u64 hwpoison_filter_flags_value;
102EXPORT_SYMBOL_GPL(hwpoison_filter_enable);
103EXPORT_SYMBOL_GPL(hwpoison_filter_dev_major);
104EXPORT_SYMBOL_GPL(hwpoison_filter_dev_minor);
105EXPORT_SYMBOL_GPL(hwpoison_filter_flags_mask);
106EXPORT_SYMBOL_GPL(hwpoison_filter_flags_value);
107
108static int hwpoison_filter_dev(struct page *p)
109{
110 struct address_space *mapping;
111 dev_t dev;
112
113 if (hwpoison_filter_dev_major == ~0U &&
114 hwpoison_filter_dev_minor == ~0U)
115 return 0;
116
117
118
119
120 if (PageSlab(p))
121 return -EINVAL;
122
123 mapping = page_mapping(p);
124 if (mapping == NULL || mapping->host == NULL)
125 return -EINVAL;
126
127 dev = mapping->host->i_sb->s_dev;
128 if (hwpoison_filter_dev_major != ~0U &&
129 hwpoison_filter_dev_major != MAJOR(dev))
130 return -EINVAL;
131 if (hwpoison_filter_dev_minor != ~0U &&
132 hwpoison_filter_dev_minor != MINOR(dev))
133 return -EINVAL;
134
135 return 0;
136}
137
138static int hwpoison_filter_flags(struct page *p)
139{
140 if (!hwpoison_filter_flags_mask)
141 return 0;
142
143 if ((stable_page_flags(p) & hwpoison_filter_flags_mask) ==
144 hwpoison_filter_flags_value)
145 return 0;
146 else
147 return -EINVAL;
148}
149
150
151
152
153
154
155
156
157
158
159
160#ifdef CONFIG_MEMCG
161u64 hwpoison_filter_memcg;
162EXPORT_SYMBOL_GPL(hwpoison_filter_memcg);
163static int hwpoison_filter_task(struct page *p)
164{
165 if (!hwpoison_filter_memcg)
166 return 0;
167
168 if (page_cgroup_ino(p) != hwpoison_filter_memcg)
169 return -EINVAL;
170
171 return 0;
172}
173#else
174static int hwpoison_filter_task(struct page *p) { return 0; }
175#endif
176
177int hwpoison_filter(struct page *p)
178{
179 if (!hwpoison_filter_enable)
180 return 0;
181
182 if (hwpoison_filter_dev(p))
183 return -EINVAL;
184
185 if (hwpoison_filter_flags(p))
186 return -EINVAL;
187
188 if (hwpoison_filter_task(p))
189 return -EINVAL;
190
191 return 0;
192}
193#else
194int hwpoison_filter(struct page *p)
195{
196 return 0;
197}
198#endif
199
200EXPORT_SYMBOL_GPL(hwpoison_filter);
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224struct to_kill {
225 struct list_head nd;
226 struct task_struct *tsk;
227 unsigned long addr;
228 short size_shift;
229};
230
231
232
233
234
235
236static int kill_proc(struct to_kill *tk, unsigned long pfn, int flags)
237{
238 struct task_struct *t = tk->tsk;
239 short addr_lsb = tk->size_shift;
240 int ret = 0;
241
242 pr_err("Memory failure: %#lx: Sending SIGBUS to %s:%d due to hardware memory corruption\n",
243 pfn, t->comm, t->pid);
244
245 if (flags & MF_ACTION_REQUIRED) {
246 if (t == current)
247 ret = force_sig_mceerr(BUS_MCEERR_AR,
248 (void __user *)tk->addr, addr_lsb);
249 else
250
251 ret = send_sig_mceerr(BUS_MCEERR_AO, (void __user *)tk->addr,
252 addr_lsb, t);
253 } else {
254
255
256
257
258
259
260 ret = send_sig_mceerr(BUS_MCEERR_AO, (void __user *)tk->addr,
261 addr_lsb, t);
262 }
263 if (ret < 0)
264 pr_info("Memory failure: Error sending signal to %s:%d: %d\n",
265 t->comm, t->pid, ret);
266 return ret;
267}
268
269
270
271
272
273void shake_page(struct page *p, int access)
274{
275 if (PageHuge(p))
276 return;
277
278 if (!PageSlab(p)) {
279 lru_add_drain_all();
280 if (PageLRU(p) || is_free_buddy_page(p))
281 return;
282 }
283
284
285
286
287
288 if (access)
289 drop_slab_node(page_to_nid(p));
290}
291EXPORT_SYMBOL_GPL(shake_page);
292
293static unsigned long dev_pagemap_mapping_shift(struct page *page,
294 struct vm_area_struct *vma)
295{
296 unsigned long address = vma_address(page, vma);
297 pgd_t *pgd;
298 p4d_t *p4d;
299 pud_t *pud;
300 pmd_t *pmd;
301 pte_t *pte;
302
303 pgd = pgd_offset(vma->vm_mm, address);
304 if (!pgd_present(*pgd))
305 return 0;
306 p4d = p4d_offset(pgd, address);
307 if (!p4d_present(*p4d))
308 return 0;
309 pud = pud_offset(p4d, address);
310 if (!pud_present(*pud))
311 return 0;
312 if (pud_devmap(*pud))
313 return PUD_SHIFT;
314 pmd = pmd_offset(pud, address);
315 if (!pmd_present(*pmd))
316 return 0;
317 if (pmd_devmap(*pmd))
318 return PMD_SHIFT;
319 pte = pte_offset_map(pmd, address);
320 if (!pte_present(*pte))
321 return 0;
322 if (pte_devmap(*pte))
323 return PAGE_SHIFT;
324 return 0;
325}
326
327
328
329
330
331
332
333
334
335
336static void add_to_kill(struct task_struct *tsk, struct page *p,
337 struct vm_area_struct *vma,
338 struct list_head *to_kill)
339{
340 struct to_kill *tk;
341
342 tk = kmalloc(sizeof(struct to_kill), GFP_ATOMIC);
343 if (!tk) {
344 pr_err("Memory failure: Out of memory while machine check handling\n");
345 return;
346 }
347
348 tk->addr = page_address_in_vma(p, vma);
349 if (is_zone_device_page(p))
350 tk->size_shift = dev_pagemap_mapping_shift(p, vma);
351 else
352 tk->size_shift = page_shift(compound_head(p));
353
354
355
356
357
358
359
360
361
362
363
364 if (tk->addr == -EFAULT) {
365 pr_info("Memory failure: Unable to find user space address %lx in %s\n",
366 page_to_pfn(p), tsk->comm);
367 } else if (tk->size_shift == 0) {
368 kfree(tk);
369 return;
370 }
371
372 get_task_struct(tsk);
373 tk->tsk = tsk;
374 list_add_tail(&tk->nd, to_kill);
375}
376
377
378
379
380
381
382
383
384
385static void kill_procs(struct list_head *to_kill, int forcekill, bool fail,
386 unsigned long pfn, int flags)
387{
388 struct to_kill *tk, *next;
389
390 list_for_each_entry_safe (tk, next, to_kill, nd) {
391 if (forcekill) {
392
393
394
395
396
397 if (fail || tk->addr == -EFAULT) {
398 pr_err("Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n",
399 pfn, tk->tsk->comm, tk->tsk->pid);
400 do_send_sig_info(SIGKILL, SEND_SIG_PRIV,
401 tk->tsk, PIDTYPE_PID);
402 }
403
404
405
406
407
408
409
410 else if (kill_proc(tk, pfn, flags) < 0)
411 pr_err("Memory failure: %#lx: Cannot send advisory machine check signal to %s:%d\n",
412 pfn, tk->tsk->comm, tk->tsk->pid);
413 }
414 put_task_struct(tk->tsk);
415 kfree(tk);
416 }
417}
418
419
420
421
422
423
424
425
426
427static struct task_struct *find_early_kill_thread(struct task_struct *tsk)
428{
429 struct task_struct *t;
430
431 for_each_thread(tsk, t) {
432 if (t->flags & PF_MCE_PROCESS) {
433 if (t->flags & PF_MCE_EARLY)
434 return t;
435 } else {
436 if (sysctl_memory_failure_early_kill)
437 return t;
438 }
439 }
440 return NULL;
441}
442
443
444
445
446
447
448
449
450
451
452
453
454
455static struct task_struct *task_early_kill(struct task_struct *tsk,
456 int force_early)
457{
458 if (!tsk->mm)
459 return NULL;
460
461
462
463
464 if (force_early && tsk->mm == current->mm)
465 return current;
466
467 return find_early_kill_thread(tsk);
468}
469
470
471
472
473static void collect_procs_anon(struct page *page, struct list_head *to_kill,
474 int force_early)
475{
476 struct vm_area_struct *vma;
477 struct task_struct *tsk;
478 struct anon_vma *av;
479 pgoff_t pgoff;
480
481 av = page_lock_anon_vma_read(page);
482 if (av == NULL)
483 return;
484
485 pgoff = page_to_pgoff(page);
486 read_lock(&tasklist_lock);
487 for_each_process (tsk) {
488 struct anon_vma_chain *vmac;
489 struct task_struct *t = task_early_kill(tsk, force_early);
490
491 if (!t)
492 continue;
493 anon_vma_interval_tree_foreach(vmac, &av->rb_root,
494 pgoff, pgoff) {
495 vma = vmac->vma;
496 if (!page_mapped_in_vma(page, vma))
497 continue;
498 if (vma->vm_mm == t->mm)
499 add_to_kill(t, page, vma, to_kill);
500 }
501 }
502 read_unlock(&tasklist_lock);
503 page_unlock_anon_vma_read(av);
504}
505
506
507
508
509static void collect_procs_file(struct page *page, struct list_head *to_kill,
510 int force_early)
511{
512 struct vm_area_struct *vma;
513 struct task_struct *tsk;
514 struct address_space *mapping = page->mapping;
515 pgoff_t pgoff;
516
517 i_mmap_lock_read(mapping);
518 read_lock(&tasklist_lock);
519 pgoff = page_to_pgoff(page);
520 for_each_process(tsk) {
521 struct task_struct *t = task_early_kill(tsk, force_early);
522
523 if (!t)
524 continue;
525 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff,
526 pgoff) {
527
528
529
530
531
532
533
534 if (vma->vm_mm == t->mm)
535 add_to_kill(t, page, vma, to_kill);
536 }
537 }
538 read_unlock(&tasklist_lock);
539 i_mmap_unlock_read(mapping);
540}
541
542
543
544
545static void collect_procs(struct page *page, struct list_head *tokill,
546 int force_early)
547{
548 if (!page->mapping)
549 return;
550
551 if (PageAnon(page))
552 collect_procs_anon(page, tokill, force_early);
553 else
554 collect_procs_file(page, tokill, force_early);
555}
556
557static const char *action_name[] = {
558 [MF_IGNORED] = "Ignored",
559 [MF_FAILED] = "Failed",
560 [MF_DELAYED] = "Delayed",
561 [MF_RECOVERED] = "Recovered",
562};
563
564static const char * const action_page_types[] = {
565 [MF_MSG_KERNEL] = "reserved kernel page",
566 [MF_MSG_KERNEL_HIGH_ORDER] = "high-order kernel page",
567 [MF_MSG_SLAB] = "kernel slab page",
568 [MF_MSG_DIFFERENT_COMPOUND] = "different compound page after locking",
569 [MF_MSG_POISONED_HUGE] = "huge page already hardware poisoned",
570 [MF_MSG_HUGE] = "huge page",
571 [MF_MSG_FREE_HUGE] = "free huge page",
572 [MF_MSG_NON_PMD_HUGE] = "non-pmd-sized huge page",
573 [MF_MSG_UNMAP_FAILED] = "unmapping failed page",
574 [MF_MSG_DIRTY_SWAPCACHE] = "dirty swapcache page",
575 [MF_MSG_CLEAN_SWAPCACHE] = "clean swapcache page",
576 [MF_MSG_DIRTY_MLOCKED_LRU] = "dirty mlocked LRU page",
577 [MF_MSG_CLEAN_MLOCKED_LRU] = "clean mlocked LRU page",
578 [MF_MSG_DIRTY_UNEVICTABLE_LRU] = "dirty unevictable LRU page",
579 [MF_MSG_CLEAN_UNEVICTABLE_LRU] = "clean unevictable LRU page",
580 [MF_MSG_DIRTY_LRU] = "dirty LRU page",
581 [MF_MSG_CLEAN_LRU] = "clean LRU page",
582 [MF_MSG_TRUNCATED_LRU] = "already truncated LRU page",
583 [MF_MSG_BUDDY] = "free buddy page",
584 [MF_MSG_BUDDY_2ND] = "free buddy page (2nd try)",
585 [MF_MSG_DAX] = "dax page",
586 [MF_MSG_UNSPLIT_THP] = "unsplit thp",
587 [MF_MSG_UNKNOWN] = "unknown page",
588};
589
590
591
592
593
594
595
596static int delete_from_lru_cache(struct page *p)
597{
598 if (!isolate_lru_page(p)) {
599
600
601
602
603 ClearPageActive(p);
604 ClearPageUnevictable(p);
605
606
607
608
609
610 mem_cgroup_uncharge(p);
611
612
613
614
615 put_page(p);
616 return 0;
617 }
618 return -EIO;
619}
620
621static int truncate_error_page(struct page *p, unsigned long pfn,
622 struct address_space *mapping)
623{
624 int ret = MF_FAILED;
625
626 if (mapping->a_ops->error_remove_page) {
627 int err = mapping->a_ops->error_remove_page(mapping, p);
628
629 if (err != 0) {
630 pr_info("Memory failure: %#lx: Failed to punch page: %d\n",
631 pfn, err);
632 } else if (page_has_private(p) &&
633 !try_to_release_page(p, GFP_NOIO)) {
634 pr_info("Memory failure: %#lx: failed to release buffers\n",
635 pfn);
636 } else {
637 ret = MF_RECOVERED;
638 }
639 } else {
640
641
642
643
644 if (invalidate_inode_page(p))
645 ret = MF_RECOVERED;
646 else
647 pr_info("Memory failure: %#lx: Failed to invalidate\n",
648 pfn);
649 }
650
651 return ret;
652}
653
654
655
656
657
658
659static int me_kernel(struct page *p, unsigned long pfn)
660{
661 unlock_page(p);
662 return MF_IGNORED;
663}
664
665
666
667
668static int me_unknown(struct page *p, unsigned long pfn)
669{
670 pr_err("Memory failure: %#lx: Unknown page state\n", pfn);
671 unlock_page(p);
672 return MF_FAILED;
673}
674
675
676
677
678static int me_pagecache_clean(struct page *p, unsigned long pfn)
679{
680 int ret;
681 struct address_space *mapping;
682
683 delete_from_lru_cache(p);
684
685
686
687
688
689 if (PageAnon(p)) {
690 ret = MF_RECOVERED;
691 goto out;
692 }
693
694
695
696
697
698
699
700
701 mapping = page_mapping(p);
702 if (!mapping) {
703
704
705
706 ret = MF_FAILED;
707 goto out;
708 }
709
710
711
712
713
714
715 ret = truncate_error_page(p, pfn, mapping);
716out:
717 unlock_page(p);
718 return ret;
719}
720
721
722
723
724
725
726static int me_pagecache_dirty(struct page *p, unsigned long pfn)
727{
728 struct address_space *mapping = page_mapping(p);
729
730 SetPageError(p);
731
732 if (mapping) {
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767 mapping_set_error(mapping, -EIO);
768 }
769
770 return me_pagecache_clean(p, pfn);
771}
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792static int me_swapcache_dirty(struct page *p, unsigned long pfn)
793{
794 int ret;
795
796 ClearPageDirty(p);
797
798 ClearPageUptodate(p);
799
800 ret = delete_from_lru_cache(p) ? MF_FAILED : MF_DELAYED;
801 unlock_page(p);
802 return ret;
803}
804
805static int me_swapcache_clean(struct page *p, unsigned long pfn)
806{
807 int ret;
808
809 delete_from_swap_cache(p);
810
811 ret = delete_from_lru_cache(p) ? MF_FAILED : MF_RECOVERED;
812 unlock_page(p);
813 return ret;
814}
815
816
817
818
819
820
821
822static int me_huge_page(struct page *p, unsigned long pfn)
823{
824 int res;
825 struct page *hpage = compound_head(p);
826 struct address_space *mapping;
827
828 if (!PageHuge(hpage))
829 return MF_DELAYED;
830
831 mapping = page_mapping(hpage);
832 if (mapping) {
833 res = truncate_error_page(hpage, pfn, mapping);
834 unlock_page(hpage);
835 } else {
836 res = MF_FAILED;
837 unlock_page(hpage);
838
839
840
841
842
843 if (PageAnon(hpage))
844 put_page(hpage);
845 if (!dissolve_free_huge_page(p) && take_page_off_buddy(p)) {
846 page_ref_inc(p);
847 res = MF_RECOVERED;
848 }
849 }
850
851 return res;
852}
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867#define dirty (1UL << PG_dirty)
868#define sc ((1UL << PG_swapcache) | (1UL << PG_swapbacked))
869#define unevict (1UL << PG_unevictable)
870#define mlock (1UL << PG_mlocked)
871#define lru (1UL << PG_lru)
872#define head (1UL << PG_head)
873#define slab (1UL << PG_slab)
874#define reserved (1UL << PG_reserved)
875
876static struct page_state {
877 unsigned long mask;
878 unsigned long res;
879 enum mf_action_page_type type;
880
881
882 int (*action)(struct page *p, unsigned long pfn);
883} error_states[] = {
884 { reserved, reserved, MF_MSG_KERNEL, me_kernel },
885
886
887
888
889
890
891
892
893
894
895 { slab, slab, MF_MSG_SLAB, me_kernel },
896
897 { head, head, MF_MSG_HUGE, me_huge_page },
898
899 { sc|dirty, sc|dirty, MF_MSG_DIRTY_SWAPCACHE, me_swapcache_dirty },
900 { sc|dirty, sc, MF_MSG_CLEAN_SWAPCACHE, me_swapcache_clean },
901
902 { mlock|dirty, mlock|dirty, MF_MSG_DIRTY_MLOCKED_LRU, me_pagecache_dirty },
903 { mlock|dirty, mlock, MF_MSG_CLEAN_MLOCKED_LRU, me_pagecache_clean },
904
905 { unevict|dirty, unevict|dirty, MF_MSG_DIRTY_UNEVICTABLE_LRU, me_pagecache_dirty },
906 { unevict|dirty, unevict, MF_MSG_CLEAN_UNEVICTABLE_LRU, me_pagecache_clean },
907
908 { lru|dirty, lru|dirty, MF_MSG_DIRTY_LRU, me_pagecache_dirty },
909 { lru|dirty, lru, MF_MSG_CLEAN_LRU, me_pagecache_clean },
910
911
912
913
914 { 0, 0, MF_MSG_UNKNOWN, me_unknown },
915};
916
917#undef dirty
918#undef sc
919#undef unevict
920#undef mlock
921#undef lru
922#undef head
923#undef slab
924#undef reserved
925
926
927
928
929
930static void action_result(unsigned long pfn, enum mf_action_page_type type,
931 enum mf_result result)
932{
933 trace_memory_failure_event(pfn, type, result);
934
935 pr_err("Memory failure: %#lx: recovery action for %s: %s\n",
936 pfn, action_page_types[type], action_name[result]);
937}
938
939static int page_action(struct page_state *ps, struct page *p,
940 unsigned long pfn)
941{
942 int result;
943 int count;
944
945
946 result = ps->action(p, pfn);
947
948 count = page_count(p) - 1;
949 if (ps->action == me_swapcache_dirty && result == MF_DELAYED)
950 count--;
951 if (count > 0) {
952 pr_err("Memory failure: %#lx: %s still referenced by %d users\n",
953 pfn, action_page_types[ps->type], count);
954 result = MF_FAILED;
955 }
956 action_result(pfn, ps->type, result);
957
958
959
960
961
962
963 return (result == MF_RECOVERED || result == MF_DELAYED) ? 0 : -EBUSY;
964}
965
966
967
968
969
970
971
972static inline bool HWPoisonHandlable(struct page *page)
973{
974 return PageLRU(page) || __PageMovable(page);
975}
976
977
978
979
980
981
982
983
984static int __get_hwpoison_page(struct page *page)
985{
986 struct page *head = compound_head(page);
987 int ret = 0;
988 bool hugetlb = false;
989
990 ret = get_hwpoison_huge_page(head, &hugetlb);
991 if (hugetlb)
992 return ret;
993
994
995
996
997
998
999 if (!HWPoisonHandlable(head))
1000 return 0;
1001
1002 if (PageTransHuge(head)) {
1003
1004
1005
1006
1007
1008
1009 if (!PageAnon(head)) {
1010 pr_err("Memory failure: %#lx: non anonymous thp\n",
1011 page_to_pfn(page));
1012 return 0;
1013 }
1014 }
1015
1016 if (get_page_unless_zero(head)) {
1017 if (head == compound_head(page))
1018 return 1;
1019
1020 pr_info("Memory failure: %#lx cannot catch tail\n",
1021 page_to_pfn(page));
1022 put_page(head);
1023 }
1024
1025 return 0;
1026}
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037static int get_any_page(struct page *p, unsigned long flags)
1038{
1039 int ret = 0, pass = 0;
1040 bool count_increased = false;
1041
1042 if (flags & MF_COUNT_INCREASED)
1043 count_increased = true;
1044
1045try_again:
1046 if (!count_increased && !__get_hwpoison_page(p)) {
1047 if (page_count(p)) {
1048
1049 if (pass++ < 3)
1050 goto try_again;
1051 ret = -EBUSY;
1052 } else if (!PageHuge(p) && !is_free_buddy_page(p)) {
1053
1054 if (pass++ < 3)
1055 goto try_again;
1056 ret = -EIO;
1057 }
1058 } else {
1059 if (PageHuge(p) || HWPoisonHandlable(p)) {
1060 ret = 1;
1061 } else {
1062
1063
1064
1065
1066 if (pass++ < 3) {
1067 put_page(p);
1068 shake_page(p, 1);
1069 count_increased = false;
1070 goto try_again;
1071 }
1072 put_page(p);
1073 ret = -EIO;
1074 }
1075 }
1076
1077 return ret;
1078}
1079
1080static int get_hwpoison_page(struct page *p, unsigned long flags,
1081 enum mf_flags ctxt)
1082{
1083 int ret;
1084
1085 zone_pcp_disable(page_zone(p));
1086 if (ctxt == MF_SOFT_OFFLINE)
1087 ret = get_any_page(p, flags);
1088 else
1089 ret = __get_hwpoison_page(p);
1090 zone_pcp_enable(page_zone(p));
1091
1092 return ret;
1093}
1094
1095
1096
1097
1098
1099static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
1100 int flags, struct page **hpagep)
1101{
1102 enum ttu_flags ttu = TTU_IGNORE_MLOCK;
1103 struct address_space *mapping;
1104 LIST_HEAD(tokill);
1105 bool unmap_success = true;
1106 int kill = 1, forcekill;
1107 struct page *hpage = *hpagep;
1108 bool mlocked = PageMlocked(hpage);
1109
1110
1111
1112
1113
1114 if (PageReserved(p) || PageSlab(p))
1115 return true;
1116 if (!(PageLRU(hpage) || PageHuge(p)))
1117 return true;
1118
1119
1120
1121
1122
1123 if (!page_mapped(hpage))
1124 return true;
1125
1126 if (PageKsm(p)) {
1127 pr_err("Memory failure: %#lx: can't handle KSM pages.\n", pfn);
1128 return false;
1129 }
1130
1131 if (PageSwapCache(p)) {
1132 pr_err("Memory failure: %#lx: keeping poisoned page in swap cache\n",
1133 pfn);
1134 ttu |= TTU_IGNORE_HWPOISON;
1135 }
1136
1137
1138
1139
1140
1141
1142
1143 mapping = page_mapping(hpage);
1144 if (!(flags & MF_MUST_KILL) && !PageDirty(hpage) && mapping &&
1145 mapping_can_writeback(mapping)) {
1146 if (page_mkclean(hpage)) {
1147 SetPageDirty(hpage);
1148 } else {
1149 kill = 0;
1150 ttu |= TTU_IGNORE_HWPOISON;
1151 pr_info("Memory failure: %#lx: corrupted page was clean: dropped without side effects\n",
1152 pfn);
1153 }
1154 }
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164 if (kill)
1165 collect_procs(hpage, &tokill, flags & MF_ACTION_REQUIRED);
1166
1167 if (!PageHuge(hpage)) {
1168 unmap_success = try_to_unmap(hpage, ttu);
1169 } else {
1170 if (!PageAnon(hpage)) {
1171
1172
1173
1174
1175
1176
1177
1178 mapping = hugetlb_page_mapping_lock_write(hpage);
1179 if (mapping) {
1180 unmap_success = try_to_unmap(hpage,
1181 ttu|TTU_RMAP_LOCKED);
1182 i_mmap_unlock_write(mapping);
1183 } else {
1184 pr_info("Memory failure: %#lx: could not lock mapping for mapped huge page\n", pfn);
1185 unmap_success = false;
1186 }
1187 } else {
1188 unmap_success = try_to_unmap(hpage, ttu);
1189 }
1190 }
1191 if (!unmap_success)
1192 pr_err("Memory failure: %#lx: failed to unmap page (mapcount=%d)\n",
1193 pfn, page_mapcount(hpage));
1194
1195
1196
1197
1198
1199 if (mlocked)
1200 shake_page(hpage, 0);
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212 forcekill = PageDirty(hpage) || (flags & MF_MUST_KILL);
1213 kill_procs(&tokill, forcekill, !unmap_success, pfn, flags);
1214
1215 return unmap_success;
1216}
1217
1218static int identify_page_state(unsigned long pfn, struct page *p,
1219 unsigned long page_flags)
1220{
1221 struct page_state *ps;
1222
1223
1224
1225
1226
1227
1228 for (ps = error_states;; ps++)
1229 if ((p->flags & ps->mask) == ps->res)
1230 break;
1231
1232 page_flags |= (p->flags & (1UL << PG_dirty));
1233
1234 if (!ps->mask)
1235 for (ps = error_states;; ps++)
1236 if ((page_flags & ps->mask) == ps->res)
1237 break;
1238 return page_action(ps, p, pfn);
1239}
1240
1241static int try_to_split_thp_page(struct page *page, const char *msg)
1242{
1243 lock_page(page);
1244 if (!PageAnon(page) || unlikely(split_huge_page(page))) {
1245 unsigned long pfn = page_to_pfn(page);
1246
1247 unlock_page(page);
1248 if (!PageAnon(page))
1249 pr_info("%s: %#lx: non anonymous thp\n", msg, pfn);
1250 else
1251 pr_info("%s: %#lx: thp split failed\n", msg, pfn);
1252 put_page(page);
1253 return -EBUSY;
1254 }
1255 unlock_page(page);
1256
1257 return 0;
1258}
1259
1260static int memory_failure_hugetlb(unsigned long pfn, int flags)
1261{
1262 struct page *p = pfn_to_page(pfn);
1263 struct page *head = compound_head(p);
1264 int res;
1265 unsigned long page_flags;
1266
1267 if (TestSetPageHWPoison(head)) {
1268 pr_err("Memory failure: %#lx: already hardware poisoned\n",
1269 pfn);
1270 return -EHWPOISON;
1271 }
1272
1273 num_poisoned_pages_inc();
1274
1275 if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p, flags, 0)) {
1276
1277
1278
1279 lock_page(head);
1280 if (PageHWPoison(head)) {
1281 if ((hwpoison_filter(p) && TestClearPageHWPoison(p))
1282 || (p != head && TestSetPageHWPoison(head))) {
1283 num_poisoned_pages_dec();
1284 unlock_page(head);
1285 return 0;
1286 }
1287 }
1288 unlock_page(head);
1289 res = MF_FAILED;
1290 if (!dissolve_free_huge_page(p) && take_page_off_buddy(p)) {
1291 page_ref_inc(p);
1292 res = MF_RECOVERED;
1293 }
1294 action_result(pfn, MF_MSG_FREE_HUGE, res);
1295 return res == MF_RECOVERED ? 0 : -EBUSY;
1296 }
1297
1298 lock_page(head);
1299 page_flags = head->flags;
1300
1301 if (!PageHWPoison(head)) {
1302 pr_err("Memory failure: %#lx: just unpoisoned\n", pfn);
1303 num_poisoned_pages_dec();
1304 unlock_page(head);
1305 put_page(head);
1306 return 0;
1307 }
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318 if (huge_page_size(page_hstate(head)) > PMD_SIZE) {
1319 action_result(pfn, MF_MSG_NON_PMD_HUGE, MF_IGNORED);
1320 res = -EBUSY;
1321 goto out;
1322 }
1323
1324 if (!hwpoison_user_mappings(p, pfn, flags, &head)) {
1325 action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
1326 res = -EBUSY;
1327 goto out;
1328 }
1329
1330 return identify_page_state(pfn, p, page_flags);
1331out:
1332 unlock_page(head);
1333 return res;
1334}
1335
1336static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
1337 struct dev_pagemap *pgmap)
1338{
1339 struct page *page = pfn_to_page(pfn);
1340 const bool unmap_success = true;
1341 unsigned long size = 0;
1342 struct to_kill *tk;
1343 LIST_HEAD(tokill);
1344 int rc = -EBUSY;
1345 loff_t start;
1346 dax_entry_t cookie;
1347
1348 if (flags & MF_COUNT_INCREASED)
1349
1350
1351
1352 put_page(page);
1353
1354
1355 if (!pgmap_pfn_valid(pgmap, pfn)) {
1356 rc = -ENXIO;
1357 goto out;
1358 }
1359
1360
1361
1362
1363
1364
1365
1366
1367 cookie = dax_lock_page(page);
1368 if (!cookie)
1369 goto out;
1370
1371 if (hwpoison_filter(page)) {
1372 rc = 0;
1373 goto unlock;
1374 }
1375
1376 if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
1377
1378
1379
1380
1381 goto unlock;
1382 }
1383
1384
1385
1386
1387
1388 SetPageHWPoison(page);
1389
1390
1391
1392
1393
1394
1395
1396 flags |= MF_ACTION_REQUIRED | MF_MUST_KILL;
1397 collect_procs(page, &tokill, flags & MF_ACTION_REQUIRED);
1398
1399 list_for_each_entry(tk, &tokill, nd)
1400 if (tk->size_shift)
1401 size = max(size, 1UL << tk->size_shift);
1402 if (size) {
1403
1404
1405
1406
1407
1408
1409 start = (page->index << PAGE_SHIFT) & ~(size - 1);
1410 unmap_mapping_range(page->mapping, start, size, 0);
1411 }
1412 kill_procs(&tokill, flags & MF_MUST_KILL, !unmap_success, pfn, flags);
1413 rc = 0;
1414unlock:
1415 dax_unlock_page(page, cookie);
1416out:
1417
1418 put_dev_pagemap(pgmap);
1419 action_result(pfn, MF_MSG_DAX, rc ? MF_FAILED : MF_RECOVERED);
1420 return rc;
1421}
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440int memory_failure(unsigned long pfn, int flags)
1441{
1442 struct page *p;
1443 struct page *hpage;
1444 struct page *orig_head;
1445 struct dev_pagemap *pgmap;
1446 int res = 0;
1447 unsigned long page_flags;
1448 bool retry = true;
1449 static DEFINE_MUTEX(mf_mutex);
1450
1451 if (!sysctl_memory_failure_recovery)
1452 panic("Memory failure on page %lx", pfn);
1453
1454 p = pfn_to_online_page(pfn);
1455 if (!p) {
1456 if (pfn_valid(pfn)) {
1457 pgmap = get_dev_pagemap(pfn, NULL);
1458 if (pgmap)
1459 return memory_failure_dev_pagemap(pfn, flags,
1460 pgmap);
1461 }
1462 pr_err("Memory failure: %#lx: memory outside kernel control\n",
1463 pfn);
1464 return -ENXIO;
1465 }
1466
1467 mutex_lock(&mf_mutex);
1468
1469try_again:
1470 if (PageHuge(p)) {
1471 res = memory_failure_hugetlb(pfn, flags);
1472 goto unlock_mutex;
1473 }
1474
1475 if (TestSetPageHWPoison(p)) {
1476 pr_err("Memory failure: %#lx: already hardware poisoned\n",
1477 pfn);
1478 res = -EHWPOISON;
1479 goto unlock_mutex;
1480 }
1481
1482 orig_head = hpage = compound_head(p);
1483 num_poisoned_pages_inc();
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496 if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p, flags, 0)) {
1497 if (is_free_buddy_page(p)) {
1498 if (take_page_off_buddy(p)) {
1499 page_ref_inc(p);
1500 res = MF_RECOVERED;
1501 } else {
1502
1503 if (retry) {
1504 ClearPageHWPoison(p);
1505 num_poisoned_pages_dec();
1506 retry = false;
1507 goto try_again;
1508 }
1509 res = MF_FAILED;
1510 }
1511 action_result(pfn, MF_MSG_BUDDY, res);
1512 res = res == MF_RECOVERED ? 0 : -EBUSY;
1513 } else {
1514 action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED);
1515 res = -EBUSY;
1516 }
1517 goto unlock_mutex;
1518 }
1519
1520 if (PageTransHuge(hpage)) {
1521 if (try_to_split_thp_page(p, "Memory Failure") < 0) {
1522 action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED);
1523 res = -EBUSY;
1524 goto unlock_mutex;
1525 }
1526 VM_BUG_ON_PAGE(!page_count(p), p);
1527 }
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537 shake_page(p, 0);
1538
1539 lock_page(p);
1540
1541
1542
1543
1544
1545 if (PageCompound(p) && compound_head(p) != orig_head) {
1546 action_result(pfn, MF_MSG_DIFFERENT_COMPOUND, MF_IGNORED);
1547 res = -EBUSY;
1548 goto unlock_page;
1549 }
1550
1551
1552
1553
1554
1555
1556
1557
1558 page_flags = p->flags;
1559
1560
1561
1562
1563 if (!PageHWPoison(p)) {
1564 pr_err("Memory failure: %#lx: just unpoisoned\n", pfn);
1565 num_poisoned_pages_dec();
1566 unlock_page(p);
1567 put_page(p);
1568 goto unlock_mutex;
1569 }
1570 if (hwpoison_filter(p)) {
1571 if (TestClearPageHWPoison(p))
1572 num_poisoned_pages_dec();
1573 unlock_page(p);
1574 put_page(p);
1575 goto unlock_mutex;
1576 }
1577
1578
1579
1580
1581
1582
1583 if (!PageTransTail(p) && !PageLRU(p) && !PageWriteback(p))
1584 goto identify_page_state;
1585
1586
1587
1588
1589
1590 wait_on_page_writeback(p);
1591
1592
1593
1594
1595
1596 if (!hwpoison_user_mappings(p, pfn, flags, &p)) {
1597 action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
1598 res = -EBUSY;
1599 goto unlock_page;
1600 }
1601
1602
1603
1604
1605 if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) {
1606 action_result(pfn, MF_MSG_TRUNCATED_LRU, MF_IGNORED);
1607 res = -EBUSY;
1608 goto unlock_page;
1609 }
1610
1611identify_page_state:
1612 res = identify_page_state(pfn, p, page_flags);
1613 mutex_unlock(&mf_mutex);
1614 return res;
1615unlock_page:
1616 unlock_page(p);
1617unlock_mutex:
1618 mutex_unlock(&mf_mutex);
1619 return res;
1620}
1621EXPORT_SYMBOL_GPL(memory_failure);
1622
1623#define MEMORY_FAILURE_FIFO_ORDER 4
1624#define MEMORY_FAILURE_FIFO_SIZE (1 << MEMORY_FAILURE_FIFO_ORDER)
1625
1626struct memory_failure_entry {
1627 unsigned long pfn;
1628 int flags;
1629};
1630
1631struct memory_failure_cpu {
1632 DECLARE_KFIFO(fifo, struct memory_failure_entry,
1633 MEMORY_FAILURE_FIFO_SIZE);
1634 spinlock_t lock;
1635 struct work_struct work;
1636};
1637
1638static DEFINE_PER_CPU(struct memory_failure_cpu, memory_failure_cpu);
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656void memory_failure_queue(unsigned long pfn, int flags)
1657{
1658 struct memory_failure_cpu *mf_cpu;
1659 unsigned long proc_flags;
1660 struct memory_failure_entry entry = {
1661 .pfn = pfn,
1662 .flags = flags,
1663 };
1664
1665 mf_cpu = &get_cpu_var(memory_failure_cpu);
1666 spin_lock_irqsave(&mf_cpu->lock, proc_flags);
1667 if (kfifo_put(&mf_cpu->fifo, entry))
1668 schedule_work_on(smp_processor_id(), &mf_cpu->work);
1669 else
1670 pr_err("Memory failure: buffer overflow when queuing memory failure at %#lx\n",
1671 pfn);
1672 spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
1673 put_cpu_var(memory_failure_cpu);
1674}
1675EXPORT_SYMBOL_GPL(memory_failure_queue);
1676
1677static void memory_failure_work_func(struct work_struct *work)
1678{
1679 struct memory_failure_cpu *mf_cpu;
1680 struct memory_failure_entry entry = { 0, };
1681 unsigned long proc_flags;
1682 int gotten;
1683
1684 mf_cpu = container_of(work, struct memory_failure_cpu, work);
1685 for (;;) {
1686 spin_lock_irqsave(&mf_cpu->lock, proc_flags);
1687 gotten = kfifo_get(&mf_cpu->fifo, &entry);
1688 spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
1689 if (!gotten)
1690 break;
1691 if (entry.flags & MF_SOFT_OFFLINE)
1692 soft_offline_page(entry.pfn, entry.flags);
1693 else
1694 memory_failure(entry.pfn, entry.flags);
1695 }
1696}
1697
1698
1699
1700
1701
1702void memory_failure_queue_kick(int cpu)
1703{
1704 struct memory_failure_cpu *mf_cpu;
1705
1706 mf_cpu = &per_cpu(memory_failure_cpu, cpu);
1707 cancel_work_sync(&mf_cpu->work);
1708 memory_failure_work_func(&mf_cpu->work);
1709}
1710
1711static int __init memory_failure_init(void)
1712{
1713 struct memory_failure_cpu *mf_cpu;
1714 int cpu;
1715
1716 for_each_possible_cpu(cpu) {
1717 mf_cpu = &per_cpu(memory_failure_cpu, cpu);
1718 spin_lock_init(&mf_cpu->lock);
1719 INIT_KFIFO(mf_cpu->fifo);
1720 INIT_WORK(&mf_cpu->work, memory_failure_work_func);
1721 }
1722
1723 return 0;
1724}
1725core_initcall(memory_failure_init);
1726
1727#define unpoison_pr_info(fmt, pfn, rs) \
1728({ \
1729 if (__ratelimit(rs)) \
1730 pr_info(fmt, pfn); \
1731})
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745int unpoison_memory(unsigned long pfn)
1746{
1747 struct page *page;
1748 struct page *p;
1749 int freeit = 0;
1750 unsigned long flags = 0;
1751 static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
1752 DEFAULT_RATELIMIT_BURST);
1753
1754 if (!pfn_valid(pfn))
1755 return -ENXIO;
1756
1757 p = pfn_to_page(pfn);
1758 page = compound_head(p);
1759
1760 if (!PageHWPoison(p)) {
1761 unpoison_pr_info("Unpoison: Page was already unpoisoned %#lx\n",
1762 pfn, &unpoison_rs);
1763 return 0;
1764 }
1765
1766 if (page_count(page) > 1) {
1767 unpoison_pr_info("Unpoison: Someone grabs the hwpoison page %#lx\n",
1768 pfn, &unpoison_rs);
1769 return 0;
1770 }
1771
1772 if (page_mapped(page)) {
1773 unpoison_pr_info("Unpoison: Someone maps the hwpoison page %#lx\n",
1774 pfn, &unpoison_rs);
1775 return 0;
1776 }
1777
1778 if (page_mapping(page)) {
1779 unpoison_pr_info("Unpoison: the hwpoison page has non-NULL mapping %#lx\n",
1780 pfn, &unpoison_rs);
1781 return 0;
1782 }
1783
1784
1785
1786
1787
1788
1789 if (!PageHuge(page) && PageTransHuge(page)) {
1790 unpoison_pr_info("Unpoison: Memory failure is now running on %#lx\n",
1791 pfn, &unpoison_rs);
1792 return 0;
1793 }
1794
1795 if (!get_hwpoison_page(p, flags, 0)) {
1796 if (TestClearPageHWPoison(p))
1797 num_poisoned_pages_dec();
1798 unpoison_pr_info("Unpoison: Software-unpoisoned free page %#lx\n",
1799 pfn, &unpoison_rs);
1800 return 0;
1801 }
1802
1803 lock_page(page);
1804
1805
1806
1807
1808
1809
1810 if (TestClearPageHWPoison(page)) {
1811 unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
1812 pfn, &unpoison_rs);
1813 num_poisoned_pages_dec();
1814 freeit = 1;
1815 }
1816 unlock_page(page);
1817
1818 put_page(page);
1819 if (freeit && !(pfn == my_zero_pfn(0) && page_count(p) == 1))
1820 put_page(page);
1821
1822 return 0;
1823}
1824EXPORT_SYMBOL(unpoison_memory);
1825
1826static bool isolate_page(struct page *page, struct list_head *pagelist)
1827{
1828 bool isolated = false;
1829 bool lru = PageLRU(page);
1830
1831 if (PageHuge(page)) {
1832 isolated = isolate_huge_page(page, pagelist);
1833 } else {
1834 if (lru)
1835 isolated = !isolate_lru_page(page);
1836 else
1837 isolated = !isolate_movable_page(page, ISOLATE_UNEVICTABLE);
1838
1839 if (isolated)
1840 list_add(&page->lru, pagelist);
1841 }
1842
1843 if (isolated && lru)
1844 inc_node_page_state(page, NR_ISOLATED_ANON +
1845 page_is_file_lru(page));
1846
1847
1848
1849
1850
1851
1852
1853
1854 put_page(page);
1855 return isolated;
1856}
1857
1858
1859
1860
1861
1862
1863static int __soft_offline_page(struct page *page)
1864{
1865 int ret = 0;
1866 unsigned long pfn = page_to_pfn(page);
1867 struct page *hpage = compound_head(page);
1868 char const *msg_page[] = {"page", "hugepage"};
1869 bool huge = PageHuge(page);
1870 LIST_HEAD(pagelist);
1871 struct migration_target_control mtc = {
1872 .nid = NUMA_NO_NODE,
1873 .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
1874 };
1875
1876
1877
1878
1879
1880
1881
1882 lock_page(page);
1883 if (!PageHuge(page))
1884 wait_on_page_writeback(page);
1885 if (PageHWPoison(page)) {
1886 unlock_page(page);
1887 put_page(page);
1888 pr_info("soft offline: %#lx page already poisoned\n", pfn);
1889 return 0;
1890 }
1891
1892 if (!PageHuge(page))
1893
1894
1895
1896
1897 ret = invalidate_inode_page(page);
1898 unlock_page(page);
1899
1900
1901
1902
1903
1904 if (ret) {
1905 pr_info("soft_offline: %#lx: invalidated\n", pfn);
1906 page_handle_poison(page, false, true);
1907 return 0;
1908 }
1909
1910 if (isolate_page(hpage, &pagelist)) {
1911 ret = migrate_pages(&pagelist, alloc_migration_target, NULL,
1912 (unsigned long)&mtc, MIGRATE_SYNC, MR_MEMORY_FAILURE);
1913 if (!ret) {
1914 bool release = !huge;
1915
1916 if (!page_handle_poison(page, huge, release))
1917 ret = -EBUSY;
1918 } else {
1919 if (!list_empty(&pagelist))
1920 putback_movable_pages(&pagelist);
1921
1922 pr_info("soft offline: %#lx: %s migration failed %d, type %lx (%pGp)\n",
1923 pfn, msg_page[huge], ret, page->flags, &page->flags);
1924 if (ret > 0)
1925 ret = -EBUSY;
1926 }
1927 } else {
1928 pr_info("soft offline: %#lx: %s isolation failed, page count %d, type %lx (%pGp)\n",
1929 pfn, msg_page[huge], page_count(page), page->flags, &page->flags);
1930 ret = -EBUSY;
1931 }
1932 return ret;
1933}
1934
1935static int soft_offline_in_use_page(struct page *page)
1936{
1937 struct page *hpage = compound_head(page);
1938
1939 if (!PageHuge(page) && PageTransHuge(hpage))
1940 if (try_to_split_thp_page(page, "soft offline") < 0)
1941 return -EBUSY;
1942 return __soft_offline_page(page);
1943}
1944
1945static int soft_offline_free_page(struct page *page)
1946{
1947 int rc = 0;
1948
1949 if (!page_handle_poison(page, true, false))
1950 rc = -EBUSY;
1951
1952 return rc;
1953}
1954
1955static void put_ref_page(struct page *page)
1956{
1957 if (page)
1958 put_page(page);
1959}
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983int soft_offline_page(unsigned long pfn, int flags)
1984{
1985 int ret;
1986 bool try_again = true;
1987 struct page *page, *ref_page = NULL;
1988
1989 WARN_ON_ONCE(!pfn_valid(pfn) && (flags & MF_COUNT_INCREASED));
1990
1991 if (!pfn_valid(pfn))
1992 return -ENXIO;
1993 if (flags & MF_COUNT_INCREASED)
1994 ref_page = pfn_to_page(pfn);
1995
1996
1997 page = pfn_to_online_page(pfn);
1998 if (!page) {
1999 put_ref_page(ref_page);
2000 return -EIO;
2001 }
2002
2003 if (PageHWPoison(page)) {
2004 pr_info("%s: %#lx page already poisoned\n", __func__, pfn);
2005 put_ref_page(ref_page);
2006 return 0;
2007 }
2008
2009retry:
2010 get_online_mems();
2011 ret = get_hwpoison_page(page, flags, MF_SOFT_OFFLINE);
2012 put_online_mems();
2013
2014 if (ret > 0) {
2015 ret = soft_offline_in_use_page(page);
2016 } else if (ret == 0) {
2017 if (soft_offline_free_page(page) && try_again) {
2018 try_again = false;
2019 goto retry;
2020 }
2021 } else if (ret == -EIO) {
2022 pr_info("%s: %#lx: unknown page type: %lx (%pGp)\n",
2023 __func__, pfn, page->flags, &page->flags);
2024 }
2025
2026 return ret;
2027}
2028