1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#include <linux/stddef.h>
19#include <linux/mm.h>
20#include <linux/highmem.h>
21#include <linux/interrupt.h>
22#include <linux/jiffies.h>
23#include <linux/compiler.h>
24#include <linux/kernel.h>
25#include <linux/kasan.h>
26#include <linux/kmsan.h>
27#include <linux/module.h>
28#include <linux/suspend.h>
29#include <linux/ratelimit.h>
30#include <linux/oom.h>
31#include <linux/topology.h>
32#include <linux/sysctl.h>
33#include <linux/cpu.h>
34#include <linux/cpuset.h>
35#include <linux/pagevec.h>
36#include <linux/memory_hotplug.h>
37#include <linux/nodemask.h>
38#include <linux/vmstat.h>
39#include <linux/fault-inject.h>
40#include <linux/compaction.h>
41#include <trace/events/kmem.h>
42#include <trace/events/oom.h>
43#include <linux/prefetch.h>
44#include <linux/mm_inline.h>
45#include <linux/mmu_notifier.h>
46#include <linux/migrate.h>
47#include <linux/sched/mm.h>
48#include <linux/page_owner.h>
49#include <linux/page_table_check.h>
50#include <linux/memcontrol.h>
51#include <linux/ftrace.h>
52#include <linux/lockdep.h>
53#include <linux/psi.h>
54#include <linux/khugepaged.h>
55#include <linux/delayacct.h>
56#include <linux/cacheinfo.h>
57#include <linux/pgalloc_tag.h>
58#include <asm/div64.h>
59#include "internal.h"
60#include "shuffle.h"
61#include "page_reporting.h"
62
63
64typedef int __bitwise fpi_t;
65
66
67#define FPI_NONE ((__force fpi_t)0)
68
69
70
71
72
73
74
75
76
77#define FPI_SKIP_REPORT_NOTIFY ((__force fpi_t)BIT(0))
78
79
80
81
82
83
84
85
86
87
88
89#define FPI_TO_TAIL ((__force fpi_t)BIT(1))
90
91
92#define FPI_TRYLOCK ((__force fpi_t)BIT(2))
93
94
95static DEFINE_MUTEX(pcp_batch_high_lock);
96#define MIN_PERCPU_PAGELIST_HIGH_FRACTION (8)
97
98#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
99
100
101
102
103#define pcp_trylock_prepare(flags) do { } while (0)
104#define pcp_trylock_finish(flag) do { } while (0)
105#else
106
107
108#define pcp_trylock_prepare(flags) local_irq_save(flags)
109#define pcp_trylock_finish(flags) local_irq_restore(flags)
110#endif
111
112
113
114
115
116
117
118
119
120#ifndef CONFIG_PREEMPT_RT
121#define pcpu_task_pin() preempt_disable()
122#define pcpu_task_unpin() preempt_enable()
123#else
124#define pcpu_task_pin() migrate_disable()
125#define pcpu_task_unpin() migrate_enable()
126#endif
127
128
129
130
131
132#define pcpu_spin_lock(type, member, ptr) \
133({ \
134 type *_ret; \
135 pcpu_task_pin(); \
136 _ret = this_cpu_ptr(ptr); \
137 spin_lock(&_ret->member); \
138 _ret; \
139})
140
141#define pcpu_spin_trylock(type, member, ptr) \
142({ \
143 type *_ret; \
144 pcpu_task_pin(); \
145 _ret = this_cpu_ptr(ptr); \
146 if (!spin_trylock(&_ret->member)) { \
147 pcpu_task_unpin(); \
148 _ret = NULL; \
149 } \
150 _ret; \
151})
152
153#define pcpu_spin_unlock(member, ptr) \
154({ \
155 spin_unlock(&ptr->member); \
156 pcpu_task_unpin(); \
157})
158
159
160#define pcp_spin_lock(ptr) \
161 pcpu_spin_lock(struct per_cpu_pages, lock, ptr)
162
163#define pcp_spin_trylock(ptr) \
164 pcpu_spin_trylock(struct per_cpu_pages, lock, ptr)
165
166#define pcp_spin_unlock(ptr) \
167 pcpu_spin_unlock(lock, ptr)
168
169#ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
170DEFINE_PER_CPU(int, numa_node);
171EXPORT_PER_CPU_SYMBOL(numa_node);
172#endif
173
174DEFINE_STATIC_KEY_TRUE(vm_numa_stat_key);
175
176#ifdef CONFIG_HAVE_MEMORYLESS_NODES
177
178
179
180
181
182
183DEFINE_PER_CPU(int, _numa_mem_);
184EXPORT_PER_CPU_SYMBOL(_numa_mem_);
185#endif
186
187static DEFINE_MUTEX(pcpu_drain_mutex);
188
189#ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY
190volatile unsigned long latent_entropy __latent_entropy;
191EXPORT_SYMBOL(latent_entropy);
192#endif
193
194
195
196
197nodemask_t node_states[NR_NODE_STATES] __read_mostly = {
198 [N_POSSIBLE] = NODE_MASK_ALL,
199 [N_ONLINE] = { { [0] = 1UL } },
200#ifndef CONFIG_NUMA
201 [N_NORMAL_MEMORY] = { { [0] = 1UL } },
202#ifdef CONFIG_HIGHMEM
203 [N_HIGH_MEMORY] = { { [0] = 1UL } },
204#endif
205 [N_MEMORY] = { { [0] = 1UL } },
206 [N_CPU] = { { [0] = 1UL } },
207#endif
208};
209EXPORT_SYMBOL(node_states);
210
211gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
212
213#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
214unsigned int pageblock_order __read_mostly;
215#endif
216
217static void __free_pages_ok(struct page *page, unsigned int order,
218 fpi_t fpi_flags);
219
220
221
222
223
224
225
226
227
228
229
230
231static int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES] = {
232#ifdef CONFIG_ZONE_DMA
233 [ZONE_DMA] = 256,
234#endif
235#ifdef CONFIG_ZONE_DMA32
236 [ZONE_DMA32] = 256,
237#endif
238 [ZONE_NORMAL] = 32,
239#ifdef CONFIG_HIGHMEM
240 [ZONE_HIGHMEM] = 0,
241#endif
242 [ZONE_MOVABLE] = 0,
243};
244
245char * const zone_names[MAX_NR_ZONES] = {
246#ifdef CONFIG_ZONE_DMA
247 "DMA",
248#endif
249#ifdef CONFIG_ZONE_DMA32
250 "DMA32",
251#endif
252 "Normal",
253#ifdef CONFIG_HIGHMEM
254 "HighMem",
255#endif
256 "Movable",
257#ifdef CONFIG_ZONE_DEVICE
258 "Device",
259#endif
260};
261
262const char * const migratetype_names[MIGRATE_TYPES] = {
263 "Unmovable",
264 "Movable",
265 "Reclaimable",
266 "HighAtomic",
267#ifdef CONFIG_CMA
268 "CMA",
269#endif
270#ifdef CONFIG_MEMORY_ISOLATION
271 "Isolate",
272#endif
273};
274
275int min_free_kbytes = 1024;
276int user_min_free_kbytes = -1;
277static int watermark_boost_factor __read_mostly = 15000;
278static int watermark_scale_factor = 10;
279int defrag_mode;
280
281
282int movable_zone;
283EXPORT_SYMBOL(movable_zone);
284
285#if MAX_NUMNODES > 1
286unsigned int nr_node_ids __read_mostly = MAX_NUMNODES;
287unsigned int nr_online_nodes __read_mostly = 1;
288EXPORT_SYMBOL(nr_node_ids);
289EXPORT_SYMBOL(nr_online_nodes);
290#endif
291
292static bool page_contains_unaccepted(struct page *page, unsigned int order);
293static bool cond_accept_memory(struct zone *zone, unsigned int order,
294 int alloc_flags);
295static bool __free_unaccepted(struct page *page);
296
297int page_group_by_mobility_disabled __read_mostly;
298
299#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
300
301
302
303
304
305DEFINE_STATIC_KEY_TRUE(deferred_pages);
306
307static inline bool deferred_pages_enabled(void)
308{
309 return static_branch_unlikely(&deferred_pages);
310}
311
312
313
314
315
316
317
318static bool __ref
319_deferred_grow_zone(struct zone *zone, unsigned int order)
320{
321 return deferred_grow_zone(zone, order);
322}
323#else
324static inline bool deferred_pages_enabled(void)
325{
326 return false;
327}
328
329static inline bool _deferred_grow_zone(struct zone *zone, unsigned int order)
330{
331 return false;
332}
333#endif
334
335
336static inline unsigned long *get_pageblock_bitmap(const struct page *page,
337 unsigned long pfn)
338{
339#ifdef CONFIG_SPARSEMEM
340 return section_to_usemap(__pfn_to_section(pfn));
341#else
342 return page_zone(page)->pageblock_flags;
343#endif
344}
345
346static inline int pfn_to_bitidx(const struct page *page, unsigned long pfn)
347{
348#ifdef CONFIG_SPARSEMEM
349 pfn &= (PAGES_PER_SECTION-1);
350#else
351 pfn = pfn - pageblock_start_pfn(page_zone(page)->zone_start_pfn);
352#endif
353 return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
354}
355
356static __always_inline bool is_standalone_pb_bit(enum pageblock_bits pb_bit)
357{
358 return pb_bit > PB_migrate_end && pb_bit < __NR_PAGEBLOCK_BITS;
359}
360
361static __always_inline void
362get_pfnblock_bitmap_bitidx(const struct page *page, unsigned long pfn,
363 unsigned long **bitmap_word, unsigned long *bitidx)
364{
365 unsigned long *bitmap;
366 unsigned long word_bitidx;
367
368#ifdef CONFIG_MEMORY_ISOLATION
369 BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 8);
370#else
371 BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
372#endif
373 BUILD_BUG_ON(__MIGRATE_TYPE_END >= (1 << PB_migratetype_bits));
374 VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page);
375
376 bitmap = get_pageblock_bitmap(page, pfn);
377 *bitidx = pfn_to_bitidx(page, pfn);
378 word_bitidx = *bitidx / BITS_PER_LONG;
379 *bitidx &= (BITS_PER_LONG - 1);
380 *bitmap_word = &bitmap[word_bitidx];
381}
382
383
384
385
386
387
388
389
390
391
392
393static unsigned long __get_pfnblock_flags_mask(const struct page *page,
394 unsigned long pfn,
395 unsigned long mask)
396{
397 unsigned long *bitmap_word;
398 unsigned long bitidx;
399 unsigned long word;
400
401 get_pfnblock_bitmap_bitidx(page, pfn, &bitmap_word, &bitidx);
402
403
404
405
406
407 word = READ_ONCE(*bitmap_word);
408 return (word >> bitidx) & mask;
409}
410
411
412
413
414
415
416
417
418
419bool get_pfnblock_bit(const struct page *page, unsigned long pfn,
420 enum pageblock_bits pb_bit)
421{
422 unsigned long *bitmap_word;
423 unsigned long bitidx;
424
425 if (WARN_ON_ONCE(!is_standalone_pb_bit(pb_bit)))
426 return false;
427
428 get_pfnblock_bitmap_bitidx(page, pfn, &bitmap_word, &bitidx);
429
430 return test_bit(bitidx + pb_bit, bitmap_word);
431}
432
433
434
435
436
437
438
439
440
441
442
443__always_inline enum migratetype
444get_pfnblock_migratetype(const struct page *page, unsigned long pfn)
445{
446 unsigned long mask = MIGRATETYPE_AND_ISO_MASK;
447 unsigned long flags;
448
449 flags = __get_pfnblock_flags_mask(page, pfn, mask);
450
451#ifdef CONFIG_MEMORY_ISOLATION
452 if (flags & BIT(PB_migrate_isolate))
453 return MIGRATE_ISOLATE;
454#endif
455 return flags & MIGRATETYPE_MASK;
456}
457
458
459
460
461
462
463
464
465
466static void __set_pfnblock_flags_mask(struct page *page, unsigned long pfn,
467 unsigned long flags, unsigned long mask)
468{
469 unsigned long *bitmap_word;
470 unsigned long bitidx;
471 unsigned long word;
472
473 get_pfnblock_bitmap_bitidx(page, pfn, &bitmap_word, &bitidx);
474
475 mask <<= bitidx;
476 flags <<= bitidx;
477
478 word = READ_ONCE(*bitmap_word);
479 do {
480 } while (!try_cmpxchg(bitmap_word, &word, (word & ~mask) | flags));
481}
482
483
484
485
486
487
488
489void set_pfnblock_bit(const struct page *page, unsigned long pfn,
490 enum pageblock_bits pb_bit)
491{
492 unsigned long *bitmap_word;
493 unsigned long bitidx;
494
495 if (WARN_ON_ONCE(!is_standalone_pb_bit(pb_bit)))
496 return;
497
498 get_pfnblock_bitmap_bitidx(page, pfn, &bitmap_word, &bitidx);
499
500 set_bit(bitidx + pb_bit, bitmap_word);
501}
502
503
504
505
506
507
508
509void clear_pfnblock_bit(const struct page *page, unsigned long pfn,
510 enum pageblock_bits pb_bit)
511{
512 unsigned long *bitmap_word;
513 unsigned long bitidx;
514
515 if (WARN_ON_ONCE(!is_standalone_pb_bit(pb_bit)))
516 return;
517
518 get_pfnblock_bitmap_bitidx(page, pfn, &bitmap_word, &bitidx);
519
520 clear_bit(bitidx + pb_bit, bitmap_word);
521}
522
523
524
525
526
527
528static void set_pageblock_migratetype(struct page *page,
529 enum migratetype migratetype)
530{
531 if (unlikely(page_group_by_mobility_disabled &&
532 migratetype < MIGRATE_PCPTYPES))
533 migratetype = MIGRATE_UNMOVABLE;
534
535#ifdef CONFIG_MEMORY_ISOLATION
536 if (migratetype == MIGRATE_ISOLATE) {
537 VM_WARN_ONCE(1,
538 "Use set_pageblock_isolate() for pageblock isolation");
539 return;
540 }
541 VM_WARN_ONCE(get_pfnblock_bit(page, page_to_pfn(page),
542 PB_migrate_isolate),
543 "Use clear_pageblock_isolate() to unisolate pageblock");
544
545#endif
546 __set_pfnblock_flags_mask(page, page_to_pfn(page),
547 (unsigned long)migratetype,
548 MIGRATETYPE_AND_ISO_MASK);
549}
550
551void __meminit init_pageblock_migratetype(struct page *page,
552 enum migratetype migratetype,
553 bool isolate)
554{
555 unsigned long flags;
556
557 if (unlikely(page_group_by_mobility_disabled &&
558 migratetype < MIGRATE_PCPTYPES))
559 migratetype = MIGRATE_UNMOVABLE;
560
561 flags = migratetype;
562
563#ifdef CONFIG_MEMORY_ISOLATION
564 if (migratetype == MIGRATE_ISOLATE) {
565 VM_WARN_ONCE(
566 1,
567 "Set isolate=true to isolate pageblock with a migratetype");
568 return;
569 }
570 if (isolate)
571 flags |= BIT(PB_migrate_isolate);
572#endif
573 __set_pfnblock_flags_mask(page, page_to_pfn(page), flags,
574 MIGRATETYPE_AND_ISO_MASK);
575}
576
577#ifdef CONFIG_DEBUG_VM
578static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
579{
580 int ret;
581 unsigned seq;
582 unsigned long pfn = page_to_pfn(page);
583 unsigned long sp, start_pfn;
584
585 do {
586 seq = zone_span_seqbegin(zone);
587 start_pfn = zone->zone_start_pfn;
588 sp = zone->spanned_pages;
589 ret = !zone_spans_pfn(zone, pfn);
590 } while (zone_span_seqretry(zone, seq));
591
592 if (ret)
593 pr_err("page 0x%lx outside node %d zone %s [ 0x%lx - 0x%lx ]\n",
594 pfn, zone_to_nid(zone), zone->name,
595 start_pfn, start_pfn + sp);
596
597 return ret;
598}
599
600
601
602
603static bool __maybe_unused bad_range(struct zone *zone, struct page *page)
604{
605 if (page_outside_zone_boundaries(zone, page))
606 return true;
607 if (zone != page_zone(page))
608 return true;
609
610 return false;
611}
612#else
613static inline bool __maybe_unused bad_range(struct zone *zone, struct page *page)
614{
615 return false;
616}
617#endif
618
619static void bad_page(struct page *page, const char *reason)
620{
621 static unsigned long resume;
622 static unsigned long nr_shown;
623 static unsigned long nr_unshown;
624
625
626
627
628
629 if (nr_shown == 60) {
630 if (time_before(jiffies, resume)) {
631 nr_unshown++;
632 goto out;
633 }
634 if (nr_unshown) {
635 pr_alert(
636 "BUG: Bad page state: %lu messages suppressed\n",
637 nr_unshown);
638 nr_unshown = 0;
639 }
640 nr_shown = 0;
641 }
642 if (nr_shown++ == 0)
643 resume = jiffies + 60 * HZ;
644
645 pr_alert("BUG: Bad page state in process %s pfn:%05lx\n",
646 current->comm, page_to_pfn(page));
647 dump_page(page, reason);
648
649 print_modules();
650 dump_stack();
651out:
652
653 if (PageBuddy(page))
654 __ClearPageBuddy(page);
655 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
656}
657
658static inline unsigned int order_to_pindex(int migratetype, int order)
659{
660
661#ifdef CONFIG_TRANSPARENT_HUGEPAGE
662 bool movable;
663 if (order > PAGE_ALLOC_COSTLY_ORDER) {
664 VM_BUG_ON(order != HPAGE_PMD_ORDER);
665
666 movable = migratetype == MIGRATE_MOVABLE;
667
668 return NR_LOWORDER_PCP_LISTS + movable;
669 }
670#else
671 VM_BUG_ON(order > PAGE_ALLOC_COSTLY_ORDER);
672#endif
673
674 return (MIGRATE_PCPTYPES * order) + migratetype;
675}
676
677static inline int pindex_to_order(unsigned int pindex)
678{
679 int order = pindex / MIGRATE_PCPTYPES;
680
681#ifdef CONFIG_TRANSPARENT_HUGEPAGE
682 if (pindex >= NR_LOWORDER_PCP_LISTS)
683 order = HPAGE_PMD_ORDER;
684#else
685 VM_BUG_ON(order > PAGE_ALLOC_COSTLY_ORDER);
686#endif
687
688 return order;
689}
690
691static inline bool pcp_allowed_order(unsigned int order)
692{
693 if (order <= PAGE_ALLOC_COSTLY_ORDER)
694 return true;
695#ifdef CONFIG_TRANSPARENT_HUGEPAGE
696 if (order == HPAGE_PMD_ORDER)
697 return true;
698#endif
699 return false;
700}
701
702
703
704
705
706
707
708
709
710
711
712
713
714void prep_compound_page(struct page *page, unsigned int order)
715{
716 int i;
717 int nr_pages = 1 << order;
718
719 __SetPageHead(page);
720 for (i = 1; i < nr_pages; i++)
721 prep_compound_tail(page, i);
722
723 prep_compound_head(page, order);
724}
725
726static inline void set_buddy_order(struct page *page, unsigned int order)
727{
728 set_page_private(page, order);
729 __SetPageBuddy(page);
730}
731
732#ifdef CONFIG_COMPACTION
733static inline struct capture_control *task_capc(struct zone *zone)
734{
735 struct capture_control *capc = current->capture_control;
736
737 return unlikely(capc) &&
738 !(current->flags & PF_KTHREAD) &&
739 !capc->page &&
740 capc->cc->zone == zone ? capc : NULL;
741}
742
743static inline bool
744compaction_capture(struct capture_control *capc, struct page *page,
745 int order, int migratetype)
746{
747 if (!capc || order != capc->cc->order)
748 return false;
749
750
751 if (is_migrate_cma(migratetype) ||
752 is_migrate_isolate(migratetype))
753 return false;
754
755
756
757
758
759
760
761
762 if (order < pageblock_order && migratetype == MIGRATE_MOVABLE &&
763 capc->cc->migratetype != MIGRATE_MOVABLE)
764 return false;
765
766 if (migratetype != capc->cc->migratetype)
767 trace_mm_page_alloc_extfrag(page, capc->cc->order, order,
768 capc->cc->migratetype, migratetype);
769
770 capc->page = page;
771 return true;
772}
773
774#else
775static inline struct capture_control *task_capc(struct zone *zone)
776{
777 return NULL;
778}
779
780static inline bool
781compaction_capture(struct capture_control *capc, struct page *page,
782 int order, int migratetype)
783{
784 return false;
785}
786#endif
787
788static inline void account_freepages(struct zone *zone, int nr_pages,
789 int migratetype)
790{
791 lockdep_assert_held(&zone->lock);
792
793 if (is_migrate_isolate(migratetype))
794 return;
795
796 __mod_zone_page_state(zone, NR_FREE_PAGES, nr_pages);
797
798 if (is_migrate_cma(migratetype))
799 __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, nr_pages);
800 else if (is_migrate_highatomic(migratetype))
801 WRITE_ONCE(zone->nr_free_highatomic,
802 zone->nr_free_highatomic + nr_pages);
803}
804
805
806static inline void __add_to_free_list(struct page *page, struct zone *zone,
807 unsigned int order, int migratetype,
808 bool tail)
809{
810 struct free_area *area = &zone->free_area[order];
811 int nr_pages = 1 << order;
812
813 VM_WARN_ONCE(get_pageblock_migratetype(page) != migratetype,
814 "page type is %d, passed migratetype is %d (nr=%d)\n",
815 get_pageblock_migratetype(page), migratetype, nr_pages);
816
817 if (tail)
818 list_add_tail(&page->buddy_list, &area->free_list[migratetype]);
819 else
820 list_add(&page->buddy_list, &area->free_list[migratetype]);
821 area->nr_free++;
822
823 if (order >= pageblock_order && !is_migrate_isolate(migratetype))
824 __mod_zone_page_state(zone, NR_FREE_PAGES_BLOCKS, nr_pages);
825}
826
827
828
829
830
831
832static inline void move_to_free_list(struct page *page, struct zone *zone,
833 unsigned int order, int old_mt, int new_mt)
834{
835 struct free_area *area = &zone->free_area[order];
836 int nr_pages = 1 << order;
837
838
839 VM_WARN_ONCE(get_pageblock_migratetype(page) != old_mt,
840 "page type is %d, passed migratetype is %d (nr=%d)\n",
841 get_pageblock_migratetype(page), old_mt, nr_pages);
842
843 list_move_tail(&page->buddy_list, &area->free_list[new_mt]);
844
845 account_freepages(zone, -nr_pages, old_mt);
846 account_freepages(zone, nr_pages, new_mt);
847
848 if (order >= pageblock_order &&
849 is_migrate_isolate(old_mt) != is_migrate_isolate(new_mt)) {
850 if (!is_migrate_isolate(old_mt))
851 nr_pages = -nr_pages;
852 __mod_zone_page_state(zone, NR_FREE_PAGES_BLOCKS, nr_pages);
853 }
854}
855
856static inline void __del_page_from_free_list(struct page *page, struct zone *zone,
857 unsigned int order, int migratetype)
858{
859 int nr_pages = 1 << order;
860
861 VM_WARN_ONCE(get_pageblock_migratetype(page) != migratetype,
862 "page type is %d, passed migratetype is %d (nr=%d)\n",
863 get_pageblock_migratetype(page), migratetype, nr_pages);
864
865
866 if (page_reported(page))
867 __ClearPageReported(page);
868
869 list_del(&page->buddy_list);
870 __ClearPageBuddy(page);
871 set_page_private(page, 0);
872 zone->free_area[order].nr_free--;
873
874 if (order >= pageblock_order && !is_migrate_isolate(migratetype))
875 __mod_zone_page_state(zone, NR_FREE_PAGES_BLOCKS, -nr_pages);
876}
877
878static inline void del_page_from_free_list(struct page *page, struct zone *zone,
879 unsigned int order, int migratetype)
880{
881 __del_page_from_free_list(page, zone, order, migratetype);
882 account_freepages(zone, -(1 << order), migratetype);
883}
884
885static inline struct page *get_page_from_free_area(struct free_area *area,
886 int migratetype)
887{
888 return list_first_entry_or_null(&area->free_list[migratetype],
889 struct page, buddy_list);
890}
891
892
893
894
895
896
897
898
899
900static inline bool
901buddy_merge_likely(unsigned long pfn, unsigned long buddy_pfn,
902 struct page *page, unsigned int order)
903{
904 unsigned long higher_page_pfn;
905 struct page *higher_page;
906
907 if (order >= MAX_PAGE_ORDER - 1)
908 return false;
909
910 higher_page_pfn = buddy_pfn & pfn;
911 higher_page = page + (higher_page_pfn - pfn);
912
913 return find_buddy_page_pfn(higher_page, higher_page_pfn, order + 1,
914 NULL) != NULL;
915}
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941static inline void __free_one_page(struct page *page,
942 unsigned long pfn,
943 struct zone *zone, unsigned int order,
944 int migratetype, fpi_t fpi_flags)
945{
946 struct capture_control *capc = task_capc(zone);
947 unsigned long buddy_pfn = 0;
948 unsigned long combined_pfn;
949 struct page *buddy;
950 bool to_tail;
951
952 VM_BUG_ON(!zone_is_initialized(zone));
953 VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page);
954
955 VM_BUG_ON(migratetype == -1);
956 VM_BUG_ON_PAGE(pfn & ((1 << order) - 1), page);
957 VM_BUG_ON_PAGE(bad_range(zone, page), page);
958
959 account_freepages(zone, 1 << order, migratetype);
960
961 while (order < MAX_PAGE_ORDER) {
962 int buddy_mt = migratetype;
963
964 if (compaction_capture(capc, page, order, migratetype)) {
965 account_freepages(zone, -(1 << order), migratetype);
966 return;
967 }
968
969 buddy = find_buddy_page_pfn(page, pfn, order, &buddy_pfn);
970 if (!buddy)
971 goto done_merging;
972
973 if (unlikely(order >= pageblock_order)) {
974
975
976
977
978
979
980 buddy_mt = get_pfnblock_migratetype(buddy, buddy_pfn);
981
982 if (migratetype != buddy_mt &&
983 (!migratetype_is_mergeable(migratetype) ||
984 !migratetype_is_mergeable(buddy_mt)))
985 goto done_merging;
986 }
987
988
989
990
991
992 if (page_is_guard(buddy))
993 clear_page_guard(zone, buddy, order);
994 else
995 __del_page_from_free_list(buddy, zone, order, buddy_mt);
996
997 if (unlikely(buddy_mt != migratetype)) {
998
999
1000
1001
1002
1003 set_pageblock_migratetype(buddy, migratetype);
1004 }
1005
1006 combined_pfn = buddy_pfn & pfn;
1007 page = page + (combined_pfn - pfn);
1008 pfn = combined_pfn;
1009 order++;
1010 }
1011
1012done_merging:
1013 set_buddy_order(page, order);
1014
1015 if (fpi_flags & FPI_TO_TAIL)
1016 to_tail = true;
1017 else if (is_shuffle_order(order))
1018 to_tail = shuffle_pick_tail();
1019 else
1020 to_tail = buddy_merge_likely(pfn, buddy_pfn, page, order);
1021
1022 __add_to_free_list(page, zone, order, migratetype, to_tail);
1023
1024
1025 if (!(fpi_flags & FPI_SKIP_REPORT_NOTIFY))
1026 page_reporting_notify_free(order);
1027}
1028
1029
1030
1031
1032
1033
1034static inline bool page_expected_state(struct page *page,
1035 unsigned long check_flags)
1036{
1037 if (unlikely(atomic_read(&page->_mapcount) != -1))
1038 return false;
1039
1040 if (unlikely((unsigned long)page->mapping |
1041 page_ref_count(page) |
1042#ifdef CONFIG_MEMCG
1043 page->memcg_data |
1044#endif
1045 page_pool_page_is_pp(page) |
1046 (page->flags & check_flags)))
1047 return false;
1048
1049 return true;
1050}
1051
1052static const char *page_bad_reason(struct page *page, unsigned long flags)
1053{
1054 const char *bad_reason = NULL;
1055
1056 if (unlikely(atomic_read(&page->_mapcount) != -1))
1057 bad_reason = "nonzero mapcount";
1058 if (unlikely(page->mapping != NULL))
1059 bad_reason = "non-NULL mapping";
1060 if (unlikely(page_ref_count(page) != 0))
1061 bad_reason = "nonzero _refcount";
1062 if (unlikely(page->flags & flags)) {
1063 if (flags == PAGE_FLAGS_CHECK_AT_PREP)
1064 bad_reason = "PAGE_FLAGS_CHECK_AT_PREP flag(s) set";
1065 else
1066 bad_reason = "PAGE_FLAGS_CHECK_AT_FREE flag(s) set";
1067 }
1068#ifdef CONFIG_MEMCG
1069 if (unlikely(page->memcg_data))
1070 bad_reason = "page still charged to cgroup";
1071#endif
1072 if (unlikely(page_pool_page_is_pp(page)))
1073 bad_reason = "page_pool leak";
1074 return bad_reason;
1075}
1076
1077static inline bool free_page_is_bad(struct page *page)
1078{
1079 if (likely(page_expected_state(page, PAGE_FLAGS_CHECK_AT_FREE)))
1080 return false;
1081
1082
1083 bad_page(page, page_bad_reason(page, PAGE_FLAGS_CHECK_AT_FREE));
1084 return true;
1085}
1086
1087static inline bool is_check_pages_enabled(void)
1088{
1089 return static_branch_unlikely(&check_pages_enabled);
1090}
1091
1092static int free_tail_page_prepare(struct page *head_page, struct page *page)
1093{
1094 struct folio *folio = (struct folio *)head_page;
1095 int ret = 1;
1096
1097
1098
1099
1100
1101 BUILD_BUG_ON((unsigned long)LIST_POISON1 & 1);
1102
1103 if (!is_check_pages_enabled()) {
1104 ret = 0;
1105 goto out;
1106 }
1107 switch (page - head_page) {
1108 case 1:
1109
1110 if (unlikely(folio_large_mapcount(folio))) {
1111 bad_page(page, "nonzero large_mapcount");
1112 goto out;
1113 }
1114 if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT) &&
1115 unlikely(atomic_read(&folio->_nr_pages_mapped))) {
1116 bad_page(page, "nonzero nr_pages_mapped");
1117 goto out;
1118 }
1119 if (IS_ENABLED(CONFIG_MM_ID)) {
1120 if (unlikely(folio->_mm_id_mapcount[0] != -1)) {
1121 bad_page(page, "nonzero mm mapcount 0");
1122 goto out;
1123 }
1124 if (unlikely(folio->_mm_id_mapcount[1] != -1)) {
1125 bad_page(page, "nonzero mm mapcount 1");
1126 goto out;
1127 }
1128 }
1129 if (IS_ENABLED(CONFIG_64BIT)) {
1130 if (unlikely(atomic_read(&folio->_entire_mapcount) + 1)) {
1131 bad_page(page, "nonzero entire_mapcount");
1132 goto out;
1133 }
1134 if (unlikely(atomic_read(&folio->_pincount))) {
1135 bad_page(page, "nonzero pincount");
1136 goto out;
1137 }
1138 }
1139 break;
1140 case 2:
1141
1142 if (unlikely(!list_empty(&folio->_deferred_list))) {
1143 bad_page(page, "on deferred list");
1144 goto out;
1145 }
1146 if (!IS_ENABLED(CONFIG_64BIT)) {
1147 if (unlikely(atomic_read(&folio->_entire_mapcount) + 1)) {
1148 bad_page(page, "nonzero entire_mapcount");
1149 goto out;
1150 }
1151 if (unlikely(atomic_read(&folio->_pincount))) {
1152 bad_page(page, "nonzero pincount");
1153 goto out;
1154 }
1155 }
1156 break;
1157 case 3:
1158
1159 if (IS_ENABLED(CONFIG_HUGETLB_PAGE))
1160 break;
1161 fallthrough;
1162 default:
1163 if (page->mapping != TAIL_MAPPING) {
1164 bad_page(page, "corrupted mapping in tail page");
1165 goto out;
1166 }
1167 break;
1168 }
1169 if (unlikely(!PageTail(page))) {
1170 bad_page(page, "PageTail not set");
1171 goto out;
1172 }
1173 if (unlikely(compound_head(page) != head_page)) {
1174 bad_page(page, "compound_head not consistent");
1175 goto out;
1176 }
1177 ret = 0;
1178out:
1179 page->mapping = NULL;
1180 clear_compound_head(page);
1181 return ret;
1182}
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213static inline bool should_skip_kasan_poison(struct page *page)
1214{
1215 if (IS_ENABLED(CONFIG_KASAN_GENERIC))
1216 return deferred_pages_enabled();
1217
1218 return page_kasan_tag(page) == KASAN_TAG_KERNEL;
1219}
1220
1221static void kernel_init_pages(struct page *page, int numpages)
1222{
1223 int i;
1224
1225
1226 kasan_disable_current();
1227 for (i = 0; i < numpages; i++)
1228 clear_highpage_kasan_tagged(page + i);
1229 kasan_enable_current();
1230}
1231
1232#ifdef CONFIG_MEM_ALLOC_PROFILING
1233
1234
1235void __clear_page_tag_ref(struct page *page)
1236{
1237 union pgtag_ref_handle handle;
1238 union codetag_ref ref;
1239
1240 if (get_page_tag_ref(page, &ref, &handle)) {
1241 set_codetag_empty(&ref);
1242 update_page_tag_ref(handle, &ref);
1243 put_page_tag_ref(handle);
1244 }
1245}
1246
1247
1248static noinline
1249void __pgalloc_tag_add(struct page *page, struct task_struct *task,
1250 unsigned int nr)
1251{
1252 union pgtag_ref_handle handle;
1253 union codetag_ref ref;
1254
1255 if (get_page_tag_ref(page, &ref, &handle)) {
1256 alloc_tag_add(&ref, task->alloc_tag, PAGE_SIZE * nr);
1257 update_page_tag_ref(handle, &ref);
1258 put_page_tag_ref(handle);
1259 }
1260}
1261
1262static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
1263 unsigned int nr)
1264{
1265 if (mem_alloc_profiling_enabled())
1266 __pgalloc_tag_add(page, task, nr);
1267}
1268
1269
1270static noinline
1271void __pgalloc_tag_sub(struct page *page, unsigned int nr)
1272{
1273 union pgtag_ref_handle handle;
1274 union codetag_ref ref;
1275
1276 if (get_page_tag_ref(page, &ref, &handle)) {
1277 alloc_tag_sub(&ref, PAGE_SIZE * nr);
1278 update_page_tag_ref(handle, &ref);
1279 put_page_tag_ref(handle);
1280 }
1281}
1282
1283static inline void pgalloc_tag_sub(struct page *page, unsigned int nr)
1284{
1285 if (mem_alloc_profiling_enabled())
1286 __pgalloc_tag_sub(page, nr);
1287}
1288
1289
1290static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr)
1291{
1292 if (tag)
1293 this_cpu_sub(tag->counters->bytes, PAGE_SIZE * nr);
1294}
1295
1296#else
1297
1298static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
1299 unsigned int nr) {}
1300static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {}
1301static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {}
1302
1303#endif
1304
1305__always_inline bool free_pages_prepare(struct page *page,
1306 unsigned int order)
1307{
1308 int bad = 0;
1309 bool skip_kasan_poison = should_skip_kasan_poison(page);
1310 bool init = want_init_on_free();
1311 bool compound = PageCompound(page);
1312 struct folio *folio = page_folio(page);
1313
1314 VM_BUG_ON_PAGE(PageTail(page), page);
1315
1316 trace_mm_page_free(page, order);
1317 kmsan_free_page(page, order);
1318
1319 if (memcg_kmem_online() && PageMemcgKmem(page))
1320 __memcg_kmem_uncharge_page(page, order);
1321
1322
1323
1324
1325
1326
1327
1328 if (unlikely(folio_test_mlocked(folio))) {
1329 long nr_pages = folio_nr_pages(folio);
1330
1331 __folio_clear_mlocked(folio);
1332 zone_stat_mod_folio(folio, NR_MLOCK, -nr_pages);
1333 count_vm_events(UNEVICTABLE_PGCLEARED, nr_pages);
1334 }
1335
1336 if (unlikely(PageHWPoison(page)) && !order) {
1337
1338 reset_page_owner(page, order);
1339 page_table_check_free(page, order);
1340 pgalloc_tag_sub(page, 1 << order);
1341
1342
1343
1344
1345
1346
1347 clear_page_tag_ref(page);
1348 return false;
1349 }
1350
1351 VM_BUG_ON_PAGE(compound && compound_order(page) != order, page);
1352
1353
1354
1355
1356
1357 if (unlikely(order)) {
1358 int i;
1359
1360 if (compound) {
1361 page[1].flags &= ~PAGE_FLAGS_SECOND;
1362#ifdef NR_PAGES_IN_LARGE_FOLIO
1363 folio->_nr_pages = 0;
1364#endif
1365 }
1366 for (i = 1; i < (1 << order); i++) {
1367 if (compound)
1368 bad += free_tail_page_prepare(page, page + i);
1369 if (is_check_pages_enabled()) {
1370 if (free_page_is_bad(page + i)) {
1371 bad++;
1372 continue;
1373 }
1374 }
1375 (page + i)->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
1376 }
1377 }
1378 if (folio_test_anon(folio)) {
1379 mod_mthp_stat(order, MTHP_STAT_NR_ANON, -1);
1380 folio->mapping = NULL;
1381 }
1382 if (unlikely(page_has_type(page)))
1383
1384 page->page_type = UINT_MAX;
1385
1386 if (is_check_pages_enabled()) {
1387 if (free_page_is_bad(page))
1388 bad++;
1389 if (bad)
1390 return false;
1391 }
1392
1393 page_cpupid_reset_last(page);
1394 page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
1395 reset_page_owner(page, order);
1396 page_table_check_free(page, order);
1397 pgalloc_tag_sub(page, 1 << order);
1398
1399 if (!PageHighMem(page)) {
1400 debug_check_no_locks_freed(page_address(page),
1401 PAGE_SIZE << order);
1402 debug_check_no_obj_freed(page_address(page),
1403 PAGE_SIZE << order);
1404 }
1405
1406 kernel_poison_pages(page, 1 << order);
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416 if (!skip_kasan_poison) {
1417 kasan_poison_pages(page, order, init);
1418
1419
1420 if (kasan_has_integrated_init())
1421 init = false;
1422 }
1423 if (init)
1424 kernel_init_pages(page, 1 << order);
1425
1426
1427
1428
1429
1430
1431 arch_free_page(page, order);
1432
1433 debug_pagealloc_unmap_pages(page, 1 << order);
1434
1435 return true;
1436}
1437
1438
1439
1440
1441
1442
1443static void free_pcppages_bulk(struct zone *zone, int count,
1444 struct per_cpu_pages *pcp,
1445 int pindex)
1446{
1447 unsigned long flags;
1448 unsigned int order;
1449 struct page *page;
1450
1451
1452
1453
1454
1455 count = min(pcp->count, count);
1456
1457
1458 pindex = pindex - 1;
1459
1460 spin_lock_irqsave(&zone->lock, flags);
1461
1462 while (count > 0) {
1463 struct list_head *list;
1464 int nr_pages;
1465
1466
1467 do {
1468 if (++pindex > NR_PCP_LISTS - 1)
1469 pindex = 0;
1470 list = &pcp->lists[pindex];
1471 } while (list_empty(list));
1472
1473 order = pindex_to_order(pindex);
1474 nr_pages = 1 << order;
1475 do {
1476 unsigned long pfn;
1477 int mt;
1478
1479 page = list_last_entry(list, struct page, pcp_list);
1480 pfn = page_to_pfn(page);
1481 mt = get_pfnblock_migratetype(page, pfn);
1482
1483
1484 list_del(&page->pcp_list);
1485 count -= nr_pages;
1486 pcp->count -= nr_pages;
1487
1488 __free_one_page(page, pfn, zone, order, mt, FPI_NONE);
1489 trace_mm_page_pcpu_drain(page, order, mt);
1490 } while (count > 0 && !list_empty(list));
1491 }
1492
1493 spin_unlock_irqrestore(&zone->lock, flags);
1494}
1495
1496
1497static void split_large_buddy(struct zone *zone, struct page *page,
1498 unsigned long pfn, int order, fpi_t fpi)
1499{
1500 unsigned long end = pfn + (1 << order);
1501
1502 VM_WARN_ON_ONCE(!IS_ALIGNED(pfn, 1 << order));
1503
1504 VM_WARN_ON_ONCE(PageBuddy(page));
1505
1506 if (order > pageblock_order)
1507 order = pageblock_order;
1508
1509 do {
1510 int mt = get_pfnblock_migratetype(page, pfn);
1511
1512 __free_one_page(page, pfn, zone, order, mt, fpi);
1513 pfn += 1 << order;
1514 if (pfn == end)
1515 break;
1516 page = pfn_to_page(pfn);
1517 } while (1);
1518}
1519
1520static void add_page_to_zone_llist(struct zone *zone, struct page *page,
1521 unsigned int order)
1522{
1523
1524 page->order = order;
1525
1526 llist_add(&page->pcp_llist, &zone->trylock_free_pages);
1527}
1528
1529static void free_one_page(struct zone *zone, struct page *page,
1530 unsigned long pfn, unsigned int order,
1531 fpi_t fpi_flags)
1532{
1533 struct llist_head *llhead;
1534 unsigned long flags;
1535
1536 if (unlikely(fpi_flags & FPI_TRYLOCK)) {
1537 if (!spin_trylock_irqsave(&zone->lock, flags)) {
1538 add_page_to_zone_llist(zone, page, order);
1539 return;
1540 }
1541 } else {
1542 spin_lock_irqsave(&zone->lock, flags);
1543 }
1544
1545
1546 llhead = &zone->trylock_free_pages;
1547 if (unlikely(!llist_empty(llhead) && !(fpi_flags & FPI_TRYLOCK))) {
1548 struct llist_node *llnode;
1549 struct page *p, *tmp;
1550
1551 llnode = llist_del_all(llhead);
1552 llist_for_each_entry_safe(p, tmp, llnode, pcp_llist) {
1553 unsigned int p_order = p->order;
1554
1555 split_large_buddy(zone, p, page_to_pfn(p), p_order, fpi_flags);
1556 __count_vm_events(PGFREE, 1 << p_order);
1557 }
1558 }
1559 split_large_buddy(zone, page, pfn, order, fpi_flags);
1560 spin_unlock_irqrestore(&zone->lock, flags);
1561
1562 __count_vm_events(PGFREE, 1 << order);
1563}
1564
1565static void __free_pages_ok(struct page *page, unsigned int order,
1566 fpi_t fpi_flags)
1567{
1568 unsigned long pfn = page_to_pfn(page);
1569 struct zone *zone = page_zone(page);
1570
1571 if (free_pages_prepare(page, order))
1572 free_one_page(zone, page, pfn, order, fpi_flags);
1573}
1574
1575void __meminit __free_pages_core(struct page *page, unsigned int order,
1576 enum meminit_context context)
1577{
1578 unsigned int nr_pages = 1 << order;
1579 struct page *p = page;
1580 unsigned int loop;
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590 if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG) &&
1591 unlikely(context == MEMINIT_HOTPLUG)) {
1592 for (loop = 0; loop < nr_pages; loop++, p++) {
1593 VM_WARN_ON_ONCE(PageReserved(p));
1594 __ClearPageOffline(p);
1595 set_page_count(p, 0);
1596 }
1597
1598 adjust_managed_page_count(page, nr_pages);
1599 } else {
1600 for (loop = 0; loop < nr_pages; loop++, p++) {
1601 __ClearPageReserved(p);
1602 set_page_count(p, 0);
1603 }
1604
1605
1606 atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
1607 }
1608
1609 if (page_contains_unaccepted(page, order)) {
1610 if (order == MAX_PAGE_ORDER && __free_unaccepted(page))
1611 return;
1612
1613 accept_memory(page_to_phys(page), PAGE_SIZE << order);
1614 }
1615
1616
1617
1618
1619
1620 __free_pages_ok(page, order, FPI_TO_TAIL);
1621}
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
1648 unsigned long end_pfn, struct zone *zone)
1649{
1650 struct page *start_page;
1651 struct page *end_page;
1652
1653
1654 end_pfn--;
1655
1656 if (!pfn_valid(end_pfn))
1657 return NULL;
1658
1659 start_page = pfn_to_online_page(start_pfn);
1660 if (!start_page)
1661 return NULL;
1662
1663 if (page_zone(start_page) != zone)
1664 return NULL;
1665
1666 end_page = pfn_to_page(end_pfn);
1667
1668
1669 if (page_zone_id(start_page) != page_zone_id(end_page))
1670 return NULL;
1671
1672 return start_page;
1673}
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689static inline unsigned int expand(struct zone *zone, struct page *page, int low,
1690 int high, int migratetype)
1691{
1692 unsigned int size = 1 << high;
1693 unsigned int nr_added = 0;
1694
1695 while (high > low) {
1696 high--;
1697 size >>= 1;
1698 VM_BUG_ON_PAGE(bad_range(zone, &page[size]), &page[size]);
1699
1700
1701
1702
1703
1704
1705
1706 if (set_page_guard(zone, &page[size], high))
1707 continue;
1708
1709 __add_to_free_list(&page[size], zone, high, migratetype, false);
1710 set_buddy_order(&page[size], high);
1711 nr_added += size;
1712 }
1713
1714 return nr_added;
1715}
1716
1717static __always_inline void page_del_and_expand(struct zone *zone,
1718 struct page *page, int low,
1719 int high, int migratetype)
1720{
1721 int nr_pages = 1 << high;
1722
1723 __del_page_from_free_list(page, zone, high, migratetype);
1724 nr_pages -= expand(zone, page, low, high, migratetype);
1725 account_freepages(zone, -nr_pages, migratetype);
1726}
1727
1728static void check_new_page_bad(struct page *page)
1729{
1730 if (unlikely(PageHWPoison(page))) {
1731
1732 if (PageBuddy(page))
1733 __ClearPageBuddy(page);
1734 return;
1735 }
1736
1737 bad_page(page,
1738 page_bad_reason(page, PAGE_FLAGS_CHECK_AT_PREP));
1739}
1740
1741
1742
1743
1744static bool check_new_page(struct page *page)
1745{
1746 if (likely(page_expected_state(page,
1747 PAGE_FLAGS_CHECK_AT_PREP|__PG_HWPOISON)))
1748 return false;
1749
1750 check_new_page_bad(page);
1751 return true;
1752}
1753
1754static inline bool check_new_pages(struct page *page, unsigned int order)
1755{
1756 if (is_check_pages_enabled()) {
1757 for (int i = 0; i < (1 << order); i++) {
1758 struct page *p = page + i;
1759
1760 if (check_new_page(p))
1761 return true;
1762 }
1763 }
1764
1765 return false;
1766}
1767
1768static inline bool should_skip_kasan_unpoison(gfp_t flags)
1769{
1770
1771 if (IS_ENABLED(CONFIG_KASAN_GENERIC) ||
1772 IS_ENABLED(CONFIG_KASAN_SW_TAGS))
1773 return false;
1774
1775
1776 if (!kasan_hw_tags_enabled())
1777 return true;
1778
1779
1780
1781
1782
1783 return flags & __GFP_SKIP_KASAN;
1784}
1785
1786static inline bool should_skip_init(gfp_t flags)
1787{
1788
1789 if (!kasan_hw_tags_enabled())
1790 return false;
1791
1792
1793 return (flags & __GFP_SKIP_ZERO);
1794}
1795
1796inline void post_alloc_hook(struct page *page, unsigned int order,
1797 gfp_t gfp_flags)
1798{
1799 bool init = !want_init_on_free() && want_init_on_alloc(gfp_flags) &&
1800 !should_skip_init(gfp_flags);
1801 bool zero_tags = init && (gfp_flags & __GFP_ZEROTAGS);
1802 int i;
1803
1804 set_page_private(page, 0);
1805
1806 arch_alloc_page(page, order);
1807 debug_pagealloc_map_pages(page, 1 << order);
1808
1809
1810
1811
1812
1813
1814 kernel_unpoison_pages(page, 1 << order);
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826 if (zero_tags) {
1827
1828 for (i = 0; i != 1 << order; ++i)
1829 tag_clear_highpage(page + i);
1830
1831
1832 init = false;
1833 }
1834 if (!should_skip_kasan_unpoison(gfp_flags) &&
1835 kasan_unpoison_pages(page, order, init)) {
1836
1837 if (kasan_has_integrated_init())
1838 init = false;
1839 } else {
1840
1841
1842
1843
1844 for (i = 0; i != 1 << order; ++i)
1845 page_kasan_tag_reset(page + i);
1846 }
1847
1848 if (init)
1849 kernel_init_pages(page, 1 << order);
1850
1851 set_page_owner(page, order, gfp_flags);
1852 page_table_check_alloc(page, order);
1853 pgalloc_tag_add(page, current, 1 << order);
1854}
1855
1856static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
1857 unsigned int alloc_flags)
1858{
1859 post_alloc_hook(page, order, gfp_flags);
1860
1861 if (order && (gfp_flags & __GFP_COMP))
1862 prep_compound_page(page, order);
1863
1864
1865
1866
1867
1868
1869
1870 if (alloc_flags & ALLOC_NO_WATERMARKS)
1871 set_page_pfmemalloc(page);
1872 else
1873 clear_page_pfmemalloc(page);
1874}
1875
1876
1877
1878
1879
1880static __always_inline
1881struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
1882 int migratetype)
1883{
1884 unsigned int current_order;
1885 struct free_area *area;
1886 struct page *page;
1887
1888
1889 for (current_order = order; current_order < NR_PAGE_ORDERS; ++current_order) {
1890 area = &(zone->free_area[current_order]);
1891 page = get_page_from_free_area(area, migratetype);
1892 if (!page)
1893 continue;
1894
1895 page_del_and_expand(zone, page, order, current_order,
1896 migratetype);
1897 trace_mm_page_alloc_zone_locked(page, order, migratetype,
1898 pcp_allowed_order(order) &&
1899 migratetype < MIGRATE_PCPTYPES);
1900 return page;
1901 }
1902
1903 return NULL;
1904}
1905
1906
1907
1908
1909
1910
1911
1912
1913static int fallbacks[MIGRATE_PCPTYPES][MIGRATE_PCPTYPES - 1] = {
1914 [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE },
1915 [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE },
1916 [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE },
1917};
1918
1919#ifdef CONFIG_CMA
1920static __always_inline struct page *__rmqueue_cma_fallback(struct zone *zone,
1921 unsigned int order)
1922{
1923 return __rmqueue_smallest(zone, order, MIGRATE_CMA);
1924}
1925#else
1926static inline struct page *__rmqueue_cma_fallback(struct zone *zone,
1927 unsigned int order) { return NULL; }
1928#endif
1929
1930
1931
1932
1933
1934static int __move_freepages_block(struct zone *zone, unsigned long start_pfn,
1935 int old_mt, int new_mt)
1936{
1937 struct page *page;
1938 unsigned long pfn, end_pfn;
1939 unsigned int order;
1940 int pages_moved = 0;
1941
1942 VM_WARN_ON(start_pfn & (pageblock_nr_pages - 1));
1943 end_pfn = pageblock_end_pfn(start_pfn);
1944
1945 for (pfn = start_pfn; pfn < end_pfn;) {
1946 page = pfn_to_page(pfn);
1947 if (!PageBuddy(page)) {
1948 pfn++;
1949 continue;
1950 }
1951
1952
1953 VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page);
1954 VM_BUG_ON_PAGE(page_zone(page) != zone, page);
1955
1956 order = buddy_order(page);
1957
1958 move_to_free_list(page, zone, order, old_mt, new_mt);
1959
1960 pfn += 1 << order;
1961 pages_moved += 1 << order;
1962 }
1963
1964 return pages_moved;
1965}
1966
1967static bool prep_move_freepages_block(struct zone *zone, struct page *page,
1968 unsigned long *start_pfn,
1969 int *num_free, int *num_movable)
1970{
1971 unsigned long pfn, start, end;
1972
1973 pfn = page_to_pfn(page);
1974 start = pageblock_start_pfn(pfn);
1975 end = pageblock_end_pfn(pfn);
1976
1977
1978
1979
1980
1981
1982
1983
1984 if (!zone_spans_pfn(zone, start))
1985 return false;
1986 if (!zone_spans_pfn(zone, end - 1))
1987 return false;
1988
1989 *start_pfn = start;
1990
1991 if (num_free) {
1992 *num_free = 0;
1993 *num_movable = 0;
1994 for (pfn = start; pfn < end;) {
1995 page = pfn_to_page(pfn);
1996 if (PageBuddy(page)) {
1997 int nr = 1 << buddy_order(page);
1998
1999 *num_free += nr;
2000 pfn += nr;
2001 continue;
2002 }
2003
2004
2005
2006
2007
2008 if (PageLRU(page) || page_has_movable_ops(page))
2009 (*num_movable)++;
2010 pfn++;
2011 }
2012 }
2013
2014 return true;
2015}
2016
2017static int move_freepages_block(struct zone *zone, struct page *page,
2018 int old_mt, int new_mt)
2019{
2020 unsigned long start_pfn;
2021 int res;
2022
2023 if (!prep_move_freepages_block(zone, page, &start_pfn, NULL, NULL))
2024 return -1;
2025
2026 res = __move_freepages_block(zone, start_pfn, old_mt, new_mt);
2027 set_pageblock_migratetype(pfn_to_page(start_pfn), new_mt);
2028
2029 return res;
2030
2031}
2032
2033#ifdef CONFIG_MEMORY_ISOLATION
2034
2035static unsigned long find_large_buddy(unsigned long start_pfn)
2036{
2037 int order = 0;
2038 struct page *page;
2039 unsigned long pfn = start_pfn;
2040
2041 while (!PageBuddy(page = pfn_to_page(pfn))) {
2042
2043 if (++order > MAX_PAGE_ORDER)
2044 return start_pfn;
2045 pfn &= ~0UL << order;
2046 }
2047
2048
2049
2050
2051 if (pfn + (1 << buddy_order(page)) > start_pfn)
2052 return pfn;
2053
2054
2055 return start_pfn;
2056}
2057
2058static inline void toggle_pageblock_isolate(struct page *page, bool isolate)
2059{
2060 if (isolate)
2061 set_pfnblock_bit(page, page_to_pfn(page), PB_migrate_isolate);
2062 else
2063 clear_pfnblock_bit(page, page_to_pfn(page), PB_migrate_isolate);
2064}
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085static bool __move_freepages_block_isolate(struct zone *zone,
2086 struct page *page, bool isolate)
2087{
2088 unsigned long start_pfn, pfn;
2089 int from_mt;
2090 int to_mt;
2091
2092 if (isolate == get_pageblock_isolate(page)) {
2093 VM_WARN_ONCE(1, "%s a pageblock that is already in that state",
2094 isolate ? "Isolate" : "Unisolate");
2095 return false;
2096 }
2097
2098 if (!prep_move_freepages_block(zone, page, &start_pfn, NULL, NULL))
2099 return false;
2100
2101
2102 if (pageblock_order == MAX_PAGE_ORDER)
2103 goto move;
2104
2105
2106 pfn = find_large_buddy(start_pfn);
2107 if (pfn != start_pfn) {
2108 struct page *buddy = pfn_to_page(pfn);
2109 int order = buddy_order(buddy);
2110
2111 del_page_from_free_list(buddy, zone, order,
2112 get_pfnblock_migratetype(buddy, pfn));
2113 toggle_pageblock_isolate(page, isolate);
2114 split_large_buddy(zone, buddy, pfn, order, FPI_NONE);
2115 return true;
2116 }
2117
2118
2119 if (PageBuddy(page) && buddy_order(page) > pageblock_order) {
2120 int order = buddy_order(page);
2121
2122 del_page_from_free_list(page, zone, order,
2123 get_pfnblock_migratetype(page, pfn));
2124 toggle_pageblock_isolate(page, isolate);
2125 split_large_buddy(zone, page, pfn, order, FPI_NONE);
2126 return true;
2127 }
2128move:
2129
2130 if (isolate) {
2131 from_mt = __get_pfnblock_flags_mask(page, page_to_pfn(page),
2132 MIGRATETYPE_MASK);
2133 to_mt = MIGRATE_ISOLATE;
2134 } else {
2135 from_mt = MIGRATE_ISOLATE;
2136 to_mt = __get_pfnblock_flags_mask(page, page_to_pfn(page),
2137 MIGRATETYPE_MASK);
2138 }
2139
2140 __move_freepages_block(zone, start_pfn, from_mt, to_mt);
2141 toggle_pageblock_isolate(pfn_to_page(start_pfn), isolate);
2142
2143 return true;
2144}
2145
2146bool pageblock_isolate_and_move_free_pages(struct zone *zone, struct page *page)
2147{
2148 return __move_freepages_block_isolate(zone, page, true);
2149}
2150
2151bool pageblock_unisolate_and_move_free_pages(struct zone *zone, struct page *page)
2152{
2153 return __move_freepages_block_isolate(zone, page, false);
2154}
2155
2156#endif
2157
2158static void change_pageblock_range(struct page *pageblock_page,
2159 int start_order, int migratetype)
2160{
2161 int nr_pageblocks = 1 << (start_order - pageblock_order);
2162
2163 while (nr_pageblocks--) {
2164 set_pageblock_migratetype(pageblock_page, migratetype);
2165 pageblock_page += pageblock_nr_pages;
2166 }
2167}
2168
2169static inline bool boost_watermark(struct zone *zone)
2170{
2171 unsigned long max_boost;
2172
2173 if (!watermark_boost_factor)
2174 return false;
2175
2176
2177
2178
2179
2180
2181 if ((pageblock_nr_pages * 4) > zone_managed_pages(zone))
2182 return false;
2183
2184 max_boost = mult_frac(zone->_watermark[WMARK_HIGH],
2185 watermark_boost_factor, 10000);
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195 if (!max_boost)
2196 return false;
2197
2198 max_boost = max(pageblock_nr_pages, max_boost);
2199
2200 zone->watermark_boost = min(zone->watermark_boost + pageblock_nr_pages,
2201 max_boost);
2202
2203 return true;
2204}
2205
2206
2207
2208
2209
2210
2211static bool should_try_claim_block(unsigned int order, int start_mt)
2212{
2213
2214
2215
2216
2217
2218
2219
2220 if (order >= pageblock_order)
2221 return true;
2222
2223
2224
2225
2226
2227 if (order >= pageblock_order / 2)
2228 return true;
2229
2230
2231
2232
2233
2234
2235
2236
2237 if (start_mt == MIGRATE_RECLAIMABLE || start_mt == MIGRATE_UNMOVABLE)
2238 return true;
2239
2240 if (page_group_by_mobility_disabled)
2241 return true;
2242
2243
2244
2245
2246
2247
2248
2249
2250 return false;
2251}
2252
2253
2254
2255
2256
2257
2258
2259int find_suitable_fallback(struct free_area *area, unsigned int order,
2260 int migratetype, bool claimable)
2261{
2262 int i;
2263
2264 if (claimable && !should_try_claim_block(order, migratetype))
2265 return -2;
2266
2267 if (area->nr_free == 0)
2268 return -1;
2269
2270 for (i = 0; i < MIGRATE_PCPTYPES - 1 ; i++) {
2271 int fallback_mt = fallbacks[migratetype][i];
2272
2273 if (!free_area_empty(area, fallback_mt))
2274 return fallback_mt;
2275 }
2276
2277 return -1;
2278}
2279
2280
2281
2282
2283
2284
2285
2286
2287static struct page *
2288try_to_claim_block(struct zone *zone, struct page *page,
2289 int current_order, int order, int start_type,
2290 int block_type, unsigned int alloc_flags)
2291{
2292 int free_pages, movable_pages, alike_pages;
2293 unsigned long start_pfn;
2294
2295
2296 if (current_order >= pageblock_order) {
2297 unsigned int nr_added;
2298
2299 del_page_from_free_list(page, zone, current_order, block_type);
2300 change_pageblock_range(page, current_order, start_type);
2301 nr_added = expand(zone, page, order, current_order, start_type);
2302 account_freepages(zone, nr_added, start_type);
2303 return page;
2304 }
2305
2306
2307
2308
2309
2310
2311 if (boost_watermark(zone) && (alloc_flags & ALLOC_KSWAPD))
2312 set_bit(ZONE_BOOSTED_WATERMARK, &zone->flags);
2313
2314
2315 if (!prep_move_freepages_block(zone, page, &start_pfn, &free_pages,
2316 &movable_pages))
2317 return NULL;
2318
2319
2320
2321
2322
2323
2324 if (start_type == MIGRATE_MOVABLE) {
2325 alike_pages = movable_pages;
2326 } else {
2327
2328
2329
2330
2331
2332
2333
2334 if (block_type == MIGRATE_MOVABLE)
2335 alike_pages = pageblock_nr_pages
2336 - (free_pages + movable_pages);
2337 else
2338 alike_pages = 0;
2339 }
2340
2341
2342
2343
2344 if (free_pages + alike_pages >= (1 << (pageblock_order-1)) ||
2345 page_group_by_mobility_disabled) {
2346 __move_freepages_block(zone, start_pfn, block_type, start_type);
2347 set_pageblock_migratetype(pfn_to_page(start_pfn), start_type);
2348 return __rmqueue_smallest(zone, order, start_type);
2349 }
2350
2351 return NULL;
2352}
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362static __always_inline struct page *
2363__rmqueue_claim(struct zone *zone, int order, int start_migratetype,
2364 unsigned int alloc_flags)
2365{
2366 struct free_area *area;
2367 int current_order;
2368 int min_order = order;
2369 struct page *page;
2370 int fallback_mt;
2371
2372
2373
2374
2375
2376
2377 if (order < pageblock_order && alloc_flags & ALLOC_NOFRAGMENT)
2378 min_order = pageblock_order;
2379
2380
2381
2382
2383
2384
2385 for (current_order = MAX_PAGE_ORDER; current_order >= min_order;
2386 --current_order) {
2387 area = &(zone->free_area[current_order]);
2388 fallback_mt = find_suitable_fallback(area, current_order,
2389 start_migratetype, true);
2390
2391
2392 if (fallback_mt == -1)
2393 continue;
2394
2395
2396 if (fallback_mt == -2)
2397 break;
2398
2399 page = get_page_from_free_area(area, fallback_mt);
2400 page = try_to_claim_block(zone, page, current_order, order,
2401 start_migratetype, fallback_mt,
2402 alloc_flags);
2403 if (page) {
2404 trace_mm_page_alloc_extfrag(page, order, current_order,
2405 start_migratetype, fallback_mt);
2406 return page;
2407 }
2408 }
2409
2410 return NULL;
2411}
2412
2413
2414
2415
2416
2417static __always_inline struct page *
2418__rmqueue_steal(struct zone *zone, int order, int start_migratetype)
2419{
2420 struct free_area *area;
2421 int current_order;
2422 struct page *page;
2423 int fallback_mt;
2424
2425 for (current_order = order; current_order < NR_PAGE_ORDERS; current_order++) {
2426 area = &(zone->free_area[current_order]);
2427 fallback_mt = find_suitable_fallback(area, current_order,
2428 start_migratetype, false);
2429 if (fallback_mt == -1)
2430 continue;
2431
2432 page = get_page_from_free_area(area, fallback_mt);
2433 page_del_and_expand(zone, page, order, current_order, fallback_mt);
2434 trace_mm_page_alloc_extfrag(page, order, current_order,
2435 start_migratetype, fallback_mt);
2436 return page;
2437 }
2438
2439 return NULL;
2440}
2441
2442enum rmqueue_mode {
2443 RMQUEUE_NORMAL,
2444 RMQUEUE_CMA,
2445 RMQUEUE_CLAIM,
2446 RMQUEUE_STEAL,
2447};
2448
2449
2450
2451
2452
2453static __always_inline struct page *
2454__rmqueue(struct zone *zone, unsigned int order, int migratetype,
2455 unsigned int alloc_flags, enum rmqueue_mode *mode)
2456{
2457 struct page *page;
2458
2459 if (IS_ENABLED(CONFIG_CMA)) {
2460
2461
2462
2463
2464
2465 if (alloc_flags & ALLOC_CMA &&
2466 zone_page_state(zone, NR_FREE_CMA_PAGES) >
2467 zone_page_state(zone, NR_FREE_PAGES) / 2) {
2468 page = __rmqueue_cma_fallback(zone, order);
2469 if (page)
2470 return page;
2471 }
2472 }
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483 switch (*mode) {
2484 case RMQUEUE_NORMAL:
2485 page = __rmqueue_smallest(zone, order, migratetype);
2486 if (page)
2487 return page;
2488 fallthrough;
2489 case RMQUEUE_CMA:
2490 if (alloc_flags & ALLOC_CMA) {
2491 page = __rmqueue_cma_fallback(zone, order);
2492 if (page) {
2493 *mode = RMQUEUE_CMA;
2494 return page;
2495 }
2496 }
2497 fallthrough;
2498 case RMQUEUE_CLAIM:
2499 page = __rmqueue_claim(zone, order, migratetype, alloc_flags);
2500 if (page) {
2501
2502 *mode = RMQUEUE_NORMAL;
2503 return page;
2504 }
2505 fallthrough;
2506 case RMQUEUE_STEAL:
2507 if (!(alloc_flags & ALLOC_NOFRAGMENT)) {
2508 page = __rmqueue_steal(zone, order, migratetype);
2509 if (page) {
2510 *mode = RMQUEUE_STEAL;
2511 return page;
2512 }
2513 }
2514 }
2515 return NULL;
2516}
2517
2518
2519
2520
2521
2522
2523static int rmqueue_bulk(struct zone *zone, unsigned int order,
2524 unsigned long count, struct list_head *list,
2525 int migratetype, unsigned int alloc_flags)
2526{
2527 enum rmqueue_mode rmqm = RMQUEUE_NORMAL;
2528 unsigned long flags;
2529 int i;
2530
2531 if (unlikely(alloc_flags & ALLOC_TRYLOCK)) {
2532 if (!spin_trylock_irqsave(&zone->lock, flags))
2533 return 0;
2534 } else {
2535 spin_lock_irqsave(&zone->lock, flags);
2536 }
2537 for (i = 0; i < count; ++i) {
2538 struct page *page = __rmqueue(zone, order, migratetype,
2539 alloc_flags, &rmqm);
2540 if (unlikely(page == NULL))
2541 break;
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553 list_add_tail(&page->pcp_list, list);
2554 }
2555 spin_unlock_irqrestore(&zone->lock, flags);
2556
2557 return i;
2558}
2559
2560
2561
2562
2563
2564int decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp)
2565{
2566 int high_min, to_drain, batch;
2567 int todo = 0;
2568
2569 high_min = READ_ONCE(pcp->high_min);
2570 batch = READ_ONCE(pcp->batch);
2571
2572
2573
2574
2575
2576 if (pcp->high > high_min) {
2577 pcp->high = max3(pcp->count - (batch << CONFIG_PCP_BATCH_SCALE_MAX),
2578 pcp->high - (pcp->high >> 3), high_min);
2579 if (pcp->high > high_min)
2580 todo++;
2581 }
2582
2583 to_drain = pcp->count - pcp->high;
2584 if (to_drain > 0) {
2585 spin_lock(&pcp->lock);
2586 free_pcppages_bulk(zone, to_drain, pcp, 0);
2587 spin_unlock(&pcp->lock);
2588 todo++;
2589 }
2590
2591 return todo;
2592}
2593
2594#ifdef CONFIG_NUMA
2595
2596
2597
2598
2599
2600void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
2601{
2602 int to_drain, batch;
2603
2604 batch = READ_ONCE(pcp->batch);
2605 to_drain = min(pcp->count, batch);
2606 if (to_drain > 0) {
2607 spin_lock(&pcp->lock);
2608 free_pcppages_bulk(zone, to_drain, pcp, 0);
2609 spin_unlock(&pcp->lock);
2610 }
2611}
2612#endif
2613
2614
2615
2616
2617static void drain_pages_zone(unsigned int cpu, struct zone *zone)
2618{
2619 struct per_cpu_pages *pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
2620 int count;
2621
2622 do {
2623 spin_lock(&pcp->lock);
2624 count = pcp->count;
2625 if (count) {
2626 int to_drain = min(count,
2627 pcp->batch << CONFIG_PCP_BATCH_SCALE_MAX);
2628
2629 free_pcppages_bulk(zone, to_drain, pcp, 0);
2630 count -= to_drain;
2631 }
2632 spin_unlock(&pcp->lock);
2633 } while (count);
2634}
2635
2636
2637
2638
2639static void drain_pages(unsigned int cpu)
2640{
2641 struct zone *zone;
2642
2643 for_each_populated_zone(zone) {
2644 drain_pages_zone(cpu, zone);
2645 }
2646}
2647
2648
2649
2650
2651void drain_local_pages(struct zone *zone)
2652{
2653 int cpu = smp_processor_id();
2654
2655 if (zone)
2656 drain_pages_zone(cpu, zone);
2657 else
2658 drain_pages(cpu);
2659}
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671static void __drain_all_pages(struct zone *zone, bool force_all_cpus)
2672{
2673 int cpu;
2674
2675
2676
2677
2678
2679 static cpumask_t cpus_with_pcps;
2680
2681
2682
2683
2684
2685
2686 if (unlikely(!mutex_trylock(&pcpu_drain_mutex))) {
2687 if (!zone)
2688 return;
2689 mutex_lock(&pcpu_drain_mutex);
2690 }
2691
2692
2693
2694
2695
2696
2697
2698 for_each_online_cpu(cpu) {
2699 struct per_cpu_pages *pcp;
2700 struct zone *z;
2701 bool has_pcps = false;
2702
2703 if (force_all_cpus) {
2704
2705
2706
2707
2708 has_pcps = true;
2709 } else if (zone) {
2710 pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
2711 if (pcp->count)
2712 has_pcps = true;
2713 } else {
2714 for_each_populated_zone(z) {
2715 pcp = per_cpu_ptr(z->per_cpu_pageset, cpu);
2716 if (pcp->count) {
2717 has_pcps = true;
2718 break;
2719 }
2720 }
2721 }
2722
2723 if (has_pcps)
2724 cpumask_set_cpu(cpu, &cpus_with_pcps);
2725 else
2726 cpumask_clear_cpu(cpu, &cpus_with_pcps);
2727 }
2728
2729 for_each_cpu(cpu, &cpus_with_pcps) {
2730 if (zone)
2731 drain_pages_zone(cpu, zone);
2732 else
2733 drain_pages(cpu);
2734 }
2735
2736 mutex_unlock(&pcpu_drain_mutex);
2737}
2738
2739
2740
2741
2742
2743
2744void drain_all_pages(struct zone *zone)
2745{
2746 __drain_all_pages(zone, false);
2747}
2748
2749static int nr_pcp_free(struct per_cpu_pages *pcp, int batch, int high, bool free_high)
2750{
2751 int min_nr_free, max_nr_free;
2752
2753
2754 if (unlikely(free_high))
2755 return min(pcp->count, batch << CONFIG_PCP_BATCH_SCALE_MAX);
2756
2757
2758 if (unlikely(high < batch))
2759 return 1;
2760
2761
2762 min_nr_free = batch;
2763 max_nr_free = high - batch;
2764
2765
2766
2767
2768
2769 batch = clamp_t(int, pcp->free_count, min_nr_free, max_nr_free);
2770
2771 return batch;
2772}
2773
2774static int nr_pcp_high(struct per_cpu_pages *pcp, struct zone *zone,
2775 int batch, bool free_high)
2776{
2777 int high, high_min, high_max;
2778
2779 high_min = READ_ONCE(pcp->high_min);
2780 high_max = READ_ONCE(pcp->high_max);
2781 high = pcp->high = clamp(pcp->high, high_min, high_max);
2782
2783 if (unlikely(!high))
2784 return 0;
2785
2786 if (unlikely(free_high)) {
2787 pcp->high = max(high - (batch << CONFIG_PCP_BATCH_SCALE_MAX),
2788 high_min);
2789 return 0;
2790 }
2791
2792
2793
2794
2795
2796 if (test_bit(ZONE_RECLAIM_ACTIVE, &zone->flags)) {
2797 int free_count = max_t(int, pcp->free_count, batch);
2798
2799 pcp->high = max(high - free_count, high_min);
2800 return min(batch << 2, pcp->high);
2801 }
2802
2803 if (high_min == high_max)
2804 return high;
2805
2806 if (test_bit(ZONE_BELOW_HIGH, &zone->flags)) {
2807 int free_count = max_t(int, pcp->free_count, batch);
2808
2809 pcp->high = max(high - free_count, high_min);
2810 high = max(pcp->count, high_min);
2811 } else if (pcp->count >= high) {
2812 int need_high = pcp->free_count + batch;
2813
2814
2815 if (pcp->high < need_high)
2816 pcp->high = clamp(need_high, high_min, high_max);
2817 }
2818
2819 return high;
2820}
2821
2822static void free_frozen_page_commit(struct zone *zone,
2823 struct per_cpu_pages *pcp, struct page *page, int migratetype,
2824 unsigned int order, fpi_t fpi_flags)
2825{
2826 int high, batch;
2827 int pindex;
2828 bool free_high = false;
2829
2830
2831
2832
2833
2834
2835 pcp->alloc_factor >>= 1;
2836 __count_vm_events(PGFREE, 1 << order);
2837 pindex = order_to_pindex(migratetype, order);
2838 list_add(&page->pcp_list, &pcp->lists[pindex]);
2839 pcp->count += 1 << order;
2840
2841 batch = READ_ONCE(pcp->batch);
2842
2843
2844
2845
2846
2847
2848 if (order && order <= PAGE_ALLOC_COSTLY_ORDER) {
2849 free_high = (pcp->free_count >= (batch + pcp->high_min / 2) &&
2850 (pcp->flags & PCPF_PREV_FREE_HIGH_ORDER) &&
2851 (!(pcp->flags & PCPF_FREE_HIGH_BATCH) ||
2852 pcp->count >= batch));
2853 pcp->flags |= PCPF_PREV_FREE_HIGH_ORDER;
2854 } else if (pcp->flags & PCPF_PREV_FREE_HIGH_ORDER) {
2855 pcp->flags &= ~PCPF_PREV_FREE_HIGH_ORDER;
2856 }
2857 if (pcp->free_count < (batch << CONFIG_PCP_BATCH_SCALE_MAX))
2858 pcp->free_count += (1 << order);
2859
2860 if (unlikely(fpi_flags & FPI_TRYLOCK)) {
2861
2862
2863
2864
2865 return;
2866 }
2867 high = nr_pcp_high(pcp, zone, batch, free_high);
2868 if (pcp->count >= high) {
2869 free_pcppages_bulk(zone, nr_pcp_free(pcp, batch, high, free_high),
2870 pcp, pindex);
2871 if (test_bit(ZONE_BELOW_HIGH, &zone->flags) &&
2872 zone_watermark_ok(zone, 0, high_wmark_pages(zone),
2873 ZONE_MOVABLE, 0))
2874 clear_bit(ZONE_BELOW_HIGH, &zone->flags);
2875 }
2876}
2877
2878
2879
2880
2881static void __free_frozen_pages(struct page *page, unsigned int order,
2882 fpi_t fpi_flags)
2883{
2884 unsigned long __maybe_unused UP_flags;
2885 struct per_cpu_pages *pcp;
2886 struct zone *zone;
2887 unsigned long pfn = page_to_pfn(page);
2888 int migratetype;
2889
2890 if (!pcp_allowed_order(order)) {
2891 __free_pages_ok(page, order, fpi_flags);
2892 return;
2893 }
2894
2895 if (!free_pages_prepare(page, order))
2896 return;
2897
2898
2899
2900
2901
2902
2903
2904
2905 zone = page_zone(page);
2906 migratetype = get_pfnblock_migratetype(page, pfn);
2907 if (unlikely(migratetype >= MIGRATE_PCPTYPES)) {
2908 if (unlikely(is_migrate_isolate(migratetype))) {
2909 free_one_page(zone, page, pfn, order, fpi_flags);
2910 return;
2911 }
2912 migratetype = MIGRATE_MOVABLE;
2913 }
2914
2915 if (unlikely((fpi_flags & FPI_TRYLOCK) && IS_ENABLED(CONFIG_PREEMPT_RT)
2916 && (in_nmi() || in_hardirq()))) {
2917 add_page_to_zone_llist(zone, page, order);
2918 return;
2919 }
2920 pcp_trylock_prepare(UP_flags);
2921 pcp = pcp_spin_trylock(zone->per_cpu_pageset);
2922 if (pcp) {
2923 free_frozen_page_commit(zone, pcp, page, migratetype, order, fpi_flags);
2924 pcp_spin_unlock(pcp);
2925 } else {
2926 free_one_page(zone, page, pfn, order, fpi_flags);
2927 }
2928 pcp_trylock_finish(UP_flags);
2929}
2930
2931void free_frozen_pages(struct page *page, unsigned int order)
2932{
2933 __free_frozen_pages(page, order, FPI_NONE);
2934}
2935
2936
2937
2938
2939void free_unref_folios(struct folio_batch *folios)
2940{
2941 unsigned long __maybe_unused UP_flags;
2942 struct per_cpu_pages *pcp = NULL;
2943 struct zone *locked_zone = NULL;
2944 int i, j;
2945
2946
2947 for (i = 0, j = 0; i < folios->nr; i++) {
2948 struct folio *folio = folios->folios[i];
2949 unsigned long pfn = folio_pfn(folio);
2950 unsigned int order = folio_order(folio);
2951
2952 if (!free_pages_prepare(&folio->page, order))
2953 continue;
2954
2955
2956
2957
2958 if (!pcp_allowed_order(order)) {
2959 free_one_page(folio_zone(folio), &folio->page,
2960 pfn, order, FPI_NONE);
2961 continue;
2962 }
2963 folio->private = (void *)(unsigned long)order;
2964 if (j != i)
2965 folios->folios[j] = folio;
2966 j++;
2967 }
2968 folios->nr = j;
2969
2970 for (i = 0; i < folios->nr; i++) {
2971 struct folio *folio = folios->folios[i];
2972 struct zone *zone = folio_zone(folio);
2973 unsigned long pfn = folio_pfn(folio);
2974 unsigned int order = (unsigned long)folio->private;
2975 int migratetype;
2976
2977 folio->private = NULL;
2978 migratetype = get_pfnblock_migratetype(&folio->page, pfn);
2979
2980
2981 if (zone != locked_zone ||
2982 is_migrate_isolate(migratetype)) {
2983 if (pcp) {
2984 pcp_spin_unlock(pcp);
2985 pcp_trylock_finish(UP_flags);
2986 locked_zone = NULL;
2987 pcp = NULL;
2988 }
2989
2990
2991
2992
2993
2994 if (is_migrate_isolate(migratetype)) {
2995 free_one_page(zone, &folio->page, pfn,
2996 order, FPI_NONE);
2997 continue;
2998 }
2999
3000
3001
3002
3003
3004 pcp_trylock_prepare(UP_flags);
3005 pcp = pcp_spin_trylock(zone->per_cpu_pageset);
3006 if (unlikely(!pcp)) {
3007 pcp_trylock_finish(UP_flags);
3008 free_one_page(zone, &folio->page, pfn,
3009 order, FPI_NONE);
3010 continue;
3011 }
3012 locked_zone = zone;
3013 }
3014
3015
3016
3017
3018
3019 if (unlikely(migratetype >= MIGRATE_PCPTYPES))
3020 migratetype = MIGRATE_MOVABLE;
3021
3022 trace_mm_page_free_batched(&folio->page);
3023 free_frozen_page_commit(zone, pcp, &folio->page, migratetype,
3024 order, FPI_NONE);
3025 }
3026
3027 if (pcp) {
3028 pcp_spin_unlock(pcp);
3029 pcp_trylock_finish(UP_flags);
3030 }
3031 folio_batch_reinit(folios);
3032}
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042void split_page(struct page *page, unsigned int order)
3043{
3044 int i;
3045
3046 VM_BUG_ON_PAGE(PageCompound(page), page);
3047 VM_BUG_ON_PAGE(!page_count(page), page);
3048
3049 for (i = 1; i < (1 << order); i++)
3050 set_page_refcounted(page + i);
3051 split_page_owner(page, order, 0);
3052 pgalloc_tag_split(page_folio(page), order, 0);
3053 split_page_memcg(page, order);
3054}
3055EXPORT_SYMBOL_GPL(split_page);
3056
3057int __isolate_free_page(struct page *page, unsigned int order)
3058{
3059 struct zone *zone = page_zone(page);
3060 int mt = get_pageblock_migratetype(page);
3061
3062 if (!is_migrate_isolate(mt)) {
3063 unsigned long watermark;
3064
3065
3066
3067
3068
3069
3070 watermark = zone->_watermark[WMARK_MIN] + (1UL << order);
3071 if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA))
3072 return 0;
3073 }
3074
3075 del_page_from_free_list(page, zone, order, mt);
3076
3077
3078
3079
3080
3081 if (order >= pageblock_order - 1) {
3082 struct page *endpage = page + (1 << order) - 1;
3083 for (; page < endpage; page += pageblock_nr_pages) {
3084 int mt = get_pageblock_migratetype(page);
3085
3086
3087
3088
3089 if (migratetype_is_mergeable(mt))
3090 move_freepages_block(zone, page, mt,
3091 MIGRATE_MOVABLE);
3092 }
3093 }
3094
3095 return 1UL << order;
3096}
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107void __putback_isolated_page(struct page *page, unsigned int order, int mt)
3108{
3109 struct zone *zone = page_zone(page);
3110
3111
3112 lockdep_assert_held(&zone->lock);
3113
3114
3115 __free_one_page(page, page_to_pfn(page), zone, order, mt,
3116 FPI_SKIP_REPORT_NOTIFY | FPI_TO_TAIL);
3117}
3118
3119
3120
3121
3122static inline void zone_statistics(struct zone *preferred_zone, struct zone *z,
3123 long nr_account)
3124{
3125#ifdef CONFIG_NUMA
3126 enum numa_stat_item local_stat = NUMA_LOCAL;
3127
3128
3129 if (!static_branch_likely(&vm_numa_stat_key))
3130 return;
3131
3132 if (zone_to_nid(z) != numa_node_id())
3133 local_stat = NUMA_OTHER;
3134
3135 if (zone_to_nid(z) == zone_to_nid(preferred_zone))
3136 __count_numa_events(z, NUMA_HIT, nr_account);
3137 else {
3138 __count_numa_events(z, NUMA_MISS, nr_account);
3139 __count_numa_events(preferred_zone, NUMA_FOREIGN, nr_account);
3140 }
3141 __count_numa_events(z, local_stat, nr_account);
3142#endif
3143}
3144
3145static __always_inline
3146struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone,
3147 unsigned int order, unsigned int alloc_flags,
3148 int migratetype)
3149{
3150 struct page *page;
3151 unsigned long flags;
3152
3153 do {
3154 page = NULL;
3155 if (unlikely(alloc_flags & ALLOC_TRYLOCK)) {
3156 if (!spin_trylock_irqsave(&zone->lock, flags))
3157 return NULL;
3158 } else {
3159 spin_lock_irqsave(&zone->lock, flags);
3160 }
3161 if (alloc_flags & ALLOC_HIGHATOMIC)
3162 page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
3163 if (!page) {
3164 enum rmqueue_mode rmqm = RMQUEUE_NORMAL;
3165
3166 page = __rmqueue(zone, order, migratetype, alloc_flags, &rmqm);
3167
3168
3169
3170
3171
3172
3173
3174 if (!page && (alloc_flags & (ALLOC_OOM|ALLOC_NON_BLOCK)))
3175 page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
3176
3177 if (!page) {
3178 spin_unlock_irqrestore(&zone->lock, flags);
3179 return NULL;
3180 }
3181 }
3182 spin_unlock_irqrestore(&zone->lock, flags);
3183 } while (check_new_pages(page, order));
3184
3185 __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
3186 zone_statistics(preferred_zone, zone, 1);
3187
3188 return page;
3189}
3190
3191static int nr_pcp_alloc(struct per_cpu_pages *pcp, struct zone *zone, int order)
3192{
3193 int high, base_batch, batch, max_nr_alloc;
3194 int high_max, high_min;
3195
3196 base_batch = READ_ONCE(pcp->batch);
3197 high_min = READ_ONCE(pcp->high_min);
3198 high_max = READ_ONCE(pcp->high_max);
3199 high = pcp->high = clamp(pcp->high, high_min, high_max);
3200
3201
3202 if (unlikely(high < base_batch))
3203 return 1;
3204
3205 if (order)
3206 batch = base_batch;
3207 else
3208 batch = (base_batch << pcp->alloc_factor);
3209
3210
3211
3212
3213
3214 if (high_min != high_max && !test_bit(ZONE_BELOW_HIGH, &zone->flags))
3215 high = pcp->high = min(high + batch, high_max);
3216
3217 if (!order) {
3218 max_nr_alloc = max(high - pcp->count - base_batch, base_batch);
3219
3220
3221
3222
3223 if (batch <= max_nr_alloc &&
3224 pcp->alloc_factor < CONFIG_PCP_BATCH_SCALE_MAX)
3225 pcp->alloc_factor++;
3226 batch = min(batch, max_nr_alloc);
3227 }
3228
3229
3230
3231
3232
3233
3234
3235 if (batch > 1)
3236 batch = max(batch >> order, 2);
3237
3238 return batch;
3239}
3240
3241
3242static inline
3243struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
3244 int migratetype,
3245 unsigned int alloc_flags,
3246 struct per_cpu_pages *pcp,
3247 struct list_head *list)
3248{
3249 struct page *page;
3250
3251 do {
3252 if (list_empty(list)) {
3253 int batch = nr_pcp_alloc(pcp, zone, order);
3254 int alloced;
3255
3256 alloced = rmqueue_bulk(zone, order,
3257 batch, list,
3258 migratetype, alloc_flags);
3259
3260 pcp->count += alloced << order;
3261 if (unlikely(list_empty(list)))
3262 return NULL;
3263 }
3264
3265 page = list_first_entry(list, struct page, pcp_list);
3266 list_del(&page->pcp_list);
3267 pcp->count -= 1 << order;
3268 } while (check_new_pages(page, order));
3269
3270 return page;
3271}
3272
3273
3274static struct page *rmqueue_pcplist(struct zone *preferred_zone,
3275 struct zone *zone, unsigned int order,
3276 int migratetype, unsigned int alloc_flags)
3277{
3278 struct per_cpu_pages *pcp;
3279 struct list_head *list;
3280 struct page *page;
3281 unsigned long __maybe_unused UP_flags;
3282
3283
3284 pcp_trylock_prepare(UP_flags);
3285 pcp = pcp_spin_trylock(zone->per_cpu_pageset);
3286 if (!pcp) {
3287 pcp_trylock_finish(UP_flags);
3288 return NULL;
3289 }
3290
3291
3292
3293
3294
3295
3296 pcp->free_count >>= 1;
3297 list = &pcp->lists[order_to_pindex(migratetype, order)];
3298 page = __rmqueue_pcplist(zone, order, migratetype, alloc_flags, pcp, list);
3299 pcp_spin_unlock(pcp);
3300 pcp_trylock_finish(UP_flags);
3301 if (page) {
3302 __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
3303 zone_statistics(preferred_zone, zone, 1);
3304 }
3305 return page;
3306}
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319__no_sanitize_memory
3320static inline
3321struct page *rmqueue(struct zone *preferred_zone,
3322 struct zone *zone, unsigned int order,
3323 gfp_t gfp_flags, unsigned int alloc_flags,
3324 int migratetype)
3325{
3326 struct page *page;
3327
3328 if (likely(pcp_allowed_order(order))) {
3329 page = rmqueue_pcplist(preferred_zone, zone, order,
3330 migratetype, alloc_flags);
3331 if (likely(page))
3332 goto out;
3333 }
3334
3335 page = rmqueue_buddy(preferred_zone, zone, order, alloc_flags,
3336 migratetype);
3337
3338out:
3339
3340 if ((alloc_flags & ALLOC_KSWAPD) &&
3341 unlikely(test_bit(ZONE_BOOSTED_WATERMARK, &zone->flags))) {
3342 clear_bit(ZONE_BOOSTED_WATERMARK, &zone->flags);
3343 wakeup_kswapd(zone, 0, 0, zone_idx(zone));
3344 }
3345
3346 VM_BUG_ON_PAGE(page && bad_range(zone, page), page);
3347 return page;
3348}
3349
3350
3351
3352
3353
3354
3355static void reserve_highatomic_pageblock(struct page *page, int order,
3356 struct zone *zone)
3357{
3358 int mt;
3359 unsigned long max_managed, flags;
3360
3361
3362
3363
3364
3365
3366
3367 if ((zone_managed_pages(zone) / 100) < pageblock_nr_pages)
3368 return;
3369 max_managed = ALIGN((zone_managed_pages(zone) / 100), pageblock_nr_pages);
3370 if (zone->nr_reserved_highatomic >= max_managed)
3371 return;
3372
3373 spin_lock_irqsave(&zone->lock, flags);
3374
3375
3376 if (zone->nr_reserved_highatomic >= max_managed)
3377 goto out_unlock;
3378
3379
3380 mt = get_pageblock_migratetype(page);
3381
3382 if (!migratetype_is_mergeable(mt))
3383 goto out_unlock;
3384
3385 if (order < pageblock_order) {
3386 if (move_freepages_block(zone, page, mt, MIGRATE_HIGHATOMIC) == -1)
3387 goto out_unlock;
3388 zone->nr_reserved_highatomic += pageblock_nr_pages;
3389 } else {
3390 change_pageblock_range(page, order, MIGRATE_HIGHATOMIC);
3391 zone->nr_reserved_highatomic += 1 << order;
3392 }
3393
3394out_unlock:
3395 spin_unlock_irqrestore(&zone->lock, flags);
3396}
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
3408 bool force)
3409{
3410 struct zonelist *zonelist = ac->zonelist;
3411 unsigned long flags;
3412 struct zoneref *z;
3413 struct zone *zone;
3414 struct page *page;
3415 int order;
3416 int ret;
3417
3418 for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->highest_zoneidx,
3419 ac->nodemask) {
3420
3421
3422
3423
3424 if (!force && zone->nr_reserved_highatomic <=
3425 pageblock_nr_pages)
3426 continue;
3427
3428 spin_lock_irqsave(&zone->lock, flags);
3429 for (order = 0; order < NR_PAGE_ORDERS; order++) {
3430 struct free_area *area = &(zone->free_area[order]);
3431 unsigned long size;
3432
3433 page = get_page_from_free_area(area, MIGRATE_HIGHATOMIC);
3434 if (!page)
3435 continue;
3436
3437 size = max(pageblock_nr_pages, 1UL << order);
3438
3439
3440
3441
3442
3443
3444
3445 if (WARN_ON_ONCE(size > zone->nr_reserved_highatomic))
3446 size = zone->nr_reserved_highatomic;
3447 zone->nr_reserved_highatomic -= size;
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458 if (order < pageblock_order)
3459 ret = move_freepages_block(zone, page,
3460 MIGRATE_HIGHATOMIC,
3461 ac->migratetype);
3462 else {
3463 move_to_free_list(page, zone, order,
3464 MIGRATE_HIGHATOMIC,
3465 ac->migratetype);
3466 change_pageblock_range(page, order,
3467 ac->migratetype);
3468 ret = 1;
3469 }
3470
3471
3472
3473
3474 WARN_ON_ONCE(ret == -1);
3475 if (ret > 0) {
3476 spin_unlock_irqrestore(&zone->lock, flags);
3477 return ret;
3478 }
3479 }
3480 spin_unlock_irqrestore(&zone->lock, flags);
3481 }
3482
3483 return false;
3484}
3485
3486static inline long __zone_watermark_unusable_free(struct zone *z,
3487 unsigned int order, unsigned int alloc_flags)
3488{
3489 long unusable_free = (1 << order) - 1;
3490
3491
3492
3493
3494
3495 if (likely(!(alloc_flags & ALLOC_RESERVES)))
3496 unusable_free += READ_ONCE(z->nr_free_highatomic);
3497
3498#ifdef CONFIG_CMA
3499
3500 if (!(alloc_flags & ALLOC_CMA))
3501 unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES);
3502#endif
3503
3504 return unusable_free;
3505}
3506
3507
3508
3509
3510
3511
3512
3513bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
3514 int highest_zoneidx, unsigned int alloc_flags,
3515 long free_pages)
3516{
3517 long min = mark;
3518 int o;
3519
3520
3521 free_pages -= __zone_watermark_unusable_free(z, order, alloc_flags);
3522
3523 if (unlikely(alloc_flags & ALLOC_RESERVES)) {
3524
3525
3526
3527
3528 if (alloc_flags & ALLOC_MIN_RESERVE) {
3529 min -= min / 2;
3530
3531
3532
3533
3534
3535
3536
3537
3538 if (alloc_flags & ALLOC_NON_BLOCK)
3539 min -= min / 4;
3540 }
3541
3542
3543
3544
3545
3546
3547
3548 if (alloc_flags & ALLOC_OOM)
3549 min -= min / 2;
3550 }
3551
3552
3553
3554
3555
3556
3557 if (free_pages <= min + z->lowmem_reserve[highest_zoneidx])
3558 return false;
3559
3560
3561 if (!order)
3562 return true;
3563
3564
3565 for (o = order; o < NR_PAGE_ORDERS; o++) {
3566 struct free_area *area = &z->free_area[o];
3567 int mt;
3568
3569 if (!area->nr_free)
3570 continue;
3571
3572 for (mt = 0; mt < MIGRATE_PCPTYPES; mt++) {
3573 if (!free_area_empty(area, mt))
3574 return true;
3575 }
3576
3577#ifdef CONFIG_CMA
3578 if ((alloc_flags & ALLOC_CMA) &&
3579 !free_area_empty(area, MIGRATE_CMA)) {
3580 return true;
3581 }
3582#endif
3583 if ((alloc_flags & (ALLOC_HIGHATOMIC|ALLOC_OOM)) &&
3584 !free_area_empty(area, MIGRATE_HIGHATOMIC)) {
3585 return true;
3586 }
3587 }
3588 return false;
3589}
3590
3591bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
3592 int highest_zoneidx, unsigned int alloc_flags)
3593{
3594 return __zone_watermark_ok(z, order, mark, highest_zoneidx, alloc_flags,
3595 zone_page_state(z, NR_FREE_PAGES));
3596}
3597
3598static inline bool zone_watermark_fast(struct zone *z, unsigned int order,
3599 unsigned long mark, int highest_zoneidx,
3600 unsigned int alloc_flags, gfp_t gfp_mask)
3601{
3602 long free_pages;
3603
3604 free_pages = zone_page_state(z, NR_FREE_PAGES);
3605
3606
3607
3608
3609
3610 if (!order) {
3611 long usable_free;
3612 long reserved;
3613
3614 usable_free = free_pages;
3615 reserved = __zone_watermark_unusable_free(z, 0, alloc_flags);
3616
3617
3618 usable_free -= min(usable_free, reserved);
3619 if (usable_free > mark + z->lowmem_reserve[highest_zoneidx])
3620 return true;
3621 }
3622
3623 if (__zone_watermark_ok(z, order, mark, highest_zoneidx, alloc_flags,
3624 free_pages))
3625 return true;
3626
3627
3628
3629
3630
3631
3632
3633 if (unlikely(!order && (alloc_flags & ALLOC_MIN_RESERVE) && z->watermark_boost
3634 && ((alloc_flags & ALLOC_WMARK_MASK) == WMARK_MIN))) {
3635 mark = z->_watermark[WMARK_MIN];
3636 return __zone_watermark_ok(z, order, mark, highest_zoneidx,
3637 alloc_flags, free_pages);
3638 }
3639
3640 return false;
3641}
3642
3643#ifdef CONFIG_NUMA
3644int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE;
3645
3646static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
3647{
3648 return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) <=
3649 node_reclaim_distance;
3650}
3651#else
3652static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
3653{
3654 return true;
3655}
3656#endif
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666static inline unsigned int
3667alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask)
3668{
3669 unsigned int alloc_flags;
3670
3671
3672
3673
3674
3675 alloc_flags = (__force int) (gfp_mask & __GFP_KSWAPD_RECLAIM);
3676
3677 if (defrag_mode) {
3678 alloc_flags |= ALLOC_NOFRAGMENT;
3679 return alloc_flags;
3680 }
3681
3682#ifdef CONFIG_ZONE_DMA32
3683 if (!zone)
3684 return alloc_flags;
3685
3686 if (zone_idx(zone) != ZONE_NORMAL)
3687 return alloc_flags;
3688
3689
3690
3691
3692
3693
3694 BUILD_BUG_ON(ZONE_NORMAL - ZONE_DMA32 != 1);
3695 if (nr_online_nodes > 1 && !populated_zone(--zone))
3696 return alloc_flags;
3697
3698 alloc_flags |= ALLOC_NOFRAGMENT;
3699#endif
3700 return alloc_flags;
3701}
3702
3703
3704static inline unsigned int gfp_to_alloc_flags_cma(gfp_t gfp_mask,
3705 unsigned int alloc_flags)
3706{
3707#ifdef CONFIG_CMA
3708 if (gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE)
3709 alloc_flags |= ALLOC_CMA;
3710#endif
3711 return alloc_flags;
3712}
3713
3714
3715
3716
3717
3718static struct page *
3719get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
3720 const struct alloc_context *ac)
3721{
3722 struct zoneref *z;
3723 struct zone *zone;
3724 struct pglist_data *last_pgdat = NULL;
3725 bool last_pgdat_dirty_ok = false;
3726 bool no_fallback;
3727
3728retry:
3729
3730
3731
3732
3733 no_fallback = alloc_flags & ALLOC_NOFRAGMENT;
3734 z = ac->preferred_zoneref;
3735 for_next_zone_zonelist_nodemask(zone, z, ac->highest_zoneidx,
3736 ac->nodemask) {
3737 struct page *page;
3738 unsigned long mark;
3739
3740 if (cpusets_enabled() &&
3741 (alloc_flags & ALLOC_CPUSET) &&
3742 !__cpuset_zone_allowed(zone, gfp_mask))
3743 continue;
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763 if (ac->spread_dirty_pages) {
3764 if (last_pgdat != zone->zone_pgdat) {
3765 last_pgdat = zone->zone_pgdat;
3766 last_pgdat_dirty_ok = node_dirty_ok(zone->zone_pgdat);
3767 }
3768
3769 if (!last_pgdat_dirty_ok)
3770 continue;
3771 }
3772
3773 if (no_fallback && !defrag_mode && nr_online_nodes > 1 &&
3774 zone != zonelist_zone(ac->preferred_zoneref)) {
3775 int local_nid;
3776
3777
3778
3779
3780
3781
3782 local_nid = zonelist_node_idx(ac->preferred_zoneref);
3783 if (zone_to_nid(zone) != local_nid) {
3784 alloc_flags &= ~ALLOC_NOFRAGMENT;
3785 goto retry;
3786 }
3787 }
3788
3789 cond_accept_memory(zone, order, alloc_flags);
3790
3791
3792
3793
3794
3795
3796
3797
3798 if (test_bit(ZONE_BELOW_HIGH, &zone->flags))
3799 goto check_alloc_wmark;
3800
3801 mark = high_wmark_pages(zone);
3802 if (zone_watermark_fast(zone, order, mark,
3803 ac->highest_zoneidx, alloc_flags,
3804 gfp_mask))
3805 goto try_this_zone;
3806 else
3807 set_bit(ZONE_BELOW_HIGH, &zone->flags);
3808
3809check_alloc_wmark:
3810 mark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK);
3811 if (!zone_watermark_fast(zone, order, mark,
3812 ac->highest_zoneidx, alloc_flags,
3813 gfp_mask)) {
3814 int ret;
3815
3816 if (cond_accept_memory(zone, order, alloc_flags))
3817 goto try_this_zone;
3818
3819
3820
3821
3822
3823 if (deferred_pages_enabled()) {
3824 if (_deferred_grow_zone(zone, order))
3825 goto try_this_zone;
3826 }
3827
3828 BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK);
3829 if (alloc_flags & ALLOC_NO_WATERMARKS)
3830 goto try_this_zone;
3831
3832 if (!node_reclaim_enabled() ||
3833 !zone_allows_reclaim(zonelist_zone(ac->preferred_zoneref), zone))
3834 continue;
3835
3836 ret = node_reclaim(zone->zone_pgdat, gfp_mask, order);
3837 switch (ret) {
3838 case NODE_RECLAIM_NOSCAN:
3839
3840 continue;
3841 case NODE_RECLAIM_FULL:
3842
3843 continue;
3844 default:
3845
3846 if (zone_watermark_ok(zone, order, mark,
3847 ac->highest_zoneidx, alloc_flags))
3848 goto try_this_zone;
3849
3850 continue;
3851 }
3852 }
3853
3854try_this_zone:
3855 page = rmqueue(zonelist_zone(ac->preferred_zoneref), zone, order,
3856 gfp_mask, alloc_flags, ac->migratetype);
3857 if (page) {
3858 prep_new_page(page, order, gfp_mask, alloc_flags);
3859
3860
3861
3862
3863
3864 if (unlikely(alloc_flags & ALLOC_HIGHATOMIC))
3865 reserve_highatomic_pageblock(page, order, zone);
3866
3867 return page;
3868 } else {
3869 if (cond_accept_memory(zone, order, alloc_flags))
3870 goto try_this_zone;
3871
3872
3873 if (deferred_pages_enabled()) {
3874 if (_deferred_grow_zone(zone, order))
3875 goto try_this_zone;
3876 }
3877 }
3878 }
3879
3880
3881
3882
3883
3884 if (no_fallback && !defrag_mode) {
3885 alloc_flags &= ~ALLOC_NOFRAGMENT;
3886 goto retry;
3887 }
3888
3889 return NULL;
3890}
3891
3892static void warn_alloc_show_mem(gfp_t gfp_mask, nodemask_t *nodemask)
3893{
3894 unsigned int filter = SHOW_MEM_FILTER_NODES;
3895
3896
3897
3898
3899
3900
3901 if (!(gfp_mask & __GFP_NOMEMALLOC))
3902 if (tsk_is_oom_victim(current) ||
3903 (current->flags & (PF_MEMALLOC | PF_EXITING)))
3904 filter &= ~SHOW_MEM_FILTER_NODES;
3905 if (!in_task() || !(gfp_mask & __GFP_DIRECT_RECLAIM))
3906 filter &= ~SHOW_MEM_FILTER_NODES;
3907
3908 __show_mem(filter, nodemask, gfp_zone(gfp_mask));
3909}
3910
3911void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...)
3912{
3913 struct va_format vaf;
3914 va_list args;
3915 static DEFINE_RATELIMIT_STATE(nopage_rs, 10*HZ, 1);
3916
3917 if ((gfp_mask & __GFP_NOWARN) ||
3918 !__ratelimit(&nopage_rs) ||
3919 ((gfp_mask & __GFP_DMA) && !has_managed_dma()))
3920 return;
3921
3922 va_start(args, fmt);
3923 vaf.fmt = fmt;
3924 vaf.va = &args;
3925 pr_warn("%s: %pV, mode:%#x(%pGg), nodemask=%*pbl",
3926 current->comm, &vaf, gfp_mask, &gfp_mask,
3927 nodemask_pr_args(nodemask));
3928 va_end(args);
3929
3930 cpuset_print_current_mems_allowed();
3931 pr_cont("\n");
3932 dump_stack();
3933 warn_alloc_show_mem(gfp_mask, nodemask);
3934}
3935
3936static inline struct page *
3937__alloc_pages_cpuset_fallback(gfp_t gfp_mask, unsigned int order,
3938 unsigned int alloc_flags,
3939 const struct alloc_context *ac)
3940{
3941 struct page *page;
3942
3943 page = get_page_from_freelist(gfp_mask, order,
3944 alloc_flags|ALLOC_CPUSET, ac);
3945
3946
3947
3948
3949 if (!page)
3950 page = get_page_from_freelist(gfp_mask, order,
3951 alloc_flags, ac);
3952 return page;
3953}
3954
3955static inline struct page *
3956__alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
3957 const struct alloc_context *ac, unsigned long *did_some_progress)
3958{
3959 struct oom_control oc = {
3960 .zonelist = ac->zonelist,
3961 .nodemask = ac->nodemask,
3962 .memcg = NULL,
3963 .gfp_mask = gfp_mask,
3964 .order = order,
3965 };
3966 struct page *page;
3967
3968 *did_some_progress = 0;
3969
3970
3971
3972
3973
3974 if (!mutex_trylock(&oom_lock)) {
3975 *did_some_progress = 1;
3976 schedule_timeout_uninterruptible(1);
3977 return NULL;
3978 }
3979
3980
3981
3982
3983
3984
3985
3986
3987 page = get_page_from_freelist((gfp_mask | __GFP_HARDWALL) &
3988 ~__GFP_DIRECT_RECLAIM, order,
3989 ALLOC_WMARK_HIGH|ALLOC_CPUSET, ac);
3990 if (page)
3991 goto out;
3992
3993
3994 if (current->flags & PF_DUMPCORE)
3995 goto out;
3996
3997 if (order > PAGE_ALLOC_COSTLY_ORDER)
3998 goto out;
3999
4000
4001
4002
4003
4004
4005
4006
4007 if (gfp_mask & (__GFP_RETRY_MAYFAIL | __GFP_THISNODE))
4008 goto out;
4009
4010 if (ac->highest_zoneidx < ZONE_NORMAL)
4011 goto out;
4012 if (pm_suspended_storage())
4013 goto out;
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025 if (out_of_memory(&oc) ||
4026 WARN_ON_ONCE_GFP(gfp_mask & __GFP_NOFAIL, gfp_mask)) {
4027 *did_some_progress = 1;
4028
4029
4030
4031
4032
4033 if (gfp_mask & __GFP_NOFAIL)
4034 page = __alloc_pages_cpuset_fallback(gfp_mask, order,
4035 ALLOC_NO_WATERMARKS, ac);
4036 }
4037out:
4038 mutex_unlock(&oom_lock);
4039 return page;
4040}
4041
4042
4043
4044
4045
4046#define MAX_COMPACT_RETRIES 16
4047
4048#ifdef CONFIG_COMPACTION
4049
4050static struct page *
4051__alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
4052 unsigned int alloc_flags, const struct alloc_context *ac,
4053 enum compact_priority prio, enum compact_result *compact_result)
4054{
4055 struct page *page = NULL;
4056 unsigned long pflags;
4057 unsigned int noreclaim_flag;
4058
4059 if (!order)
4060 return NULL;
4061
4062 psi_memstall_enter(&pflags);
4063 delayacct_compact_start();
4064 noreclaim_flag = memalloc_noreclaim_save();
4065
4066 *compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
4067 prio, &page);
4068
4069 memalloc_noreclaim_restore(noreclaim_flag);
4070 psi_memstall_leave(&pflags);
4071 delayacct_compact_end();
4072
4073 if (*compact_result == COMPACT_SKIPPED)
4074 return NULL;
4075
4076
4077
4078
4079 count_vm_event(COMPACTSTALL);
4080
4081
4082 if (page)
4083 prep_new_page(page, order, gfp_mask, alloc_flags);
4084
4085
4086 if (!page)
4087 page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac);
4088
4089 if (page) {
4090 struct zone *zone = page_zone(page);
4091
4092 zone->compact_blockskip_flush = false;
4093 compaction_defer_reset(zone, order, true);
4094 count_vm_event(COMPACTSUCCESS);
4095 return page;
4096 }
4097
4098
4099
4100
4101
4102 count_vm_event(COMPACTFAIL);
4103
4104 cond_resched();
4105
4106 return NULL;
4107}
4108
4109static inline bool
4110should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
4111 enum compact_result compact_result,
4112 enum compact_priority *compact_priority,
4113 int *compaction_retries)
4114{
4115 int max_retries = MAX_COMPACT_RETRIES;
4116 int min_priority;
4117 bool ret = false;
4118 int retries = *compaction_retries;
4119 enum compact_priority priority = *compact_priority;
4120
4121 if (!order)
4122 return false;
4123
4124 if (fatal_signal_pending(current))
4125 return false;
4126
4127
4128
4129
4130
4131 if (compact_result == COMPACT_SKIPPED) {
4132 ret = compaction_zonelist_suitable(ac, order, alloc_flags);
4133 goto out;
4134 }
4135
4136
4137
4138
4139
4140 if (compact_result == COMPACT_SUCCESS) {
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150 if (order > PAGE_ALLOC_COSTLY_ORDER)
4151 max_retries /= 4;
4152
4153 if (++(*compaction_retries) <= max_retries) {
4154 ret = true;
4155 goto out;
4156 }
4157 }
4158
4159
4160
4161
4162 min_priority = (order > PAGE_ALLOC_COSTLY_ORDER) ?
4163 MIN_COMPACT_COSTLY_PRIORITY : MIN_COMPACT_PRIORITY;
4164
4165 if (*compact_priority > min_priority) {
4166 (*compact_priority)--;
4167 *compaction_retries = 0;
4168 ret = true;
4169 }
4170out:
4171 trace_compact_retry(order, priority, compact_result, retries, max_retries, ret);
4172 return ret;
4173}
4174#else
4175static inline struct page *
4176__alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
4177 unsigned int alloc_flags, const struct alloc_context *ac,
4178 enum compact_priority prio, enum compact_result *compact_result)
4179{
4180 *compact_result = COMPACT_SKIPPED;
4181 return NULL;
4182}
4183
4184static inline bool
4185should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_flags,
4186 enum compact_result compact_result,
4187 enum compact_priority *compact_priority,
4188 int *compaction_retries)
4189{
4190 struct zone *zone;
4191 struct zoneref *z;
4192
4193 if (!order || order > PAGE_ALLOC_COSTLY_ORDER)
4194 return false;
4195
4196
4197
4198
4199
4200
4201
4202 for_each_zone_zonelist_nodemask(zone, z, ac->zonelist,
4203 ac->highest_zoneidx, ac->nodemask) {
4204 if (zone_watermark_ok(zone, 0, min_wmark_pages(zone),
4205 ac->highest_zoneidx, alloc_flags))
4206 return true;
4207 }
4208 return false;
4209}
4210#endif
4211
4212#ifdef CONFIG_LOCKDEP
4213static struct lockdep_map __fs_reclaim_map =
4214 STATIC_LOCKDEP_MAP_INIT("fs_reclaim", &__fs_reclaim_map);
4215
4216static bool __need_reclaim(gfp_t gfp_mask)
4217{
4218
4219 if (!(gfp_mask & __GFP_DIRECT_RECLAIM))
4220 return false;
4221
4222
4223 if (current->flags & PF_MEMALLOC)
4224 return false;
4225
4226 if (gfp_mask & __GFP_NOLOCKDEP)
4227 return false;
4228
4229 return true;
4230}
4231
4232void __fs_reclaim_acquire(unsigned long ip)
4233{
4234 lock_acquire_exclusive(&__fs_reclaim_map, 0, 0, NULL, ip);
4235}
4236
4237void __fs_reclaim_release(unsigned long ip)
4238{
4239 lock_release(&__fs_reclaim_map, ip);
4240}
4241
4242void fs_reclaim_acquire(gfp_t gfp_mask)
4243{
4244 gfp_mask = current_gfp_context(gfp_mask);
4245
4246 if (__need_reclaim(gfp_mask)) {
4247 if (gfp_mask & __GFP_FS)
4248 __fs_reclaim_acquire(_RET_IP_);
4249
4250#ifdef CONFIG_MMU_NOTIFIER
4251 lock_map_acquire(&__mmu_notifier_invalidate_range_start_map);
4252 lock_map_release(&__mmu_notifier_invalidate_range_start_map);
4253#endif
4254
4255 }
4256}
4257EXPORT_SYMBOL_GPL(fs_reclaim_acquire);
4258
4259void fs_reclaim_release(gfp_t gfp_mask)
4260{
4261 gfp_mask = current_gfp_context(gfp_mask);
4262
4263 if (__need_reclaim(gfp_mask)) {
4264 if (gfp_mask & __GFP_FS)
4265 __fs_reclaim_release(_RET_IP_);
4266 }
4267}
4268EXPORT_SYMBOL_GPL(fs_reclaim_release);
4269#endif
4270
4271
4272
4273
4274
4275
4276
4277static DEFINE_SEQLOCK(zonelist_update_seq);
4278
4279static unsigned int zonelist_iter_begin(void)
4280{
4281 if (IS_ENABLED(CONFIG_MEMORY_HOTREMOVE))
4282 return read_seqbegin(&zonelist_update_seq);
4283
4284 return 0;
4285}
4286
4287static unsigned int check_retry_zonelist(unsigned int seq)
4288{
4289 if (IS_ENABLED(CONFIG_MEMORY_HOTREMOVE))
4290 return read_seqretry(&zonelist_update_seq, seq);
4291
4292 return seq;
4293}
4294
4295
4296static unsigned long
4297__perform_reclaim(gfp_t gfp_mask, unsigned int order,
4298 const struct alloc_context *ac)
4299{
4300 unsigned int noreclaim_flag;
4301 unsigned long progress;
4302
4303 cond_resched();
4304
4305
4306 cpuset_memory_pressure_bump();
4307 fs_reclaim_acquire(gfp_mask);
4308 noreclaim_flag = memalloc_noreclaim_save();
4309
4310 progress = try_to_free_pages(ac->zonelist, order, gfp_mask,
4311 ac->nodemask);
4312
4313 memalloc_noreclaim_restore(noreclaim_flag);
4314 fs_reclaim_release(gfp_mask);
4315
4316 cond_resched();
4317
4318 return progress;
4319}
4320
4321
4322static inline struct page *
4323__alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
4324 unsigned int alloc_flags, const struct alloc_context *ac,
4325 unsigned long *did_some_progress)
4326{
4327 struct page *page = NULL;
4328 unsigned long pflags;
4329 bool drained = false;
4330
4331 psi_memstall_enter(&pflags);
4332 *did_some_progress = __perform_reclaim(gfp_mask, order, ac);
4333 if (unlikely(!(*did_some_progress)))
4334 goto out;
4335
4336retry:
4337 page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac);
4338
4339
4340
4341
4342
4343
4344 if (!page && !drained) {
4345 unreserve_highatomic_pageblock(ac, false);
4346 drain_all_pages(NULL);
4347 drained = true;
4348 goto retry;
4349 }
4350out:
4351 psi_memstall_leave(&pflags);
4352
4353 return page;
4354}
4355
4356static void wake_all_kswapds(unsigned int order, gfp_t gfp_mask,
4357 const struct alloc_context *ac)
4358{
4359 struct zoneref *z;
4360 struct zone *zone;
4361 pg_data_t *last_pgdat = NULL;
4362 enum zone_type highest_zoneidx = ac->highest_zoneidx;
4363 unsigned int reclaim_order;
4364
4365 if (defrag_mode)
4366 reclaim_order = max(order, pageblock_order);
4367 else
4368 reclaim_order = order;
4369
4370 for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, highest_zoneidx,
4371 ac->nodemask) {
4372 if (!managed_zone(zone))
4373 continue;
4374 if (last_pgdat == zone->zone_pgdat)
4375 continue;
4376 wakeup_kswapd(zone, gfp_mask, reclaim_order, highest_zoneidx);
4377 last_pgdat = zone->zone_pgdat;
4378 }
4379}
4380
4381static inline unsigned int
4382gfp_to_alloc_flags(gfp_t gfp_mask, unsigned int order)
4383{
4384 unsigned int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET;
4385
4386
4387
4388
4389
4390
4391 BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_MIN_RESERVE);
4392 BUILD_BUG_ON(__GFP_KSWAPD_RECLAIM != (__force gfp_t) ALLOC_KSWAPD);
4393
4394
4395
4396
4397
4398
4399
4400 alloc_flags |= (__force int)
4401 (gfp_mask & (__GFP_HIGH | __GFP_KSWAPD_RECLAIM));
4402
4403 if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) {
4404
4405
4406
4407
4408 if (!(gfp_mask & __GFP_NOMEMALLOC)) {
4409 alloc_flags |= ALLOC_NON_BLOCK;
4410
4411 if (order > 0)
4412 alloc_flags |= ALLOC_HIGHATOMIC;
4413 }
4414
4415
4416
4417
4418
4419
4420 if (alloc_flags & ALLOC_MIN_RESERVE)
4421 alloc_flags &= ~ALLOC_CPUSET;
4422 } else if (unlikely(rt_or_dl_task(current)) && in_task())
4423 alloc_flags |= ALLOC_MIN_RESERVE;
4424
4425 alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, alloc_flags);
4426
4427 if (defrag_mode)
4428 alloc_flags |= ALLOC_NOFRAGMENT;
4429
4430 return alloc_flags;
4431}
4432
4433static bool oom_reserves_allowed(struct task_struct *tsk)
4434{
4435 if (!tsk_is_oom_victim(tsk))
4436 return false;
4437
4438
4439
4440
4441
4442 if (!IS_ENABLED(CONFIG_MMU) && !test_thread_flag(TIF_MEMDIE))
4443 return false;
4444
4445 return true;
4446}
4447
4448
4449
4450
4451
4452static inline int __gfp_pfmemalloc_flags(gfp_t gfp_mask)
4453{
4454 if (unlikely(gfp_mask & __GFP_NOMEMALLOC))
4455 return 0;
4456 if (gfp_mask & __GFP_MEMALLOC)
4457 return ALLOC_NO_WATERMARKS;
4458 if (in_serving_softirq() && (current->flags & PF_MEMALLOC))
4459 return ALLOC_NO_WATERMARKS;
4460 if (!in_interrupt()) {
4461 if (current->flags & PF_MEMALLOC)
4462 return ALLOC_NO_WATERMARKS;
4463 else if (oom_reserves_allowed(current))
4464 return ALLOC_OOM;
4465 }
4466
4467 return 0;
4468}
4469
4470bool gfp_pfmemalloc_allowed(gfp_t gfp_mask)
4471{
4472 return !!__gfp_pfmemalloc_flags(gfp_mask);
4473}
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485static inline bool
4486should_reclaim_retry(gfp_t gfp_mask, unsigned order,
4487 struct alloc_context *ac, int alloc_flags,
4488 bool did_some_progress, int *no_progress_loops)
4489{
4490 struct zone *zone;
4491 struct zoneref *z;
4492 bool ret = false;
4493
4494
4495
4496
4497
4498
4499 if (did_some_progress && order <= PAGE_ALLOC_COSTLY_ORDER)
4500 *no_progress_loops = 0;
4501 else
4502 (*no_progress_loops)++;
4503
4504 if (*no_progress_loops > MAX_RECLAIM_RETRIES)
4505 goto out;
4506
4507
4508
4509
4510
4511
4512
4513
4514 for_each_zone_zonelist_nodemask(zone, z, ac->zonelist,
4515 ac->highest_zoneidx, ac->nodemask) {
4516 unsigned long available;
4517 unsigned long reclaimable;
4518 unsigned long min_wmark = min_wmark_pages(zone);
4519 bool wmark;
4520
4521 if (cpusets_enabled() &&
4522 (alloc_flags & ALLOC_CPUSET) &&
4523 !__cpuset_zone_allowed(zone, gfp_mask))
4524 continue;
4525
4526 available = reclaimable = zone_reclaimable_pages(zone);
4527 available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
4528
4529
4530
4531
4532
4533 wmark = __zone_watermark_ok(zone, order, min_wmark,
4534 ac->highest_zoneidx, alloc_flags, available);
4535 trace_reclaim_retry_zone(z, order, reclaimable,
4536 available, min_wmark, *no_progress_loops, wmark);
4537 if (wmark) {
4538 ret = true;
4539 break;
4540 }
4541 }
4542
4543
4544
4545
4546
4547
4548
4549
4550 if (current->flags & PF_WQ_WORKER)
4551 schedule_timeout_uninterruptible(1);
4552 else
4553 cond_resched();
4554out:
4555
4556 if (!ret)
4557 return unreserve_highatomic_pageblock(ac, true);
4558
4559 return ret;
4560}
4561
4562static inline bool
4563check_retry_cpuset(int cpuset_mems_cookie, struct alloc_context *ac)
4564{
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576 if (cpusets_enabled() && ac->nodemask &&
4577 !cpuset_nodemask_valid_mems_allowed(ac->nodemask)) {
4578 ac->nodemask = NULL;
4579 return true;
4580 }
4581
4582
4583
4584
4585
4586
4587
4588
4589 if (read_mems_allowed_retry(cpuset_mems_cookie))
4590 return true;
4591
4592 return false;
4593}
4594
4595static inline struct page *
4596__alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
4597 struct alloc_context *ac)
4598{
4599 bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM;
4600 bool can_compact = gfp_compaction_allowed(gfp_mask);
4601 bool nofail = gfp_mask & __GFP_NOFAIL;
4602 const bool costly_order = order > PAGE_ALLOC_COSTLY_ORDER;
4603 struct page *page = NULL;
4604 unsigned int alloc_flags;
4605 unsigned long did_some_progress;
4606 enum compact_priority compact_priority;
4607 enum compact_result compact_result;
4608 int compaction_retries;
4609 int no_progress_loops;
4610 unsigned int cpuset_mems_cookie;
4611 unsigned int zonelist_iter_cookie;
4612 int reserve_flags;
4613
4614 if (unlikely(nofail)) {
4615
4616
4617
4618
4619 WARN_ON_ONCE(order > 1);
4620
4621
4622
4623
4624 WARN_ON_ONCE(!can_direct_reclaim);
4625
4626
4627
4628
4629
4630 WARN_ON_ONCE(current->flags & PF_MEMALLOC);
4631 }
4632
4633restart:
4634 compaction_retries = 0;
4635 no_progress_loops = 0;
4636 compact_result = COMPACT_SKIPPED;
4637 compact_priority = DEF_COMPACT_PRIORITY;
4638 cpuset_mems_cookie = read_mems_allowed_begin();
4639 zonelist_iter_cookie = zonelist_iter_begin();
4640
4641
4642
4643
4644
4645
4646 alloc_flags = gfp_to_alloc_flags(gfp_mask, order);
4647
4648
4649
4650
4651
4652
4653
4654 ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
4655 ac->highest_zoneidx, ac->nodemask);
4656 if (!zonelist_zone(ac->preferred_zoneref))
4657 goto nopage;
4658
4659
4660
4661
4662
4663
4664 if (cpusets_insane_config() && (gfp_mask & __GFP_HARDWALL)) {
4665 struct zoneref *z = first_zones_zonelist(ac->zonelist,
4666 ac->highest_zoneidx,
4667 &cpuset_current_mems_allowed);
4668 if (!zonelist_zone(z))
4669 goto nopage;
4670 }
4671
4672 if (alloc_flags & ALLOC_KSWAPD)
4673 wake_all_kswapds(order, gfp_mask, ac);
4674
4675
4676
4677
4678
4679 page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac);
4680 if (page)
4681 goto got_pg;
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692 if (can_direct_reclaim && can_compact &&
4693 (costly_order ||
4694 (order > 0 && ac->migratetype != MIGRATE_MOVABLE))
4695 && !gfp_pfmemalloc_allowed(gfp_mask)) {
4696 page = __alloc_pages_direct_compact(gfp_mask, order,
4697 alloc_flags, ac,
4698 INIT_COMPACT_PRIORITY,
4699 &compact_result);
4700 if (page)
4701 goto got_pg;
4702
4703
4704
4705
4706
4707 if (costly_order && (gfp_mask & __GFP_NORETRY)) {
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725 if (compact_result == COMPACT_SKIPPED ||
4726 compact_result == COMPACT_DEFERRED)
4727 goto nopage;
4728
4729
4730
4731
4732
4733
4734 compact_priority = INIT_COMPACT_PRIORITY;
4735 }
4736 }
4737
4738retry:
4739
4740
4741
4742
4743 if (check_retry_cpuset(cpuset_mems_cookie, ac) ||
4744 check_retry_zonelist(zonelist_iter_cookie))
4745 goto restart;
4746
4747
4748 if (alloc_flags & ALLOC_KSWAPD)
4749 wake_all_kswapds(order, gfp_mask, ac);
4750
4751 reserve_flags = __gfp_pfmemalloc_flags(gfp_mask);
4752 if (reserve_flags)
4753 alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, reserve_flags) |
4754 (alloc_flags & ALLOC_KSWAPD);
4755
4756
4757
4758
4759
4760
4761 if (!(alloc_flags & ALLOC_CPUSET) || reserve_flags) {
4762 ac->nodemask = NULL;
4763 ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
4764 ac->highest_zoneidx, ac->nodemask);
4765 }
4766
4767
4768 page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac);
4769 if (page)
4770 goto got_pg;
4771
4772
4773 if (!can_direct_reclaim)
4774 goto nopage;
4775
4776
4777 if (current->flags & PF_MEMALLOC)
4778 goto nopage;
4779
4780
4781 page = __alloc_pages_direct_reclaim(gfp_mask, order, alloc_flags, ac,
4782 &did_some_progress);
4783 if (page)
4784 goto got_pg;
4785
4786
4787 page = __alloc_pages_direct_compact(gfp_mask, order, alloc_flags, ac,
4788 compact_priority, &compact_result);
4789 if (page)
4790 goto got_pg;
4791
4792
4793 if (gfp_mask & __GFP_NORETRY)
4794 goto nopage;
4795
4796
4797
4798
4799
4800 if (costly_order && (!can_compact ||
4801 !(gfp_mask & __GFP_RETRY_MAYFAIL)))
4802 goto nopage;
4803
4804 if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
4805 did_some_progress > 0, &no_progress_loops))
4806 goto retry;
4807
4808
4809
4810
4811
4812
4813
4814 if (did_some_progress > 0 && can_compact &&
4815 should_compact_retry(ac, order, alloc_flags,
4816 compact_result, &compact_priority,
4817 &compaction_retries))
4818 goto retry;
4819
4820
4821 if (defrag_mode && (alloc_flags & ALLOC_NOFRAGMENT)) {
4822 alloc_flags &= ~ALLOC_NOFRAGMENT;
4823 goto retry;
4824 }
4825
4826
4827
4828
4829
4830 if (check_retry_cpuset(cpuset_mems_cookie, ac) ||
4831 check_retry_zonelist(zonelist_iter_cookie))
4832 goto restart;
4833
4834
4835 page = __alloc_pages_may_oom(gfp_mask, order, ac, &did_some_progress);
4836 if (page)
4837 goto got_pg;
4838
4839
4840 if (tsk_is_oom_victim(current) &&
4841 (alloc_flags & ALLOC_OOM ||
4842 (gfp_mask & __GFP_NOMEMALLOC)))
4843 goto nopage;
4844
4845
4846 if (did_some_progress) {
4847 no_progress_loops = 0;
4848 goto retry;
4849 }
4850
4851nopage:
4852
4853
4854
4855
4856 if (check_retry_cpuset(cpuset_mems_cookie, ac) ||
4857 check_retry_zonelist(zonelist_iter_cookie))
4858 goto restart;
4859
4860
4861
4862
4863
4864 if (unlikely(nofail)) {
4865
4866
4867
4868
4869
4870 if (!can_direct_reclaim)
4871 goto fail;
4872
4873
4874
4875
4876
4877
4878
4879
4880 page = __alloc_pages_cpuset_fallback(gfp_mask, order, ALLOC_MIN_RESERVE, ac);
4881 if (page)
4882 goto got_pg;
4883
4884 cond_resched();
4885 goto retry;
4886 }
4887fail:
4888 warn_alloc(gfp_mask, ac->nodemask,
4889 "page allocation failure: order:%u", order);
4890got_pg:
4891 return page;
4892}
4893
4894static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
4895 int preferred_nid, nodemask_t *nodemask,
4896 struct alloc_context *ac, gfp_t *alloc_gfp,
4897 unsigned int *alloc_flags)
4898{
4899 ac->highest_zoneidx = gfp_zone(gfp_mask);
4900 ac->zonelist = node_zonelist(preferred_nid, gfp_mask);
4901 ac->nodemask = nodemask;
4902 ac->migratetype = gfp_migratetype(gfp_mask);
4903
4904 if (cpusets_enabled()) {
4905 *alloc_gfp |= __GFP_HARDWALL;
4906
4907
4908
4909
4910 if (in_task() && !ac->nodemask)
4911 ac->nodemask = &cpuset_current_mems_allowed;
4912 else
4913 *alloc_flags |= ALLOC_CPUSET;
4914 }
4915
4916 might_alloc(gfp_mask);
4917
4918
4919
4920
4921
4922 if (!(*alloc_flags & ALLOC_TRYLOCK) &&
4923 should_fail_alloc_page(gfp_mask, order))
4924 return false;
4925
4926 *alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, *alloc_flags);
4927
4928
4929 ac->spread_dirty_pages = (gfp_mask & __GFP_WRITE);
4930
4931
4932
4933
4934
4935
4936 ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
4937 ac->highest_zoneidx, ac->nodemask);
4938
4939 return true;
4940}
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
4959 nodemask_t *nodemask, int nr_pages,
4960 struct page **page_array)
4961{
4962 struct page *page;
4963 unsigned long __maybe_unused UP_flags;
4964 struct zone *zone;
4965 struct zoneref *z;
4966 struct per_cpu_pages *pcp;
4967 struct list_head *pcp_list;
4968 struct alloc_context ac;
4969 gfp_t alloc_gfp;
4970 unsigned int alloc_flags = ALLOC_WMARK_LOW;
4971 int nr_populated = 0, nr_account = 0;
4972
4973
4974
4975
4976
4977 while (nr_populated < nr_pages && page_array[nr_populated])
4978 nr_populated++;
4979
4980
4981 if (unlikely(nr_pages <= 0))
4982 goto out;
4983
4984
4985 if (unlikely(nr_pages - nr_populated == 0))
4986 goto out;
4987
4988
4989 if (memcg_kmem_online() && (gfp & __GFP_ACCOUNT))
4990 goto failed;
4991
4992
4993 if (nr_pages - nr_populated == 1)
4994 goto failed;
4995
4996#ifdef CONFIG_PAGE_OWNER
4997
4998
4999
5000
5001
5002
5003
5004 if (static_branch_unlikely(&page_owner_inited))
5005 goto failed;
5006#endif
5007
5008
5009 gfp &= gfp_allowed_mask;
5010 alloc_gfp = gfp;
5011 if (!prepare_alloc_pages(gfp, 0, preferred_nid, nodemask, &ac, &alloc_gfp, &alloc_flags))
5012 goto out;
5013 gfp = alloc_gfp;
5014
5015
5016 z = ac.preferred_zoneref;
5017 for_next_zone_zonelist_nodemask(zone, z, ac.highest_zoneidx, ac.nodemask) {
5018 unsigned long mark;
5019
5020 if (cpusets_enabled() && (alloc_flags & ALLOC_CPUSET) &&
5021 !__cpuset_zone_allowed(zone, gfp)) {
5022 continue;
5023 }
5024
5025 if (nr_online_nodes > 1 && zone != zonelist_zone(ac.preferred_zoneref) &&
5026 zone_to_nid(zone) != zonelist_node_idx(ac.preferred_zoneref)) {
5027 goto failed;
5028 }
5029
5030 cond_accept_memory(zone, 0, alloc_flags);
5031retry_this_zone:
5032 mark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK) + nr_pages;
5033 if (zone_watermark_fast(zone, 0, mark,
5034 zonelist_zone_idx(ac.preferred_zoneref),
5035 alloc_flags, gfp)) {
5036 break;
5037 }
5038
5039 if (cond_accept_memory(zone, 0, alloc_flags))
5040 goto retry_this_zone;
5041
5042
5043 if (deferred_pages_enabled()) {
5044 if (_deferred_grow_zone(zone, 0))
5045 goto retry_this_zone;
5046 }
5047 }
5048
5049
5050
5051
5052
5053 if (unlikely(!zone))
5054 goto failed;
5055
5056
5057 pcp_trylock_prepare(UP_flags);
5058 pcp = pcp_spin_trylock(zone->per_cpu_pageset);
5059 if (!pcp)
5060 goto failed_irq;
5061
5062
5063 pcp_list = &pcp->lists[order_to_pindex(ac.migratetype, 0)];
5064 while (nr_populated < nr_pages) {
5065
5066
5067 if (page_array[nr_populated]) {
5068 nr_populated++;
5069 continue;
5070 }
5071
5072 page = __rmqueue_pcplist(zone, 0, ac.migratetype, alloc_flags,
5073 pcp, pcp_list);
5074 if (unlikely(!page)) {
5075
5076 if (!nr_account) {
5077 pcp_spin_unlock(pcp);
5078 goto failed_irq;
5079 }
5080 break;
5081 }
5082 nr_account++;
5083
5084 prep_new_page(page, 0, gfp, 0);
5085 set_page_refcounted(page);
5086 page_array[nr_populated++] = page;
5087 }
5088
5089 pcp_spin_unlock(pcp);
5090 pcp_trylock_finish(UP_flags);
5091
5092 __count_zid_vm_events(PGALLOC, zone_idx(zone), nr_account);
5093 zone_statistics(zonelist_zone(ac.preferred_zoneref), zone, nr_account);
5094
5095out:
5096 return nr_populated;
5097
5098failed_irq:
5099 pcp_trylock_finish(UP_flags);
5100
5101failed:
5102 page = __alloc_pages_noprof(gfp, 0, preferred_nid, nodemask);
5103 if (page)
5104 page_array[nr_populated++] = page;
5105 goto out;
5106}
5107EXPORT_SYMBOL_GPL(alloc_pages_bulk_noprof);
5108
5109
5110
5111
5112struct page *__alloc_frozen_pages_noprof(gfp_t gfp, unsigned int order,
5113 int preferred_nid, nodemask_t *nodemask)
5114{
5115 struct page *page;
5116 unsigned int alloc_flags = ALLOC_WMARK_LOW;
5117 gfp_t alloc_gfp;
5118 struct alloc_context ac = { };
5119
5120
5121
5122
5123
5124 if (WARN_ON_ONCE_GFP(order > MAX_PAGE_ORDER, gfp))
5125 return NULL;
5126
5127 gfp &= gfp_allowed_mask;
5128
5129
5130
5131
5132
5133
5134
5135 gfp = current_gfp_context(gfp);
5136 alloc_gfp = gfp;
5137 if (!prepare_alloc_pages(gfp, order, preferred_nid, nodemask, &ac,
5138 &alloc_gfp, &alloc_flags))
5139 return NULL;
5140
5141
5142
5143
5144
5145 alloc_flags |= alloc_flags_nofragment(zonelist_zone(ac.preferred_zoneref), gfp);
5146
5147
5148 page = get_page_from_freelist(alloc_gfp, order, alloc_flags, &ac);
5149 if (likely(page))
5150 goto out;
5151
5152 alloc_gfp = gfp;
5153 ac.spread_dirty_pages = false;
5154
5155
5156
5157
5158
5159 ac.nodemask = nodemask;
5160
5161 page = __alloc_pages_slowpath(alloc_gfp, order, &ac);
5162
5163out:
5164 if (memcg_kmem_online() && (gfp & __GFP_ACCOUNT) && page &&
5165 unlikely(__memcg_kmem_charge_page(page, gfp, order) != 0)) {
5166 free_frozen_pages(page, order);
5167 page = NULL;
5168 }
5169
5170 trace_mm_page_alloc(page, order, alloc_gfp, ac.migratetype);
5171 kmsan_alloc_page(page, order, alloc_gfp);
5172
5173 return page;
5174}
5175EXPORT_SYMBOL(__alloc_frozen_pages_noprof);
5176
5177struct page *__alloc_pages_noprof(gfp_t gfp, unsigned int order,
5178 int preferred_nid, nodemask_t *nodemask)
5179{
5180 struct page *page;
5181
5182 page = __alloc_frozen_pages_noprof(gfp, order, preferred_nid, nodemask);
5183 if (page)
5184 set_page_refcounted(page);
5185 return page;
5186}
5187EXPORT_SYMBOL(__alloc_pages_noprof);
5188
5189struct folio *__folio_alloc_noprof(gfp_t gfp, unsigned int order, int preferred_nid,
5190 nodemask_t *nodemask)
5191{
5192 struct page *page = __alloc_pages_noprof(gfp | __GFP_COMP, order,
5193 preferred_nid, nodemask);
5194 return page_rmappable_folio(page);
5195}
5196EXPORT_SYMBOL(__folio_alloc_noprof);
5197
5198
5199
5200
5201
5202
5203unsigned long get_free_pages_noprof(gfp_t gfp_mask, unsigned int order)
5204{
5205 struct page *page;
5206
5207 page = alloc_pages_noprof(gfp_mask & ~__GFP_HIGHMEM, order);
5208 if (!page)
5209 return 0;
5210 return (unsigned long) page_address(page);
5211}
5212EXPORT_SYMBOL(get_free_pages_noprof);
5213
5214unsigned long get_zeroed_page_noprof(gfp_t gfp_mask)
5215{
5216 return get_free_pages_noprof(gfp_mask | __GFP_ZERO, 0);
5217}
5218EXPORT_SYMBOL(get_zeroed_page_noprof);
5219
5220static void ___free_pages(struct page *page, unsigned int order,
5221 fpi_t fpi_flags)
5222{
5223
5224 int head = PageHead(page);
5225
5226 struct alloc_tag *tag = pgalloc_tag_get(page);
5227
5228 if (put_page_testzero(page))
5229 __free_frozen_pages(page, order, fpi_flags);
5230 else if (!head) {
5231 pgalloc_tag_sub_pages(tag, (1 << order) - 1);
5232 while (order-- > 0)
5233 __free_frozen_pages(page + (1 << order), order,
5234 fpi_flags);
5235 }
5236}
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258void __free_pages(struct page *page, unsigned int order)
5259{
5260 ___free_pages(page, order, FPI_NONE);
5261}
5262EXPORT_SYMBOL(__free_pages);
5263
5264
5265
5266
5267
5268void free_pages_nolock(struct page *page, unsigned int order)
5269{
5270 ___free_pages(page, order, FPI_TRYLOCK);
5271}
5272
5273void free_pages(unsigned long addr, unsigned int order)
5274{
5275 if (addr != 0) {
5276 VM_BUG_ON(!virt_addr_valid((void *)addr));
5277 __free_pages(virt_to_page((void *)addr), order);
5278 }
5279}
5280
5281EXPORT_SYMBOL(free_pages);
5282
5283static void *make_alloc_exact(unsigned long addr, unsigned int order,
5284 size_t size)
5285{
5286 if (addr) {
5287 unsigned long nr = DIV_ROUND_UP(size, PAGE_SIZE);
5288 struct page *page = virt_to_page((void *)addr);
5289 struct page *last = page + nr;
5290
5291 split_page_owner(page, order, 0);
5292 pgalloc_tag_split(page_folio(page), order, 0);
5293 split_page_memcg(page, order);
5294 while (page < --last)
5295 set_page_refcounted(last);
5296
5297 last = page + (1UL << order);
5298 for (page += nr; page < last; page++)
5299 __free_pages_ok(page, 0, FPI_TO_TAIL);
5300 }
5301 return (void *)addr;
5302}
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319void *alloc_pages_exact_noprof(size_t size, gfp_t gfp_mask)
5320{
5321 unsigned int order = get_order(size);
5322 unsigned long addr;
5323
5324 if (WARN_ON_ONCE(gfp_mask & (__GFP_COMP | __GFP_HIGHMEM)))
5325 gfp_mask &= ~(__GFP_COMP | __GFP_HIGHMEM);
5326
5327 addr = get_free_pages_noprof(gfp_mask, order);
5328 return make_alloc_exact(addr, order, size);
5329}
5330EXPORT_SYMBOL(alloc_pages_exact_noprof);
5331
5332
5333
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344void * __meminit alloc_pages_exact_nid_noprof(int nid, size_t size, gfp_t gfp_mask)
5345{
5346 unsigned int order = get_order(size);
5347 struct page *p;
5348
5349 if (WARN_ON_ONCE(gfp_mask & (__GFP_COMP | __GFP_HIGHMEM)))
5350 gfp_mask &= ~(__GFP_COMP | __GFP_HIGHMEM);
5351
5352 p = alloc_pages_node_noprof(nid, gfp_mask, order);
5353 if (!p)
5354 return NULL;
5355 return make_alloc_exact((unsigned long)page_address(p), order, size);
5356}
5357
5358
5359
5360
5361
5362
5363
5364
5365void free_pages_exact(void *virt, size_t size)
5366{
5367 unsigned long addr = (unsigned long)virt;
5368 unsigned long end = addr + PAGE_ALIGN(size);
5369
5370 while (addr < end) {
5371 free_page(addr);
5372 addr += PAGE_SIZE;
5373 }
5374}
5375EXPORT_SYMBOL(free_pages_exact);
5376
5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388
5389static unsigned long nr_free_zone_pages(int offset)
5390{
5391 struct zoneref *z;
5392 struct zone *zone;
5393
5394
5395 unsigned long sum = 0;
5396
5397 struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL);
5398
5399 for_each_zone_zonelist(zone, z, zonelist, offset) {
5400 unsigned long size = zone_managed_pages(zone);
5401 unsigned long high = high_wmark_pages(zone);
5402 if (size > high)
5403 sum += size - high;
5404 }
5405
5406 return sum;
5407}
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418unsigned long nr_free_buffer_pages(void)
5419{
5420 return nr_free_zone_pages(gfp_zone(GFP_USER));
5421}
5422EXPORT_SYMBOL_GPL(nr_free_buffer_pages);
5423
5424static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref)
5425{
5426 zoneref->zone = zone;
5427 zoneref->zone_idx = zone_idx(zone);
5428}
5429
5430
5431
5432
5433
5434
5435static int build_zonerefs_node(pg_data_t *pgdat, struct zoneref *zonerefs)
5436{
5437 struct zone *zone;
5438 enum zone_type zone_type = MAX_NR_ZONES;
5439 int nr_zones = 0;
5440
5441 do {
5442 zone_type--;
5443 zone = pgdat->node_zones + zone_type;
5444 if (populated_zone(zone)) {
5445 zoneref_set_zone(zone, &zonerefs[nr_zones++]);
5446 check_highest_zone(zone_type);
5447 }
5448 } while (zone_type);
5449
5450 return nr_zones;
5451}
5452
5453#ifdef CONFIG_NUMA
5454
5455static int __parse_numa_zonelist_order(char *s)
5456{
5457
5458
5459
5460
5461
5462
5463 if (!(*s == 'd' || *s == 'D' || *s == 'n' || *s == 'N')) {
5464 pr_warn("Ignoring unsupported numa_zonelist_order value: %s\n", s);
5465 return -EINVAL;
5466 }
5467 return 0;
5468}
5469
5470static char numa_zonelist_order[] = "Node";
5471#define NUMA_ZONELIST_ORDER_LEN 16
5472
5473
5474
5475static int numa_zonelist_order_handler(const struct ctl_table *table, int write,
5476 void *buffer, size_t *length, loff_t *ppos)
5477{
5478 if (write)
5479 return __parse_numa_zonelist_order(buffer);
5480 return proc_dostring(table, write, buffer, length, ppos);
5481}
5482
5483static int node_load[MAX_NUMNODES];
5484
5485
5486
5487
5488
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498
5499
5500int find_next_best_node(int node, nodemask_t *used_node_mask)
5501{
5502 int n, val;
5503 int min_val = INT_MAX;
5504 int best_node = NUMA_NO_NODE;
5505
5506
5507
5508
5509
5510 if (!node_isset(node, *used_node_mask) && node_state(node, N_MEMORY)) {
5511 node_set(node, *used_node_mask);
5512 return node;
5513 }
5514
5515 for_each_node_state(n, N_MEMORY) {
5516
5517
5518 if (node_isset(n, *used_node_mask))
5519 continue;
5520
5521
5522 val = node_distance(node, n);
5523
5524
5525 val += (n < node);
5526
5527
5528 if (!cpumask_empty(cpumask_of_node(n)))
5529 val += PENALTY_FOR_NODE_WITH_CPUS;
5530
5531
5532 val *= MAX_NUMNODES;
5533 val += node_load[n];
5534
5535 if (val < min_val) {
5536 min_val = val;
5537 best_node = n;
5538 }
5539 }
5540
5541 if (best_node >= 0)
5542 node_set(best_node, *used_node_mask);
5543
5544 return best_node;
5545}
5546
5547
5548
5549
5550
5551
5552
5553static void build_zonelists_in_node_order(pg_data_t *pgdat, int *node_order,
5554 unsigned nr_nodes)
5555{
5556 struct zoneref *zonerefs;
5557 int i;
5558
5559 zonerefs = pgdat->node_zonelists[ZONELIST_FALLBACK]._zonerefs;
5560
5561 for (i = 0; i < nr_nodes; i++) {
5562 int nr_zones;
5563
5564 pg_data_t *node = NODE_DATA(node_order[i]);
5565
5566 nr_zones = build_zonerefs_node(node, zonerefs);
5567 zonerefs += nr_zones;
5568 }
5569 zonerefs->zone = NULL;
5570 zonerefs->zone_idx = 0;
5571}
5572
5573
5574
5575
5576static void build_thisnode_zonelists(pg_data_t *pgdat)
5577{
5578 struct zoneref *zonerefs;
5579 int nr_zones;
5580
5581 zonerefs = pgdat->node_zonelists[ZONELIST_NOFALLBACK]._zonerefs;
5582 nr_zones = build_zonerefs_node(pgdat, zonerefs);
5583 zonerefs += nr_zones;
5584 zonerefs->zone = NULL;
5585 zonerefs->zone_idx = 0;
5586}
5587
5588static void build_zonelists(pg_data_t *pgdat)
5589{
5590 static int node_order[MAX_NUMNODES];
5591 int node, nr_nodes = 0;
5592 nodemask_t used_mask = NODE_MASK_NONE;
5593 int local_node, prev_node;
5594
5595
5596 local_node = pgdat->node_id;
5597 prev_node = local_node;
5598
5599 memset(node_order, 0, sizeof(node_order));
5600 while ((node = find_next_best_node(local_node, &used_mask)) >= 0) {
5601
5602
5603
5604
5605
5606 if (node_distance(local_node, node) !=
5607 node_distance(local_node, prev_node))
5608 node_load[node] += 1;
5609
5610 node_order[nr_nodes++] = node;
5611 prev_node = node;
5612 }
5613
5614 build_zonelists_in_node_order(pgdat, node_order, nr_nodes);
5615 build_thisnode_zonelists(pgdat);
5616 pr_info("Fallback order for Node %d: ", local_node);
5617 for (node = 0; node < nr_nodes; node++)
5618 pr_cont("%d ", node_order[node]);
5619 pr_cont("\n");
5620}
5621
5622#ifdef CONFIG_HAVE_MEMORYLESS_NODES
5623
5624
5625
5626
5627
5628
5629int local_memory_node(int node)
5630{
5631 struct zoneref *z;
5632
5633 z = first_zones_zonelist(node_zonelist(node, GFP_KERNEL),
5634 gfp_zone(GFP_KERNEL),
5635 NULL);
5636 return zonelist_node_idx(z);
5637}
5638#endif
5639
5640static void setup_min_unmapped_ratio(void);
5641static void setup_min_slab_ratio(void);
5642#else
5643
5644static void build_zonelists(pg_data_t *pgdat)
5645{
5646 struct zoneref *zonerefs;
5647 int nr_zones;
5648
5649 zonerefs = pgdat->node_zonelists[ZONELIST_FALLBACK]._zonerefs;
5650 nr_zones = build_zonerefs_node(pgdat, zonerefs);
5651 zonerefs += nr_zones;
5652
5653 zonerefs->zone = NULL;
5654 zonerefs->zone_idx = 0;
5655}
5656
5657#endif
5658
5659
5660
5661
5662
5663
5664
5665
5666
5667
5668
5669
5670
5671
5672
5673
5674static void per_cpu_pages_init(struct per_cpu_pages *pcp, struct per_cpu_zonestat *pzstats);
5675
5676#define BOOT_PAGESET_HIGH 0
5677#define BOOT_PAGESET_BATCH 1
5678static DEFINE_PER_CPU(struct per_cpu_pages, boot_pageset);
5679static DEFINE_PER_CPU(struct per_cpu_zonestat, boot_zonestats);
5680
5681static void __build_all_zonelists(void *data)
5682{
5683 int nid;
5684 int __maybe_unused cpu;
5685 pg_data_t *self = data;
5686 unsigned long flags;
5687
5688
5689
5690
5691
5692 write_seqlock_irqsave(&zonelist_update_seq, flags);
5693
5694
5695
5696
5697
5698
5699 printk_deferred_enter();
5700
5701#ifdef CONFIG_NUMA
5702 memset(node_load, 0, sizeof(node_load));
5703#endif
5704
5705
5706
5707
5708
5709 if (self && !node_online(self->node_id)) {
5710 build_zonelists(self);
5711 } else {
5712
5713
5714
5715
5716 for_each_node(nid) {
5717 pg_data_t *pgdat = NODE_DATA(nid);
5718
5719 build_zonelists(pgdat);
5720 }
5721
5722#ifdef CONFIG_HAVE_MEMORYLESS_NODES
5723
5724
5725
5726
5727
5728
5729
5730
5731 for_each_online_cpu(cpu)
5732 set_cpu_numa_mem(cpu, local_memory_node(cpu_to_node(cpu)));
5733#endif
5734 }
5735
5736 printk_deferred_exit();
5737 write_sequnlock_irqrestore(&zonelist_update_seq, flags);
5738}
5739
5740static noinline void __init
5741build_all_zonelists_init(void)
5742{
5743 int cpu;
5744
5745 __build_all_zonelists(NULL);
5746
5747
5748
5749
5750
5751
5752
5753
5754
5755
5756
5757
5758
5759
5760 for_each_possible_cpu(cpu)
5761 per_cpu_pages_init(&per_cpu(boot_pageset, cpu), &per_cpu(boot_zonestats, cpu));
5762
5763 mminit_verify_zonelist();
5764 cpuset_init_current_mems_allowed();
5765}
5766
5767
5768
5769
5770
5771
5772
5773void __ref build_all_zonelists(pg_data_t *pgdat)
5774{
5775 unsigned long vm_total_pages;
5776
5777 if (system_state == SYSTEM_BOOTING) {
5778 build_all_zonelists_init();
5779 } else {
5780 __build_all_zonelists(pgdat);
5781
5782 }
5783
5784 vm_total_pages = nr_free_zone_pages(gfp_zone(GFP_HIGHUSER_MOVABLE));
5785
5786
5787
5788
5789
5790
5791
5792 if (vm_total_pages < (pageblock_nr_pages * MIGRATE_TYPES))
5793 page_group_by_mobility_disabled = 1;
5794 else
5795 page_group_by_mobility_disabled = 0;
5796
5797 pr_info("Built %u zonelists, mobility grouping %s. Total pages: %ld\n",
5798 nr_online_nodes,
5799 str_off_on(page_group_by_mobility_disabled),
5800 vm_total_pages);
5801#ifdef CONFIG_NUMA
5802 pr_info("Policy zone: %s\n", zone_names[policy_zone]);
5803#endif
5804}
5805
5806static int zone_batchsize(struct zone *zone)
5807{
5808#ifdef CONFIG_MMU
5809 int batch;
5810
5811
5812
5813
5814
5815
5816
5817 batch = min(zone_managed_pages(zone) >> 10, SZ_1M / PAGE_SIZE);
5818 batch /= 4;
5819 if (batch < 1)
5820 batch = 1;
5821
5822
5823
5824
5825
5826
5827
5828
5829
5830
5831
5832 batch = rounddown_pow_of_two(batch + batch/2) - 1;
5833
5834 return batch;
5835
5836#else
5837
5838
5839
5840
5841
5842
5843
5844
5845
5846
5847
5848
5849
5850 return 0;
5851#endif
5852}
5853
5854static int percpu_pagelist_high_fraction;
5855static int zone_highsize(struct zone *zone, int batch, int cpu_online,
5856 int high_fraction)
5857{
5858#ifdef CONFIG_MMU
5859 int high;
5860 int nr_split_cpus;
5861 unsigned long total_pages;
5862
5863 if (!high_fraction) {
5864
5865
5866
5867
5868
5869 total_pages = low_wmark_pages(zone);
5870 } else {
5871
5872
5873
5874
5875
5876 total_pages = zone_managed_pages(zone) / high_fraction;
5877 }
5878
5879
5880
5881
5882
5883
5884
5885
5886
5887 nr_split_cpus = cpumask_weight(cpumask_of_node(zone_to_nid(zone))) + cpu_online;
5888 if (!nr_split_cpus)
5889 nr_split_cpus = num_online_cpus();
5890 high = total_pages / nr_split_cpus;
5891
5892
5893
5894
5895
5896 high = max(high, batch << 2);
5897
5898 return high;
5899#else
5900 return 0;
5901#endif
5902}
5903
5904
5905
5906
5907
5908
5909
5910
5911
5912
5913
5914
5915
5916
5917
5918
5919
5920
5921static void pageset_update(struct per_cpu_pages *pcp, unsigned long high_min,
5922 unsigned long high_max, unsigned long batch)
5923{
5924 WRITE_ONCE(pcp->batch, batch);
5925 WRITE_ONCE(pcp->high_min, high_min);
5926 WRITE_ONCE(pcp->high_max, high_max);
5927}
5928
5929static void per_cpu_pages_init(struct per_cpu_pages *pcp, struct per_cpu_zonestat *pzstats)
5930{
5931 int pindex;
5932
5933 memset(pcp, 0, sizeof(*pcp));
5934 memset(pzstats, 0, sizeof(*pzstats));
5935
5936 spin_lock_init(&pcp->lock);
5937 for (pindex = 0; pindex < NR_PCP_LISTS; pindex++)
5938 INIT_LIST_HEAD(&pcp->lists[pindex]);
5939
5940
5941
5942
5943
5944
5945
5946 pcp->high_min = BOOT_PAGESET_HIGH;
5947 pcp->high_max = BOOT_PAGESET_HIGH;
5948 pcp->batch = BOOT_PAGESET_BATCH;
5949 pcp->free_count = 0;
5950}
5951
5952static void __zone_set_pageset_high_and_batch(struct zone *zone, unsigned long high_min,
5953 unsigned long high_max, unsigned long batch)
5954{
5955 struct per_cpu_pages *pcp;
5956 int cpu;
5957
5958 for_each_possible_cpu(cpu) {
5959 pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
5960 pageset_update(pcp, high_min, high_max, batch);
5961 }
5962}
5963
5964
5965
5966
5967
5968static void zone_set_pageset_high_and_batch(struct zone *zone, int cpu_online)
5969{
5970 int new_high_min, new_high_max, new_batch;
5971
5972 new_batch = max(1, zone_batchsize(zone));
5973 if (percpu_pagelist_high_fraction) {
5974 new_high_min = zone_highsize(zone, new_batch, cpu_online,
5975 percpu_pagelist_high_fraction);
5976
5977
5978
5979
5980 new_high_max = new_high_min;
5981 } else {
5982 new_high_min = zone_highsize(zone, new_batch, cpu_online, 0);
5983 new_high_max = zone_highsize(zone, new_batch, cpu_online,
5984 MIN_PERCPU_PAGELIST_HIGH_FRACTION);
5985 }
5986
5987 if (zone->pageset_high_min == new_high_min &&
5988 zone->pageset_high_max == new_high_max &&
5989 zone->pageset_batch == new_batch)
5990 return;
5991
5992 zone->pageset_high_min = new_high_min;
5993 zone->pageset_high_max = new_high_max;
5994 zone->pageset_batch = new_batch;
5995
5996 __zone_set_pageset_high_and_batch(zone, new_high_min, new_high_max,
5997 new_batch);
5998}
5999
6000void __meminit setup_zone_pageset(struct zone *zone)
6001{
6002 int cpu;
6003
6004
6005 if (sizeof(struct per_cpu_zonestat) > 0)
6006 zone->per_cpu_zonestats = alloc_percpu(struct per_cpu_zonestat);
6007
6008 zone->per_cpu_pageset = alloc_percpu(struct per_cpu_pages);
6009 for_each_possible_cpu(cpu) {
6010 struct per_cpu_pages *pcp;
6011 struct per_cpu_zonestat *pzstats;
6012
6013 pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
6014 pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
6015 per_cpu_pages_init(pcp, pzstats);
6016 }
6017
6018 zone_set_pageset_high_and_batch(zone, 0);
6019}
6020
6021
6022
6023
6024
6025static void zone_pcp_update(struct zone *zone, int cpu_online)
6026{
6027 mutex_lock(&pcp_batch_high_lock);
6028 zone_set_pageset_high_and_batch(zone, cpu_online);
6029 mutex_unlock(&pcp_batch_high_lock);
6030}
6031
6032static void zone_pcp_update_cacheinfo(struct zone *zone, unsigned int cpu)
6033{
6034 struct per_cpu_pages *pcp;
6035 struct cpu_cacheinfo *cci;
6036
6037 pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
6038 cci = get_cpu_cacheinfo(cpu);
6039
6040
6041
6042
6043
6044
6045
6046 spin_lock(&pcp->lock);
6047 if ((cci->per_cpu_data_slice_size >> PAGE_SHIFT) > 3 * pcp->batch)
6048 pcp->flags |= PCPF_FREE_HIGH_BATCH;
6049 else
6050 pcp->flags &= ~PCPF_FREE_HIGH_BATCH;
6051 spin_unlock(&pcp->lock);
6052}
6053
6054void setup_pcp_cacheinfo(unsigned int cpu)
6055{
6056 struct zone *zone;
6057
6058 for_each_populated_zone(zone)
6059 zone_pcp_update_cacheinfo(zone, cpu);
6060}
6061
6062
6063
6064
6065
6066void __init setup_per_cpu_pageset(void)
6067{
6068 struct pglist_data *pgdat;
6069 struct zone *zone;
6070 int __maybe_unused cpu;
6071
6072 for_each_populated_zone(zone)
6073 setup_zone_pageset(zone);
6074
6075#ifdef CONFIG_NUMA
6076
6077
6078
6079
6080
6081
6082 for_each_possible_cpu(cpu) {
6083 struct per_cpu_zonestat *pzstats = &per_cpu(boot_zonestats, cpu);
6084 memset(pzstats->vm_numa_event, 0,
6085 sizeof(pzstats->vm_numa_event));
6086 }
6087#endif
6088
6089 for_each_online_pgdat(pgdat)
6090 pgdat->per_cpu_nodestats =
6091 alloc_percpu(struct per_cpu_nodestat);
6092}
6093
6094__meminit void zone_pcp_init(struct zone *zone)
6095{
6096
6097
6098
6099
6100
6101 zone->per_cpu_pageset = &boot_pageset;
6102 zone->per_cpu_zonestats = &boot_zonestats;
6103 zone->pageset_high_min = BOOT_PAGESET_HIGH;
6104 zone->pageset_high_max = BOOT_PAGESET_HIGH;
6105 zone->pageset_batch = BOOT_PAGESET_BATCH;
6106
6107 if (populated_zone(zone))
6108 pr_debug(" %s zone: %lu pages, LIFO batch:%u\n", zone->name,
6109 zone->present_pages, zone_batchsize(zone));
6110}
6111
6112static void setup_per_zone_lowmem_reserve(void);
6113
6114void adjust_managed_page_count(struct page *page, long count)
6115{
6116 atomic_long_add(count, &page_zone(page)->managed_pages);
6117 totalram_pages_add(count);
6118 setup_per_zone_lowmem_reserve();
6119}
6120EXPORT_SYMBOL(adjust_managed_page_count);
6121
6122unsigned long free_reserved_area(void *start, void *end, int poison, const char *s)
6123{
6124 void *pos;
6125 unsigned long pages = 0;
6126
6127 start = (void *)PAGE_ALIGN((unsigned long)start);
6128 end = (void *)((unsigned long)end & PAGE_MASK);
6129 for (pos = start; pos < end; pos += PAGE_SIZE, pages++) {
6130 struct page *page = virt_to_page(pos);
6131 void *direct_map_addr;
6132
6133
6134
6135
6136
6137
6138
6139
6140 direct_map_addr = page_address(page);
6141
6142
6143
6144
6145 direct_map_addr = kasan_reset_tag(direct_map_addr);
6146 if ((unsigned int)poison <= 0xFF)
6147 memset(direct_map_addr, poison, PAGE_SIZE);
6148
6149 free_reserved_page(page);
6150 }
6151
6152 if (pages && s)
6153 pr_info("Freeing %s memory: %ldK\n", s, K(pages));
6154
6155 return pages;
6156}
6157
6158void free_reserved_page(struct page *page)
6159{
6160 clear_page_tag_ref(page);
6161 ClearPageReserved(page);
6162 init_page_count(page);
6163 __free_page(page);
6164 adjust_managed_page_count(page, 1);
6165}
6166EXPORT_SYMBOL(free_reserved_page);
6167
6168static int page_alloc_cpu_dead(unsigned int cpu)
6169{
6170 struct zone *zone;
6171
6172 lru_add_drain_cpu(cpu);
6173 mlock_drain_remote(cpu);
6174 drain_pages(cpu);
6175
6176
6177
6178
6179
6180
6181
6182 vm_events_fold_cpu(cpu);
6183
6184
6185
6186
6187
6188
6189
6190
6191 cpu_vm_stats_fold(cpu);
6192
6193 for_each_populated_zone(zone)
6194 zone_pcp_update(zone, 0);
6195
6196 return 0;
6197}
6198
6199static int page_alloc_cpu_online(unsigned int cpu)
6200{
6201 struct zone *zone;
6202
6203 for_each_populated_zone(zone)
6204 zone_pcp_update(zone, 1);
6205 return 0;
6206}
6207
6208void __init page_alloc_init_cpuhp(void)
6209{
6210 int ret;
6211
6212 ret = cpuhp_setup_state_nocalls(CPUHP_PAGE_ALLOC,
6213 "mm/page_alloc:pcp",
6214 page_alloc_cpu_online,
6215 page_alloc_cpu_dead);
6216 WARN_ON(ret < 0);
6217}
6218
6219
6220
6221
6222
6223static void calculate_totalreserve_pages(void)
6224{
6225 struct pglist_data *pgdat;
6226 unsigned long reserve_pages = 0;
6227 enum zone_type i, j;
6228
6229 for_each_online_pgdat(pgdat) {
6230
6231 pgdat->totalreserve_pages = 0;
6232
6233 for (i = 0; i < MAX_NR_ZONES; i++) {
6234 struct zone *zone = pgdat->node_zones + i;
6235 long max = 0;
6236 unsigned long managed_pages = zone_managed_pages(zone);
6237
6238
6239 for (j = i; j < MAX_NR_ZONES; j++) {
6240 if (zone->lowmem_reserve[j] > max)
6241 max = zone->lowmem_reserve[j];
6242 }
6243
6244
6245 max += high_wmark_pages(zone);
6246
6247 if (max > managed_pages)
6248 max = managed_pages;
6249
6250 pgdat->totalreserve_pages += max;
6251
6252 reserve_pages += max;
6253 }
6254 }
6255 totalreserve_pages = reserve_pages;
6256 trace_mm_calculate_totalreserve_pages(totalreserve_pages);
6257}
6258
6259
6260
6261
6262
6263
6264
6265static void setup_per_zone_lowmem_reserve(void)
6266{
6267 struct pglist_data *pgdat;
6268 enum zone_type i, j;
6269
6270 for_each_online_pgdat(pgdat) {
6271 for (i = 0; i < MAX_NR_ZONES - 1; i++) {
6272 struct zone *zone = &pgdat->node_zones[i];
6273 int ratio = sysctl_lowmem_reserve_ratio[i];
6274 bool clear = !ratio || !zone_managed_pages(zone);
6275 unsigned long managed_pages = 0;
6276
6277 for (j = i + 1; j < MAX_NR_ZONES; j++) {
6278 struct zone *upper_zone = &pgdat->node_zones[j];
6279
6280 managed_pages += zone_managed_pages(upper_zone);
6281
6282 if (clear)
6283 zone->lowmem_reserve[j] = 0;
6284 else
6285 zone->lowmem_reserve[j] = managed_pages / ratio;
6286 trace_mm_setup_per_zone_lowmem_reserve(zone, upper_zone,
6287 zone->lowmem_reserve[j]);
6288 }
6289 }
6290 }
6291
6292
6293 calculate_totalreserve_pages();
6294}
6295
6296static void __setup_per_zone_wmarks(void)
6297{
6298 unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10);
6299 unsigned long lowmem_pages = 0;
6300 struct zone *zone;
6301 unsigned long flags;
6302
6303
6304 for_each_zone(zone) {
6305 if (!is_highmem(zone) && zone_idx(zone) != ZONE_MOVABLE)
6306 lowmem_pages += zone_managed_pages(zone);
6307 }
6308
6309 for_each_zone(zone) {
6310 u64 tmp;
6311
6312 spin_lock_irqsave(&zone->lock, flags);
6313 tmp = (u64)pages_min * zone_managed_pages(zone);
6314 tmp = div64_ul(tmp, lowmem_pages);
6315 if (is_highmem(zone) || zone_idx(zone) == ZONE_MOVABLE) {
6316
6317
6318
6319
6320
6321
6322
6323
6324
6325 unsigned long min_pages;
6326
6327 min_pages = zone_managed_pages(zone) / 1024;
6328 min_pages = clamp(min_pages, SWAP_CLUSTER_MAX, 128UL);
6329 zone->_watermark[WMARK_MIN] = min_pages;
6330 } else {
6331
6332
6333
6334
6335 zone->_watermark[WMARK_MIN] = tmp;
6336 }
6337
6338
6339
6340
6341
6342
6343 tmp = max_t(u64, tmp >> 2,
6344 mult_frac(zone_managed_pages(zone),
6345 watermark_scale_factor, 10000));
6346
6347 zone->watermark_boost = 0;
6348 zone->_watermark[WMARK_LOW] = min_wmark_pages(zone) + tmp;
6349 zone->_watermark[WMARK_HIGH] = low_wmark_pages(zone) + tmp;
6350 zone->_watermark[WMARK_PROMO] = high_wmark_pages(zone) + tmp;
6351 trace_mm_setup_per_zone_wmarks(zone);
6352
6353 spin_unlock_irqrestore(&zone->lock, flags);
6354 }
6355
6356
6357 calculate_totalreserve_pages();
6358}
6359
6360
6361
6362
6363
6364
6365
6366
6367void setup_per_zone_wmarks(void)
6368{
6369 struct zone *zone;
6370 static DEFINE_SPINLOCK(lock);
6371
6372 spin_lock(&lock);
6373 __setup_per_zone_wmarks();
6374 spin_unlock(&lock);
6375
6376
6377
6378
6379
6380 for_each_zone(zone)
6381 zone_pcp_update(zone, 0);
6382}
6383
6384
6385
6386
6387
6388
6389
6390
6391
6392
6393
6394
6395
6396
6397
6398
6399
6400
6401
6402
6403
6404
6405
6406
6407
6408void calculate_min_free_kbytes(void)
6409{
6410 unsigned long lowmem_kbytes;
6411 int new_min_free_kbytes;
6412
6413 lowmem_kbytes = nr_free_buffer_pages() * (PAGE_SIZE >> 10);
6414 new_min_free_kbytes = int_sqrt(lowmem_kbytes * 16);
6415
6416 if (new_min_free_kbytes > user_min_free_kbytes)
6417 min_free_kbytes = clamp(new_min_free_kbytes, 128, 262144);
6418 else
6419 pr_warn("min_free_kbytes is not updated to %d because user defined value %d is preferred\n",
6420 new_min_free_kbytes, user_min_free_kbytes);
6421
6422}
6423
6424int __meminit init_per_zone_wmark_min(void)
6425{
6426 calculate_min_free_kbytes();
6427 setup_per_zone_wmarks();
6428 refresh_zone_stat_thresholds();
6429 setup_per_zone_lowmem_reserve();
6430
6431#ifdef CONFIG_NUMA
6432 setup_min_unmapped_ratio();
6433 setup_min_slab_ratio();
6434#endif
6435
6436 khugepaged_min_free_kbytes_update();
6437
6438 return 0;
6439}
6440postcore_initcall(init_per_zone_wmark_min)
6441
6442
6443
6444
6445
6446
6447static int min_free_kbytes_sysctl_handler(const struct ctl_table *table, int write,
6448 void *buffer, size_t *length, loff_t *ppos)
6449{
6450 int rc;
6451
6452 rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
6453 if (rc)
6454 return rc;
6455
6456 if (write) {
6457 user_min_free_kbytes = min_free_kbytes;
6458 setup_per_zone_wmarks();
6459 }
6460 return 0;
6461}
6462
6463static int watermark_scale_factor_sysctl_handler(const struct ctl_table *table, int write,
6464 void *buffer, size_t *length, loff_t *ppos)
6465{
6466 int rc;
6467
6468 rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
6469 if (rc)
6470 return rc;
6471
6472 if (write)
6473 setup_per_zone_wmarks();
6474
6475 return 0;
6476}
6477
6478#ifdef CONFIG_NUMA
6479static void setup_min_unmapped_ratio(void)
6480{
6481 pg_data_t *pgdat;
6482 struct zone *zone;
6483
6484 for_each_online_pgdat(pgdat)
6485 pgdat->min_unmapped_pages = 0;
6486
6487 for_each_zone(zone)
6488 zone->zone_pgdat->min_unmapped_pages += (zone_managed_pages(zone) *
6489 sysctl_min_unmapped_ratio) / 100;
6490}
6491
6492
6493static int sysctl_min_unmapped_ratio_sysctl_handler(const struct ctl_table *table, int write,
6494 void *buffer, size_t *length, loff_t *ppos)
6495{
6496 int rc;
6497
6498 rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
6499 if (rc)
6500 return rc;
6501
6502 setup_min_unmapped_ratio();
6503
6504 return 0;
6505}
6506
6507static void setup_min_slab_ratio(void)
6508{
6509 pg_data_t *pgdat;
6510 struct zone *zone;
6511
6512 for_each_online_pgdat(pgdat)
6513 pgdat->min_slab_pages = 0;
6514
6515 for_each_zone(zone)
6516 zone->zone_pgdat->min_slab_pages += (zone_managed_pages(zone) *
6517 sysctl_min_slab_ratio) / 100;
6518}
6519
6520static int sysctl_min_slab_ratio_sysctl_handler(const struct ctl_table *table, int write,
6521 void *buffer, size_t *length, loff_t *ppos)
6522{
6523 int rc;
6524
6525 rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
6526 if (rc)
6527 return rc;
6528
6529 setup_min_slab_ratio();
6530
6531 return 0;
6532}
6533#endif
6534
6535
6536
6537
6538
6539
6540
6541
6542
6543
6544static int lowmem_reserve_ratio_sysctl_handler(const struct ctl_table *table,
6545 int write, void *buffer, size_t *length, loff_t *ppos)
6546{
6547 int i;
6548
6549 proc_dointvec_minmax(table, write, buffer, length, ppos);
6550
6551 for (i = 0; i < MAX_NR_ZONES; i++) {
6552 if (sysctl_lowmem_reserve_ratio[i] < 1)
6553 sysctl_lowmem_reserve_ratio[i] = 0;
6554 }
6555
6556 setup_per_zone_lowmem_reserve();
6557 return 0;
6558}
6559
6560
6561
6562
6563
6564
6565static int percpu_pagelist_high_fraction_sysctl_handler(const struct ctl_table *table,
6566 int write, void *buffer, size_t *length, loff_t *ppos)
6567{
6568 struct zone *zone;
6569 int old_percpu_pagelist_high_fraction;
6570 int ret;
6571
6572 mutex_lock(&pcp_batch_high_lock);
6573 old_percpu_pagelist_high_fraction = percpu_pagelist_high_fraction;
6574
6575 ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
6576 if (!write || ret < 0)
6577 goto out;
6578
6579
6580 if (percpu_pagelist_high_fraction &&
6581 percpu_pagelist_high_fraction < MIN_PERCPU_PAGELIST_HIGH_FRACTION) {
6582 percpu_pagelist_high_fraction = old_percpu_pagelist_high_fraction;
6583 ret = -EINVAL;
6584 goto out;
6585 }
6586
6587
6588 if (percpu_pagelist_high_fraction == old_percpu_pagelist_high_fraction)
6589 goto out;
6590
6591 for_each_populated_zone(zone)
6592 zone_set_pageset_high_and_batch(zone, 0);
6593out:
6594 mutex_unlock(&pcp_batch_high_lock);
6595 return ret;
6596}
6597
6598static const struct ctl_table page_alloc_sysctl_table[] = {
6599 {
6600 .procname = "min_free_kbytes",
6601 .data = &min_free_kbytes,
6602 .maxlen = sizeof(min_free_kbytes),
6603 .mode = 0644,
6604 .proc_handler = min_free_kbytes_sysctl_handler,
6605 .extra1 = SYSCTL_ZERO,
6606 },
6607 {
6608 .procname = "watermark_boost_factor",
6609 .data = &watermark_boost_factor,
6610 .maxlen = sizeof(watermark_boost_factor),
6611 .mode = 0644,
6612 .proc_handler = proc_dointvec_minmax,
6613 .extra1 = SYSCTL_ZERO,
6614 },
6615 {
6616 .procname = "watermark_scale_factor",
6617 .data = &watermark_scale_factor,
6618 .maxlen = sizeof(watermark_scale_factor),
6619 .mode = 0644,
6620 .proc_handler = watermark_scale_factor_sysctl_handler,
6621 .extra1 = SYSCTL_ONE,
6622 .extra2 = SYSCTL_THREE_THOUSAND,
6623 },
6624 {
6625 .procname = "defrag_mode",
6626 .data = &defrag_mode,
6627 .maxlen = sizeof(defrag_mode),
6628 .mode = 0644,
6629 .proc_handler = proc_dointvec_minmax,
6630 .extra1 = SYSCTL_ZERO,
6631 .extra2 = SYSCTL_ONE,
6632 },
6633 {
6634 .procname = "percpu_pagelist_high_fraction",
6635 .data = &percpu_pagelist_high_fraction,
6636 .maxlen = sizeof(percpu_pagelist_high_fraction),
6637 .mode = 0644,
6638 .proc_handler = percpu_pagelist_high_fraction_sysctl_handler,
6639 .extra1 = SYSCTL_ZERO,
6640 },
6641 {
6642 .procname = "lowmem_reserve_ratio",
6643 .data = &sysctl_lowmem_reserve_ratio,
6644 .maxlen = sizeof(sysctl_lowmem_reserve_ratio),
6645 .mode = 0644,
6646 .proc_handler = lowmem_reserve_ratio_sysctl_handler,
6647 },
6648#ifdef CONFIG_NUMA
6649 {
6650 .procname = "numa_zonelist_order",
6651 .data = &numa_zonelist_order,
6652 .maxlen = NUMA_ZONELIST_ORDER_LEN,
6653 .mode = 0644,
6654 .proc_handler = numa_zonelist_order_handler,
6655 },
6656 {
6657 .procname = "min_unmapped_ratio",
6658 .data = &sysctl_min_unmapped_ratio,
6659 .maxlen = sizeof(sysctl_min_unmapped_ratio),
6660 .mode = 0644,
6661 .proc_handler = sysctl_min_unmapped_ratio_sysctl_handler,
6662 .extra1 = SYSCTL_ZERO,
6663 .extra2 = SYSCTL_ONE_HUNDRED,
6664 },
6665 {
6666 .procname = "min_slab_ratio",
6667 .data = &sysctl_min_slab_ratio,
6668 .maxlen = sizeof(sysctl_min_slab_ratio),
6669 .mode = 0644,
6670 .proc_handler = sysctl_min_slab_ratio_sysctl_handler,
6671 .extra1 = SYSCTL_ZERO,
6672 .extra2 = SYSCTL_ONE_HUNDRED,
6673 },
6674#endif
6675};
6676
6677void __init page_alloc_sysctl_init(void)
6678{
6679 register_sysctl_init("vm", page_alloc_sysctl_table);
6680}
6681
6682#ifdef CONFIG_CONTIG_ALLOC
6683
6684static void alloc_contig_dump_pages(struct list_head *page_list)
6685{
6686 DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, "migrate failure");
6687
6688 if (DYNAMIC_DEBUG_BRANCH(descriptor)) {
6689 struct page *page;
6690
6691 dump_stack();
6692 list_for_each_entry(page, page_list, lru)
6693 dump_page(page, "migration failure");
6694 }
6695}
6696
6697
6698static int __alloc_contig_migrate_range(struct compact_control *cc,
6699 unsigned long start, unsigned long end)
6700{
6701
6702 unsigned int nr_reclaimed;
6703 unsigned long pfn = start;
6704 unsigned int tries = 0;
6705 int ret = 0;
6706 struct migration_target_control mtc = {
6707 .nid = zone_to_nid(cc->zone),
6708 .gfp_mask = cc->gfp_mask,
6709 .reason = MR_CONTIG_RANGE,
6710 };
6711
6712 lru_cache_disable();
6713
6714 while (pfn < end || !list_empty(&cc->migratepages)) {
6715 if (fatal_signal_pending(current)) {
6716 ret = -EINTR;
6717 break;
6718 }
6719
6720 if (list_empty(&cc->migratepages)) {
6721 cc->nr_migratepages = 0;
6722 ret = isolate_migratepages_range(cc, pfn, end);
6723 if (ret && ret != -EAGAIN)
6724 break;
6725 pfn = cc->migrate_pfn;
6726 tries = 0;
6727 } else if (++tries == 5) {
6728 ret = -EBUSY;
6729 break;
6730 }
6731
6732 nr_reclaimed = reclaim_clean_pages_from_list(cc->zone,
6733 &cc->migratepages);
6734 cc->nr_migratepages -= nr_reclaimed;
6735
6736 ret = migrate_pages(&cc->migratepages, alloc_migration_target,
6737 NULL, (unsigned long)&mtc, cc->mode, MR_CONTIG_RANGE, NULL);
6738
6739
6740
6741
6742
6743 if (ret == -ENOMEM)
6744 break;
6745 }
6746
6747 lru_cache_enable();
6748 if (ret < 0) {
6749 if (!(cc->gfp_mask & __GFP_NOWARN) && ret == -EBUSY)
6750 alloc_contig_dump_pages(&cc->migratepages);
6751 putback_movable_pages(&cc->migratepages);
6752 }
6753
6754 return (ret < 0) ? ret : 0;
6755}
6756
6757static void split_free_pages(struct list_head *list, gfp_t gfp_mask)
6758{
6759 int order;
6760
6761 for (order = 0; order < NR_PAGE_ORDERS; order++) {
6762 struct page *page, *next;
6763 int nr_pages = 1 << order;
6764
6765 list_for_each_entry_safe(page, next, &list[order], lru) {
6766 int i;
6767
6768 post_alloc_hook(page, order, gfp_mask);
6769 set_page_refcounted(page);
6770 if (!order)
6771 continue;
6772
6773 split_page(page, order);
6774
6775
6776 list_del(&page->lru);
6777 for (i = 0; i < nr_pages; i++)
6778 list_add_tail(&page[i].lru, &list[0]);
6779 }
6780 }
6781}
6782
6783static int __alloc_contig_verify_gfp_mask(gfp_t gfp_mask, gfp_t *gfp_cc_mask)
6784{
6785 const gfp_t reclaim_mask = __GFP_IO | __GFP_FS | __GFP_RECLAIM;
6786 const gfp_t action_mask = __GFP_COMP | __GFP_RETRY_MAYFAIL | __GFP_NOWARN |
6787 __GFP_ZERO | __GFP_ZEROTAGS | __GFP_SKIP_ZERO;
6788 const gfp_t cc_action_mask = __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
6789
6790
6791
6792
6793
6794 gfp_mask &= ~(GFP_ZONEMASK | __GFP_RECLAIMABLE | __GFP_WRITE |
6795 __GFP_HARDWALL | __GFP_THISNODE | __GFP_MOVABLE);
6796
6797
6798
6799
6800
6801 if (gfp_mask & ~(reclaim_mask | action_mask))
6802 return -EINVAL;
6803
6804
6805
6806
6807
6808
6809
6810
6811
6812 *gfp_cc_mask = (gfp_mask & (reclaim_mask | cc_action_mask)) |
6813 __GFP_MOVABLE | __GFP_RETRY_MAYFAIL;
6814 return 0;
6815}
6816
6817
6818
6819
6820
6821
6822
6823
6824
6825
6826
6827
6828
6829
6830
6831
6832
6833
6834
6835
6836
6837int alloc_contig_range_noprof(unsigned long start, unsigned long end,
6838 acr_flags_t alloc_flags, gfp_t gfp_mask)
6839{
6840 unsigned long outer_start, outer_end;
6841 int ret = 0;
6842
6843 struct compact_control cc = {
6844 .nr_migratepages = 0,
6845 .order = -1,
6846 .zone = page_zone(pfn_to_page(start)),
6847 .mode = MIGRATE_SYNC,
6848 .ignore_skip_hint = true,
6849 .no_set_skip_hint = true,
6850 .alloc_contig = true,
6851 };
6852 INIT_LIST_HEAD(&cc.migratepages);
6853 enum pb_isolate_mode mode = (alloc_flags & ACR_FLAGS_CMA) ?
6854 PB_ISOLATE_MODE_CMA_ALLOC :
6855 PB_ISOLATE_MODE_OTHER;
6856
6857 gfp_mask = current_gfp_context(gfp_mask);
6858 if (__alloc_contig_verify_gfp_mask(gfp_mask, (gfp_t *)&cc.gfp_mask))
6859 return -EINVAL;
6860
6861
6862
6863
6864
6865
6866
6867
6868
6869
6870
6871
6872
6873
6874
6875
6876
6877
6878
6879
6880
6881
6882 ret = start_isolate_page_range(start, end, mode);
6883 if (ret)
6884 goto done;
6885
6886 drain_all_pages(cc.zone);
6887
6888
6889
6890
6891
6892
6893
6894
6895
6896
6897
6898 ret = __alloc_contig_migrate_range(&cc, start, end);
6899 if (ret && ret != -EBUSY)
6900 goto done;
6901
6902
6903
6904
6905
6906
6907
6908
6909 ret = replace_free_hugepage_folios(start, end);
6910 if (ret)
6911 goto done;
6912
6913
6914
6915
6916
6917
6918
6919
6920
6921
6922
6923
6924
6925
6926
6927
6928
6929 outer_start = find_large_buddy(start);
6930
6931
6932 if (test_pages_isolated(outer_start, end, mode)) {
6933 ret = -EBUSY;
6934 goto done;
6935 }
6936
6937
6938 outer_end = isolate_freepages_range(&cc, outer_start, end);
6939 if (!outer_end) {
6940 ret = -EBUSY;
6941 goto done;
6942 }
6943
6944 if (!(gfp_mask & __GFP_COMP)) {
6945 split_free_pages(cc.freepages, gfp_mask);
6946
6947
6948 if (start != outer_start)
6949 free_contig_range(outer_start, start - outer_start);
6950 if (end != outer_end)
6951 free_contig_range(end, outer_end - end);
6952 } else if (start == outer_start && end == outer_end && is_power_of_2(end - start)) {
6953 struct page *head = pfn_to_page(start);
6954 int order = ilog2(end - start);
6955
6956 check_new_pages(head, order);
6957 prep_new_page(head, order, gfp_mask, 0);
6958 set_page_refcounted(head);
6959 } else {
6960 ret = -EINVAL;
6961 WARN(true, "PFN range: requested [%lu, %lu), allocated [%lu, %lu)\n",
6962 start, end, outer_start, outer_end);
6963 }
6964done:
6965 undo_isolate_page_range(start, end);
6966 return ret;
6967}
6968EXPORT_SYMBOL(alloc_contig_range_noprof);
6969
6970static int __alloc_contig_pages(unsigned long start_pfn,
6971 unsigned long nr_pages, gfp_t gfp_mask)
6972{
6973 unsigned long end_pfn = start_pfn + nr_pages;
6974
6975 return alloc_contig_range_noprof(start_pfn, end_pfn, ACR_FLAGS_NONE,
6976 gfp_mask);
6977}
6978
6979static bool pfn_range_valid_contig(struct zone *z, unsigned long start_pfn,
6980 unsigned long nr_pages)
6981{
6982 unsigned long i, end_pfn = start_pfn + nr_pages;
6983 struct page *page;
6984
6985 for (i = start_pfn; i < end_pfn; i++) {
6986 page = pfn_to_online_page(i);
6987 if (!page)
6988 return false;
6989
6990 if (page_zone(page) != z)
6991 return false;
6992
6993 if (PageReserved(page))
6994 return false;
6995
6996 if (PageHuge(page))
6997 return false;
6998 }
6999 return true;
7000}
7001
7002static bool zone_spans_last_pfn(const struct zone *zone,
7003 unsigned long start_pfn, unsigned long nr_pages)
7004{
7005 unsigned long last_pfn = start_pfn + nr_pages - 1;
7006
7007 return zone_spans_pfn(zone, last_pfn);
7008}
7009
7010
7011
7012
7013
7014
7015
7016
7017
7018
7019
7020
7021
7022
7023
7024
7025
7026
7027
7028
7029
7030
7031
7032
7033struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
7034 int nid, nodemask_t *nodemask)
7035{
7036 unsigned long ret, pfn, flags;
7037 struct zonelist *zonelist;
7038 struct zone *zone;
7039 struct zoneref *z;
7040
7041 zonelist = node_zonelist(nid, gfp_mask);
7042 for_each_zone_zonelist_nodemask(zone, z, zonelist,
7043 gfp_zone(gfp_mask), nodemask) {
7044 spin_lock_irqsave(&zone->lock, flags);
7045
7046 pfn = ALIGN(zone->zone_start_pfn, nr_pages);
7047 while (zone_spans_last_pfn(zone, pfn, nr_pages)) {
7048 if (pfn_range_valid_contig(zone, pfn, nr_pages)) {
7049
7050
7051
7052
7053
7054
7055
7056 spin_unlock_irqrestore(&zone->lock, flags);
7057 ret = __alloc_contig_pages(pfn, nr_pages,
7058 gfp_mask);
7059 if (!ret)
7060 return pfn_to_page(pfn);
7061 spin_lock_irqsave(&zone->lock, flags);
7062 }
7063 pfn += nr_pages;
7064 }
7065 spin_unlock_irqrestore(&zone->lock, flags);
7066 }
7067 return NULL;
7068}
7069#endif
7070
7071void free_contig_range(unsigned long pfn, unsigned long nr_pages)
7072{
7073 unsigned long count = 0;
7074 struct folio *folio = pfn_folio(pfn);
7075
7076 if (folio_test_large(folio)) {
7077 int expected = folio_nr_pages(folio);
7078
7079 if (nr_pages == expected)
7080 folio_put(folio);
7081 else
7082 WARN(true, "PFN %lu: nr_pages %lu != expected %d\n",
7083 pfn, nr_pages, expected);
7084 return;
7085 }
7086
7087 for (; nr_pages--; pfn++) {
7088 struct page *page = pfn_to_page(pfn);
7089
7090 count += page_count(page) != 1;
7091 __free_page(page);
7092 }
7093 WARN(count != 0, "%lu pages are still in use!\n", count);
7094}
7095EXPORT_SYMBOL(free_contig_range);
7096
7097
7098
7099
7100
7101
7102
7103
7104
7105void zone_pcp_disable(struct zone *zone)
7106{
7107 mutex_lock(&pcp_batch_high_lock);
7108 __zone_set_pageset_high_and_batch(zone, 0, 0, 1);
7109 __drain_all_pages(zone, true);
7110}
7111
7112void zone_pcp_enable(struct zone *zone)
7113{
7114 __zone_set_pageset_high_and_batch(zone, zone->pageset_high_min,
7115 zone->pageset_high_max, zone->pageset_batch);
7116 mutex_unlock(&pcp_batch_high_lock);
7117}
7118
7119void zone_pcp_reset(struct zone *zone)
7120{
7121 int cpu;
7122 struct per_cpu_zonestat *pzstats;
7123
7124 if (zone->per_cpu_pageset != &boot_pageset) {
7125 for_each_online_cpu(cpu) {
7126 pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
7127 drain_zonestat(zone, pzstats);
7128 }
7129 free_percpu(zone->per_cpu_pageset);
7130 zone->per_cpu_pageset = &boot_pageset;
7131 if (zone->per_cpu_zonestats != &boot_zonestats) {
7132 free_percpu(zone->per_cpu_zonestats);
7133 zone->per_cpu_zonestats = &boot_zonestats;
7134 }
7135 }
7136}
7137
7138#ifdef CONFIG_MEMORY_HOTREMOVE
7139
7140
7141
7142
7143
7144
7145
7146
7147unsigned long __offline_isolated_pages(unsigned long start_pfn,
7148 unsigned long end_pfn)
7149{
7150 unsigned long already_offline = 0, flags;
7151 unsigned long pfn = start_pfn;
7152 struct page *page;
7153 struct zone *zone;
7154 unsigned int order;
7155
7156 offline_mem_sections(pfn, end_pfn);
7157 zone = page_zone(pfn_to_page(pfn));
7158 spin_lock_irqsave(&zone->lock, flags);
7159 while (pfn < end_pfn) {
7160 page = pfn_to_page(pfn);
7161
7162
7163
7164
7165 if (unlikely(!PageBuddy(page) && PageHWPoison(page))) {
7166 pfn++;
7167 continue;
7168 }
7169
7170
7171
7172
7173 if (PageOffline(page)) {
7174 BUG_ON(page_count(page));
7175 BUG_ON(PageBuddy(page));
7176 already_offline++;
7177 pfn++;
7178 continue;
7179 }
7180
7181 BUG_ON(page_count(page));
7182 BUG_ON(!PageBuddy(page));
7183 VM_WARN_ON(get_pageblock_migratetype(page) != MIGRATE_ISOLATE);
7184 order = buddy_order(page);
7185 del_page_from_free_list(page, zone, order, MIGRATE_ISOLATE);
7186 pfn += (1 << order);
7187 }
7188 spin_unlock_irqrestore(&zone->lock, flags);
7189
7190 return end_pfn - start_pfn - already_offline;
7191}
7192#endif
7193
7194
7195
7196
7197bool is_free_buddy_page(const struct page *page)
7198{
7199 unsigned long pfn = page_to_pfn(page);
7200 unsigned int order;
7201
7202 for (order = 0; order < NR_PAGE_ORDERS; order++) {
7203 const struct page *head = page - (pfn & ((1 << order) - 1));
7204
7205 if (PageBuddy(head) &&
7206 buddy_order_unsafe(head) >= order)
7207 break;
7208 }
7209
7210 return order <= MAX_PAGE_ORDER;
7211}
7212EXPORT_SYMBOL(is_free_buddy_page);
7213
7214#ifdef CONFIG_MEMORY_FAILURE
7215static inline void add_to_free_list(struct page *page, struct zone *zone,
7216 unsigned int order, int migratetype,
7217 bool tail)
7218{
7219 __add_to_free_list(page, zone, order, migratetype, tail);
7220 account_freepages(zone, 1 << order, migratetype);
7221}
7222
7223
7224
7225
7226
7227static void break_down_buddy_pages(struct zone *zone, struct page *page,
7228 struct page *target, int low, int high,
7229 int migratetype)
7230{
7231 unsigned long size = 1 << high;
7232 struct page *current_buddy;
7233
7234 while (high > low) {
7235 high--;
7236 size >>= 1;
7237
7238 if (target >= &page[size]) {
7239 current_buddy = page;
7240 page = page + size;
7241 } else {
7242 current_buddy = page + size;
7243 }
7244
7245 if (set_page_guard(zone, current_buddy, high))
7246 continue;
7247
7248 add_to_free_list(current_buddy, zone, high, migratetype, false);
7249 set_buddy_order(current_buddy, high);
7250 }
7251}
7252
7253
7254
7255
7256bool take_page_off_buddy(struct page *page)
7257{
7258 struct zone *zone = page_zone(page);
7259 unsigned long pfn = page_to_pfn(page);
7260 unsigned long flags;
7261 unsigned int order;
7262 bool ret = false;
7263
7264 spin_lock_irqsave(&zone->lock, flags);
7265 for (order = 0; order < NR_PAGE_ORDERS; order++) {
7266 struct page *page_head = page - (pfn & ((1 << order) - 1));
7267 int page_order = buddy_order(page_head);
7268
7269 if (PageBuddy(page_head) && page_order >= order) {
7270 unsigned long pfn_head = page_to_pfn(page_head);
7271 int migratetype = get_pfnblock_migratetype(page_head,
7272 pfn_head);
7273
7274 del_page_from_free_list(page_head, zone, page_order,
7275 migratetype);
7276 break_down_buddy_pages(zone, page_head, page, 0,
7277 page_order, migratetype);
7278 SetPageHWPoisonTakenOff(page);
7279 ret = true;
7280 break;
7281 }
7282 if (page_count(page_head) > 0)
7283 break;
7284 }
7285 spin_unlock_irqrestore(&zone->lock, flags);
7286 return ret;
7287}
7288
7289
7290
7291
7292bool put_page_back_buddy(struct page *page)
7293{
7294 struct zone *zone = page_zone(page);
7295 unsigned long flags;
7296 bool ret = false;
7297
7298 spin_lock_irqsave(&zone->lock, flags);
7299 if (put_page_testzero(page)) {
7300 unsigned long pfn = page_to_pfn(page);
7301 int migratetype = get_pfnblock_migratetype(page, pfn);
7302
7303 ClearPageHWPoisonTakenOff(page);
7304 __free_one_page(page, pfn, zone, 0, migratetype, FPI_NONE);
7305 if (TestClearPageHWPoison(page)) {
7306 ret = true;
7307 }
7308 }
7309 spin_unlock_irqrestore(&zone->lock, flags);
7310
7311 return ret;
7312}
7313#endif
7314
7315#ifdef CONFIG_ZONE_DMA
7316bool has_managed_dma(void)
7317{
7318 struct pglist_data *pgdat;
7319
7320 for_each_online_pgdat(pgdat) {
7321 struct zone *zone = &pgdat->node_zones[ZONE_DMA];
7322
7323 if (managed_zone(zone))
7324 return true;
7325 }
7326 return false;
7327}
7328#endif
7329
7330#ifdef CONFIG_UNACCEPTED_MEMORY
7331
7332static bool lazy_accept = true;
7333
7334static int __init accept_memory_parse(char *p)
7335{
7336 if (!strcmp(p, "lazy")) {
7337 lazy_accept = true;
7338 return 0;
7339 } else if (!strcmp(p, "eager")) {
7340 lazy_accept = false;
7341 return 0;
7342 } else {
7343 return -EINVAL;
7344 }
7345}
7346early_param("accept_memory", accept_memory_parse);
7347
7348static bool page_contains_unaccepted(struct page *page, unsigned int order)
7349{
7350 phys_addr_t start = page_to_phys(page);
7351
7352 return range_contains_unaccepted_memory(start, PAGE_SIZE << order);
7353}
7354
7355static void __accept_page(struct zone *zone, unsigned long *flags,
7356 struct page *page)
7357{
7358 list_del(&page->lru);
7359 account_freepages(zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
7360 __mod_zone_page_state(zone, NR_UNACCEPTED, -MAX_ORDER_NR_PAGES);
7361 __ClearPageUnaccepted(page);
7362 spin_unlock_irqrestore(&zone->lock, *flags);
7363
7364 accept_memory(page_to_phys(page), PAGE_SIZE << MAX_PAGE_ORDER);
7365
7366 __free_pages_ok(page, MAX_PAGE_ORDER, FPI_TO_TAIL);
7367}
7368
7369void accept_page(struct page *page)
7370{
7371 struct zone *zone = page_zone(page);
7372 unsigned long flags;
7373
7374 spin_lock_irqsave(&zone->lock, flags);
7375 if (!PageUnaccepted(page)) {
7376 spin_unlock_irqrestore(&zone->lock, flags);
7377 return;
7378 }
7379
7380
7381 __accept_page(zone, &flags, page);
7382}
7383
7384static bool try_to_accept_memory_one(struct zone *zone)
7385{
7386 unsigned long flags;
7387 struct page *page;
7388
7389 spin_lock_irqsave(&zone->lock, flags);
7390 page = list_first_entry_or_null(&zone->unaccepted_pages,
7391 struct page, lru);
7392 if (!page) {
7393 spin_unlock_irqrestore(&zone->lock, flags);
7394 return false;
7395 }
7396
7397
7398 __accept_page(zone, &flags, page);
7399
7400 return true;
7401}
7402
7403static bool cond_accept_memory(struct zone *zone, unsigned int order,
7404 int alloc_flags)
7405{
7406 long to_accept, wmark;
7407 bool ret = false;
7408
7409 if (list_empty(&zone->unaccepted_pages))
7410 return false;
7411
7412
7413 if (alloc_flags & ALLOC_TRYLOCK)
7414 return false;
7415
7416 wmark = promo_wmark_pages(zone);
7417
7418
7419
7420
7421
7422
7423 if (!wmark)
7424 return try_to_accept_memory_one(zone);
7425
7426
7427 to_accept = wmark -
7428 (zone_page_state(zone, NR_FREE_PAGES) -
7429 __zone_watermark_unusable_free(zone, order, 0) -
7430 zone_page_state(zone, NR_UNACCEPTED));
7431
7432 while (to_accept > 0) {
7433 if (!try_to_accept_memory_one(zone))
7434 break;
7435 ret = true;
7436 to_accept -= MAX_ORDER_NR_PAGES;
7437 }
7438
7439 return ret;
7440}
7441
7442static bool __free_unaccepted(struct page *page)
7443{
7444 struct zone *zone = page_zone(page);
7445 unsigned long flags;
7446
7447 if (!lazy_accept)
7448 return false;
7449
7450 spin_lock_irqsave(&zone->lock, flags);
7451 list_add_tail(&page->lru, &zone->unaccepted_pages);
7452 account_freepages(zone, MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
7453 __mod_zone_page_state(zone, NR_UNACCEPTED, MAX_ORDER_NR_PAGES);
7454 __SetPageUnaccepted(page);
7455 spin_unlock_irqrestore(&zone->lock, flags);
7456
7457 return true;
7458}
7459
7460#else
7461
7462static bool page_contains_unaccepted(struct page *page, unsigned int order)
7463{
7464 return false;
7465}
7466
7467static bool cond_accept_memory(struct zone *zone, unsigned int order,
7468 int alloc_flags)
7469{
7470 return false;
7471}
7472
7473static bool __free_unaccepted(struct page *page)
7474{
7475 BUILD_BUG();
7476 return false;
7477}
7478
7479#endif
7480
7481
7482
7483
7484
7485
7486
7487
7488
7489
7490
7491
7492
7493
7494
7495
7496struct page *alloc_pages_nolock_noprof(int nid, unsigned int order)
7497{
7498
7499
7500
7501
7502
7503
7504
7505
7506
7507
7508
7509
7510
7511
7512
7513
7514
7515
7516
7517
7518 gfp_t alloc_gfp = __GFP_NOWARN | __GFP_ZERO | __GFP_NOMEMALLOC
7519 | __GFP_ACCOUNT;
7520 unsigned int alloc_flags = ALLOC_TRYLOCK;
7521 struct alloc_context ac = { };
7522 struct page *page;
7523
7524
7525
7526
7527
7528
7529
7530
7531
7532
7533
7534
7535 if (IS_ENABLED(CONFIG_PREEMPT_RT) && (in_nmi() || in_hardirq()))
7536 return NULL;
7537 if (!pcp_allowed_order(order))
7538 return NULL;
7539
7540
7541 if (deferred_pages_enabled())
7542 return NULL;
7543
7544 if (nid == NUMA_NO_NODE)
7545 nid = numa_node_id();
7546
7547 prepare_alloc_pages(alloc_gfp, order, nid, NULL, &ac,
7548 &alloc_gfp, &alloc_flags);
7549
7550
7551
7552
7553
7554 page = get_page_from_freelist(alloc_gfp, order, alloc_flags, &ac);
7555
7556
7557
7558 if (page)
7559 set_page_refcounted(page);
7560
7561 if (memcg_kmem_online() && page &&
7562 unlikely(__memcg_kmem_charge_page(page, alloc_gfp, order) != 0)) {
7563 free_pages_nolock(page, order);
7564 page = NULL;
7565 }
7566 trace_mm_page_alloc(page, order, alloc_gfp, ac.migratetype);
7567 kmsan_alloc_page(page, order, alloc_gfp);
7568 return page;
7569}
7570