1
2
3
4
5
6
7
8
9
10
11
12#include <linux/fs.h>
13#include <linux/mm.h>
14#include <linux/err.h>
15#include <linux/module.h>
16#include <linux/slab.h>
17#include <linux/cpu.h>
18#include <linux/cpumask.h>
19#include <linux/vmstat.h>
20#include <linux/proc_fs.h>
21#include <linux/seq_file.h>
22#include <linux/debugfs.h>
23#include <linux/sched.h>
24#include <linux/math64.h>
25#include <linux/writeback.h>
26#include <linux/compaction.h>
27#include <linux/mm_inline.h>
28#include <linux/page_ext.h>
29#include <linux/page_owner.h>
30
31#include "internal.h"
32
33#ifdef CONFIG_VM_EVENT_COUNTERS
34DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
35EXPORT_PER_CPU_SYMBOL(vm_event_states);
36
37static void sum_vm_events(unsigned long *ret)
38{
39 int cpu;
40 int i;
41
42 memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
43
44 for_each_online_cpu(cpu) {
45 struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
46
47 for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
48 ret[i] += this->event[i];
49 }
50}
51
52
53
54
55
56
57void all_vm_events(unsigned long *ret)
58{
59 get_online_cpus();
60 sum_vm_events(ret);
61 put_online_cpus();
62}
63EXPORT_SYMBOL_GPL(all_vm_events);
64
65
66
67
68
69
70
71void vm_events_fold_cpu(int cpu)
72{
73 struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
74 int i;
75
76 for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
77 count_vm_events(i, fold_state->event[i]);
78 fold_state->event[i] = 0;
79 }
80}
81
82#endif
83
84
85
86
87
88
89atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
90EXPORT_SYMBOL(vm_stat);
91
92#ifdef CONFIG_SMP
93
94int calculate_pressure_threshold(struct zone *zone)
95{
96 int threshold;
97 int watermark_distance;
98
99
100
101
102
103
104
105
106
107 watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone);
108 threshold = max(1, (int)(watermark_distance / num_online_cpus()));
109
110
111
112
113 threshold = min(125, threshold);
114
115 return threshold;
116}
117
118int calculate_normal_threshold(struct zone *zone)
119{
120 int threshold;
121 int mem;
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153 mem = zone->managed_pages >> (27 - PAGE_SHIFT);
154
155 threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem));
156
157
158
159
160 threshold = min(125, threshold);
161
162 return threshold;
163}
164
165
166
167
168void refresh_zone_stat_thresholds(void)
169{
170 struct zone *zone;
171 int cpu;
172 int threshold;
173
174 for_each_populated_zone(zone) {
175 unsigned long max_drift, tolerate_drift;
176
177 threshold = calculate_normal_threshold(zone);
178
179 for_each_online_cpu(cpu)
180 per_cpu_ptr(zone->pageset, cpu)->stat_threshold
181 = threshold;
182
183
184
185
186
187
188 tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone);
189 max_drift = num_online_cpus() * threshold;
190 if (max_drift > tolerate_drift)
191 zone->percpu_drift_mark = high_wmark_pages(zone) +
192 max_drift;
193 }
194}
195
196void set_pgdat_percpu_threshold(pg_data_t *pgdat,
197 int (*calculate_pressure)(struct zone *))
198{
199 struct zone *zone;
200 int cpu;
201 int threshold;
202 int i;
203
204 for (i = 0; i < pgdat->nr_zones; i++) {
205 zone = &pgdat->node_zones[i];
206 if (!zone->percpu_drift_mark)
207 continue;
208
209 threshold = (*calculate_pressure)(zone);
210 for_each_online_cpu(cpu)
211 per_cpu_ptr(zone->pageset, cpu)->stat_threshold
212 = threshold;
213 }
214}
215
216
217
218
219
220
221void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
222 long delta)
223{
224 struct per_cpu_pageset __percpu *pcp = zone->pageset;
225 s8 __percpu *p = pcp->vm_stat_diff + item;
226 long x;
227 long t;
228
229 x = delta + __this_cpu_read(*p);
230
231 t = __this_cpu_read(pcp->stat_threshold);
232
233 if (unlikely(x > t || x < -t)) {
234 zone_page_state_add(x, zone, item);
235 x = 0;
236 }
237 __this_cpu_write(*p, x);
238}
239EXPORT_SYMBOL(__mod_zone_page_state);
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
265{
266 struct per_cpu_pageset __percpu *pcp = zone->pageset;
267 s8 __percpu *p = pcp->vm_stat_diff + item;
268 s8 v, t;
269
270 v = __this_cpu_inc_return(*p);
271 t = __this_cpu_read(pcp->stat_threshold);
272 if (unlikely(v > t)) {
273 s8 overstep = t >> 1;
274
275 zone_page_state_add(v + overstep, zone, item);
276 __this_cpu_write(*p, -overstep);
277 }
278}
279
280void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
281{
282 __inc_zone_state(page_zone(page), item);
283}
284EXPORT_SYMBOL(__inc_zone_page_state);
285
286void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
287{
288 struct per_cpu_pageset __percpu *pcp = zone->pageset;
289 s8 __percpu *p = pcp->vm_stat_diff + item;
290 s8 v, t;
291
292 v = __this_cpu_dec_return(*p);
293 t = __this_cpu_read(pcp->stat_threshold);
294 if (unlikely(v < - t)) {
295 s8 overstep = t >> 1;
296
297 zone_page_state_add(v - overstep, zone, item);
298 __this_cpu_write(*p, overstep);
299 }
300}
301
302void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
303{
304 __dec_zone_state(page_zone(page), item);
305}
306EXPORT_SYMBOL(__dec_zone_page_state);
307
308#ifdef CONFIG_HAVE_CMPXCHG_LOCAL
309
310
311
312
313
314
315
316
317
318
319
320
321static inline void mod_state(struct zone *zone, enum zone_stat_item item,
322 long delta, int overstep_mode)
323{
324 struct per_cpu_pageset __percpu *pcp = zone->pageset;
325 s8 __percpu *p = pcp->vm_stat_diff + item;
326 long o, n, t, z;
327
328 do {
329 z = 0;
330
331
332
333
334
335
336
337
338
339
340
341 t = this_cpu_read(pcp->stat_threshold);
342
343 o = this_cpu_read(*p);
344 n = delta + o;
345
346 if (n > t || n < -t) {
347 int os = overstep_mode * (t >> 1) ;
348
349
350 z = n + os;
351 n = -os;
352 }
353 } while (this_cpu_cmpxchg(*p, o, n) != o);
354
355 if (z)
356 zone_page_state_add(z, zone, item);
357}
358
359void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
360 long delta)
361{
362 mod_state(zone, item, delta, 0);
363}
364EXPORT_SYMBOL(mod_zone_page_state);
365
366void inc_zone_state(struct zone *zone, enum zone_stat_item item)
367{
368 mod_state(zone, item, 1, 1);
369}
370
371void inc_zone_page_state(struct page *page, enum zone_stat_item item)
372{
373 mod_state(page_zone(page), item, 1, 1);
374}
375EXPORT_SYMBOL(inc_zone_page_state);
376
377void dec_zone_page_state(struct page *page, enum zone_stat_item item)
378{
379 mod_state(page_zone(page), item, -1, -1);
380}
381EXPORT_SYMBOL(dec_zone_page_state);
382#else
383
384
385
386void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
387 long delta)
388{
389 unsigned long flags;
390
391 local_irq_save(flags);
392 __mod_zone_page_state(zone, item, delta);
393 local_irq_restore(flags);
394}
395EXPORT_SYMBOL(mod_zone_page_state);
396
397void inc_zone_state(struct zone *zone, enum zone_stat_item item)
398{
399 unsigned long flags;
400
401 local_irq_save(flags);
402 __inc_zone_state(zone, item);
403 local_irq_restore(flags);
404}
405
406void inc_zone_page_state(struct page *page, enum zone_stat_item item)
407{
408 unsigned long flags;
409 struct zone *zone;
410
411 zone = page_zone(page);
412 local_irq_save(flags);
413 __inc_zone_state(zone, item);
414 local_irq_restore(flags);
415}
416EXPORT_SYMBOL(inc_zone_page_state);
417
418void dec_zone_page_state(struct page *page, enum zone_stat_item item)
419{
420 unsigned long flags;
421
422 local_irq_save(flags);
423 __dec_zone_page_state(page, item);
424 local_irq_restore(flags);
425}
426EXPORT_SYMBOL(dec_zone_page_state);
427#endif
428
429
430
431
432
433
434static int fold_diff(int *diff)
435{
436 int i;
437 int changes = 0;
438
439 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
440 if (diff[i]) {
441 atomic_long_add(diff[i], &vm_stat[i]);
442 changes++;
443 }
444 return changes;
445}
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463static int refresh_cpu_vm_stats(bool do_pagesets)
464{
465 struct zone *zone;
466 int i;
467 int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
468 int changes = 0;
469
470 for_each_populated_zone(zone) {
471 struct per_cpu_pageset __percpu *p = zone->pageset;
472
473 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
474 int v;
475
476 v = this_cpu_xchg(p->vm_stat_diff[i], 0);
477 if (v) {
478
479 atomic_long_add(v, &zone->vm_stat[i]);
480 global_diff[i] += v;
481#ifdef CONFIG_NUMA
482
483 __this_cpu_write(p->expire, 3);
484#endif
485 }
486 }
487#ifdef CONFIG_NUMA
488 if (do_pagesets) {
489 cond_resched();
490
491
492
493
494
495
496
497 if (!__this_cpu_read(p->expire) ||
498 !__this_cpu_read(p->pcp.count))
499 continue;
500
501
502
503
504 if (zone_to_nid(zone) == numa_node_id()) {
505 __this_cpu_write(p->expire, 0);
506 continue;
507 }
508
509 if (__this_cpu_dec_return(p->expire))
510 continue;
511
512 if (__this_cpu_read(p->pcp.count)) {
513 drain_zone_pages(zone, this_cpu_ptr(&p->pcp));
514 changes++;
515 }
516 }
517#endif
518 }
519 changes += fold_diff(global_diff);
520 return changes;
521}
522
523
524
525
526
527
528void cpu_vm_stats_fold(int cpu)
529{
530 struct zone *zone;
531 int i;
532 int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
533
534 for_each_populated_zone(zone) {
535 struct per_cpu_pageset *p;
536
537 p = per_cpu_ptr(zone->pageset, cpu);
538
539 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
540 if (p->vm_stat_diff[i]) {
541 int v;
542
543 v = p->vm_stat_diff[i];
544 p->vm_stat_diff[i] = 0;
545 atomic_long_add(v, &zone->vm_stat[i]);
546 global_diff[i] += v;
547 }
548 }
549
550 fold_diff(global_diff);
551}
552
553
554
555
556
557void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset)
558{
559 int i;
560
561 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
562 if (pset->vm_stat_diff[i]) {
563 int v = pset->vm_stat_diff[i];
564 pset->vm_stat_diff[i] = 0;
565 atomic_long_add(v, &zone->vm_stat[i]);
566 atomic_long_add(v, &vm_stat[i]);
567 }
568}
569#endif
570
571#ifdef CONFIG_NUMA
572
573
574
575unsigned long node_page_state(int node, enum zone_stat_item item)
576{
577 struct zone *zones = NODE_DATA(node)->node_zones;
578 int i;
579 unsigned long count = 0;
580
581 for (i = 0; i < MAX_NR_ZONES; i++)
582 count += zone_page_state(zones + i, item);
583
584 return count;
585}
586
587#endif
588
589#ifdef CONFIG_COMPACTION
590
591struct contig_page_info {
592 unsigned long free_pages;
593 unsigned long free_blocks_total;
594 unsigned long free_blocks_suitable;
595};
596
597
598
599
600
601
602
603
604
605static void fill_contig_page_info(struct zone *zone,
606 unsigned int suitable_order,
607 struct contig_page_info *info)
608{
609 unsigned int order;
610
611 info->free_pages = 0;
612 info->free_blocks_total = 0;
613 info->free_blocks_suitable = 0;
614
615 for (order = 0; order < MAX_ORDER; order++) {
616 unsigned long blocks;
617
618
619 blocks = zone->free_area[order].nr_free;
620 info->free_blocks_total += blocks;
621
622
623 info->free_pages += blocks << order;
624
625
626 if (order >= suitable_order)
627 info->free_blocks_suitable += blocks <<
628 (order - suitable_order);
629 }
630}
631
632
633
634
635
636
637
638
639static int __fragmentation_index(unsigned int order, struct contig_page_info *info)
640{
641 unsigned long requested = 1UL << order;
642
643 if (!info->free_blocks_total)
644 return 0;
645
646
647 if (info->free_blocks_suitable)
648 return -1000;
649
650
651
652
653
654
655
656 return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total);
657}
658
659
660int fragmentation_index(struct zone *zone, unsigned int order)
661{
662 struct contig_page_info info;
663
664 fill_contig_page_info(zone, order, &info);
665 return __fragmentation_index(order, &info);
666}
667#endif
668
669#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || defined(CONFIG_NUMA)
670#ifdef CONFIG_ZONE_DMA
671#define TEXT_FOR_DMA(xx) xx "_dma",
672#else
673#define TEXT_FOR_DMA(xx)
674#endif
675
676#ifdef CONFIG_ZONE_DMA32
677#define TEXT_FOR_DMA32(xx) xx "_dma32",
678#else
679#define TEXT_FOR_DMA32(xx)
680#endif
681
682#ifdef CONFIG_HIGHMEM
683#define TEXT_FOR_HIGHMEM(xx) xx "_high",
684#else
685#define TEXT_FOR_HIGHMEM(xx)
686#endif
687
688#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
689 TEXT_FOR_HIGHMEM(xx) xx "_movable",
690
691const char * const vmstat_text[] = {
692
693 "nr_free_pages",
694 "nr_alloc_batch",
695 "nr_inactive_anon",
696 "nr_active_anon",
697 "nr_inactive_file",
698 "nr_active_file",
699 "nr_unevictable",
700 "nr_mlock",
701 "nr_anon_pages",
702 "nr_mapped",
703 "nr_file_pages",
704 "nr_dirty",
705 "nr_writeback",
706 "nr_slab_reclaimable",
707 "nr_slab_unreclaimable",
708 "nr_page_table_pages",
709 "nr_kernel_stack",
710 "nr_unstable",
711 "nr_bounce",
712 "nr_vmscan_write",
713 "nr_vmscan_immediate_reclaim",
714 "nr_writeback_temp",
715 "nr_isolated_anon",
716 "nr_isolated_file",
717 "nr_shmem",
718 "nr_dirtied",
719 "nr_written",
720 "nr_pages_scanned",
721
722#ifdef CONFIG_NUMA
723 "numa_hit",
724 "numa_miss",
725 "numa_foreign",
726 "numa_interleave",
727 "numa_local",
728 "numa_other",
729#endif
730 "workingset_refault",
731 "workingset_activate",
732 "workingset_nodereclaim",
733 "nr_anon_transparent_hugepages",
734 "nr_free_cma",
735
736
737 "nr_dirty_threshold",
738 "nr_dirty_background_threshold",
739
740#ifdef CONFIG_VM_EVENT_COUNTERS
741
742 "pgpgin",
743 "pgpgout",
744 "pswpin",
745 "pswpout",
746
747 TEXTS_FOR_ZONES("pgalloc")
748
749 "pgfree",
750 "pgactivate",
751 "pgdeactivate",
752
753 "pgfault",
754 "pgmajfault",
755 "pglazyfreed",
756
757 TEXTS_FOR_ZONES("pgrefill")
758 TEXTS_FOR_ZONES("pgsteal_kswapd")
759 TEXTS_FOR_ZONES("pgsteal_direct")
760 TEXTS_FOR_ZONES("pgscan_kswapd")
761 TEXTS_FOR_ZONES("pgscan_direct")
762 "pgscan_direct_throttle",
763
764#ifdef CONFIG_NUMA
765 "zone_reclaim_failed",
766#endif
767 "pginodesteal",
768 "slabs_scanned",
769 "kswapd_inodesteal",
770 "kswapd_low_wmark_hit_quickly",
771 "kswapd_high_wmark_hit_quickly",
772 "pageoutrun",
773 "allocstall",
774
775 "pgrotated",
776
777 "drop_pagecache",
778 "drop_slab",
779
780#ifdef CONFIG_NUMA_BALANCING
781 "numa_pte_updates",
782 "numa_huge_pte_updates",
783 "numa_hint_faults",
784 "numa_hint_faults_local",
785 "numa_pages_migrated",
786#endif
787#ifdef CONFIG_MIGRATION
788 "pgmigrate_success",
789 "pgmigrate_fail",
790#endif
791#ifdef CONFIG_COMPACTION
792 "compact_migrate_scanned",
793 "compact_free_scanned",
794 "compact_isolated",
795 "compact_stall",
796 "compact_fail",
797 "compact_success",
798 "compact_daemon_wake",
799#endif
800
801#ifdef CONFIG_HUGETLB_PAGE
802 "htlb_buddy_alloc_success",
803 "htlb_buddy_alloc_fail",
804#endif
805 "unevictable_pgs_culled",
806 "unevictable_pgs_scanned",
807 "unevictable_pgs_rescued",
808 "unevictable_pgs_mlocked",
809 "unevictable_pgs_munlocked",
810 "unevictable_pgs_cleared",
811 "unevictable_pgs_stranded",
812
813#ifdef CONFIG_TRANSPARENT_HUGEPAGE
814 "thp_fault_alloc",
815 "thp_fault_fallback",
816 "thp_collapse_alloc",
817 "thp_collapse_alloc_failed",
818 "thp_split_page",
819 "thp_split_page_failed",
820 "thp_deferred_split_page",
821 "thp_split_pmd",
822 "thp_zero_page_alloc",
823 "thp_zero_page_alloc_failed",
824#endif
825#ifdef CONFIG_MEMORY_BALLOON
826 "balloon_inflate",
827 "balloon_deflate",
828#ifdef CONFIG_BALLOON_COMPACTION
829 "balloon_migrate",
830#endif
831#endif
832#ifdef CONFIG_DEBUG_TLBFLUSH
833#ifdef CONFIG_SMP
834 "nr_tlb_remote_flush",
835 "nr_tlb_remote_flush_received",
836#endif
837 "nr_tlb_local_flush_all",
838 "nr_tlb_local_flush_one",
839#endif
840
841#ifdef CONFIG_DEBUG_VM_VMACACHE
842 "vmacache_find_calls",
843 "vmacache_find_hits",
844 "vmacache_full_flushes",
845#endif
846#endif
847};
848#endif
849
850
851#if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \
852 defined(CONFIG_PROC_FS)
853static void *frag_start(struct seq_file *m, loff_t *pos)
854{
855 pg_data_t *pgdat;
856 loff_t node = *pos;
857
858 for (pgdat = first_online_pgdat();
859 pgdat && node;
860 pgdat = next_online_pgdat(pgdat))
861 --node;
862
863 return pgdat;
864}
865
866static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
867{
868 pg_data_t *pgdat = (pg_data_t *)arg;
869
870 (*pos)++;
871 return next_online_pgdat(pgdat);
872}
873
874static void frag_stop(struct seq_file *m, void *arg)
875{
876}
877
878
879static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
880 void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
881{
882 struct zone *zone;
883 struct zone *node_zones = pgdat->node_zones;
884 unsigned long flags;
885
886 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
887 if (!populated_zone(zone))
888 continue;
889
890 spin_lock_irqsave(&zone->lock, flags);
891 print(m, pgdat, zone);
892 spin_unlock_irqrestore(&zone->lock, flags);
893 }
894}
895#endif
896
897#ifdef CONFIG_PROC_FS
898static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
899 struct zone *zone)
900{
901 int order;
902
903 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
904 for (order = 0; order < MAX_ORDER; ++order)
905 seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
906 seq_putc(m, '\n');
907}
908
909
910
911
912static int frag_show(struct seq_file *m, void *arg)
913{
914 pg_data_t *pgdat = (pg_data_t *)arg;
915 walk_zones_in_node(m, pgdat, frag_show_print);
916 return 0;
917}
918
919static void pagetypeinfo_showfree_print(struct seq_file *m,
920 pg_data_t *pgdat, struct zone *zone)
921{
922 int order, mtype;
923
924 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) {
925 seq_printf(m, "Node %4d, zone %8s, type %12s ",
926 pgdat->node_id,
927 zone->name,
928 migratetype_names[mtype]);
929 for (order = 0; order < MAX_ORDER; ++order) {
930 unsigned long freecount = 0;
931 struct free_area *area;
932 struct list_head *curr;
933
934 area = &(zone->free_area[order]);
935
936 list_for_each(curr, &area->free_list[mtype])
937 freecount++;
938 seq_printf(m, "%6lu ", freecount);
939 }
940 seq_putc(m, '\n');
941 }
942}
943
944
945static int pagetypeinfo_showfree(struct seq_file *m, void *arg)
946{
947 int order;
948 pg_data_t *pgdat = (pg_data_t *)arg;
949
950
951 seq_printf(m, "%-43s ", "Free pages count per migrate type at order");
952 for (order = 0; order < MAX_ORDER; ++order)
953 seq_printf(m, "%6d ", order);
954 seq_putc(m, '\n');
955
956 walk_zones_in_node(m, pgdat, pagetypeinfo_showfree_print);
957
958 return 0;
959}
960
961static void pagetypeinfo_showblockcount_print(struct seq_file *m,
962 pg_data_t *pgdat, struct zone *zone)
963{
964 int mtype;
965 unsigned long pfn;
966 unsigned long start_pfn = zone->zone_start_pfn;
967 unsigned long end_pfn = zone_end_pfn(zone);
968 unsigned long count[MIGRATE_TYPES] = { 0, };
969
970 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
971 struct page *page;
972
973 if (!pfn_valid(pfn))
974 continue;
975
976 page = pfn_to_page(pfn);
977
978
979 if (!memmap_valid_within(pfn, page, zone))
980 continue;
981
982 if (page_zone(page) != zone)
983 continue;
984
985 mtype = get_pageblock_migratetype(page);
986
987 if (mtype < MIGRATE_TYPES)
988 count[mtype]++;
989 }
990
991
992 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
993 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
994 seq_printf(m, "%12lu ", count[mtype]);
995 seq_putc(m, '\n');
996}
997
998
999static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
1000{
1001 int mtype;
1002 pg_data_t *pgdat = (pg_data_t *)arg;
1003
1004 seq_printf(m, "\n%-23s", "Number of blocks type ");
1005 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1006 seq_printf(m, "%12s ", migratetype_names[mtype]);
1007 seq_putc(m, '\n');
1008 walk_zones_in_node(m, pgdat, pagetypeinfo_showblockcount_print);
1009
1010 return 0;
1011}
1012
1013#ifdef CONFIG_PAGE_OWNER
1014static void pagetypeinfo_showmixedcount_print(struct seq_file *m,
1015 pg_data_t *pgdat,
1016 struct zone *zone)
1017{
1018 struct page *page;
1019 struct page_ext *page_ext;
1020 unsigned long pfn = zone->zone_start_pfn, block_end_pfn;
1021 unsigned long end_pfn = pfn + zone->spanned_pages;
1022 unsigned long count[MIGRATE_TYPES] = { 0, };
1023 int pageblock_mt, page_mt;
1024 int i;
1025
1026
1027 pfn = zone->zone_start_pfn;
1028
1029
1030
1031
1032
1033
1034 for (; pfn < end_pfn; ) {
1035 if (!pfn_valid(pfn)) {
1036 pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
1037 continue;
1038 }
1039
1040 block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
1041 block_end_pfn = min(block_end_pfn, end_pfn);
1042
1043 page = pfn_to_page(pfn);
1044 pageblock_mt = get_pageblock_migratetype(page);
1045
1046 for (; pfn < block_end_pfn; pfn++) {
1047 if (!pfn_valid_within(pfn))
1048 continue;
1049
1050 page = pfn_to_page(pfn);
1051
1052 if (page_zone(page) != zone)
1053 continue;
1054
1055 if (PageBuddy(page)) {
1056 pfn += (1UL << page_order(page)) - 1;
1057 continue;
1058 }
1059
1060 if (PageReserved(page))
1061 continue;
1062
1063 page_ext = lookup_page_ext(page);
1064 if (unlikely(!page_ext))
1065 continue;
1066
1067 if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
1068 continue;
1069
1070 page_mt = gfpflags_to_migratetype(page_ext->gfp_mask);
1071 if (pageblock_mt != page_mt) {
1072 if (is_migrate_cma(pageblock_mt))
1073 count[MIGRATE_MOVABLE]++;
1074 else
1075 count[pageblock_mt]++;
1076
1077 pfn = block_end_pfn;
1078 break;
1079 }
1080 pfn += (1UL << page_ext->order) - 1;
1081 }
1082 }
1083
1084
1085 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1086 for (i = 0; i < MIGRATE_TYPES; i++)
1087 seq_printf(m, "%12lu ", count[i]);
1088 seq_putc(m, '\n');
1089}
1090#endif
1091
1092
1093
1094
1095
1096
1097
1098static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat)
1099{
1100#ifdef CONFIG_PAGE_OWNER
1101 int mtype;
1102
1103 if (!static_branch_unlikely(&page_owner_inited))
1104 return;
1105
1106 drain_all_pages(NULL);
1107
1108 seq_printf(m, "\n%-23s", "Number of mixed blocks ");
1109 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1110 seq_printf(m, "%12s ", migratetype_names[mtype]);
1111 seq_putc(m, '\n');
1112
1113 walk_zones_in_node(m, pgdat, pagetypeinfo_showmixedcount_print);
1114#endif
1115}
1116
1117
1118
1119
1120
1121static int pagetypeinfo_show(struct seq_file *m, void *arg)
1122{
1123 pg_data_t *pgdat = (pg_data_t *)arg;
1124
1125
1126 if (!node_state(pgdat->node_id, N_MEMORY))
1127 return 0;
1128
1129 seq_printf(m, "Page block order: %d\n", pageblock_order);
1130 seq_printf(m, "Pages per block: %lu\n", pageblock_nr_pages);
1131 seq_putc(m, '\n');
1132 pagetypeinfo_showfree(m, pgdat);
1133 pagetypeinfo_showblockcount(m, pgdat);
1134 pagetypeinfo_showmixedcount(m, pgdat);
1135
1136 return 0;
1137}
1138
1139static const struct seq_operations fragmentation_op = {
1140 .start = frag_start,
1141 .next = frag_next,
1142 .stop = frag_stop,
1143 .show = frag_show,
1144};
1145
1146static int fragmentation_open(struct inode *inode, struct file *file)
1147{
1148 return seq_open(file, &fragmentation_op);
1149}
1150
1151static const struct file_operations fragmentation_file_operations = {
1152 .open = fragmentation_open,
1153 .read = seq_read,
1154 .llseek = seq_lseek,
1155 .release = seq_release,
1156};
1157
1158static const struct seq_operations pagetypeinfo_op = {
1159 .start = frag_start,
1160 .next = frag_next,
1161 .stop = frag_stop,
1162 .show = pagetypeinfo_show,
1163};
1164
1165static int pagetypeinfo_open(struct inode *inode, struct file *file)
1166{
1167 return seq_open(file, &pagetypeinfo_op);
1168}
1169
1170static const struct file_operations pagetypeinfo_file_ops = {
1171 .open = pagetypeinfo_open,
1172 .read = seq_read,
1173 .llseek = seq_lseek,
1174 .release = seq_release,
1175};
1176
1177static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
1178 struct zone *zone)
1179{
1180 int i;
1181 seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
1182 seq_printf(m,
1183 "\n pages free %lu"
1184 "\n min %lu"
1185 "\n low %lu"
1186 "\n high %lu"
1187 "\n scanned %lu"
1188 "\n spanned %lu"
1189 "\n present %lu"
1190 "\n managed %lu",
1191 zone_page_state(zone, NR_FREE_PAGES),
1192 min_wmark_pages(zone),
1193 low_wmark_pages(zone),
1194 high_wmark_pages(zone),
1195 zone_page_state(zone, NR_PAGES_SCANNED),
1196 zone->spanned_pages,
1197 zone->present_pages,
1198 zone->managed_pages);
1199
1200 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
1201 seq_printf(m, "\n %-12s %lu", vmstat_text[i],
1202 zone_page_state(zone, i));
1203
1204 seq_printf(m,
1205 "\n protection: (%ld",
1206 zone->lowmem_reserve[0]);
1207 for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
1208 seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
1209 seq_printf(m,
1210 ")"
1211 "\n pagesets");
1212 for_each_online_cpu(i) {
1213 struct per_cpu_pageset *pageset;
1214
1215 pageset = per_cpu_ptr(zone->pageset, i);
1216 seq_printf(m,
1217 "\n cpu: %i"
1218 "\n count: %i"
1219 "\n high: %i"
1220 "\n batch: %i",
1221 i,
1222 pageset->pcp.count,
1223 pageset->pcp.high,
1224 pageset->pcp.batch);
1225#ifdef CONFIG_SMP
1226 seq_printf(m, "\n vm stats threshold: %d",
1227 pageset->stat_threshold);
1228#endif
1229 }
1230 seq_printf(m,
1231 "\n all_unreclaimable: %u"
1232 "\n start_pfn: %lu"
1233 "\n inactive_ratio: %u",
1234 !zone_reclaimable(zone),
1235 zone->zone_start_pfn,
1236 zone->inactive_ratio);
1237 seq_putc(m, '\n');
1238}
1239
1240
1241
1242
1243static int zoneinfo_show(struct seq_file *m, void *arg)
1244{
1245 pg_data_t *pgdat = (pg_data_t *)arg;
1246 walk_zones_in_node(m, pgdat, zoneinfo_show_print);
1247 return 0;
1248}
1249
1250static const struct seq_operations zoneinfo_op = {
1251 .start = frag_start,
1252
1253 .next = frag_next,
1254 .stop = frag_stop,
1255 .show = zoneinfo_show,
1256};
1257
1258static int zoneinfo_open(struct inode *inode, struct file *file)
1259{
1260 return seq_open(file, &zoneinfo_op);
1261}
1262
1263static const struct file_operations proc_zoneinfo_file_operations = {
1264 .open = zoneinfo_open,
1265 .read = seq_read,
1266 .llseek = seq_lseek,
1267 .release = seq_release,
1268};
1269
1270enum writeback_stat_item {
1271 NR_DIRTY_THRESHOLD,
1272 NR_DIRTY_BG_THRESHOLD,
1273 NR_VM_WRITEBACK_STAT_ITEMS,
1274};
1275
1276static void *vmstat_start(struct seq_file *m, loff_t *pos)
1277{
1278 unsigned long *v;
1279 int i, stat_items_size;
1280
1281 if (*pos >= ARRAY_SIZE(vmstat_text))
1282 return NULL;
1283 stat_items_size = NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) +
1284 NR_VM_WRITEBACK_STAT_ITEMS * sizeof(unsigned long);
1285
1286#ifdef CONFIG_VM_EVENT_COUNTERS
1287 stat_items_size += sizeof(struct vm_event_state);
1288#endif
1289
1290 v = kmalloc(stat_items_size, GFP_KERNEL);
1291 m->private = v;
1292 if (!v)
1293 return ERR_PTR(-ENOMEM);
1294 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
1295 v[i] = global_page_state(i);
1296 v += NR_VM_ZONE_STAT_ITEMS;
1297
1298 global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,
1299 v + NR_DIRTY_THRESHOLD);
1300 v += NR_VM_WRITEBACK_STAT_ITEMS;
1301
1302#ifdef CONFIG_VM_EVENT_COUNTERS
1303 all_vm_events(v);
1304 v[PGPGIN] /= 2;
1305 v[PGPGOUT] /= 2;
1306#endif
1307 return (unsigned long *)m->private + *pos;
1308}
1309
1310static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
1311{
1312 (*pos)++;
1313 if (*pos >= ARRAY_SIZE(vmstat_text))
1314 return NULL;
1315 return (unsigned long *)m->private + *pos;
1316}
1317
1318static int vmstat_show(struct seq_file *m, void *arg)
1319{
1320 unsigned long *l = arg;
1321 unsigned long off = l - (unsigned long *)m->private;
1322
1323 seq_printf(m, "%s %lu\n", vmstat_text[off], *l);
1324 return 0;
1325}
1326
1327static void vmstat_stop(struct seq_file *m, void *arg)
1328{
1329 kfree(m->private);
1330 m->private = NULL;
1331}
1332
1333static const struct seq_operations vmstat_op = {
1334 .start = vmstat_start,
1335 .next = vmstat_next,
1336 .stop = vmstat_stop,
1337 .show = vmstat_show,
1338};
1339
1340static int vmstat_open(struct inode *inode, struct file *file)
1341{
1342 return seq_open(file, &vmstat_op);
1343}
1344
1345static const struct file_operations proc_vmstat_file_operations = {
1346 .open = vmstat_open,
1347 .read = seq_read,
1348 .llseek = seq_lseek,
1349 .release = seq_release,
1350};
1351#endif
1352
1353#ifdef CONFIG_SMP
1354static struct workqueue_struct *vmstat_wq;
1355static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
1356int sysctl_stat_interval __read_mostly = HZ;
1357
1358#ifdef CONFIG_PROC_FS
1359static void refresh_vm_stats(struct work_struct *work)
1360{
1361 refresh_cpu_vm_stats(true);
1362}
1363
1364int vmstat_refresh(struct ctl_table *table, int write,
1365 void __user *buffer, size_t *lenp, loff_t *ppos)
1366{
1367 long val;
1368 int err;
1369 int i;
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383 err = schedule_on_each_cpu(refresh_vm_stats);
1384 if (err)
1385 return err;
1386 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
1387 val = atomic_long_read(&vm_stat[i]);
1388 if (val < 0) {
1389 switch (i) {
1390 case NR_ALLOC_BATCH:
1391 case NR_PAGES_SCANNED:
1392
1393
1394
1395
1396
1397
1398
1399 break;
1400 default:
1401 pr_warn("%s: %s %ld\n",
1402 __func__, vmstat_text[i], val);
1403 err = -EINVAL;
1404 break;
1405 }
1406 }
1407 }
1408 if (err)
1409 return err;
1410 if (write)
1411 *ppos += *lenp;
1412 else
1413 *lenp = 0;
1414 return 0;
1415}
1416#endif
1417
1418static void vmstat_update(struct work_struct *w)
1419{
1420 if (refresh_cpu_vm_stats(true)) {
1421
1422
1423
1424
1425
1426 queue_delayed_work_on(smp_processor_id(), vmstat_wq,
1427 this_cpu_ptr(&vmstat_work),
1428 round_jiffies_relative(sysctl_stat_interval));
1429 }
1430}
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441static bool need_update(int cpu)
1442{
1443 struct zone *zone;
1444
1445 for_each_populated_zone(zone) {
1446 struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu);
1447
1448 BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1);
1449
1450
1451
1452
1453 if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS))
1454 return true;
1455
1456 }
1457 return false;
1458}
1459
1460
1461
1462
1463
1464
1465void quiet_vmstat(void)
1466{
1467 if (system_state != SYSTEM_RUNNING)
1468 return;
1469
1470 if (!delayed_work_pending(this_cpu_ptr(&vmstat_work)))
1471 return;
1472
1473 if (!need_update(smp_processor_id()))
1474 return;
1475
1476
1477
1478
1479
1480
1481
1482 refresh_cpu_vm_stats(false);
1483}
1484
1485
1486
1487
1488
1489
1490
1491static void vmstat_shepherd(struct work_struct *w);
1492
1493static DECLARE_DEFERRABLE_WORK(shepherd, vmstat_shepherd);
1494
1495static void vmstat_shepherd(struct work_struct *w)
1496{
1497 int cpu;
1498
1499 get_online_cpus();
1500
1501 for_each_online_cpu(cpu) {
1502 struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
1503
1504 if (!delayed_work_pending(dw) && need_update(cpu))
1505 queue_delayed_work_on(cpu, vmstat_wq, dw, 0);
1506 }
1507 put_online_cpus();
1508
1509 schedule_delayed_work(&shepherd,
1510 round_jiffies_relative(sysctl_stat_interval));
1511}
1512
1513static void __init start_shepherd_timer(void)
1514{
1515 int cpu;
1516
1517 for_each_possible_cpu(cpu)
1518 INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
1519 vmstat_update);
1520
1521 vmstat_wq = alloc_workqueue("vmstat", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
1522 schedule_delayed_work(&shepherd,
1523 round_jiffies_relative(sysctl_stat_interval));
1524}
1525
1526static void vmstat_cpu_dead(int node)
1527{
1528 int cpu;
1529
1530 get_online_cpus();
1531 for_each_online_cpu(cpu)
1532 if (cpu_to_node(cpu) == node)
1533 goto end;
1534
1535 node_clear_state(node, N_CPU);
1536end:
1537 put_online_cpus();
1538}
1539
1540
1541
1542
1543
1544static int vmstat_cpuup_callback(struct notifier_block *nfb,
1545 unsigned long action,
1546 void *hcpu)
1547{
1548 long cpu = (long)hcpu;
1549
1550 switch (action) {
1551 case CPU_ONLINE:
1552 case CPU_ONLINE_FROZEN:
1553 refresh_zone_stat_thresholds();
1554 node_set_state(cpu_to_node(cpu), N_CPU);
1555 break;
1556 case CPU_DOWN_PREPARE:
1557 case CPU_DOWN_PREPARE_FROZEN:
1558 cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
1559 break;
1560 case CPU_DOWN_FAILED:
1561 case CPU_DOWN_FAILED_FROZEN:
1562 break;
1563 case CPU_DEAD:
1564 case CPU_DEAD_FROZEN:
1565 refresh_zone_stat_thresholds();
1566 vmstat_cpu_dead(cpu_to_node(cpu));
1567 break;
1568 default:
1569 break;
1570 }
1571 return NOTIFY_OK;
1572}
1573
1574static struct notifier_block vmstat_notifier =
1575 { &vmstat_cpuup_callback, NULL, 0 };
1576#endif
1577
1578static int __init setup_vmstat(void)
1579{
1580#ifdef CONFIG_SMP
1581 cpu_notifier_register_begin();
1582 __register_cpu_notifier(&vmstat_notifier);
1583
1584 start_shepherd_timer();
1585 cpu_notifier_register_done();
1586#endif
1587#ifdef CONFIG_PROC_FS
1588 proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);
1589 proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops);
1590 proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations);
1591 proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations);
1592#endif
1593 return 0;
1594}
1595module_init(setup_vmstat)
1596
1597#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)
1598
1599
1600
1601
1602
1603static int unusable_free_index(unsigned int order,
1604 struct contig_page_info *info)
1605{
1606
1607 if (info->free_pages == 0)
1608 return 1000;
1609
1610
1611
1612
1613
1614
1615
1616
1617 return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages);
1618
1619}
1620
1621static void unusable_show_print(struct seq_file *m,
1622 pg_data_t *pgdat, struct zone *zone)
1623{
1624 unsigned int order;
1625 int index;
1626 struct contig_page_info info;
1627
1628 seq_printf(m, "Node %d, zone %8s ",
1629 pgdat->node_id,
1630 zone->name);
1631 for (order = 0; order < MAX_ORDER; ++order) {
1632 fill_contig_page_info(zone, order, &info);
1633 index = unusable_free_index(order, &info);
1634 seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
1635 }
1636
1637 seq_putc(m, '\n');
1638}
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649static int unusable_show(struct seq_file *m, void *arg)
1650{
1651 pg_data_t *pgdat = (pg_data_t *)arg;
1652
1653
1654 if (!node_state(pgdat->node_id, N_MEMORY))
1655 return 0;
1656
1657 walk_zones_in_node(m, pgdat, unusable_show_print);
1658
1659 return 0;
1660}
1661
1662static const struct seq_operations unusable_op = {
1663 .start = frag_start,
1664 .next = frag_next,
1665 .stop = frag_stop,
1666 .show = unusable_show,
1667};
1668
1669static int unusable_open(struct inode *inode, struct file *file)
1670{
1671 return seq_open(file, &unusable_op);
1672}
1673
1674static const struct file_operations unusable_file_ops = {
1675 .open = unusable_open,
1676 .read = seq_read,
1677 .llseek = seq_lseek,
1678 .release = seq_release,
1679};
1680
1681static void extfrag_show_print(struct seq_file *m,
1682 pg_data_t *pgdat, struct zone *zone)
1683{
1684 unsigned int order;
1685 int index;
1686
1687
1688 struct contig_page_info info;
1689
1690 seq_printf(m, "Node %d, zone %8s ",
1691 pgdat->node_id,
1692 zone->name);
1693 for (order = 0; order < MAX_ORDER; ++order) {
1694 fill_contig_page_info(zone, order, &info);
1695 index = __fragmentation_index(order, &info);
1696 seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
1697 }
1698
1699 seq_putc(m, '\n');
1700}
1701
1702
1703
1704
1705static int extfrag_show(struct seq_file *m, void *arg)
1706{
1707 pg_data_t *pgdat = (pg_data_t *)arg;
1708
1709 walk_zones_in_node(m, pgdat, extfrag_show_print);
1710
1711 return 0;
1712}
1713
1714static const struct seq_operations extfrag_op = {
1715 .start = frag_start,
1716 .next = frag_next,
1717 .stop = frag_stop,
1718 .show = extfrag_show,
1719};
1720
1721static int extfrag_open(struct inode *inode, struct file *file)
1722{
1723 return seq_open(file, &extfrag_op);
1724}
1725
1726static const struct file_operations extfrag_file_ops = {
1727 .open = extfrag_open,
1728 .read = seq_read,
1729 .llseek = seq_lseek,
1730 .release = seq_release,
1731};
1732
1733static int __init extfrag_debug_init(void)
1734{
1735 struct dentry *extfrag_debug_root;
1736
1737 extfrag_debug_root = debugfs_create_dir("extfrag", NULL);
1738 if (!extfrag_debug_root)
1739 return -ENOMEM;
1740
1741 if (!debugfs_create_file("unusable_index", 0444,
1742 extfrag_debug_root, NULL, &unusable_file_ops))
1743 goto fail;
1744
1745 if (!debugfs_create_file("extfrag_index", 0444,
1746 extfrag_debug_root, NULL, &extfrag_file_ops))
1747 goto fail;
1748
1749 return 0;
1750fail:
1751 debugfs_remove_recursive(extfrag_debug_root);
1752 return -ENOMEM;
1753}
1754
1755module_init(extfrag_debug_init);
1756#endif
1757