1
2
3
4
5
6
7
8
9
10
11
12#include <linux/fs.h>
13#include <linux/mm.h>
14#include <linux/err.h>
15#include <linux/module.h>
16#include <linux/slab.h>
17#include <linux/cpu.h>
18#include <linux/cpumask.h>
19#include <linux/vmstat.h>
20#include <linux/sched.h>
21#include <linux/math64.h>
22#include <linux/writeback.h>
23#include <linux/compaction.h>
24#include <linux/mm_inline.h>
25#include <linux/page_ext.h>
26#include <linux/page_owner.h>
27
28#include "internal.h"
29
30#ifdef CONFIG_VM_EVENT_COUNTERS
31DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
32EXPORT_PER_CPU_SYMBOL(vm_event_states);
33
34static void sum_vm_events(unsigned long *ret)
35{
36 int cpu;
37 int i;
38
39 memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
40
41 for_each_online_cpu(cpu) {
42 struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
43
44 for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
45 ret[i] += this->event[i];
46 }
47}
48
49
50
51
52
53
54void all_vm_events(unsigned long *ret)
55{
56 get_online_cpus();
57 sum_vm_events(ret);
58 put_online_cpus();
59}
60EXPORT_SYMBOL_GPL(all_vm_events);
61
62
63
64
65
66
67
68void vm_events_fold_cpu(int cpu)
69{
70 struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
71 int i;
72
73 for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
74 count_vm_events(i, fold_state->event[i]);
75 fold_state->event[i] = 0;
76 }
77}
78
79#endif
80
81
82
83
84
85
86atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
87EXPORT_SYMBOL(vm_stat);
88
89#ifdef CONFIG_SMP
90
91int calculate_pressure_threshold(struct zone *zone)
92{
93 int threshold;
94 int watermark_distance;
95
96
97
98
99
100
101
102
103
104 watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone);
105 threshold = max(1, (int)(watermark_distance / num_online_cpus()));
106
107
108
109
110 threshold = min(125, threshold);
111
112 return threshold;
113}
114
115int calculate_normal_threshold(struct zone *zone)
116{
117 int threshold;
118 int mem;
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150 mem = zone->managed_pages >> (27 - PAGE_SHIFT);
151
152 threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem));
153
154
155
156
157 threshold = min(125, threshold);
158
159 return threshold;
160}
161
162
163
164
165void refresh_zone_stat_thresholds(void)
166{
167 struct zone *zone;
168 int cpu;
169 int threshold;
170
171 for_each_populated_zone(zone) {
172 unsigned long max_drift, tolerate_drift;
173
174 threshold = calculate_normal_threshold(zone);
175
176 for_each_online_cpu(cpu)
177 per_cpu_ptr(zone->pageset, cpu)->stat_threshold
178 = threshold;
179
180
181
182
183
184
185 tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone);
186 max_drift = num_online_cpus() * threshold;
187 if (max_drift > tolerate_drift)
188 zone->percpu_drift_mark = high_wmark_pages(zone) +
189 max_drift;
190 }
191}
192
193void set_pgdat_percpu_threshold(pg_data_t *pgdat,
194 int (*calculate_pressure)(struct zone *))
195{
196 struct zone *zone;
197 int cpu;
198 int threshold;
199 int i;
200
201 for (i = 0; i < pgdat->nr_zones; i++) {
202 zone = &pgdat->node_zones[i];
203 if (!zone->percpu_drift_mark)
204 continue;
205
206 threshold = (*calculate_pressure)(zone);
207 for_each_online_cpu(cpu)
208 per_cpu_ptr(zone->pageset, cpu)->stat_threshold
209 = threshold;
210 }
211}
212
213
214
215
216
217
218void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
219 int delta)
220{
221 struct per_cpu_pageset __percpu *pcp = zone->pageset;
222 s8 __percpu *p = pcp->vm_stat_diff + item;
223 long x;
224 long t;
225
226 x = delta + __this_cpu_read(*p);
227
228 t = __this_cpu_read(pcp->stat_threshold);
229
230 if (unlikely(x > t || x < -t)) {
231 zone_page_state_add(x, zone, item);
232 x = 0;
233 }
234 __this_cpu_write(*p, x);
235}
236EXPORT_SYMBOL(__mod_zone_page_state);
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
262{
263 struct per_cpu_pageset __percpu *pcp = zone->pageset;
264 s8 __percpu *p = pcp->vm_stat_diff + item;
265 s8 v, t;
266
267 v = __this_cpu_inc_return(*p);
268 t = __this_cpu_read(pcp->stat_threshold);
269 if (unlikely(v > t)) {
270 s8 overstep = t >> 1;
271
272 zone_page_state_add(v + overstep, zone, item);
273 __this_cpu_write(*p, -overstep);
274 }
275}
276
277void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
278{
279 __inc_zone_state(page_zone(page), item);
280}
281EXPORT_SYMBOL(__inc_zone_page_state);
282
283void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
284{
285 struct per_cpu_pageset __percpu *pcp = zone->pageset;
286 s8 __percpu *p = pcp->vm_stat_diff + item;
287 s8 v, t;
288
289 v = __this_cpu_dec_return(*p);
290 t = __this_cpu_read(pcp->stat_threshold);
291 if (unlikely(v < - t)) {
292 s8 overstep = t >> 1;
293
294 zone_page_state_add(v - overstep, zone, item);
295 __this_cpu_write(*p, overstep);
296 }
297}
298
299void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
300{
301 __dec_zone_state(page_zone(page), item);
302}
303EXPORT_SYMBOL(__dec_zone_page_state);
304
305#ifdef CONFIG_HAVE_CMPXCHG_LOCAL
306
307
308
309
310
311
312
313
314
315
316
317
318static inline void mod_state(struct zone *zone,
319 enum zone_stat_item item, int delta, int overstep_mode)
320{
321 struct per_cpu_pageset __percpu *pcp = zone->pageset;
322 s8 __percpu *p = pcp->vm_stat_diff + item;
323 long o, n, t, z;
324
325 do {
326 z = 0;
327
328
329
330
331
332
333
334
335
336
337
338 t = this_cpu_read(pcp->stat_threshold);
339
340 o = this_cpu_read(*p);
341 n = delta + o;
342
343 if (n > t || n < -t) {
344 int os = overstep_mode * (t >> 1) ;
345
346
347 z = n + os;
348 n = -os;
349 }
350 } while (this_cpu_cmpxchg(*p, o, n) != o);
351
352 if (z)
353 zone_page_state_add(z, zone, item);
354}
355
356void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
357 int delta)
358{
359 mod_state(zone, item, delta, 0);
360}
361EXPORT_SYMBOL(mod_zone_page_state);
362
363void inc_zone_state(struct zone *zone, enum zone_stat_item item)
364{
365 mod_state(zone, item, 1, 1);
366}
367
368void inc_zone_page_state(struct page *page, enum zone_stat_item item)
369{
370 mod_state(page_zone(page), item, 1, 1);
371}
372EXPORT_SYMBOL(inc_zone_page_state);
373
374void dec_zone_page_state(struct page *page, enum zone_stat_item item)
375{
376 mod_state(page_zone(page), item, -1, -1);
377}
378EXPORT_SYMBOL(dec_zone_page_state);
379#else
380
381
382
383void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
384 int delta)
385{
386 unsigned long flags;
387
388 local_irq_save(flags);
389 __mod_zone_page_state(zone, item, delta);
390 local_irq_restore(flags);
391}
392EXPORT_SYMBOL(mod_zone_page_state);
393
394void inc_zone_state(struct zone *zone, enum zone_stat_item item)
395{
396 unsigned long flags;
397
398 local_irq_save(flags);
399 __inc_zone_state(zone, item);
400 local_irq_restore(flags);
401}
402
403void inc_zone_page_state(struct page *page, enum zone_stat_item item)
404{
405 unsigned long flags;
406 struct zone *zone;
407
408 zone = page_zone(page);
409 local_irq_save(flags);
410 __inc_zone_state(zone, item);
411 local_irq_restore(flags);
412}
413EXPORT_SYMBOL(inc_zone_page_state);
414
415void dec_zone_page_state(struct page *page, enum zone_stat_item item)
416{
417 unsigned long flags;
418
419 local_irq_save(flags);
420 __dec_zone_page_state(page, item);
421 local_irq_restore(flags);
422}
423EXPORT_SYMBOL(dec_zone_page_state);
424#endif
425
426
427
428
429
430
431static int fold_diff(int *diff)
432{
433 int i;
434 int changes = 0;
435
436 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
437 if (diff[i]) {
438 atomic_long_add(diff[i], &vm_stat[i]);
439 changes++;
440 }
441 return changes;
442}
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460static int refresh_cpu_vm_stats(void)
461{
462 struct zone *zone;
463 int i;
464 int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
465 int changes = 0;
466
467 for_each_populated_zone(zone) {
468 struct per_cpu_pageset __percpu *p = zone->pageset;
469
470 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
471 int v;
472
473 v = this_cpu_xchg(p->vm_stat_diff[i], 0);
474 if (v) {
475
476 atomic_long_add(v, &zone->vm_stat[i]);
477 global_diff[i] += v;
478#ifdef CONFIG_NUMA
479
480 __this_cpu_write(p->expire, 3);
481#endif
482 }
483 }
484 cond_resched();
485#ifdef CONFIG_NUMA
486
487
488
489
490
491
492
493 if (!__this_cpu_read(p->expire) ||
494 !__this_cpu_read(p->pcp.count))
495 continue;
496
497
498
499
500 if (zone_to_nid(zone) == numa_node_id()) {
501 __this_cpu_write(p->expire, 0);
502 continue;
503 }
504
505 if (__this_cpu_dec_return(p->expire))
506 continue;
507
508 if (__this_cpu_read(p->pcp.count)) {
509 drain_zone_pages(zone, this_cpu_ptr(&p->pcp));
510 changes++;
511 }
512#endif
513 }
514 changes += fold_diff(global_diff);
515 return changes;
516}
517
518
519
520
521
522
523void cpu_vm_stats_fold(int cpu)
524{
525 struct zone *zone;
526 int i;
527 int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
528
529 for_each_populated_zone(zone) {
530 struct per_cpu_pageset *p;
531
532 p = per_cpu_ptr(zone->pageset, cpu);
533
534 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
535 if (p->vm_stat_diff[i]) {
536 int v;
537
538 v = p->vm_stat_diff[i];
539 p->vm_stat_diff[i] = 0;
540 atomic_long_add(v, &zone->vm_stat[i]);
541 global_diff[i] += v;
542 }
543 }
544
545 fold_diff(global_diff);
546}
547
548
549
550
551
552void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset)
553{
554 int i;
555
556 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
557 if (pset->vm_stat_diff[i]) {
558 int v = pset->vm_stat_diff[i];
559 pset->vm_stat_diff[i] = 0;
560 atomic_long_add(v, &zone->vm_stat[i]);
561 atomic_long_add(v, &vm_stat[i]);
562 }
563}
564#endif
565
566#ifdef CONFIG_NUMA
567
568
569
570
571
572
573
574
575
576
577void zone_statistics(struct zone *preferred_zone, struct zone *z, gfp_t flags)
578{
579 if (z->zone_pgdat == preferred_zone->zone_pgdat) {
580 __inc_zone_state(z, NUMA_HIT);
581 } else {
582 __inc_zone_state(z, NUMA_MISS);
583 __inc_zone_state(preferred_zone, NUMA_FOREIGN);
584 }
585 if (z->node == ((flags & __GFP_OTHER_NODE) ?
586 preferred_zone->node : numa_node_id()))
587 __inc_zone_state(z, NUMA_LOCAL);
588 else
589 __inc_zone_state(z, NUMA_OTHER);
590}
591#endif
592
593#ifdef CONFIG_COMPACTION
594
595struct contig_page_info {
596 unsigned long free_pages;
597 unsigned long free_blocks_total;
598 unsigned long free_blocks_suitable;
599};
600
601
602
603
604
605
606
607
608
609static void fill_contig_page_info(struct zone *zone,
610 unsigned int suitable_order,
611 struct contig_page_info *info)
612{
613 unsigned int order;
614
615 info->free_pages = 0;
616 info->free_blocks_total = 0;
617 info->free_blocks_suitable = 0;
618
619 for (order = 0; order < MAX_ORDER; order++) {
620 unsigned long blocks;
621
622
623 blocks = zone->free_area[order].nr_free;
624 info->free_blocks_total += blocks;
625
626
627 info->free_pages += blocks << order;
628
629
630 if (order >= suitable_order)
631 info->free_blocks_suitable += blocks <<
632 (order - suitable_order);
633 }
634}
635
636
637
638
639
640
641
642
643static int __fragmentation_index(unsigned int order, struct contig_page_info *info)
644{
645 unsigned long requested = 1UL << order;
646
647 if (!info->free_blocks_total)
648 return 0;
649
650
651 if (info->free_blocks_suitable)
652 return -1000;
653
654
655
656
657
658
659
660 return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total);
661}
662
663
664int fragmentation_index(struct zone *zone, unsigned int order)
665{
666 struct contig_page_info info;
667
668 fill_contig_page_info(zone, order, &info);
669 return __fragmentation_index(order, &info);
670}
671#endif
672
673#if defined(CONFIG_PROC_FS) || defined(CONFIG_COMPACTION)
674#include <linux/proc_fs.h>
675#include <linux/seq_file.h>
676
677static char * const migratetype_names[MIGRATE_TYPES] = {
678 "Unmovable",
679 "Reclaimable",
680 "Movable",
681 "Reserve",
682#ifdef CONFIG_CMA
683 "CMA",
684#endif
685#ifdef CONFIG_MEMORY_ISOLATION
686 "Isolate",
687#endif
688};
689
690static void *frag_start(struct seq_file *m, loff_t *pos)
691{
692 pg_data_t *pgdat;
693 loff_t node = *pos;
694 for (pgdat = first_online_pgdat();
695 pgdat && node;
696 pgdat = next_online_pgdat(pgdat))
697 --node;
698
699 return pgdat;
700}
701
702static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
703{
704 pg_data_t *pgdat = (pg_data_t *)arg;
705
706 (*pos)++;
707 return next_online_pgdat(pgdat);
708}
709
710static void frag_stop(struct seq_file *m, void *arg)
711{
712}
713
714
715static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
716 void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
717{
718 struct zone *zone;
719 struct zone *node_zones = pgdat->node_zones;
720 unsigned long flags;
721
722 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
723 if (!populated_zone(zone))
724 continue;
725
726 spin_lock_irqsave(&zone->lock, flags);
727 print(m, pgdat, zone);
728 spin_unlock_irqrestore(&zone->lock, flags);
729 }
730}
731#endif
732
733#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || defined(CONFIG_NUMA)
734#ifdef CONFIG_ZONE_DMA
735#define TEXT_FOR_DMA(xx) xx "_dma",
736#else
737#define TEXT_FOR_DMA(xx)
738#endif
739
740#ifdef CONFIG_ZONE_DMA32
741#define TEXT_FOR_DMA32(xx) xx "_dma32",
742#else
743#define TEXT_FOR_DMA32(xx)
744#endif
745
746#ifdef CONFIG_HIGHMEM
747#define TEXT_FOR_HIGHMEM(xx) xx "_high",
748#else
749#define TEXT_FOR_HIGHMEM(xx)
750#endif
751
752#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
753 TEXT_FOR_HIGHMEM(xx) xx "_movable",
754
755const char * const vmstat_text[] = {
756
757 "nr_free_pages",
758 "nr_alloc_batch",
759 "nr_inactive_anon",
760 "nr_active_anon",
761 "nr_inactive_file",
762 "nr_active_file",
763 "nr_unevictable",
764 "nr_mlock",
765 "nr_anon_pages",
766 "nr_mapped",
767 "nr_file_pages",
768 "nr_dirty",
769 "nr_writeback",
770 "nr_slab_reclaimable",
771 "nr_slab_unreclaimable",
772 "nr_page_table_pages",
773 "nr_kernel_stack",
774 "nr_unstable",
775 "nr_bounce",
776 "nr_vmscan_write",
777 "nr_vmscan_immediate_reclaim",
778 "nr_writeback_temp",
779 "nr_isolated_anon",
780 "nr_isolated_file",
781 "nr_shmem",
782 "nr_dirtied",
783 "nr_written",
784 "nr_pages_scanned",
785
786#ifdef CONFIG_NUMA
787 "numa_hit",
788 "numa_miss",
789 "numa_foreign",
790 "numa_interleave",
791 "numa_local",
792 "numa_other",
793#endif
794 "workingset_refault",
795 "workingset_activate",
796 "workingset_nodereclaim",
797 "nr_anon_transparent_hugepages",
798 "nr_free_cma",
799
800
801 "nr_dirty_threshold",
802 "nr_dirty_background_threshold",
803
804#ifdef CONFIG_VM_EVENT_COUNTERS
805
806 "pgpgin",
807 "pgpgout",
808 "pswpin",
809 "pswpout",
810
811 TEXTS_FOR_ZONES("pgalloc")
812
813 "pgfree",
814 "pgactivate",
815 "pgdeactivate",
816
817 "pgfault",
818 "pgmajfault",
819
820 TEXTS_FOR_ZONES("pgrefill")
821 TEXTS_FOR_ZONES("pgsteal_kswapd")
822 TEXTS_FOR_ZONES("pgsteal_direct")
823 TEXTS_FOR_ZONES("pgscan_kswapd")
824 TEXTS_FOR_ZONES("pgscan_direct")
825 "pgscan_direct_throttle",
826
827#ifdef CONFIG_NUMA
828 "zone_reclaim_failed",
829#endif
830 "pginodesteal",
831 "slabs_scanned",
832 "kswapd_inodesteal",
833 "kswapd_low_wmark_hit_quickly",
834 "kswapd_high_wmark_hit_quickly",
835 "pageoutrun",
836 "allocstall",
837
838 "pgrotated",
839
840 "drop_pagecache",
841 "drop_slab",
842
843#ifdef CONFIG_NUMA_BALANCING
844 "numa_pte_updates",
845 "numa_huge_pte_updates",
846 "numa_hint_faults",
847 "numa_hint_faults_local",
848 "numa_pages_migrated",
849#endif
850#ifdef CONFIG_MIGRATION
851 "pgmigrate_success",
852 "pgmigrate_fail",
853#endif
854#ifdef CONFIG_COMPACTION
855 "compact_migrate_scanned",
856 "compact_free_scanned",
857 "compact_isolated",
858 "compact_stall",
859 "compact_fail",
860 "compact_success",
861#endif
862
863#ifdef CONFIG_HUGETLB_PAGE
864 "htlb_buddy_alloc_success",
865 "htlb_buddy_alloc_fail",
866#endif
867 "unevictable_pgs_culled",
868 "unevictable_pgs_scanned",
869 "unevictable_pgs_rescued",
870 "unevictable_pgs_mlocked",
871 "unevictable_pgs_munlocked",
872 "unevictable_pgs_cleared",
873 "unevictable_pgs_stranded",
874
875#ifdef CONFIG_TRANSPARENT_HUGEPAGE
876 "thp_fault_alloc",
877 "thp_fault_fallback",
878 "thp_collapse_alloc",
879 "thp_collapse_alloc_failed",
880 "thp_split",
881 "thp_zero_page_alloc",
882 "thp_zero_page_alloc_failed",
883#endif
884#ifdef CONFIG_MEMORY_BALLOON
885 "balloon_inflate",
886 "balloon_deflate",
887#ifdef CONFIG_BALLOON_COMPACTION
888 "balloon_migrate",
889#endif
890#endif
891#ifdef CONFIG_DEBUG_TLBFLUSH
892#ifdef CONFIG_SMP
893 "nr_tlb_remote_flush",
894 "nr_tlb_remote_flush_received",
895#endif
896 "nr_tlb_local_flush_all",
897 "nr_tlb_local_flush_one",
898#endif
899
900#ifdef CONFIG_DEBUG_VM_VMACACHE
901 "vmacache_find_calls",
902 "vmacache_find_hits",
903 "vmacache_full_flushes",
904#endif
905#endif
906};
907#endif
908
909
910#ifdef CONFIG_PROC_FS
911static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
912 struct zone *zone)
913{
914 int order;
915
916 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
917 for (order = 0; order < MAX_ORDER; ++order)
918 seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
919 seq_putc(m, '\n');
920}
921
922
923
924
925static int frag_show(struct seq_file *m, void *arg)
926{
927 pg_data_t *pgdat = (pg_data_t *)arg;
928 walk_zones_in_node(m, pgdat, frag_show_print);
929 return 0;
930}
931
932static void pagetypeinfo_showfree_print(struct seq_file *m,
933 pg_data_t *pgdat, struct zone *zone)
934{
935 int order, mtype;
936
937 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) {
938 seq_printf(m, "Node %4d, zone %8s, type %12s ",
939 pgdat->node_id,
940 zone->name,
941 migratetype_names[mtype]);
942 for (order = 0; order < MAX_ORDER; ++order) {
943 unsigned long freecount = 0;
944 struct free_area *area;
945 struct list_head *curr;
946
947 area = &(zone->free_area[order]);
948
949 list_for_each(curr, &area->free_list[mtype])
950 freecount++;
951 seq_printf(m, "%6lu ", freecount);
952 }
953 seq_putc(m, '\n');
954 }
955}
956
957
958static int pagetypeinfo_showfree(struct seq_file *m, void *arg)
959{
960 int order;
961 pg_data_t *pgdat = (pg_data_t *)arg;
962
963
964 seq_printf(m, "%-43s ", "Free pages count per migrate type at order");
965 for (order = 0; order < MAX_ORDER; ++order)
966 seq_printf(m, "%6d ", order);
967 seq_putc(m, '\n');
968
969 walk_zones_in_node(m, pgdat, pagetypeinfo_showfree_print);
970
971 return 0;
972}
973
974static void pagetypeinfo_showblockcount_print(struct seq_file *m,
975 pg_data_t *pgdat, struct zone *zone)
976{
977 int mtype;
978 unsigned long pfn;
979 unsigned long start_pfn = zone->zone_start_pfn;
980 unsigned long end_pfn = zone_end_pfn(zone);
981 unsigned long count[MIGRATE_TYPES] = { 0, };
982
983 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
984 struct page *page;
985
986 if (!pfn_valid(pfn))
987 continue;
988
989 page = pfn_to_page(pfn);
990
991
992 if (!memmap_valid_within(pfn, page, zone))
993 continue;
994
995 mtype = get_pageblock_migratetype(page);
996
997 if (mtype < MIGRATE_TYPES)
998 count[mtype]++;
999 }
1000
1001
1002 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1003 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1004 seq_printf(m, "%12lu ", count[mtype]);
1005 seq_putc(m, '\n');
1006}
1007
1008
1009static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
1010{
1011 int mtype;
1012 pg_data_t *pgdat = (pg_data_t *)arg;
1013
1014 seq_printf(m, "\n%-23s", "Number of blocks type ");
1015 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1016 seq_printf(m, "%12s ", migratetype_names[mtype]);
1017 seq_putc(m, '\n');
1018 walk_zones_in_node(m, pgdat, pagetypeinfo_showblockcount_print);
1019
1020 return 0;
1021}
1022
1023#ifdef CONFIG_PAGE_OWNER
1024static void pagetypeinfo_showmixedcount_print(struct seq_file *m,
1025 pg_data_t *pgdat,
1026 struct zone *zone)
1027{
1028 struct page *page;
1029 struct page_ext *page_ext;
1030 unsigned long pfn = zone->zone_start_pfn, block_end_pfn;
1031 unsigned long end_pfn = pfn + zone->spanned_pages;
1032 unsigned long count[MIGRATE_TYPES] = { 0, };
1033 int pageblock_mt, page_mt;
1034 int i;
1035
1036
1037 pfn = zone->zone_start_pfn;
1038
1039
1040
1041
1042
1043
1044 for (; pfn < end_pfn; ) {
1045 if (!pfn_valid(pfn)) {
1046 pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
1047 continue;
1048 }
1049
1050 block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
1051 block_end_pfn = min(block_end_pfn, end_pfn);
1052
1053 page = pfn_to_page(pfn);
1054 pageblock_mt = get_pfnblock_migratetype(page, pfn);
1055
1056 for (; pfn < block_end_pfn; pfn++) {
1057 if (!pfn_valid_within(pfn))
1058 continue;
1059
1060 page = pfn_to_page(pfn);
1061 if (PageBuddy(page)) {
1062 pfn += (1UL << page_order(page)) - 1;
1063 continue;
1064 }
1065
1066 if (PageReserved(page))
1067 continue;
1068
1069 page_ext = lookup_page_ext(page);
1070
1071 if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
1072 continue;
1073
1074 page_mt = gfpflags_to_migratetype(page_ext->gfp_mask);
1075 if (pageblock_mt != page_mt) {
1076 if (is_migrate_cma(pageblock_mt))
1077 count[MIGRATE_MOVABLE]++;
1078 else
1079 count[pageblock_mt]++;
1080
1081 pfn = block_end_pfn;
1082 break;
1083 }
1084 pfn += (1UL << page_ext->order) - 1;
1085 }
1086 }
1087
1088
1089 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1090 for (i = 0; i < MIGRATE_TYPES; i++)
1091 seq_printf(m, "%12lu ", count[i]);
1092 seq_putc(m, '\n');
1093}
1094#endif
1095
1096
1097
1098
1099
1100
1101
1102static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat)
1103{
1104#ifdef CONFIG_PAGE_OWNER
1105 int mtype;
1106
1107 if (!page_owner_inited)
1108 return;
1109
1110 drain_all_pages(NULL);
1111
1112 seq_printf(m, "\n%-23s", "Number of mixed blocks ");
1113 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1114 seq_printf(m, "%12s ", migratetype_names[mtype]);
1115 seq_putc(m, '\n');
1116
1117 walk_zones_in_node(m, pgdat, pagetypeinfo_showmixedcount_print);
1118#endif
1119}
1120
1121
1122
1123
1124
1125static int pagetypeinfo_show(struct seq_file *m, void *arg)
1126{
1127 pg_data_t *pgdat = (pg_data_t *)arg;
1128
1129
1130 if (!node_state(pgdat->node_id, N_MEMORY))
1131 return 0;
1132
1133 seq_printf(m, "Page block order: %d\n", pageblock_order);
1134 seq_printf(m, "Pages per block: %lu\n", pageblock_nr_pages);
1135 seq_putc(m, '\n');
1136 pagetypeinfo_showfree(m, pgdat);
1137 pagetypeinfo_showblockcount(m, pgdat);
1138 pagetypeinfo_showmixedcount(m, pgdat);
1139
1140 return 0;
1141}
1142
1143static const struct seq_operations fragmentation_op = {
1144 .start = frag_start,
1145 .next = frag_next,
1146 .stop = frag_stop,
1147 .show = frag_show,
1148};
1149
1150static int fragmentation_open(struct inode *inode, struct file *file)
1151{
1152 return seq_open(file, &fragmentation_op);
1153}
1154
1155static const struct file_operations fragmentation_file_operations = {
1156 .open = fragmentation_open,
1157 .read = seq_read,
1158 .llseek = seq_lseek,
1159 .release = seq_release,
1160};
1161
1162static const struct seq_operations pagetypeinfo_op = {
1163 .start = frag_start,
1164 .next = frag_next,
1165 .stop = frag_stop,
1166 .show = pagetypeinfo_show,
1167};
1168
1169static int pagetypeinfo_open(struct inode *inode, struct file *file)
1170{
1171 return seq_open(file, &pagetypeinfo_op);
1172}
1173
1174static const struct file_operations pagetypeinfo_file_ops = {
1175 .open = pagetypeinfo_open,
1176 .read = seq_read,
1177 .llseek = seq_lseek,
1178 .release = seq_release,
1179};
1180
1181static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
1182 struct zone *zone)
1183{
1184 int i;
1185 seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
1186 seq_printf(m,
1187 "\n pages free %lu"
1188 "\n min %lu"
1189 "\n low %lu"
1190 "\n high %lu"
1191 "\n scanned %lu"
1192 "\n spanned %lu"
1193 "\n present %lu"
1194 "\n managed %lu",
1195 zone_page_state(zone, NR_FREE_PAGES),
1196 min_wmark_pages(zone),
1197 low_wmark_pages(zone),
1198 high_wmark_pages(zone),
1199 zone_page_state(zone, NR_PAGES_SCANNED),
1200 zone->spanned_pages,
1201 zone->present_pages,
1202 zone->managed_pages);
1203
1204 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
1205 seq_printf(m, "\n %-12s %lu", vmstat_text[i],
1206 zone_page_state(zone, i));
1207
1208 seq_printf(m,
1209 "\n protection: (%ld",
1210 zone->lowmem_reserve[0]);
1211 for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
1212 seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
1213 seq_printf(m,
1214 ")"
1215 "\n pagesets");
1216 for_each_online_cpu(i) {
1217 struct per_cpu_pageset *pageset;
1218
1219 pageset = per_cpu_ptr(zone->pageset, i);
1220 seq_printf(m,
1221 "\n cpu: %i"
1222 "\n count: %i"
1223 "\n high: %i"
1224 "\n batch: %i",
1225 i,
1226 pageset->pcp.count,
1227 pageset->pcp.high,
1228 pageset->pcp.batch);
1229#ifdef CONFIG_SMP
1230 seq_printf(m, "\n vm stats threshold: %d",
1231 pageset->stat_threshold);
1232#endif
1233 }
1234 seq_printf(m,
1235 "\n all_unreclaimable: %u"
1236 "\n start_pfn: %lu"
1237 "\n inactive_ratio: %u",
1238 !zone_reclaimable(zone),
1239 zone->zone_start_pfn,
1240 zone->inactive_ratio);
1241 seq_putc(m, '\n');
1242}
1243
1244
1245
1246
1247static int zoneinfo_show(struct seq_file *m, void *arg)
1248{
1249 pg_data_t *pgdat = (pg_data_t *)arg;
1250 walk_zones_in_node(m, pgdat, zoneinfo_show_print);
1251 return 0;
1252}
1253
1254static const struct seq_operations zoneinfo_op = {
1255 .start = frag_start,
1256
1257 .next = frag_next,
1258 .stop = frag_stop,
1259 .show = zoneinfo_show,
1260};
1261
1262static int zoneinfo_open(struct inode *inode, struct file *file)
1263{
1264 return seq_open(file, &zoneinfo_op);
1265}
1266
1267static const struct file_operations proc_zoneinfo_file_operations = {
1268 .open = zoneinfo_open,
1269 .read = seq_read,
1270 .llseek = seq_lseek,
1271 .release = seq_release,
1272};
1273
1274enum writeback_stat_item {
1275 NR_DIRTY_THRESHOLD,
1276 NR_DIRTY_BG_THRESHOLD,
1277 NR_VM_WRITEBACK_STAT_ITEMS,
1278};
1279
1280static void *vmstat_start(struct seq_file *m, loff_t *pos)
1281{
1282 unsigned long *v;
1283 int i, stat_items_size;
1284
1285 if (*pos >= ARRAY_SIZE(vmstat_text))
1286 return NULL;
1287 stat_items_size = NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) +
1288 NR_VM_WRITEBACK_STAT_ITEMS * sizeof(unsigned long);
1289
1290#ifdef CONFIG_VM_EVENT_COUNTERS
1291 stat_items_size += sizeof(struct vm_event_state);
1292#endif
1293
1294 v = kmalloc(stat_items_size, GFP_KERNEL);
1295 m->private = v;
1296 if (!v)
1297 return ERR_PTR(-ENOMEM);
1298 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
1299 v[i] = global_page_state(i);
1300 v += NR_VM_ZONE_STAT_ITEMS;
1301
1302 global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,
1303 v + NR_DIRTY_THRESHOLD);
1304 v += NR_VM_WRITEBACK_STAT_ITEMS;
1305
1306#ifdef CONFIG_VM_EVENT_COUNTERS
1307 all_vm_events(v);
1308 v[PGPGIN] /= 2;
1309 v[PGPGOUT] /= 2;
1310#endif
1311 return (unsigned long *)m->private + *pos;
1312}
1313
1314static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
1315{
1316 (*pos)++;
1317 if (*pos >= ARRAY_SIZE(vmstat_text))
1318 return NULL;
1319 return (unsigned long *)m->private + *pos;
1320}
1321
1322static int vmstat_show(struct seq_file *m, void *arg)
1323{
1324 unsigned long *l = arg;
1325 unsigned long off = l - (unsigned long *)m->private;
1326
1327 seq_printf(m, "%s %lu\n", vmstat_text[off], *l);
1328 return 0;
1329}
1330
1331static void vmstat_stop(struct seq_file *m, void *arg)
1332{
1333 kfree(m->private);
1334 m->private = NULL;
1335}
1336
1337static const struct seq_operations vmstat_op = {
1338 .start = vmstat_start,
1339 .next = vmstat_next,
1340 .stop = vmstat_stop,
1341 .show = vmstat_show,
1342};
1343
1344static int vmstat_open(struct inode *inode, struct file *file)
1345{
1346 return seq_open(file, &vmstat_op);
1347}
1348
1349static const struct file_operations proc_vmstat_file_operations = {
1350 .open = vmstat_open,
1351 .read = seq_read,
1352 .llseek = seq_lseek,
1353 .release = seq_release,
1354};
1355#endif
1356
1357#ifdef CONFIG_SMP
1358static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
1359int sysctl_stat_interval __read_mostly = HZ;
1360static cpumask_var_t cpu_stat_off;
1361
1362static void vmstat_update(struct work_struct *w)
1363{
1364 if (refresh_cpu_vm_stats())
1365
1366
1367
1368
1369
1370 schedule_delayed_work(this_cpu_ptr(&vmstat_work),
1371 round_jiffies_relative(sysctl_stat_interval));
1372 else {
1373
1374
1375
1376
1377
1378
1379
1380 int r;
1381
1382
1383
1384
1385
1386
1387
1388 r = cpumask_test_and_set_cpu(smp_processor_id(),
1389 cpu_stat_off);
1390 VM_BUG_ON(r);
1391 }
1392}
1393
1394
1395
1396
1397
1398static bool need_update(int cpu)
1399{
1400 struct zone *zone;
1401
1402 for_each_populated_zone(zone) {
1403 struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu);
1404
1405 BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1);
1406
1407
1408
1409
1410 if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS))
1411 return true;
1412
1413 }
1414 return false;
1415}
1416
1417
1418
1419
1420
1421
1422
1423
1424static void vmstat_shepherd(struct work_struct *w);
1425
1426static DECLARE_DELAYED_WORK(shepherd, vmstat_shepherd);
1427
1428static void vmstat_shepherd(struct work_struct *w)
1429{
1430 int cpu;
1431
1432 get_online_cpus();
1433
1434 for_each_cpu(cpu, cpu_stat_off)
1435 if (need_update(cpu) &&
1436 cpumask_test_and_clear_cpu(cpu, cpu_stat_off))
1437
1438 schedule_delayed_work_on(cpu, &per_cpu(vmstat_work, cpu),
1439 __round_jiffies_relative(sysctl_stat_interval, cpu));
1440
1441 put_online_cpus();
1442
1443 schedule_delayed_work(&shepherd,
1444 round_jiffies_relative(sysctl_stat_interval));
1445
1446}
1447
1448static void __init start_shepherd_timer(void)
1449{
1450 int cpu;
1451
1452 for_each_possible_cpu(cpu)
1453 INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
1454 vmstat_update);
1455
1456 if (!alloc_cpumask_var(&cpu_stat_off, GFP_KERNEL))
1457 BUG();
1458 cpumask_copy(cpu_stat_off, cpu_online_mask);
1459
1460 schedule_delayed_work(&shepherd,
1461 round_jiffies_relative(sysctl_stat_interval));
1462}
1463
1464static void vmstat_cpu_dead(int node)
1465{
1466 int cpu;
1467
1468 get_online_cpus();
1469 for_each_online_cpu(cpu)
1470 if (cpu_to_node(cpu) == node)
1471 goto end;
1472
1473 node_clear_state(node, N_CPU);
1474end:
1475 put_online_cpus();
1476}
1477
1478
1479
1480
1481
1482static int vmstat_cpuup_callback(struct notifier_block *nfb,
1483 unsigned long action,
1484 void *hcpu)
1485{
1486 long cpu = (long)hcpu;
1487
1488 switch (action) {
1489 case CPU_ONLINE:
1490 case CPU_ONLINE_FROZEN:
1491 refresh_zone_stat_thresholds();
1492 node_set_state(cpu_to_node(cpu), N_CPU);
1493 cpumask_set_cpu(cpu, cpu_stat_off);
1494 break;
1495 case CPU_DOWN_PREPARE:
1496 case CPU_DOWN_PREPARE_FROZEN:
1497 cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
1498 cpumask_clear_cpu(cpu, cpu_stat_off);
1499 break;
1500 case CPU_DOWN_FAILED:
1501 case CPU_DOWN_FAILED_FROZEN:
1502 cpumask_set_cpu(cpu, cpu_stat_off);
1503 break;
1504 case CPU_DEAD:
1505 case CPU_DEAD_FROZEN:
1506 refresh_zone_stat_thresholds();
1507 vmstat_cpu_dead(cpu_to_node(cpu));
1508 break;
1509 default:
1510 break;
1511 }
1512 return NOTIFY_OK;
1513}
1514
1515static struct notifier_block vmstat_notifier =
1516 { &vmstat_cpuup_callback, NULL, 0 };
1517#endif
1518
1519static int __init setup_vmstat(void)
1520{
1521#ifdef CONFIG_SMP
1522 cpu_notifier_register_begin();
1523 __register_cpu_notifier(&vmstat_notifier);
1524
1525 start_shepherd_timer();
1526 cpu_notifier_register_done();
1527#endif
1528#ifdef CONFIG_PROC_FS
1529 proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);
1530 proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops);
1531 proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations);
1532 proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations);
1533#endif
1534 return 0;
1535}
1536module_init(setup_vmstat)
1537
1538#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)
1539#include <linux/debugfs.h>
1540
1541
1542
1543
1544
1545
1546static int unusable_free_index(unsigned int order,
1547 struct contig_page_info *info)
1548{
1549
1550 if (info->free_pages == 0)
1551 return 1000;
1552
1553
1554
1555
1556
1557
1558
1559
1560 return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages);
1561
1562}
1563
1564static void unusable_show_print(struct seq_file *m,
1565 pg_data_t *pgdat, struct zone *zone)
1566{
1567 unsigned int order;
1568 int index;
1569 struct contig_page_info info;
1570
1571 seq_printf(m, "Node %d, zone %8s ",
1572 pgdat->node_id,
1573 zone->name);
1574 for (order = 0; order < MAX_ORDER; ++order) {
1575 fill_contig_page_info(zone, order, &info);
1576 index = unusable_free_index(order, &info);
1577 seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
1578 }
1579
1580 seq_putc(m, '\n');
1581}
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592static int unusable_show(struct seq_file *m, void *arg)
1593{
1594 pg_data_t *pgdat = (pg_data_t *)arg;
1595
1596
1597 if (!node_state(pgdat->node_id, N_MEMORY))
1598 return 0;
1599
1600 walk_zones_in_node(m, pgdat, unusable_show_print);
1601
1602 return 0;
1603}
1604
1605static const struct seq_operations unusable_op = {
1606 .start = frag_start,
1607 .next = frag_next,
1608 .stop = frag_stop,
1609 .show = unusable_show,
1610};
1611
1612static int unusable_open(struct inode *inode, struct file *file)
1613{
1614 return seq_open(file, &unusable_op);
1615}
1616
1617static const struct file_operations unusable_file_ops = {
1618 .open = unusable_open,
1619 .read = seq_read,
1620 .llseek = seq_lseek,
1621 .release = seq_release,
1622};
1623
1624static void extfrag_show_print(struct seq_file *m,
1625 pg_data_t *pgdat, struct zone *zone)
1626{
1627 unsigned int order;
1628 int index;
1629
1630
1631 struct contig_page_info info;
1632
1633 seq_printf(m, "Node %d, zone %8s ",
1634 pgdat->node_id,
1635 zone->name);
1636 for (order = 0; order < MAX_ORDER; ++order) {
1637 fill_contig_page_info(zone, order, &info);
1638 index = __fragmentation_index(order, &info);
1639 seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
1640 }
1641
1642 seq_putc(m, '\n');
1643}
1644
1645
1646
1647
1648static int extfrag_show(struct seq_file *m, void *arg)
1649{
1650 pg_data_t *pgdat = (pg_data_t *)arg;
1651
1652 walk_zones_in_node(m, pgdat, extfrag_show_print);
1653
1654 return 0;
1655}
1656
1657static const struct seq_operations extfrag_op = {
1658 .start = frag_start,
1659 .next = frag_next,
1660 .stop = frag_stop,
1661 .show = extfrag_show,
1662};
1663
1664static int extfrag_open(struct inode *inode, struct file *file)
1665{
1666 return seq_open(file, &extfrag_op);
1667}
1668
1669static const struct file_operations extfrag_file_ops = {
1670 .open = extfrag_open,
1671 .read = seq_read,
1672 .llseek = seq_lseek,
1673 .release = seq_release,
1674};
1675
1676static int __init extfrag_debug_init(void)
1677{
1678 struct dentry *extfrag_debug_root;
1679
1680 extfrag_debug_root = debugfs_create_dir("extfrag", NULL);
1681 if (!extfrag_debug_root)
1682 return -ENOMEM;
1683
1684 if (!debugfs_create_file("unusable_index", 0444,
1685 extfrag_debug_root, NULL, &unusable_file_ops))
1686 goto fail;
1687
1688 if (!debugfs_create_file("extfrag_index", 0444,
1689 extfrag_debug_root, NULL, &extfrag_file_ops))
1690 goto fail;
1691
1692 return 0;
1693fail:
1694 debugfs_remove_recursive(extfrag_debug_root);
1695 return -ENOMEM;
1696}
1697
1698module_init(extfrag_debug_init);
1699#endif
1700