1
2
3
4
5
6
7
8
9
10
11
12#include <linux/fs.h>
13#include <linux/mm.h>
14#include <linux/err.h>
15#include <linux/module.h>
16#include <linux/slab.h>
17#include <linux/cpu.h>
18#include <linux/cpumask.h>
19#include <linux/vmstat.h>
20#include <linux/proc_fs.h>
21#include <linux/seq_file.h>
22#include <linux/debugfs.h>
23#include <linux/sched.h>
24#include <linux/math64.h>
25#include <linux/writeback.h>
26#include <linux/compaction.h>
27#include <linux/mm_inline.h>
28#include <linux/page_ext.h>
29#include <linux/page_owner.h>
30
31#include "internal.h"
32
33#ifdef CONFIG_VM_EVENT_COUNTERS
34DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
35EXPORT_PER_CPU_SYMBOL(vm_event_states);
36
37static void sum_vm_events(unsigned long *ret)
38{
39 int cpu;
40 int i;
41
42 memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
43
44 for_each_online_cpu(cpu) {
45 struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
46
47 for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
48 ret[i] += this->event[i];
49 }
50}
51
52
53
54
55
56
57void all_vm_events(unsigned long *ret)
58{
59 get_online_cpus();
60 sum_vm_events(ret);
61 put_online_cpus();
62}
63EXPORT_SYMBOL_GPL(all_vm_events);
64
65
66
67
68
69
70
71void vm_events_fold_cpu(int cpu)
72{
73 struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
74 int i;
75
76 for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
77 count_vm_events(i, fold_state->event[i]);
78 fold_state->event[i] = 0;
79 }
80}
81
82#endif
83
84
85
86
87
88
89atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
90EXPORT_SYMBOL(vm_stat);
91
92#ifdef CONFIG_SMP
93
94int calculate_pressure_threshold(struct zone *zone)
95{
96 int threshold;
97 int watermark_distance;
98
99
100
101
102
103
104
105
106
107 watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone);
108 threshold = max(1, (int)(watermark_distance / num_online_cpus()));
109
110
111
112
113 threshold = min(125, threshold);
114
115 return threshold;
116}
117
118int calculate_normal_threshold(struct zone *zone)
119{
120 int threshold;
121 int mem;
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153 mem = zone->managed_pages >> (27 - PAGE_SHIFT);
154
155 threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem));
156
157
158
159
160 threshold = min(125, threshold);
161
162 return threshold;
163}
164
165
166
167
168void refresh_zone_stat_thresholds(void)
169{
170 struct zone *zone;
171 int cpu;
172 int threshold;
173
174 for_each_populated_zone(zone) {
175 unsigned long max_drift, tolerate_drift;
176
177 threshold = calculate_normal_threshold(zone);
178
179 for_each_online_cpu(cpu)
180 per_cpu_ptr(zone->pageset, cpu)->stat_threshold
181 = threshold;
182
183
184
185
186
187
188 tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone);
189 max_drift = num_online_cpus() * threshold;
190 if (max_drift > tolerate_drift)
191 zone->percpu_drift_mark = high_wmark_pages(zone) +
192 max_drift;
193 }
194}
195
196void set_pgdat_percpu_threshold(pg_data_t *pgdat,
197 int (*calculate_pressure)(struct zone *))
198{
199 struct zone *zone;
200 int cpu;
201 int threshold;
202 int i;
203
204 for (i = 0; i < pgdat->nr_zones; i++) {
205 zone = &pgdat->node_zones[i];
206 if (!zone->percpu_drift_mark)
207 continue;
208
209 threshold = (*calculate_pressure)(zone);
210 for_each_online_cpu(cpu)
211 per_cpu_ptr(zone->pageset, cpu)->stat_threshold
212 = threshold;
213 }
214}
215
216
217
218
219
220
221void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
222 int delta)
223{
224 struct per_cpu_pageset __percpu *pcp = zone->pageset;
225 s8 __percpu *p = pcp->vm_stat_diff + item;
226 long x;
227 long t;
228
229 x = delta + __this_cpu_read(*p);
230
231 t = __this_cpu_read(pcp->stat_threshold);
232
233 if (unlikely(x > t || x < -t)) {
234 zone_page_state_add(x, zone, item);
235 x = 0;
236 }
237 __this_cpu_write(*p, x);
238}
239EXPORT_SYMBOL(__mod_zone_page_state);
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
265{
266 struct per_cpu_pageset __percpu *pcp = zone->pageset;
267 s8 __percpu *p = pcp->vm_stat_diff + item;
268 s8 v, t;
269
270 v = __this_cpu_inc_return(*p);
271 t = __this_cpu_read(pcp->stat_threshold);
272 if (unlikely(v > t)) {
273 s8 overstep = t >> 1;
274
275 zone_page_state_add(v + overstep, zone, item);
276 __this_cpu_write(*p, -overstep);
277 }
278}
279
280void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
281{
282 __inc_zone_state(page_zone(page), item);
283}
284EXPORT_SYMBOL(__inc_zone_page_state);
285
286void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
287{
288 struct per_cpu_pageset __percpu *pcp = zone->pageset;
289 s8 __percpu *p = pcp->vm_stat_diff + item;
290 s8 v, t;
291
292 v = __this_cpu_dec_return(*p);
293 t = __this_cpu_read(pcp->stat_threshold);
294 if (unlikely(v < - t)) {
295 s8 overstep = t >> 1;
296
297 zone_page_state_add(v - overstep, zone, item);
298 __this_cpu_write(*p, overstep);
299 }
300}
301
302void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
303{
304 __dec_zone_state(page_zone(page), item);
305}
306EXPORT_SYMBOL(__dec_zone_page_state);
307
308#ifdef CONFIG_HAVE_CMPXCHG_LOCAL
309
310
311
312
313
314
315
316
317
318
319
320
321static inline void mod_state(struct zone *zone,
322 enum zone_stat_item item, int delta, int overstep_mode)
323{
324 struct per_cpu_pageset __percpu *pcp = zone->pageset;
325 s8 __percpu *p = pcp->vm_stat_diff + item;
326 long o, n, t, z;
327
328 do {
329 z = 0;
330
331
332
333
334
335
336
337
338
339
340
341 t = this_cpu_read(pcp->stat_threshold);
342
343 o = this_cpu_read(*p);
344 n = delta + o;
345
346 if (n > t || n < -t) {
347 int os = overstep_mode * (t >> 1) ;
348
349
350 z = n + os;
351 n = -os;
352 }
353 } while (this_cpu_cmpxchg(*p, o, n) != o);
354
355 if (z)
356 zone_page_state_add(z, zone, item);
357}
358
359void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
360 int delta)
361{
362 mod_state(zone, item, delta, 0);
363}
364EXPORT_SYMBOL(mod_zone_page_state);
365
366void inc_zone_state(struct zone *zone, enum zone_stat_item item)
367{
368 mod_state(zone, item, 1, 1);
369}
370
371void inc_zone_page_state(struct page *page, enum zone_stat_item item)
372{
373 mod_state(page_zone(page), item, 1, 1);
374}
375EXPORT_SYMBOL(inc_zone_page_state);
376
377void dec_zone_page_state(struct page *page, enum zone_stat_item item)
378{
379 mod_state(page_zone(page), item, -1, -1);
380}
381EXPORT_SYMBOL(dec_zone_page_state);
382#else
383
384
385
386void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
387 int delta)
388{
389 unsigned long flags;
390
391 local_irq_save(flags);
392 __mod_zone_page_state(zone, item, delta);
393 local_irq_restore(flags);
394}
395EXPORT_SYMBOL(mod_zone_page_state);
396
397void inc_zone_state(struct zone *zone, enum zone_stat_item item)
398{
399 unsigned long flags;
400
401 local_irq_save(flags);
402 __inc_zone_state(zone, item);
403 local_irq_restore(flags);
404}
405
406void inc_zone_page_state(struct page *page, enum zone_stat_item item)
407{
408 unsigned long flags;
409 struct zone *zone;
410
411 zone = page_zone(page);
412 local_irq_save(flags);
413 __inc_zone_state(zone, item);
414 local_irq_restore(flags);
415}
416EXPORT_SYMBOL(inc_zone_page_state);
417
418void dec_zone_page_state(struct page *page, enum zone_stat_item item)
419{
420 unsigned long flags;
421
422 local_irq_save(flags);
423 __dec_zone_page_state(page, item);
424 local_irq_restore(flags);
425}
426EXPORT_SYMBOL(dec_zone_page_state);
427#endif
428
429
430
431
432
433
434static int fold_diff(int *diff)
435{
436 int i;
437 int changes = 0;
438
439 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
440 if (diff[i]) {
441 atomic_long_add(diff[i], &vm_stat[i]);
442 changes++;
443 }
444 return changes;
445}
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463static int refresh_cpu_vm_stats(void)
464{
465 struct zone *zone;
466 int i;
467 int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
468 int changes = 0;
469
470 for_each_populated_zone(zone) {
471 struct per_cpu_pageset __percpu *p = zone->pageset;
472
473 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
474 int v;
475
476 v = this_cpu_xchg(p->vm_stat_diff[i], 0);
477 if (v) {
478
479 atomic_long_add(v, &zone->vm_stat[i]);
480 global_diff[i] += v;
481#ifdef CONFIG_NUMA
482
483 __this_cpu_write(p->expire, 3);
484#endif
485 }
486 }
487 cond_resched();
488#ifdef CONFIG_NUMA
489
490
491
492
493
494
495
496 if (!__this_cpu_read(p->expire) ||
497 !__this_cpu_read(p->pcp.count))
498 continue;
499
500
501
502
503 if (zone_to_nid(zone) == numa_node_id()) {
504 __this_cpu_write(p->expire, 0);
505 continue;
506 }
507
508 if (__this_cpu_dec_return(p->expire))
509 continue;
510
511 if (__this_cpu_read(p->pcp.count)) {
512 drain_zone_pages(zone, this_cpu_ptr(&p->pcp));
513 changes++;
514 }
515#endif
516 }
517 changes += fold_diff(global_diff);
518 return changes;
519}
520
521
522
523
524
525
526void cpu_vm_stats_fold(int cpu)
527{
528 struct zone *zone;
529 int i;
530 int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
531
532 for_each_populated_zone(zone) {
533 struct per_cpu_pageset *p;
534
535 p = per_cpu_ptr(zone->pageset, cpu);
536
537 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
538 if (p->vm_stat_diff[i]) {
539 int v;
540
541 v = p->vm_stat_diff[i];
542 p->vm_stat_diff[i] = 0;
543 atomic_long_add(v, &zone->vm_stat[i]);
544 global_diff[i] += v;
545 }
546 }
547
548 fold_diff(global_diff);
549}
550
551
552
553
554
555void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset)
556{
557 int i;
558
559 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
560 if (pset->vm_stat_diff[i]) {
561 int v = pset->vm_stat_diff[i];
562 pset->vm_stat_diff[i] = 0;
563 atomic_long_add(v, &zone->vm_stat[i]);
564 atomic_long_add(v, &vm_stat[i]);
565 }
566}
567#endif
568
569#ifdef CONFIG_NUMA
570
571
572
573
574
575
576
577
578
579
580void zone_statistics(struct zone *preferred_zone, struct zone *z, gfp_t flags)
581{
582 if (z->zone_pgdat == preferred_zone->zone_pgdat) {
583 __inc_zone_state(z, NUMA_HIT);
584 } else {
585 __inc_zone_state(z, NUMA_MISS);
586 __inc_zone_state(preferred_zone, NUMA_FOREIGN);
587 }
588 if (z->node == ((flags & __GFP_OTHER_NODE) ?
589 preferred_zone->node : numa_node_id()))
590 __inc_zone_state(z, NUMA_LOCAL);
591 else
592 __inc_zone_state(z, NUMA_OTHER);
593}
594#endif
595
596#ifdef CONFIG_COMPACTION
597
598struct contig_page_info {
599 unsigned long free_pages;
600 unsigned long free_blocks_total;
601 unsigned long free_blocks_suitable;
602};
603
604
605
606
607
608
609
610
611
612static void fill_contig_page_info(struct zone *zone,
613 unsigned int suitable_order,
614 struct contig_page_info *info)
615{
616 unsigned int order;
617
618 info->free_pages = 0;
619 info->free_blocks_total = 0;
620 info->free_blocks_suitable = 0;
621
622 for (order = 0; order < MAX_ORDER; order++) {
623 unsigned long blocks;
624
625
626 blocks = zone->free_area[order].nr_free;
627 info->free_blocks_total += blocks;
628
629
630 info->free_pages += blocks << order;
631
632
633 if (order >= suitable_order)
634 info->free_blocks_suitable += blocks <<
635 (order - suitable_order);
636 }
637}
638
639
640
641
642
643
644
645
646static int __fragmentation_index(unsigned int order, struct contig_page_info *info)
647{
648 unsigned long requested = 1UL << order;
649
650 if (!info->free_blocks_total)
651 return 0;
652
653
654 if (info->free_blocks_suitable)
655 return -1000;
656
657
658
659
660
661
662
663 return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total);
664}
665
666
667int fragmentation_index(struct zone *zone, unsigned int order)
668{
669 struct contig_page_info info;
670
671 fill_contig_page_info(zone, order, &info);
672 return __fragmentation_index(order, &info);
673}
674#endif
675
676#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || defined(CONFIG_NUMA)
677#ifdef CONFIG_ZONE_DMA
678#define TEXT_FOR_DMA(xx) xx "_dma",
679#else
680#define TEXT_FOR_DMA(xx)
681#endif
682
683#ifdef CONFIG_ZONE_DMA32
684#define TEXT_FOR_DMA32(xx) xx "_dma32",
685#else
686#define TEXT_FOR_DMA32(xx)
687#endif
688
689#ifdef CONFIG_HIGHMEM
690#define TEXT_FOR_HIGHMEM(xx) xx "_high",
691#else
692#define TEXT_FOR_HIGHMEM(xx)
693#endif
694
695#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
696 TEXT_FOR_HIGHMEM(xx) xx "_movable",
697
698const char * const vmstat_text[] = {
699
700 "nr_free_pages",
701 "nr_alloc_batch",
702 "nr_inactive_anon",
703 "nr_active_anon",
704 "nr_inactive_file",
705 "nr_active_file",
706 "nr_unevictable",
707 "nr_mlock",
708 "nr_anon_pages",
709 "nr_mapped",
710 "nr_file_pages",
711 "nr_dirty",
712 "nr_writeback",
713 "nr_slab_reclaimable",
714 "nr_slab_unreclaimable",
715 "nr_page_table_pages",
716 "nr_kernel_stack",
717 "nr_unstable",
718 "nr_bounce",
719 "nr_vmscan_write",
720 "nr_vmscan_immediate_reclaim",
721 "nr_writeback_temp",
722 "nr_isolated_anon",
723 "nr_isolated_file",
724 "nr_shmem",
725 "nr_dirtied",
726 "nr_written",
727 "nr_pages_scanned",
728
729#ifdef CONFIG_NUMA
730 "numa_hit",
731 "numa_miss",
732 "numa_foreign",
733 "numa_interleave",
734 "numa_local",
735 "numa_other",
736#endif
737 "workingset_refault",
738 "workingset_activate",
739 "workingset_nodereclaim",
740 "nr_anon_transparent_hugepages",
741 "nr_free_cma",
742
743
744 "nr_dirty_threshold",
745 "nr_dirty_background_threshold",
746
747#ifdef CONFIG_VM_EVENT_COUNTERS
748
749 "pgpgin",
750 "pgpgout",
751 "pswpin",
752 "pswpout",
753
754 TEXTS_FOR_ZONES("pgalloc")
755
756 "pgfree",
757 "pgactivate",
758 "pgdeactivate",
759
760 "pgfault",
761 "pgmajfault",
762
763 TEXTS_FOR_ZONES("pgrefill")
764 TEXTS_FOR_ZONES("pgsteal_kswapd")
765 TEXTS_FOR_ZONES("pgsteal_direct")
766 TEXTS_FOR_ZONES("pgscan_kswapd")
767 TEXTS_FOR_ZONES("pgscan_direct")
768 "pgscan_direct_throttle",
769
770#ifdef CONFIG_NUMA
771 "zone_reclaim_failed",
772#endif
773 "pginodesteal",
774 "slabs_scanned",
775 "kswapd_inodesteal",
776 "kswapd_low_wmark_hit_quickly",
777 "kswapd_high_wmark_hit_quickly",
778 "pageoutrun",
779 "allocstall",
780
781 "pgrotated",
782
783 "drop_pagecache",
784 "drop_slab",
785
786#ifdef CONFIG_NUMA_BALANCING
787 "numa_pte_updates",
788 "numa_huge_pte_updates",
789 "numa_hint_faults",
790 "numa_hint_faults_local",
791 "numa_pages_migrated",
792#endif
793#ifdef CONFIG_MIGRATION
794 "pgmigrate_success",
795 "pgmigrate_fail",
796#endif
797#ifdef CONFIG_COMPACTION
798 "compact_migrate_scanned",
799 "compact_free_scanned",
800 "compact_isolated",
801 "compact_stall",
802 "compact_fail",
803 "compact_success",
804#endif
805
806#ifdef CONFIG_HUGETLB_PAGE
807 "htlb_buddy_alloc_success",
808 "htlb_buddy_alloc_fail",
809#endif
810 "unevictable_pgs_culled",
811 "unevictable_pgs_scanned",
812 "unevictable_pgs_rescued",
813 "unevictable_pgs_mlocked",
814 "unevictable_pgs_munlocked",
815 "unevictable_pgs_cleared",
816 "unevictable_pgs_stranded",
817
818#ifdef CONFIG_TRANSPARENT_HUGEPAGE
819 "thp_fault_alloc",
820 "thp_fault_fallback",
821 "thp_collapse_alloc",
822 "thp_collapse_alloc_failed",
823 "thp_split",
824 "thp_zero_page_alloc",
825 "thp_zero_page_alloc_failed",
826#endif
827#ifdef CONFIG_MEMORY_BALLOON
828 "balloon_inflate",
829 "balloon_deflate",
830#ifdef CONFIG_BALLOON_COMPACTION
831 "balloon_migrate",
832#endif
833#endif
834#ifdef CONFIG_DEBUG_TLBFLUSH
835#ifdef CONFIG_SMP
836 "nr_tlb_remote_flush",
837 "nr_tlb_remote_flush_received",
838#endif
839 "nr_tlb_local_flush_all",
840 "nr_tlb_local_flush_one",
841#endif
842
843#ifdef CONFIG_DEBUG_VM_VMACACHE
844 "vmacache_find_calls",
845 "vmacache_find_hits",
846 "vmacache_full_flushes",
847#endif
848#endif
849};
850#endif
851
852
853#if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \
854 defined(CONFIG_PROC_FS)
855static void *frag_start(struct seq_file *m, loff_t *pos)
856{
857 pg_data_t *pgdat;
858 loff_t node = *pos;
859
860 for (pgdat = first_online_pgdat();
861 pgdat && node;
862 pgdat = next_online_pgdat(pgdat))
863 --node;
864
865 return pgdat;
866}
867
868static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
869{
870 pg_data_t *pgdat = (pg_data_t *)arg;
871
872 (*pos)++;
873 return next_online_pgdat(pgdat);
874}
875
876static void frag_stop(struct seq_file *m, void *arg)
877{
878}
879
880
881static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
882 void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
883{
884 struct zone *zone;
885 struct zone *node_zones = pgdat->node_zones;
886 unsigned long flags;
887
888 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
889 if (!populated_zone(zone))
890 continue;
891
892 spin_lock_irqsave(&zone->lock, flags);
893 print(m, pgdat, zone);
894 spin_unlock_irqrestore(&zone->lock, flags);
895 }
896}
897#endif
898
899#ifdef CONFIG_PROC_FS
900static char * const migratetype_names[MIGRATE_TYPES] = {
901 "Unmovable",
902 "Reclaimable",
903 "Movable",
904 "Reserve",
905#ifdef CONFIG_CMA
906 "CMA",
907#endif
908#ifdef CONFIG_MEMORY_ISOLATION
909 "Isolate",
910#endif
911};
912
913static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
914 struct zone *zone)
915{
916 int order;
917
918 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
919 for (order = 0; order < MAX_ORDER; ++order)
920 seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
921 seq_putc(m, '\n');
922}
923
924
925
926
927static int frag_show(struct seq_file *m, void *arg)
928{
929 pg_data_t *pgdat = (pg_data_t *)arg;
930 walk_zones_in_node(m, pgdat, frag_show_print);
931 return 0;
932}
933
934static void pagetypeinfo_showfree_print(struct seq_file *m,
935 pg_data_t *pgdat, struct zone *zone)
936{
937 int order, mtype;
938
939 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) {
940 seq_printf(m, "Node %4d, zone %8s, type %12s ",
941 pgdat->node_id,
942 zone->name,
943 migratetype_names[mtype]);
944 for (order = 0; order < MAX_ORDER; ++order) {
945 unsigned long freecount = 0;
946 struct free_area *area;
947 struct list_head *curr;
948
949 area = &(zone->free_area[order]);
950
951 list_for_each(curr, &area->free_list[mtype])
952 freecount++;
953 seq_printf(m, "%6lu ", freecount);
954 }
955 seq_putc(m, '\n');
956 }
957}
958
959
960static int pagetypeinfo_showfree(struct seq_file *m, void *arg)
961{
962 int order;
963 pg_data_t *pgdat = (pg_data_t *)arg;
964
965
966 seq_printf(m, "%-43s ", "Free pages count per migrate type at order");
967 for (order = 0; order < MAX_ORDER; ++order)
968 seq_printf(m, "%6d ", order);
969 seq_putc(m, '\n');
970
971 walk_zones_in_node(m, pgdat, pagetypeinfo_showfree_print);
972
973 return 0;
974}
975
976static void pagetypeinfo_showblockcount_print(struct seq_file *m,
977 pg_data_t *pgdat, struct zone *zone)
978{
979 int mtype;
980 unsigned long pfn;
981 unsigned long start_pfn = zone->zone_start_pfn;
982 unsigned long end_pfn = zone_end_pfn(zone);
983 unsigned long count[MIGRATE_TYPES] = { 0, };
984
985 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
986 struct page *page;
987
988 if (!pfn_valid(pfn))
989 continue;
990
991 page = pfn_to_page(pfn);
992
993
994 if (!memmap_valid_within(pfn, page, zone))
995 continue;
996
997 mtype = get_pageblock_migratetype(page);
998
999 if (mtype < MIGRATE_TYPES)
1000 count[mtype]++;
1001 }
1002
1003
1004 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1005 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1006 seq_printf(m, "%12lu ", count[mtype]);
1007 seq_putc(m, '\n');
1008}
1009
1010
1011static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
1012{
1013 int mtype;
1014 pg_data_t *pgdat = (pg_data_t *)arg;
1015
1016 seq_printf(m, "\n%-23s", "Number of blocks type ");
1017 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1018 seq_printf(m, "%12s ", migratetype_names[mtype]);
1019 seq_putc(m, '\n');
1020 walk_zones_in_node(m, pgdat, pagetypeinfo_showblockcount_print);
1021
1022 return 0;
1023}
1024
1025#ifdef CONFIG_PAGE_OWNER
1026static void pagetypeinfo_showmixedcount_print(struct seq_file *m,
1027 pg_data_t *pgdat,
1028 struct zone *zone)
1029{
1030 struct page *page;
1031 struct page_ext *page_ext;
1032 unsigned long pfn = zone->zone_start_pfn, block_end_pfn;
1033 unsigned long end_pfn = pfn + zone->spanned_pages;
1034 unsigned long count[MIGRATE_TYPES] = { 0, };
1035 int pageblock_mt, page_mt;
1036 int i;
1037
1038
1039 pfn = zone->zone_start_pfn;
1040
1041
1042
1043
1044
1045
1046 for (; pfn < end_pfn; ) {
1047 if (!pfn_valid(pfn)) {
1048 pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
1049 continue;
1050 }
1051
1052 block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
1053 block_end_pfn = min(block_end_pfn, end_pfn);
1054
1055 page = pfn_to_page(pfn);
1056 pageblock_mt = get_pfnblock_migratetype(page, pfn);
1057
1058 for (; pfn < block_end_pfn; pfn++) {
1059 if (!pfn_valid_within(pfn))
1060 continue;
1061
1062 page = pfn_to_page(pfn);
1063 if (PageBuddy(page)) {
1064 pfn += (1UL << page_order(page)) - 1;
1065 continue;
1066 }
1067
1068 if (PageReserved(page))
1069 continue;
1070
1071 page_ext = lookup_page_ext(page);
1072
1073 if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
1074 continue;
1075
1076 page_mt = gfpflags_to_migratetype(page_ext->gfp_mask);
1077 if (pageblock_mt != page_mt) {
1078 if (is_migrate_cma(pageblock_mt))
1079 count[MIGRATE_MOVABLE]++;
1080 else
1081 count[pageblock_mt]++;
1082
1083 pfn = block_end_pfn;
1084 break;
1085 }
1086 pfn += (1UL << page_ext->order) - 1;
1087 }
1088 }
1089
1090
1091 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1092 for (i = 0; i < MIGRATE_TYPES; i++)
1093 seq_printf(m, "%12lu ", count[i]);
1094 seq_putc(m, '\n');
1095}
1096#endif
1097
1098
1099
1100
1101
1102
1103
1104static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat)
1105{
1106#ifdef CONFIG_PAGE_OWNER
1107 int mtype;
1108
1109 if (!page_owner_inited)
1110 return;
1111
1112 drain_all_pages(NULL);
1113
1114 seq_printf(m, "\n%-23s", "Number of mixed blocks ");
1115 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1116 seq_printf(m, "%12s ", migratetype_names[mtype]);
1117 seq_putc(m, '\n');
1118
1119 walk_zones_in_node(m, pgdat, pagetypeinfo_showmixedcount_print);
1120#endif
1121}
1122
1123
1124
1125
1126
1127static int pagetypeinfo_show(struct seq_file *m, void *arg)
1128{
1129 pg_data_t *pgdat = (pg_data_t *)arg;
1130
1131
1132 if (!node_state(pgdat->node_id, N_MEMORY))
1133 return 0;
1134
1135 seq_printf(m, "Page block order: %d\n", pageblock_order);
1136 seq_printf(m, "Pages per block: %lu\n", pageblock_nr_pages);
1137 seq_putc(m, '\n');
1138 pagetypeinfo_showfree(m, pgdat);
1139 pagetypeinfo_showblockcount(m, pgdat);
1140 pagetypeinfo_showmixedcount(m, pgdat);
1141
1142 return 0;
1143}
1144
1145static const struct seq_operations fragmentation_op = {
1146 .start = frag_start,
1147 .next = frag_next,
1148 .stop = frag_stop,
1149 .show = frag_show,
1150};
1151
1152static int fragmentation_open(struct inode *inode, struct file *file)
1153{
1154 return seq_open(file, &fragmentation_op);
1155}
1156
1157static const struct file_operations fragmentation_file_operations = {
1158 .open = fragmentation_open,
1159 .read = seq_read,
1160 .llseek = seq_lseek,
1161 .release = seq_release,
1162};
1163
1164static const struct seq_operations pagetypeinfo_op = {
1165 .start = frag_start,
1166 .next = frag_next,
1167 .stop = frag_stop,
1168 .show = pagetypeinfo_show,
1169};
1170
1171static int pagetypeinfo_open(struct inode *inode, struct file *file)
1172{
1173 return seq_open(file, &pagetypeinfo_op);
1174}
1175
1176static const struct file_operations pagetypeinfo_file_ops = {
1177 .open = pagetypeinfo_open,
1178 .read = seq_read,
1179 .llseek = seq_lseek,
1180 .release = seq_release,
1181};
1182
1183static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
1184 struct zone *zone)
1185{
1186 int i;
1187 seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
1188 seq_printf(m,
1189 "\n pages free %lu"
1190 "\n min %lu"
1191 "\n low %lu"
1192 "\n high %lu"
1193 "\n scanned %lu"
1194 "\n spanned %lu"
1195 "\n present %lu"
1196 "\n managed %lu",
1197 zone_page_state(zone, NR_FREE_PAGES),
1198 min_wmark_pages(zone),
1199 low_wmark_pages(zone),
1200 high_wmark_pages(zone),
1201 zone_page_state(zone, NR_PAGES_SCANNED),
1202 zone->spanned_pages,
1203 zone->present_pages,
1204 zone->managed_pages);
1205
1206 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
1207 seq_printf(m, "\n %-12s %lu", vmstat_text[i],
1208 zone_page_state(zone, i));
1209
1210 seq_printf(m,
1211 "\n protection: (%ld",
1212 zone->lowmem_reserve[0]);
1213 for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
1214 seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
1215 seq_printf(m,
1216 ")"
1217 "\n pagesets");
1218 for_each_online_cpu(i) {
1219 struct per_cpu_pageset *pageset;
1220
1221 pageset = per_cpu_ptr(zone->pageset, i);
1222 seq_printf(m,
1223 "\n cpu: %i"
1224 "\n count: %i"
1225 "\n high: %i"
1226 "\n batch: %i",
1227 i,
1228 pageset->pcp.count,
1229 pageset->pcp.high,
1230 pageset->pcp.batch);
1231#ifdef CONFIG_SMP
1232 seq_printf(m, "\n vm stats threshold: %d",
1233 pageset->stat_threshold);
1234#endif
1235 }
1236 seq_printf(m,
1237 "\n all_unreclaimable: %u"
1238 "\n start_pfn: %lu"
1239 "\n inactive_ratio: %u",
1240 !zone_reclaimable(zone),
1241 zone->zone_start_pfn,
1242 zone->inactive_ratio);
1243 seq_putc(m, '\n');
1244}
1245
1246
1247
1248
1249static int zoneinfo_show(struct seq_file *m, void *arg)
1250{
1251 pg_data_t *pgdat = (pg_data_t *)arg;
1252 walk_zones_in_node(m, pgdat, zoneinfo_show_print);
1253 return 0;
1254}
1255
1256static const struct seq_operations zoneinfo_op = {
1257 .start = frag_start,
1258
1259 .next = frag_next,
1260 .stop = frag_stop,
1261 .show = zoneinfo_show,
1262};
1263
1264static int zoneinfo_open(struct inode *inode, struct file *file)
1265{
1266 return seq_open(file, &zoneinfo_op);
1267}
1268
1269static const struct file_operations proc_zoneinfo_file_operations = {
1270 .open = zoneinfo_open,
1271 .read = seq_read,
1272 .llseek = seq_lseek,
1273 .release = seq_release,
1274};
1275
1276enum writeback_stat_item {
1277 NR_DIRTY_THRESHOLD,
1278 NR_DIRTY_BG_THRESHOLD,
1279 NR_VM_WRITEBACK_STAT_ITEMS,
1280};
1281
1282static void *vmstat_start(struct seq_file *m, loff_t *pos)
1283{
1284 unsigned long *v;
1285 int i, stat_items_size;
1286
1287 if (*pos >= ARRAY_SIZE(vmstat_text))
1288 return NULL;
1289 stat_items_size = NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) +
1290 NR_VM_WRITEBACK_STAT_ITEMS * sizeof(unsigned long);
1291
1292#ifdef CONFIG_VM_EVENT_COUNTERS
1293 stat_items_size += sizeof(struct vm_event_state);
1294#endif
1295
1296 v = kmalloc(stat_items_size, GFP_KERNEL);
1297 m->private = v;
1298 if (!v)
1299 return ERR_PTR(-ENOMEM);
1300 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
1301 v[i] = global_page_state(i);
1302 v += NR_VM_ZONE_STAT_ITEMS;
1303
1304 global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,
1305 v + NR_DIRTY_THRESHOLD);
1306 v += NR_VM_WRITEBACK_STAT_ITEMS;
1307
1308#ifdef CONFIG_VM_EVENT_COUNTERS
1309 all_vm_events(v);
1310 v[PGPGIN] /= 2;
1311 v[PGPGOUT] /= 2;
1312#endif
1313 return (unsigned long *)m->private + *pos;
1314}
1315
1316static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
1317{
1318 (*pos)++;
1319 if (*pos >= ARRAY_SIZE(vmstat_text))
1320 return NULL;
1321 return (unsigned long *)m->private + *pos;
1322}
1323
1324static int vmstat_show(struct seq_file *m, void *arg)
1325{
1326 unsigned long *l = arg;
1327 unsigned long off = l - (unsigned long *)m->private;
1328
1329 seq_printf(m, "%s %lu\n", vmstat_text[off], *l);
1330 return 0;
1331}
1332
1333static void vmstat_stop(struct seq_file *m, void *arg)
1334{
1335 kfree(m->private);
1336 m->private = NULL;
1337}
1338
1339static const struct seq_operations vmstat_op = {
1340 .start = vmstat_start,
1341 .next = vmstat_next,
1342 .stop = vmstat_stop,
1343 .show = vmstat_show,
1344};
1345
1346static int vmstat_open(struct inode *inode, struct file *file)
1347{
1348 return seq_open(file, &vmstat_op);
1349}
1350
1351static const struct file_operations proc_vmstat_file_operations = {
1352 .open = vmstat_open,
1353 .read = seq_read,
1354 .llseek = seq_lseek,
1355 .release = seq_release,
1356};
1357#endif
1358
1359#ifdef CONFIG_SMP
1360static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
1361int sysctl_stat_interval __read_mostly = HZ;
1362static cpumask_var_t cpu_stat_off;
1363
1364static void vmstat_update(struct work_struct *w)
1365{
1366 if (refresh_cpu_vm_stats())
1367
1368
1369
1370
1371
1372 schedule_delayed_work(this_cpu_ptr(&vmstat_work),
1373 round_jiffies_relative(sysctl_stat_interval));
1374 else {
1375
1376
1377
1378
1379
1380
1381
1382 int r;
1383
1384
1385
1386
1387
1388
1389
1390 r = cpumask_test_and_set_cpu(smp_processor_id(),
1391 cpu_stat_off);
1392 VM_BUG_ON(r);
1393 }
1394}
1395
1396
1397
1398
1399
1400static bool need_update(int cpu)
1401{
1402 struct zone *zone;
1403
1404 for_each_populated_zone(zone) {
1405 struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu);
1406
1407 BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1);
1408
1409
1410
1411
1412 if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS))
1413 return true;
1414
1415 }
1416 return false;
1417}
1418
1419
1420
1421
1422
1423
1424
1425
1426static void vmstat_shepherd(struct work_struct *w);
1427
1428static DECLARE_DELAYED_WORK(shepherd, vmstat_shepherd);
1429
1430static void vmstat_shepherd(struct work_struct *w)
1431{
1432 int cpu;
1433
1434 get_online_cpus();
1435
1436 for_each_cpu(cpu, cpu_stat_off)
1437 if (need_update(cpu) &&
1438 cpumask_test_and_clear_cpu(cpu, cpu_stat_off))
1439
1440 schedule_delayed_work_on(cpu,
1441 &per_cpu(vmstat_work, cpu), 0);
1442
1443 put_online_cpus();
1444
1445 schedule_delayed_work(&shepherd,
1446 round_jiffies_relative(sysctl_stat_interval));
1447
1448}
1449
1450static void __init start_shepherd_timer(void)
1451{
1452 int cpu;
1453
1454 for_each_possible_cpu(cpu)
1455 INIT_DELAYED_WORK(per_cpu_ptr(&vmstat_work, cpu),
1456 vmstat_update);
1457
1458 if (!alloc_cpumask_var(&cpu_stat_off, GFP_KERNEL))
1459 BUG();
1460 cpumask_copy(cpu_stat_off, cpu_online_mask);
1461
1462 schedule_delayed_work(&shepherd,
1463 round_jiffies_relative(sysctl_stat_interval));
1464}
1465
1466static void vmstat_cpu_dead(int node)
1467{
1468 int cpu;
1469
1470 get_online_cpus();
1471 for_each_online_cpu(cpu)
1472 if (cpu_to_node(cpu) == node)
1473 goto end;
1474
1475 node_clear_state(node, N_CPU);
1476end:
1477 put_online_cpus();
1478}
1479
1480
1481
1482
1483
1484static int vmstat_cpuup_callback(struct notifier_block *nfb,
1485 unsigned long action,
1486 void *hcpu)
1487{
1488 long cpu = (long)hcpu;
1489
1490 switch (action) {
1491 case CPU_ONLINE:
1492 case CPU_ONLINE_FROZEN:
1493 refresh_zone_stat_thresholds();
1494 node_set_state(cpu_to_node(cpu), N_CPU);
1495 cpumask_set_cpu(cpu, cpu_stat_off);
1496 break;
1497 case CPU_DOWN_PREPARE:
1498 case CPU_DOWN_PREPARE_FROZEN:
1499 cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
1500 cpumask_clear_cpu(cpu, cpu_stat_off);
1501 break;
1502 case CPU_DOWN_FAILED:
1503 case CPU_DOWN_FAILED_FROZEN:
1504 cpumask_set_cpu(cpu, cpu_stat_off);
1505 break;
1506 case CPU_DEAD:
1507 case CPU_DEAD_FROZEN:
1508 refresh_zone_stat_thresholds();
1509 vmstat_cpu_dead(cpu_to_node(cpu));
1510 break;
1511 default:
1512 break;
1513 }
1514 return NOTIFY_OK;
1515}
1516
1517static struct notifier_block vmstat_notifier =
1518 { &vmstat_cpuup_callback, NULL, 0 };
1519#endif
1520
1521static int __init setup_vmstat(void)
1522{
1523#ifdef CONFIG_SMP
1524 cpu_notifier_register_begin();
1525 __register_cpu_notifier(&vmstat_notifier);
1526
1527 start_shepherd_timer();
1528 cpu_notifier_register_done();
1529#endif
1530#ifdef CONFIG_PROC_FS
1531 proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);
1532 proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops);
1533 proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations);
1534 proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations);
1535#endif
1536 return 0;
1537}
1538module_init(setup_vmstat)
1539
1540#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)
1541
1542
1543
1544
1545
1546static int unusable_free_index(unsigned int order,
1547 struct contig_page_info *info)
1548{
1549
1550 if (info->free_pages == 0)
1551 return 1000;
1552
1553
1554
1555
1556
1557
1558
1559
1560 return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages);
1561
1562}
1563
1564static void unusable_show_print(struct seq_file *m,
1565 pg_data_t *pgdat, struct zone *zone)
1566{
1567 unsigned int order;
1568 int index;
1569 struct contig_page_info info;
1570
1571 seq_printf(m, "Node %d, zone %8s ",
1572 pgdat->node_id,
1573 zone->name);
1574 for (order = 0; order < MAX_ORDER; ++order) {
1575 fill_contig_page_info(zone, order, &info);
1576 index = unusable_free_index(order, &info);
1577 seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
1578 }
1579
1580 seq_putc(m, '\n');
1581}
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592static int unusable_show(struct seq_file *m, void *arg)
1593{
1594 pg_data_t *pgdat = (pg_data_t *)arg;
1595
1596
1597 if (!node_state(pgdat->node_id, N_MEMORY))
1598 return 0;
1599
1600 walk_zones_in_node(m, pgdat, unusable_show_print);
1601
1602 return 0;
1603}
1604
1605static const struct seq_operations unusable_op = {
1606 .start = frag_start,
1607 .next = frag_next,
1608 .stop = frag_stop,
1609 .show = unusable_show,
1610};
1611
1612static int unusable_open(struct inode *inode, struct file *file)
1613{
1614 return seq_open(file, &unusable_op);
1615}
1616
1617static const struct file_operations unusable_file_ops = {
1618 .open = unusable_open,
1619 .read = seq_read,
1620 .llseek = seq_lseek,
1621 .release = seq_release,
1622};
1623
1624static void extfrag_show_print(struct seq_file *m,
1625 pg_data_t *pgdat, struct zone *zone)
1626{
1627 unsigned int order;
1628 int index;
1629
1630
1631 struct contig_page_info info;
1632
1633 seq_printf(m, "Node %d, zone %8s ",
1634 pgdat->node_id,
1635 zone->name);
1636 for (order = 0; order < MAX_ORDER; ++order) {
1637 fill_contig_page_info(zone, order, &info);
1638 index = __fragmentation_index(order, &info);
1639 seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
1640 }
1641
1642 seq_putc(m, '\n');
1643}
1644
1645
1646
1647
1648static int extfrag_show(struct seq_file *m, void *arg)
1649{
1650 pg_data_t *pgdat = (pg_data_t *)arg;
1651
1652 walk_zones_in_node(m, pgdat, extfrag_show_print);
1653
1654 return 0;
1655}
1656
1657static const struct seq_operations extfrag_op = {
1658 .start = frag_start,
1659 .next = frag_next,
1660 .stop = frag_stop,
1661 .show = extfrag_show,
1662};
1663
1664static int extfrag_open(struct inode *inode, struct file *file)
1665{
1666 return seq_open(file, &extfrag_op);
1667}
1668
1669static const struct file_operations extfrag_file_ops = {
1670 .open = extfrag_open,
1671 .read = seq_read,
1672 .llseek = seq_lseek,
1673 .release = seq_release,
1674};
1675
1676static int __init extfrag_debug_init(void)
1677{
1678 struct dentry *extfrag_debug_root;
1679
1680 extfrag_debug_root = debugfs_create_dir("extfrag", NULL);
1681 if (!extfrag_debug_root)
1682 return -ENOMEM;
1683
1684 if (!debugfs_create_file("unusable_index", 0444,
1685 extfrag_debug_root, NULL, &unusable_file_ops))
1686 goto fail;
1687
1688 if (!debugfs_create_file("extfrag_index", 0444,
1689 extfrag_debug_root, NULL, &extfrag_file_ops))
1690 goto fail;
1691
1692 return 0;
1693fail:
1694 debugfs_remove_recursive(extfrag_debug_root);
1695 return -ENOMEM;
1696}
1697
1698module_init(extfrag_debug_init);
1699#endif
1700