1
2
3
4
5
6
7
8
9
10
11
12#include <linux/fs.h>
13#include <linux/mm.h>
14#include <linux/err.h>
15#include <linux/module.h>
16#include <linux/slab.h>
17#include <linux/cpu.h>
18#include <linux/cpumask.h>
19#include <linux/vmstat.h>
20#include <linux/sched.h>
21#include <linux/math64.h>
22#include <linux/writeback.h>
23#include <linux/compaction.h>
24
25
26
27
28
29
30
31
32
33#ifndef __GENKSYMS__
34#include <linux/mm_inline.h>
35#include "internal.h"
36#endif
37
38#ifdef CONFIG_VM_EVENT_COUNTERS
39DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
40EXPORT_PER_CPU_SYMBOL(vm_event_states);
41
42static void sum_vm_events(unsigned long *ret)
43{
44 int cpu;
45 int i;
46
47 memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
48
49 for_each_online_cpu(cpu) {
50 struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
51
52 for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
53 ret[i] += this->event[i];
54 }
55}
56
57
58
59
60
61
62void all_vm_events(unsigned long *ret)
63{
64 get_online_cpus();
65 sum_vm_events(ret);
66 put_online_cpus();
67}
68EXPORT_SYMBOL_GPL(all_vm_events);
69
70
71
72
73
74
75
76void vm_events_fold_cpu(int cpu)
77{
78 struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
79 int i;
80
81 for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
82 count_vm_events(i, fold_state->event[i]);
83 fold_state->event[i] = 0;
84 }
85}
86
87#endif
88
89
90
91
92
93
94atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
95EXPORT_SYMBOL(vm_stat);
96
97#ifdef CONFIG_SMP
98
99int calculate_pressure_threshold(struct zone *zone)
100{
101 int threshold;
102 int watermark_distance;
103
104
105
106
107
108
109
110
111
112 watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone);
113 threshold = max(1, (int)(watermark_distance / num_online_cpus()));
114
115
116
117
118 threshold = min(125, threshold);
119
120 return threshold;
121}
122
123int calculate_normal_threshold(struct zone *zone)
124{
125 int threshold;
126 int mem;
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158 mem = zone->managed_pages >> (27 - PAGE_SHIFT);
159
160 threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem));
161
162
163
164
165 threshold = min(125, threshold);
166
167 return threshold;
168}
169
170
171
172
173void refresh_zone_stat_thresholds(void)
174{
175 struct zone *zone;
176 int cpu;
177 int threshold;
178
179 for_each_populated_zone(zone) {
180 unsigned long max_drift, tolerate_drift;
181
182 threshold = calculate_normal_threshold(zone);
183
184 for_each_online_cpu(cpu)
185 per_cpu_ptr(zone->pageset, cpu)->stat_threshold
186 = threshold;
187
188
189
190
191
192
193 tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone);
194 max_drift = num_online_cpus() * threshold;
195 if (max_drift > tolerate_drift)
196 zone->percpu_drift_mark = high_wmark_pages(zone) +
197 max_drift;
198 }
199}
200
201void set_pgdat_percpu_threshold(pg_data_t *pgdat,
202 int (*calculate_pressure)(struct zone *))
203{
204 struct zone *zone;
205 int cpu;
206 int threshold;
207 int i;
208
209 for (i = 0; i < pgdat->nr_zones; i++) {
210 zone = &pgdat->node_zones[i];
211 if (!zone->percpu_drift_mark)
212 continue;
213
214 threshold = (*calculate_pressure)(zone);
215 for_each_possible_cpu(cpu)
216 per_cpu_ptr(zone->pageset, cpu)->stat_threshold
217 = threshold;
218 }
219}
220
221
222
223
224void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
225 long delta)
226{
227 struct per_cpu_pageset __percpu *pcp = zone->pageset;
228 s8 __percpu *p = pcp->vm_stat_diff + item;
229 long x;
230 long t;
231
232 x = delta + __this_cpu_read(*p);
233
234 t = __this_cpu_read(pcp->stat_threshold);
235
236 if (unlikely(x > t || x < -t)) {
237 zone_page_state_add(x, zone, item);
238 x = 0;
239 }
240 __this_cpu_write(*p, x);
241}
242EXPORT_SYMBOL(__mod_zone_page_state);
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
268{
269 struct per_cpu_pageset __percpu *pcp = zone->pageset;
270 s8 __percpu *p = pcp->vm_stat_diff + item;
271 s8 v, t;
272
273 v = __this_cpu_inc_return(*p);
274 t = __this_cpu_read(pcp->stat_threshold);
275 if (unlikely(v > t)) {
276 s8 overstep = t >> 1;
277
278 zone_page_state_add(v + overstep, zone, item);
279 __this_cpu_write(*p, -overstep);
280 }
281}
282
283void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
284{
285 __inc_zone_state(page_zone(page), item);
286}
287EXPORT_SYMBOL(__inc_zone_page_state);
288
289void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
290{
291 struct per_cpu_pageset __percpu *pcp = zone->pageset;
292 s8 __percpu *p = pcp->vm_stat_diff + item;
293 s8 v, t;
294
295 v = __this_cpu_dec_return(*p);
296 t = __this_cpu_read(pcp->stat_threshold);
297 if (unlikely(v < - t)) {
298 s8 overstep = t >> 1;
299
300 zone_page_state_add(v - overstep, zone, item);
301 __this_cpu_write(*p, overstep);
302 }
303}
304
305void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
306{
307 __dec_zone_state(page_zone(page), item);
308}
309EXPORT_SYMBOL(__dec_zone_page_state);
310
311#ifdef CONFIG_HAVE_CMPXCHG_LOCAL
312
313
314
315
316
317
318
319
320
321
322
323
324static inline void mod_state(struct zone *zone, enum zone_stat_item item,
325 long delta, int overstep_mode)
326{
327 struct per_cpu_pageset __percpu *pcp = zone->pageset;
328 s8 __percpu *p = pcp->vm_stat_diff + item;
329 long o, n, t, z;
330
331 do {
332 z = 0;
333
334
335
336
337
338
339
340
341
342
343
344 t = this_cpu_read(pcp->stat_threshold);
345
346 o = this_cpu_read(*p);
347 n = delta + o;
348
349 if (n > t || n < -t) {
350 int os = overstep_mode * (t >> 1) ;
351
352
353 z = n + os;
354 n = -os;
355 }
356 } while (this_cpu_cmpxchg(*p, o, n) != o);
357
358 if (z)
359 zone_page_state_add(z, zone, item);
360}
361
362void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
363 long delta)
364{
365 mod_state(zone, item, delta, 0);
366}
367EXPORT_SYMBOL(mod_zone_page_state);
368
369void inc_zone_state(struct zone *zone, enum zone_stat_item item)
370{
371 mod_state(zone, item, 1, 1);
372}
373
374void inc_zone_page_state(struct page *page, enum zone_stat_item item)
375{
376 mod_state(page_zone(page), item, 1, 1);
377}
378EXPORT_SYMBOL(inc_zone_page_state);
379
380void dec_zone_page_state(struct page *page, enum zone_stat_item item)
381{
382 mod_state(page_zone(page), item, -1, -1);
383}
384EXPORT_SYMBOL(dec_zone_page_state);
385#else
386
387
388
389void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
390 long delta)
391{
392 unsigned long flags;
393
394 local_irq_save(flags);
395 __mod_zone_page_state(zone, item, delta);
396 local_irq_restore(flags);
397}
398EXPORT_SYMBOL(mod_zone_page_state);
399
400void inc_zone_state(struct zone *zone, enum zone_stat_item item)
401{
402 unsigned long flags;
403
404 local_irq_save(flags);
405 __inc_zone_state(zone, item);
406 local_irq_restore(flags);
407}
408
409void inc_zone_page_state(struct page *page, enum zone_stat_item item)
410{
411 unsigned long flags;
412 struct zone *zone;
413
414 zone = page_zone(page);
415 local_irq_save(flags);
416 __inc_zone_state(zone, item);
417 local_irq_restore(flags);
418}
419EXPORT_SYMBOL(inc_zone_page_state);
420
421void dec_zone_page_state(struct page *page, enum zone_stat_item item)
422{
423 unsigned long flags;
424
425 local_irq_save(flags);
426 __dec_zone_page_state(page, item);
427 local_irq_restore(flags);
428}
429EXPORT_SYMBOL(dec_zone_page_state);
430#endif
431
432
433
434
435
436
437static int fold_diff(int *diff)
438{
439 int i;
440 int changes = 0;
441
442 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
443 if (diff[i]) {
444 atomic_long_add(diff[i], &vm_stat[i]);
445 changes++;
446 }
447 return changes;
448}
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466static int refresh_cpu_vm_stats(bool do_pagesets)
467{
468 struct zone *zone;
469 int i;
470 int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
471 int changes = 0;
472
473 for_each_populated_zone(zone) {
474 struct per_cpu_pageset __percpu *p = zone->pageset;
475
476 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
477 int v;
478
479 v = this_cpu_xchg(p->vm_stat_diff[i], 0);
480 if (v) {
481
482 atomic_long_add(v, &zone->vm_stat[i]);
483 global_diff[i] += v;
484#ifdef CONFIG_NUMA
485
486 __this_cpu_write(p->expire, 3);
487#endif
488 }
489 }
490#ifdef CONFIG_NUMA
491 if (do_pagesets) {
492 cond_resched();
493
494
495
496
497
498
499
500 if (!__this_cpu_read(p->expire) ||
501 !__this_cpu_read(p->pcp.count))
502 continue;
503
504
505
506
507 if (zone_to_nid(zone) == numa_node_id()) {
508 __this_cpu_write(p->expire, 0);
509 continue;
510 }
511
512 if (__this_cpu_dec_return(p->expire))
513 continue;
514
515 if (__this_cpu_read(p->pcp.count)) {
516 drain_zone_pages(zone, this_cpu_ptr(&p->pcp));
517 changes++;
518 }
519 }
520#endif
521 }
522 changes += fold_diff(global_diff);
523 return changes;
524}
525
526
527
528
529
530
531void cpu_vm_stats_fold(int cpu)
532{
533 struct zone *zone;
534 int i;
535 int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
536
537 for_each_populated_zone(zone) {
538 struct per_cpu_pageset *p;
539
540 p = per_cpu_ptr(zone->pageset, cpu);
541
542 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
543 if (p->vm_stat_diff[i]) {
544 int v;
545
546 v = p->vm_stat_diff[i];
547 p->vm_stat_diff[i] = 0;
548 atomic_long_add(v, &zone->vm_stat[i]);
549 global_diff[i] += v;
550 }
551 }
552
553 fold_diff(global_diff);
554}
555
556
557
558
559
560void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset)
561{
562 int i;
563
564 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
565 if (pset->vm_stat_diff[i]) {
566 int v = pset->vm_stat_diff[i];
567 pset->vm_stat_diff[i] = 0;
568 atomic_long_add(v, &zone->vm_stat[i]);
569 atomic_long_add(v, &vm_stat[i]);
570 }
571}
572#endif
573
574#ifdef CONFIG_NUMA
575
576
577
578
579
580
581
582
583
584
585void zone_statistics(struct zone *preferred_zone, struct zone *z, gfp_t flags)
586{
587 if (z->zone_pgdat == preferred_zone->zone_pgdat) {
588 __inc_zone_state(z, NUMA_HIT);
589 } else {
590 __inc_zone_state(z, NUMA_MISS);
591 __inc_zone_state(preferred_zone, NUMA_FOREIGN);
592 }
593 if (z->node == ((flags & __GFP_OTHER_NODE) ?
594 preferred_zone->node : numa_node_id()))
595 __inc_zone_state(z, NUMA_LOCAL);
596 else
597 __inc_zone_state(z, NUMA_OTHER);
598}
599#endif
600
601#ifdef CONFIG_COMPACTION
602
603struct contig_page_info {
604 unsigned long free_pages;
605 unsigned long free_blocks_total;
606 unsigned long free_blocks_suitable;
607};
608
609
610
611
612
613
614
615
616
617static void fill_contig_page_info(struct zone *zone,
618 unsigned int suitable_order,
619 struct contig_page_info *info)
620{
621 unsigned int order;
622
623 info->free_pages = 0;
624 info->free_blocks_total = 0;
625 info->free_blocks_suitable = 0;
626
627 for (order = 0; order < MAX_ORDER; order++) {
628 unsigned long blocks;
629
630
631 blocks = zone->free_area[order].nr_free;
632 info->free_blocks_total += blocks;
633
634
635 info->free_pages += blocks << order;
636
637
638 if (order >= suitable_order)
639 info->free_blocks_suitable += blocks <<
640 (order - suitable_order);
641 }
642}
643
644
645
646
647
648
649
650
651static int __fragmentation_index(unsigned int order, struct contig_page_info *info)
652{
653 unsigned long requested = 1UL << order;
654
655 if (!info->free_blocks_total)
656 return 0;
657
658
659 if (info->free_blocks_suitable)
660 return -1000;
661
662
663
664
665
666
667
668 return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total);
669}
670
671
672int fragmentation_index(struct zone *zone, unsigned int order)
673{
674 struct contig_page_info info;
675
676 fill_contig_page_info(zone, order, &info);
677 return __fragmentation_index(order, &info);
678}
679#endif
680
681#if defined(CONFIG_PROC_FS) || defined(CONFIG_COMPACTION)
682#include <linux/proc_fs.h>
683#include <linux/seq_file.h>
684
685static char * const migratetype_names[MIGRATE_TYPES] = {
686 "Unmovable",
687 "Reclaimable",
688 "Movable",
689 "Reserve",
690#ifdef CONFIG_CMA
691 "CMA",
692#endif
693#ifdef CONFIG_MEMORY_ISOLATION
694 "Isolate",
695#endif
696};
697
698static void *frag_start(struct seq_file *m, loff_t *pos)
699{
700 pg_data_t *pgdat;
701 loff_t node = *pos;
702 for (pgdat = first_online_pgdat();
703 pgdat && node;
704 pgdat = next_online_pgdat(pgdat))
705 --node;
706
707 return pgdat;
708}
709
710static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
711{
712 pg_data_t *pgdat = (pg_data_t *)arg;
713
714 (*pos)++;
715 return next_online_pgdat(pgdat);
716}
717
718static void frag_stop(struct seq_file *m, void *arg)
719{
720}
721
722
723static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
724 void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
725{
726 struct zone *zone;
727 struct zone *node_zones = pgdat->node_zones;
728 unsigned long flags;
729
730 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
731 if (!populated_zone(zone))
732 continue;
733
734 spin_lock_irqsave(&zone->lock, flags);
735 print(m, pgdat, zone);
736 spin_unlock_irqrestore(&zone->lock, flags);
737 }
738}
739#endif
740
741#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || defined(CONFIG_NUMA)
742#ifdef CONFIG_ZONE_DMA
743#define TEXT_FOR_DMA(xx) xx "_dma",
744#else
745#define TEXT_FOR_DMA(xx)
746#endif
747
748#ifdef CONFIG_ZONE_DMA32
749#define TEXT_FOR_DMA32(xx) xx "_dma32",
750#else
751#define TEXT_FOR_DMA32(xx)
752#endif
753
754#ifdef CONFIG_HIGHMEM
755#define TEXT_FOR_HIGHMEM(xx) xx "_high",
756#else
757#define TEXT_FOR_HIGHMEM(xx)
758#endif
759
760#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
761 TEXT_FOR_HIGHMEM(xx) xx "_movable",
762
763const char * const vmstat_text[] = {
764
765 "nr_free_pages",
766 "nr_alloc_batch",
767 "nr_inactive_anon",
768 "nr_active_anon",
769 "nr_inactive_file",
770 "nr_active_file",
771 "nr_unevictable",
772 "nr_mlock",
773 "nr_anon_pages",
774 "nr_mapped",
775 "nr_file_pages",
776 "nr_dirty",
777 "nr_writeback",
778 "nr_slab_reclaimable",
779 "nr_slab_unreclaimable",
780 "nr_page_table_pages",
781 "nr_kernel_stack",
782 "nr_unstable",
783 "nr_bounce",
784 "nr_vmscan_write",
785 "nr_vmscan_immediate_reclaim",
786 "nr_writeback_temp",
787 "nr_isolated_anon",
788 "nr_isolated_file",
789 "nr_shmem",
790 "nr_dirtied",
791 "nr_written",
792
793#ifdef CONFIG_NUMA
794 "numa_hit",
795 "numa_miss",
796 "numa_foreign",
797 "numa_interleave",
798 "numa_local",
799 "numa_other",
800#endif
801 "workingset_refault",
802 "workingset_activate",
803 "workingset_nodereclaim",
804 "nr_anon_transparent_hugepages",
805 "nr_free_cma",
806
807
808 "nr_dirty_threshold",
809 "nr_dirty_background_threshold",
810
811#ifdef CONFIG_VM_EVENT_COUNTERS
812
813 "pgpgin",
814 "pgpgout",
815 "pswpin",
816 "pswpout",
817
818 TEXTS_FOR_ZONES("pgalloc")
819
820 "pgfree",
821 "pgactivate",
822 "pgdeactivate",
823
824 "pgfault",
825 "pgmajfault",
826 "pglazyfreed",
827
828 TEXTS_FOR_ZONES("pgrefill")
829 TEXTS_FOR_ZONES("pgsteal_kswapd")
830 TEXTS_FOR_ZONES("pgsteal_direct")
831 TEXTS_FOR_ZONES("pgscan_kswapd")
832 TEXTS_FOR_ZONES("pgscan_direct")
833 "pgscan_direct_throttle",
834
835#ifdef CONFIG_NUMA
836 "zone_reclaim_failed",
837#endif
838 "pginodesteal",
839 "slabs_scanned",
840 "kswapd_inodesteal",
841 "kswapd_low_wmark_hit_quickly",
842 "kswapd_high_wmark_hit_quickly",
843 "pageoutrun",
844 "allocstall",
845
846 "pgrotated",
847
848 "drop_pagecache",
849 "drop_slab",
850
851#ifdef CONFIG_NUMA_BALANCING
852 "numa_pte_updates",
853 "numa_huge_pte_updates",
854 "numa_hint_faults",
855 "numa_hint_faults_local",
856 "numa_pages_migrated",
857#endif
858#ifdef CONFIG_MIGRATION
859 "pgmigrate_success",
860 "pgmigrate_fail",
861#endif
862#ifdef CONFIG_COMPACTION
863 "compact_migrate_scanned",
864 "compact_free_scanned",
865 "compact_isolated",
866 "compact_stall",
867 "compact_fail",
868 "compact_success",
869#endif
870
871#ifdef CONFIG_HUGETLB_PAGE
872 "htlb_buddy_alloc_success",
873 "htlb_buddy_alloc_fail",
874#endif
875 "unevictable_pgs_culled",
876 "unevictable_pgs_scanned",
877 "unevictable_pgs_rescued",
878 "unevictable_pgs_mlocked",
879 "unevictable_pgs_munlocked",
880 "unevictable_pgs_cleared",
881 "unevictable_pgs_stranded",
882
883#ifdef CONFIG_TRANSPARENT_HUGEPAGE
884 "thp_fault_alloc",
885 "thp_fault_fallback",
886 "thp_collapse_alloc",
887 "thp_collapse_alloc_failed",
888 "thp_split",
889 "thp_zero_page_alloc",
890 "thp_zero_page_alloc_failed",
891#endif
892#ifdef CONFIG_MEMORY_BALLOON
893 "balloon_inflate",
894 "balloon_deflate",
895#ifdef CONFIG_BALLOON_COMPACTION
896 "balloon_migrate",
897#endif
898#endif
899#ifdef CONFIG_SWAP
900 "swap_ra",
901 "swap_ra_hit",
902#endif
903#endif
904};
905#endif
906
907
908#ifdef CONFIG_PROC_FS
909static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
910 struct zone *zone)
911{
912 int order;
913
914 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
915 for (order = 0; order < MAX_ORDER; ++order)
916 seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
917 seq_putc(m, '\n');
918}
919
920
921
922
923static int frag_show(struct seq_file *m, void *arg)
924{
925 pg_data_t *pgdat = (pg_data_t *)arg;
926 walk_zones_in_node(m, pgdat, frag_show_print);
927 return 0;
928}
929
930static void pagetypeinfo_showfree_print(struct seq_file *m,
931 pg_data_t *pgdat, struct zone *zone)
932{
933 int order, mtype;
934
935 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) {
936 seq_printf(m, "Node %4d, zone %8s, type %12s ",
937 pgdat->node_id,
938 zone->name,
939 migratetype_names[mtype]);
940 for (order = 0; order < MAX_ORDER; ++order) {
941 unsigned long freecount = 0;
942 struct free_area *area;
943 struct list_head *curr;
944
945 area = &(zone->free_area[order]);
946
947 list_for_each(curr, &area->free_list[mtype])
948 freecount++;
949 seq_printf(m, "%6lu ", freecount);
950 }
951 seq_putc(m, '\n');
952 }
953}
954
955
956static int pagetypeinfo_showfree(struct seq_file *m, void *arg)
957{
958 int order;
959 pg_data_t *pgdat = (pg_data_t *)arg;
960
961
962 seq_printf(m, "%-43s ", "Free pages count per migrate type at order");
963 for (order = 0; order < MAX_ORDER; ++order)
964 seq_printf(m, "%6d ", order);
965 seq_putc(m, '\n');
966
967 walk_zones_in_node(m, pgdat, pagetypeinfo_showfree_print);
968
969 return 0;
970}
971
972static void pagetypeinfo_showblockcount_print(struct seq_file *m,
973 pg_data_t *pgdat, struct zone *zone)
974{
975 int mtype;
976 unsigned long pfn;
977 unsigned long start_pfn = zone->zone_start_pfn;
978 unsigned long end_pfn = zone_end_pfn(zone);
979 unsigned long count[MIGRATE_TYPES] = { 0, };
980
981 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
982 struct page *page;
983
984 if (!pfn_valid(pfn))
985 continue;
986
987 page = pfn_to_page(pfn);
988
989
990 if (!memmap_valid_within(pfn, page, zone))
991 continue;
992
993 mtype = get_pageblock_migratetype(page);
994
995 if (mtype < MIGRATE_TYPES)
996 count[mtype]++;
997 }
998
999
1000 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1001 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1002 seq_printf(m, "%12lu ", count[mtype]);
1003 seq_putc(m, '\n');
1004}
1005
1006
1007static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
1008{
1009 int mtype;
1010 pg_data_t *pgdat = (pg_data_t *)arg;
1011
1012 seq_printf(m, "\n%-23s", "Number of blocks type ");
1013 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1014 seq_printf(m, "%12s ", migratetype_names[mtype]);
1015 seq_putc(m, '\n');
1016 walk_zones_in_node(m, pgdat, pagetypeinfo_showblockcount_print);
1017
1018 return 0;
1019}
1020
1021
1022
1023
1024
1025static int pagetypeinfo_show(struct seq_file *m, void *arg)
1026{
1027 pg_data_t *pgdat = (pg_data_t *)arg;
1028
1029
1030 if (!node_state(pgdat->node_id, N_MEMORY))
1031 return 0;
1032
1033 seq_printf(m, "Page block order: %d\n", pageblock_order);
1034 seq_printf(m, "Pages per block: %lu\n", pageblock_nr_pages);
1035 seq_putc(m, '\n');
1036 pagetypeinfo_showfree(m, pgdat);
1037 pagetypeinfo_showblockcount(m, pgdat);
1038
1039 return 0;
1040}
1041
1042static const struct seq_operations fragmentation_op = {
1043 .start = frag_start,
1044 .next = frag_next,
1045 .stop = frag_stop,
1046 .show = frag_show,
1047};
1048
1049static int fragmentation_open(struct inode *inode, struct file *file)
1050{
1051 return seq_open(file, &fragmentation_op);
1052}
1053
1054static const struct file_operations fragmentation_file_operations = {
1055 .open = fragmentation_open,
1056 .read = seq_read,
1057 .llseek = seq_lseek,
1058 .release = seq_release,
1059};
1060
1061static const struct seq_operations pagetypeinfo_op = {
1062 .start = frag_start,
1063 .next = frag_next,
1064 .stop = frag_stop,
1065 .show = pagetypeinfo_show,
1066};
1067
1068static int pagetypeinfo_open(struct inode *inode, struct file *file)
1069{
1070 return seq_open(file, &pagetypeinfo_op);
1071}
1072
1073static const struct file_operations pagetypeinfo_file_ops = {
1074 .open = pagetypeinfo_open,
1075 .read = seq_read,
1076 .llseek = seq_lseek,
1077 .release = seq_release,
1078};
1079
1080static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
1081 struct zone *zone)
1082{
1083 int i;
1084 seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
1085 seq_printf(m,
1086 "\n pages free %lu"
1087 "\n min %lu"
1088 "\n low %lu"
1089 "\n high %lu"
1090 "\n scanned %lu"
1091 "\n spanned %lu"
1092 "\n present %lu"
1093 "\n managed %lu",
1094 zone_page_state(zone, NR_FREE_PAGES),
1095 min_wmark_pages(zone),
1096 low_wmark_pages(zone),
1097 high_wmark_pages(zone),
1098 zone->pages_scanned,
1099 zone->spanned_pages,
1100 zone->present_pages,
1101 zone->managed_pages);
1102
1103 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
1104 seq_printf(m, "\n %-12s %lu", vmstat_text[i],
1105 zone_page_state(zone, i));
1106
1107 seq_printf(m,
1108 "\n protection: (%lu",
1109 zone->lowmem_reserve[0]);
1110 for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
1111 seq_printf(m, ", %lu", zone->lowmem_reserve[i]);
1112 seq_printf(m,
1113 ")"
1114 "\n pagesets");
1115 for_each_online_cpu(i) {
1116 struct per_cpu_pageset *pageset;
1117
1118 pageset = per_cpu_ptr(zone->pageset, i);
1119 seq_printf(m,
1120 "\n cpu: %i"
1121 "\n count: %i"
1122 "\n high: %i"
1123 "\n batch: %i",
1124 i,
1125 pageset->pcp.count,
1126 pageset->pcp.high,
1127 pageset->pcp.batch);
1128#ifdef CONFIG_SMP
1129 seq_printf(m, "\n vm stats threshold: %d",
1130 pageset->stat_threshold);
1131#endif
1132 }
1133 seq_printf(m,
1134 "\n all_unreclaimable: %u"
1135 "\n start_pfn: %lu"
1136 "\n inactive_ratio: %u",
1137 !zone_reclaimable(zone),
1138 zone->zone_start_pfn,
1139 zone->inactive_ratio);
1140 seq_putc(m, '\n');
1141}
1142
1143
1144
1145
1146static int zoneinfo_show(struct seq_file *m, void *arg)
1147{
1148 pg_data_t *pgdat = (pg_data_t *)arg;
1149 walk_zones_in_node(m, pgdat, zoneinfo_show_print);
1150 return 0;
1151}
1152
1153static const struct seq_operations zoneinfo_op = {
1154 .start = frag_start,
1155
1156 .next = frag_next,
1157 .stop = frag_stop,
1158 .show = zoneinfo_show,
1159};
1160
1161static int zoneinfo_open(struct inode *inode, struct file *file)
1162{
1163 return seq_open(file, &zoneinfo_op);
1164}
1165
1166static const struct file_operations proc_zoneinfo_file_operations = {
1167 .open = zoneinfo_open,
1168 .read = seq_read,
1169 .llseek = seq_lseek,
1170 .release = seq_release,
1171};
1172
1173enum writeback_stat_item {
1174 NR_DIRTY_THRESHOLD,
1175 NR_DIRTY_BG_THRESHOLD,
1176 NR_VM_WRITEBACK_STAT_ITEMS,
1177};
1178
1179static void *vmstat_start(struct seq_file *m, loff_t *pos)
1180{
1181 unsigned long *v;
1182 int i, stat_items_size;
1183
1184 if (*pos >= ARRAY_SIZE(vmstat_text))
1185 return NULL;
1186 stat_items_size = NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) +
1187 NR_VM_WRITEBACK_STAT_ITEMS * sizeof(unsigned long);
1188
1189#ifdef CONFIG_VM_EVENT_COUNTERS
1190 stat_items_size += sizeof(struct vm_event_state);
1191#endif
1192
1193 v = kmalloc(stat_items_size, GFP_KERNEL);
1194 m->private = v;
1195 if (!v)
1196 return ERR_PTR(-ENOMEM);
1197 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
1198 v[i] = global_page_state(i);
1199 v += NR_VM_ZONE_STAT_ITEMS;
1200
1201 global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,
1202 v + NR_DIRTY_THRESHOLD);
1203 v += NR_VM_WRITEBACK_STAT_ITEMS;
1204
1205#ifdef CONFIG_VM_EVENT_COUNTERS
1206 all_vm_events(v);
1207 v[PGPGIN] /= 2;
1208 v[PGPGOUT] /= 2;
1209#endif
1210 return (unsigned long *)m->private + *pos;
1211}
1212
1213static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
1214{
1215 (*pos)++;
1216 if (*pos >= ARRAY_SIZE(vmstat_text))
1217 return NULL;
1218 return (unsigned long *)m->private + *pos;
1219}
1220
1221static int vmstat_show(struct seq_file *m, void *arg)
1222{
1223 unsigned long *l = arg;
1224 unsigned long off = l - (unsigned long *)m->private;
1225
1226 seq_printf(m, "%s %lu\n", vmstat_text[off], *l);
1227 return 0;
1228}
1229
1230static void vmstat_stop(struct seq_file *m, void *arg)
1231{
1232 kfree(m->private);
1233 m->private = NULL;
1234}
1235
1236static const struct seq_operations vmstat_op = {
1237 .start = vmstat_start,
1238 .next = vmstat_next,
1239 .stop = vmstat_stop,
1240 .show = vmstat_show,
1241};
1242
1243static int vmstat_open(struct inode *inode, struct file *file)
1244{
1245 return seq_open(file, &vmstat_op);
1246}
1247
1248static const struct file_operations proc_vmstat_file_operations = {
1249 .open = vmstat_open,
1250 .read = seq_read,
1251 .llseek = seq_lseek,
1252 .release = seq_release,
1253};
1254#endif
1255
1256#ifdef CONFIG_SMP
1257static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
1258int sysctl_stat_interval __read_mostly = HZ;
1259static cpumask_var_t cpu_stat_off;
1260
1261static void vmstat_update(struct work_struct *w)
1262{
1263 if (refresh_cpu_vm_stats(true)) {
1264
1265
1266
1267
1268
1269
1270
1271 if (!cpumask_test_and_clear_cpu(smp_processor_id(),
1272 cpu_stat_off)) {
1273 schedule_delayed_work(this_cpu_ptr(&vmstat_work),
1274 round_jiffies_relative(sysctl_stat_interval));
1275 }
1276 } else {
1277
1278
1279
1280
1281
1282
1283
1284 cpumask_set_cpu(smp_processor_id(), cpu_stat_off);
1285 }
1286}
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297static bool need_update(int cpu)
1298{
1299 struct zone *zone;
1300
1301 for_each_populated_zone(zone) {
1302 struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu);
1303
1304 BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1);
1305
1306
1307
1308
1309 if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS))
1310 return true;
1311
1312 }
1313 return false;
1314}
1315
1316void quiet_vmstat(void)
1317{
1318 if (system_state != SYSTEM_RUNNING)
1319 return;
1320
1321
1322
1323
1324
1325 if (cpumask_test_and_set_cpu(smp_processor_id(), cpu_stat_off))
1326 return;
1327
1328 if (!need_update(smp_processor_id()))
1329 return;
1330
1331
1332
1333
1334
1335
1336
1337 refresh_cpu_vm_stats(false);
1338}
1339
1340
1341
1342
1343
1344
1345
1346
1347static void vmstat_shepherd(struct work_struct *w);
1348
1349static DECLARE_DEFERRABLE_WORK(shepherd, vmstat_shepherd);
1350
1351static void vmstat_shepherd(struct work_struct *w)
1352{
1353 int cpu;
1354
1355 get_online_cpus();
1356
1357 for_each_cpu(cpu, cpu_stat_off) {
1358 struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
1359
1360 if (need_update(cpu)) {
1361 if (cpumask_test_and_clear_cpu(cpu, cpu_stat_off))
1362 schedule_delayed_work_on(cpu, dw, 0);
1363 } else {
1364
1365
1366
1367
1368
1369 cancel_delayed_work(dw);
1370 }
1371 }
1372
1373 put_online_cpus();
1374
1375 schedule_delayed_work(&shepherd,
1376 round_jiffies_relative(sysctl_stat_interval));
1377
1378}
1379
1380static void __init start_shepherd_timer(void)
1381{
1382 int cpu;
1383
1384 for_each_possible_cpu(cpu)
1385 INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
1386 vmstat_update);
1387
1388 if (!alloc_cpumask_var(&cpu_stat_off, GFP_KERNEL))
1389 BUG();
1390 cpumask_copy(cpu_stat_off, cpu_online_mask);
1391
1392 schedule_delayed_work(&shepherd,
1393 round_jiffies_relative(sysctl_stat_interval));
1394}
1395
1396
1397
1398
1399
1400static int vmstat_cpuup_callback(struct notifier_block *nfb,
1401 unsigned long action,
1402 void *hcpu)
1403{
1404 long cpu = (long)hcpu;
1405
1406 switch (action) {
1407 case CPU_ONLINE:
1408 case CPU_ONLINE_FROZEN:
1409 refresh_zone_stat_thresholds();
1410 node_set_state(cpu_to_node(cpu), N_CPU);
1411 cpumask_set_cpu(cpu, cpu_stat_off);
1412 break;
1413 case CPU_DOWN_PREPARE:
1414 case CPU_DOWN_PREPARE_FROZEN:
1415 cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
1416 cpumask_clear_cpu(cpu, cpu_stat_off);
1417 break;
1418 case CPU_DOWN_FAILED:
1419 case CPU_DOWN_FAILED_FROZEN:
1420 cpumask_set_cpu(cpu, cpu_stat_off);
1421 break;
1422 case CPU_DEAD:
1423 case CPU_DEAD_FROZEN:
1424 refresh_zone_stat_thresholds();
1425 break;
1426 default:
1427 break;
1428 }
1429 return NOTIFY_OK;
1430}
1431
1432static struct notifier_block vmstat_notifier =
1433 { &vmstat_cpuup_callback, NULL, 0 };
1434#endif
1435
1436static int __init setup_vmstat(void)
1437{
1438#ifdef CONFIG_SMP
1439 cpu_notifier_register_begin();
1440 __register_cpu_notifier(&vmstat_notifier);
1441
1442 start_shepherd_timer();
1443 cpu_notifier_register_done();
1444#endif
1445#ifdef CONFIG_PROC_FS
1446 proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);
1447 proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops);
1448 proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations);
1449 proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations);
1450#endif
1451 return 0;
1452}
1453module_init(setup_vmstat)
1454
1455#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)
1456#include <linux/debugfs.h>
1457
1458
1459
1460
1461
1462
1463static int unusable_free_index(unsigned int order,
1464 struct contig_page_info *info)
1465{
1466
1467 if (info->free_pages == 0)
1468 return 1000;
1469
1470
1471
1472
1473
1474
1475
1476
1477 return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages);
1478
1479}
1480
1481static void unusable_show_print(struct seq_file *m,
1482 pg_data_t *pgdat, struct zone *zone)
1483{
1484 unsigned int order;
1485 int index;
1486 struct contig_page_info info;
1487
1488 seq_printf(m, "Node %d, zone %8s ",
1489 pgdat->node_id,
1490 zone->name);
1491 for (order = 0; order < MAX_ORDER; ++order) {
1492 fill_contig_page_info(zone, order, &info);
1493 index = unusable_free_index(order, &info);
1494 seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
1495 }
1496
1497 seq_putc(m, '\n');
1498}
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509static int unusable_show(struct seq_file *m, void *arg)
1510{
1511 pg_data_t *pgdat = (pg_data_t *)arg;
1512
1513
1514 if (!node_state(pgdat->node_id, N_MEMORY))
1515 return 0;
1516
1517 walk_zones_in_node(m, pgdat, unusable_show_print);
1518
1519 return 0;
1520}
1521
1522static const struct seq_operations unusable_op = {
1523 .start = frag_start,
1524 .next = frag_next,
1525 .stop = frag_stop,
1526 .show = unusable_show,
1527};
1528
1529static int unusable_open(struct inode *inode, struct file *file)
1530{
1531 return seq_open(file, &unusable_op);
1532}
1533
1534static const struct file_operations unusable_file_ops = {
1535 .open = unusable_open,
1536 .read = seq_read,
1537 .llseek = seq_lseek,
1538 .release = seq_release,
1539};
1540
1541static void extfrag_show_print(struct seq_file *m,
1542 pg_data_t *pgdat, struct zone *zone)
1543{
1544 unsigned int order;
1545 int index;
1546
1547
1548 struct contig_page_info info;
1549
1550 seq_printf(m, "Node %d, zone %8s ",
1551 pgdat->node_id,
1552 zone->name);
1553 for (order = 0; order < MAX_ORDER; ++order) {
1554 fill_contig_page_info(zone, order, &info);
1555 index = __fragmentation_index(order, &info);
1556 seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
1557 }
1558
1559 seq_putc(m, '\n');
1560}
1561
1562
1563
1564
1565static int extfrag_show(struct seq_file *m, void *arg)
1566{
1567 pg_data_t *pgdat = (pg_data_t *)arg;
1568
1569 walk_zones_in_node(m, pgdat, extfrag_show_print);
1570
1571 return 0;
1572}
1573
1574static const struct seq_operations extfrag_op = {
1575 .start = frag_start,
1576 .next = frag_next,
1577 .stop = frag_stop,
1578 .show = extfrag_show,
1579};
1580
1581static int extfrag_open(struct inode *inode, struct file *file)
1582{
1583 return seq_open(file, &extfrag_op);
1584}
1585
1586static const struct file_operations extfrag_file_ops = {
1587 .open = extfrag_open,
1588 .read = seq_read,
1589 .llseek = seq_lseek,
1590 .release = seq_release,
1591};
1592
1593static int __init extfrag_debug_init(void)
1594{
1595 struct dentry *extfrag_debug_root;
1596
1597 extfrag_debug_root = debugfs_create_dir("extfrag", NULL);
1598 if (!extfrag_debug_root)
1599 return -ENOMEM;
1600
1601 if (!debugfs_create_file("unusable_index", 0444,
1602 extfrag_debug_root, NULL, &unusable_file_ops))
1603 goto fail;
1604
1605 if (!debugfs_create_file("extfrag_index", 0444,
1606 extfrag_debug_root, NULL, &extfrag_file_ops))
1607 goto fail;
1608
1609 return 0;
1610fail:
1611 debugfs_remove_recursive(extfrag_debug_root);
1612 return -ENOMEM;
1613}
1614
1615module_init(extfrag_debug_init);
1616#endif
1617