1
2
3
4
5
6
7
8
9
10
11#include <linux/fs.h>
12#include <linux/mm.h>
13#include <linux/err.h>
14#include <linux/module.h>
15#include <linux/slab.h>
16#include <linux/cpu.h>
17#include <linux/vmstat.h>
18#include <linux/sched.h>
19#include <linux/math64.h>
20#include <linux/writeback.h>
21#include <linux/compaction.h>
22
23#ifdef CONFIG_VM_EVENT_COUNTERS
24DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
25EXPORT_PER_CPU_SYMBOL(vm_event_states);
26
27static void sum_vm_events(unsigned long *ret)
28{
29 int cpu;
30 int i;
31
32 memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
33
34 for_each_online_cpu(cpu) {
35 struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
36
37 for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
38 ret[i] += this->event[i];
39 }
40}
41
42
43
44
45
46
47void all_vm_events(unsigned long *ret)
48{
49 get_online_cpus();
50 sum_vm_events(ret);
51 put_online_cpus();
52}
53EXPORT_SYMBOL_GPL(all_vm_events);
54
55
56
57
58
59
60
61void vm_events_fold_cpu(int cpu)
62{
63 struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
64 int i;
65
66 for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
67 count_vm_events(i, fold_state->event[i]);
68 fold_state->event[i] = 0;
69 }
70}
71
72#endif
73
74
75
76
77
78
79atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
80EXPORT_SYMBOL(vm_stat);
81
82#ifdef CONFIG_SMP
83
84int calculate_pressure_threshold(struct zone *zone)
85{
86 int threshold;
87 int watermark_distance;
88
89
90
91
92
93
94
95
96
97 watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone);
98 threshold = max(1, (int)(watermark_distance / num_online_cpus()));
99
100
101
102
103 threshold = min(125, threshold);
104
105 return threshold;
106}
107
108int calculate_normal_threshold(struct zone *zone)
109{
110 int threshold;
111 int mem;
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143 mem = zone->managed_pages >> (27 - PAGE_SHIFT);
144
145 threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem));
146
147
148
149
150 threshold = min(125, threshold);
151
152 return threshold;
153}
154
155
156
157
158void refresh_zone_stat_thresholds(void)
159{
160 struct zone *zone;
161 int cpu;
162 int threshold;
163
164 for_each_populated_zone(zone) {
165 unsigned long max_drift, tolerate_drift;
166
167 threshold = calculate_normal_threshold(zone);
168
169 for_each_online_cpu(cpu)
170 per_cpu_ptr(zone->pageset, cpu)->stat_threshold
171 = threshold;
172
173
174
175
176
177
178 tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone);
179 max_drift = num_online_cpus() * threshold;
180 if (max_drift > tolerate_drift)
181 zone->percpu_drift_mark = high_wmark_pages(zone) +
182 max_drift;
183 }
184}
185
186void set_pgdat_percpu_threshold(pg_data_t *pgdat,
187 int (*calculate_pressure)(struct zone *))
188{
189 struct zone *zone;
190 int cpu;
191 int threshold;
192 int i;
193
194 for (i = 0; i < pgdat->nr_zones; i++) {
195 zone = &pgdat->node_zones[i];
196 if (!zone->percpu_drift_mark)
197 continue;
198
199 threshold = (*calculate_pressure)(zone);
200 for_each_possible_cpu(cpu)
201 per_cpu_ptr(zone->pageset, cpu)->stat_threshold
202 = threshold;
203 }
204}
205
206
207
208
209void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
210 int delta)
211{
212 struct per_cpu_pageset __percpu *pcp = zone->pageset;
213 s8 __percpu *p = pcp->vm_stat_diff + item;
214 long x;
215 long t;
216
217 x = delta + __this_cpu_read(*p);
218
219 t = __this_cpu_read(pcp->stat_threshold);
220
221 if (unlikely(x > t || x < -t)) {
222 zone_page_state_add(x, zone, item);
223 x = 0;
224 }
225 __this_cpu_write(*p, x);
226}
227EXPORT_SYMBOL(__mod_zone_page_state);
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
253{
254 struct per_cpu_pageset __percpu *pcp = zone->pageset;
255 s8 __percpu *p = pcp->vm_stat_diff + item;
256 s8 v, t;
257
258 v = __this_cpu_inc_return(*p);
259 t = __this_cpu_read(pcp->stat_threshold);
260 if (unlikely(v > t)) {
261 s8 overstep = t >> 1;
262
263 zone_page_state_add(v + overstep, zone, item);
264 __this_cpu_write(*p, -overstep);
265 }
266}
267
268void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
269{
270 __inc_zone_state(page_zone(page), item);
271}
272EXPORT_SYMBOL(__inc_zone_page_state);
273
274void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
275{
276 struct per_cpu_pageset __percpu *pcp = zone->pageset;
277 s8 __percpu *p = pcp->vm_stat_diff + item;
278 s8 v, t;
279
280 v = __this_cpu_dec_return(*p);
281 t = __this_cpu_read(pcp->stat_threshold);
282 if (unlikely(v < - t)) {
283 s8 overstep = t >> 1;
284
285 zone_page_state_add(v - overstep, zone, item);
286 __this_cpu_write(*p, overstep);
287 }
288}
289
290void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
291{
292 __dec_zone_state(page_zone(page), item);
293}
294EXPORT_SYMBOL(__dec_zone_page_state);
295
296#ifdef CONFIG_HAVE_CMPXCHG_LOCAL
297
298
299
300
301
302
303
304
305
306
307
308
309static inline void mod_state(struct zone *zone,
310 enum zone_stat_item item, int delta, int overstep_mode)
311{
312 struct per_cpu_pageset __percpu *pcp = zone->pageset;
313 s8 __percpu *p = pcp->vm_stat_diff + item;
314 long o, n, t, z;
315
316 do {
317 z = 0;
318
319
320
321
322
323
324
325
326
327
328
329 t = this_cpu_read(pcp->stat_threshold);
330
331 o = this_cpu_read(*p);
332 n = delta + o;
333
334 if (n > t || n < -t) {
335 int os = overstep_mode * (t >> 1) ;
336
337
338 z = n + os;
339 n = -os;
340 }
341 } while (this_cpu_cmpxchg(*p, o, n) != o);
342
343 if (z)
344 zone_page_state_add(z, zone, item);
345}
346
347void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
348 int delta)
349{
350 mod_state(zone, item, delta, 0);
351}
352EXPORT_SYMBOL(mod_zone_page_state);
353
354void inc_zone_state(struct zone *zone, enum zone_stat_item item)
355{
356 mod_state(zone, item, 1, 1);
357}
358
359void inc_zone_page_state(struct page *page, enum zone_stat_item item)
360{
361 mod_state(page_zone(page), item, 1, 1);
362}
363EXPORT_SYMBOL(inc_zone_page_state);
364
365void dec_zone_page_state(struct page *page, enum zone_stat_item item)
366{
367 mod_state(page_zone(page), item, -1, -1);
368}
369EXPORT_SYMBOL(dec_zone_page_state);
370#else
371
372
373
374void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
375 int delta)
376{
377 unsigned long flags;
378
379 local_irq_save(flags);
380 __mod_zone_page_state(zone, item, delta);
381 local_irq_restore(flags);
382}
383EXPORT_SYMBOL(mod_zone_page_state);
384
385void inc_zone_state(struct zone *zone, enum zone_stat_item item)
386{
387 unsigned long flags;
388
389 local_irq_save(flags);
390 __inc_zone_state(zone, item);
391 local_irq_restore(flags);
392}
393
394void inc_zone_page_state(struct page *page, enum zone_stat_item item)
395{
396 unsigned long flags;
397 struct zone *zone;
398
399 zone = page_zone(page);
400 local_irq_save(flags);
401 __inc_zone_state(zone, item);
402 local_irq_restore(flags);
403}
404EXPORT_SYMBOL(inc_zone_page_state);
405
406void dec_zone_page_state(struct page *page, enum zone_stat_item item)
407{
408 unsigned long flags;
409
410 local_irq_save(flags);
411 __dec_zone_page_state(page, item);
412 local_irq_restore(flags);
413}
414EXPORT_SYMBOL(dec_zone_page_state);
415#endif
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435void refresh_cpu_vm_stats(int cpu)
436{
437 struct zone *zone;
438 int i;
439 int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
440
441 for_each_populated_zone(zone) {
442 struct per_cpu_pageset *p;
443
444 p = per_cpu_ptr(zone->pageset, cpu);
445
446 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
447 if (p->vm_stat_diff[i]) {
448 unsigned long flags;
449 int v;
450
451 local_irq_save(flags);
452 v = p->vm_stat_diff[i];
453 p->vm_stat_diff[i] = 0;
454 local_irq_restore(flags);
455 atomic_long_add(v, &zone->vm_stat[i]);
456 global_diff[i] += v;
457#ifdef CONFIG_NUMA
458
459 p->expire = 3;
460#endif
461 }
462 cond_resched();
463#ifdef CONFIG_NUMA
464
465
466
467
468
469
470
471 if (!p->expire || !p->pcp.count)
472 continue;
473
474
475
476
477 if (zone_to_nid(zone) == numa_node_id()) {
478 p->expire = 0;
479 continue;
480 }
481
482 p->expire--;
483 if (p->expire)
484 continue;
485
486 if (p->pcp.count)
487 drain_zone_pages(zone, &p->pcp);
488#endif
489 }
490
491 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
492 if (global_diff[i])
493 atomic_long_add(global_diff[i], &vm_stat[i]);
494}
495
496
497
498
499
500void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset)
501{
502 int i;
503
504 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
505 if (pset->vm_stat_diff[i]) {
506 int v = pset->vm_stat_diff[i];
507 pset->vm_stat_diff[i] = 0;
508 atomic_long_add(v, &zone->vm_stat[i]);
509 atomic_long_add(v, &vm_stat[i]);
510 }
511}
512#endif
513
514#ifdef CONFIG_NUMA
515
516
517
518
519
520
521
522
523
524
525void zone_statistics(struct zone *preferred_zone, struct zone *z, gfp_t flags)
526{
527 if (z->zone_pgdat == preferred_zone->zone_pgdat) {
528 __inc_zone_state(z, NUMA_HIT);
529 } else {
530 __inc_zone_state(z, NUMA_MISS);
531 __inc_zone_state(preferred_zone, NUMA_FOREIGN);
532 }
533 if (z->node == ((flags & __GFP_OTHER_NODE) ?
534 preferred_zone->node : numa_node_id()))
535 __inc_zone_state(z, NUMA_LOCAL);
536 else
537 __inc_zone_state(z, NUMA_OTHER);
538}
539#endif
540
541#ifdef CONFIG_COMPACTION
542
543struct contig_page_info {
544 unsigned long free_pages;
545 unsigned long free_blocks_total;
546 unsigned long free_blocks_suitable;
547};
548
549
550
551
552
553
554
555
556
557static void fill_contig_page_info(struct zone *zone,
558 unsigned int suitable_order,
559 struct contig_page_info *info)
560{
561 unsigned int order;
562
563 info->free_pages = 0;
564 info->free_blocks_total = 0;
565 info->free_blocks_suitable = 0;
566
567 for (order = 0; order < MAX_ORDER; order++) {
568 unsigned long blocks;
569
570
571 blocks = zone->free_area[order].nr_free;
572 info->free_blocks_total += blocks;
573
574
575 info->free_pages += blocks << order;
576
577
578 if (order >= suitable_order)
579 info->free_blocks_suitable += blocks <<
580 (order - suitable_order);
581 }
582}
583
584
585
586
587
588
589
590
591static int __fragmentation_index(unsigned int order, struct contig_page_info *info)
592{
593 unsigned long requested = 1UL << order;
594
595 if (!info->free_blocks_total)
596 return 0;
597
598
599 if (info->free_blocks_suitable)
600 return -1000;
601
602
603
604
605
606
607
608 return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total);
609}
610
611
612int fragmentation_index(struct zone *zone, unsigned int order)
613{
614 struct contig_page_info info;
615
616 fill_contig_page_info(zone, order, &info);
617 return __fragmentation_index(order, &info);
618}
619#endif
620
621#if defined(CONFIG_PROC_FS) || defined(CONFIG_COMPACTION)
622#include <linux/proc_fs.h>
623#include <linux/seq_file.h>
624
625static char * const migratetype_names[MIGRATE_TYPES] = {
626 "Unmovable",
627 "Reclaimable",
628 "Movable",
629 "Reserve",
630#ifdef CONFIG_CMA
631 "CMA",
632#endif
633#ifdef CONFIG_MEMORY_ISOLATION
634 "Isolate",
635#endif
636};
637
638static void *frag_start(struct seq_file *m, loff_t *pos)
639{
640 pg_data_t *pgdat;
641 loff_t node = *pos;
642 for (pgdat = first_online_pgdat();
643 pgdat && node;
644 pgdat = next_online_pgdat(pgdat))
645 --node;
646
647 return pgdat;
648}
649
650static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
651{
652 pg_data_t *pgdat = (pg_data_t *)arg;
653
654 (*pos)++;
655 return next_online_pgdat(pgdat);
656}
657
658static void frag_stop(struct seq_file *m, void *arg)
659{
660}
661
662
663static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
664 void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
665{
666 struct zone *zone;
667 struct zone *node_zones = pgdat->node_zones;
668 unsigned long flags;
669
670 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
671 if (!populated_zone(zone))
672 continue;
673
674 spin_lock_irqsave(&zone->lock, flags);
675 print(m, pgdat, zone);
676 spin_unlock_irqrestore(&zone->lock, flags);
677 }
678}
679#endif
680
681#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || defined(CONFIG_NUMA)
682#ifdef CONFIG_ZONE_DMA
683#define TEXT_FOR_DMA(xx) xx "_dma",
684#else
685#define TEXT_FOR_DMA(xx)
686#endif
687
688#ifdef CONFIG_ZONE_DMA32
689#define TEXT_FOR_DMA32(xx) xx "_dma32",
690#else
691#define TEXT_FOR_DMA32(xx)
692#endif
693
694#ifdef CONFIG_HIGHMEM
695#define TEXT_FOR_HIGHMEM(xx) xx "_high",
696#else
697#define TEXT_FOR_HIGHMEM(xx)
698#endif
699
700#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
701 TEXT_FOR_HIGHMEM(xx) xx "_movable",
702
703const char * const vmstat_text[] = {
704
705 "nr_free_pages",
706 "nr_inactive_anon",
707 "nr_active_anon",
708 "nr_inactive_file",
709 "nr_active_file",
710 "nr_unevictable",
711 "nr_mlock",
712 "nr_anon_pages",
713 "nr_mapped",
714 "nr_file_pages",
715 "nr_dirty",
716 "nr_writeback",
717 "nr_slab_reclaimable",
718 "nr_slab_unreclaimable",
719 "nr_page_table_pages",
720 "nr_kernel_stack",
721 "nr_unstable",
722 "nr_bounce",
723 "nr_vmscan_write",
724 "nr_vmscan_immediate_reclaim",
725 "nr_writeback_temp",
726 "nr_isolated_anon",
727 "nr_isolated_file",
728 "nr_shmem",
729 "nr_dirtied",
730 "nr_written",
731
732#ifdef CONFIG_NUMA
733 "numa_hit",
734 "numa_miss",
735 "numa_foreign",
736 "numa_interleave",
737 "numa_local",
738 "numa_other",
739#endif
740 "nr_anon_transparent_hugepages",
741 "nr_free_cma",
742 "nr_dirty_threshold",
743 "nr_dirty_background_threshold",
744
745#ifdef CONFIG_VM_EVENT_COUNTERS
746 "pgpgin",
747 "pgpgout",
748 "pswpin",
749 "pswpout",
750
751 TEXTS_FOR_ZONES("pgalloc")
752
753 "pgfree",
754 "pgactivate",
755 "pgdeactivate",
756
757 "pgfault",
758 "pgmajfault",
759
760 TEXTS_FOR_ZONES("pgrefill")
761 TEXTS_FOR_ZONES("pgsteal_kswapd")
762 TEXTS_FOR_ZONES("pgsteal_direct")
763 TEXTS_FOR_ZONES("pgscan_kswapd")
764 TEXTS_FOR_ZONES("pgscan_direct")
765 "pgscan_direct_throttle",
766
767#ifdef CONFIG_NUMA
768 "zone_reclaim_failed",
769#endif
770 "pginodesteal",
771 "slabs_scanned",
772 "kswapd_inodesteal",
773 "kswapd_low_wmark_hit_quickly",
774 "kswapd_high_wmark_hit_quickly",
775 "pageoutrun",
776 "allocstall",
777
778 "pgrotated",
779
780#ifdef CONFIG_NUMA_BALANCING
781 "numa_pte_updates",
782 "numa_hint_faults",
783 "numa_hint_faults_local",
784 "numa_pages_migrated",
785#endif
786#ifdef CONFIG_MIGRATION
787 "pgmigrate_success",
788 "pgmigrate_fail",
789#endif
790#ifdef CONFIG_COMPACTION
791 "compact_migrate_scanned",
792 "compact_free_scanned",
793 "compact_isolated",
794 "compact_stall",
795 "compact_fail",
796 "compact_success",
797#endif
798
799#ifdef CONFIG_HUGETLB_PAGE
800 "htlb_buddy_alloc_success",
801 "htlb_buddy_alloc_fail",
802#endif
803 "unevictable_pgs_culled",
804 "unevictable_pgs_scanned",
805 "unevictable_pgs_rescued",
806 "unevictable_pgs_mlocked",
807 "unevictable_pgs_munlocked",
808 "unevictable_pgs_cleared",
809 "unevictable_pgs_stranded",
810
811#ifdef CONFIG_TRANSPARENT_HUGEPAGE
812 "thp_fault_alloc",
813 "thp_fault_fallback",
814 "thp_collapse_alloc",
815 "thp_collapse_alloc_failed",
816 "thp_split",
817 "thp_zero_page_alloc",
818 "thp_zero_page_alloc_failed",
819#endif
820
821#endif
822};
823#endif
824
825
826#ifdef CONFIG_PROC_FS
827static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
828 struct zone *zone)
829{
830 int order;
831
832 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
833 for (order = 0; order < MAX_ORDER; ++order)
834 seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
835 seq_putc(m, '\n');
836}
837
838
839
840
841static int frag_show(struct seq_file *m, void *arg)
842{
843 pg_data_t *pgdat = (pg_data_t *)arg;
844 walk_zones_in_node(m, pgdat, frag_show_print);
845 return 0;
846}
847
848static void pagetypeinfo_showfree_print(struct seq_file *m,
849 pg_data_t *pgdat, struct zone *zone)
850{
851 int order, mtype;
852
853 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) {
854 seq_printf(m, "Node %4d, zone %8s, type %12s ",
855 pgdat->node_id,
856 zone->name,
857 migratetype_names[mtype]);
858 for (order = 0; order < MAX_ORDER; ++order) {
859 unsigned long freecount = 0;
860 struct free_area *area;
861 struct list_head *curr;
862
863 area = &(zone->free_area[order]);
864
865 list_for_each(curr, &area->free_list[mtype])
866 freecount++;
867 seq_printf(m, "%6lu ", freecount);
868 }
869 seq_putc(m, '\n');
870 }
871}
872
873
874static int pagetypeinfo_showfree(struct seq_file *m, void *arg)
875{
876 int order;
877 pg_data_t *pgdat = (pg_data_t *)arg;
878
879
880 seq_printf(m, "%-43s ", "Free pages count per migrate type at order");
881 for (order = 0; order < MAX_ORDER; ++order)
882 seq_printf(m, "%6d ", order);
883 seq_putc(m, '\n');
884
885 walk_zones_in_node(m, pgdat, pagetypeinfo_showfree_print);
886
887 return 0;
888}
889
890static void pagetypeinfo_showblockcount_print(struct seq_file *m,
891 pg_data_t *pgdat, struct zone *zone)
892{
893 int mtype;
894 unsigned long pfn;
895 unsigned long start_pfn = zone->zone_start_pfn;
896 unsigned long end_pfn = zone_end_pfn(zone);
897 unsigned long count[MIGRATE_TYPES] = { 0, };
898
899 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
900 struct page *page;
901
902 if (!pfn_valid(pfn))
903 continue;
904
905 page = pfn_to_page(pfn);
906
907
908 if (!memmap_valid_within(pfn, page, zone))
909 continue;
910
911 mtype = get_pageblock_migratetype(page);
912
913 if (mtype < MIGRATE_TYPES)
914 count[mtype]++;
915 }
916
917
918 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
919 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
920 seq_printf(m, "%12lu ", count[mtype]);
921 seq_putc(m, '\n');
922}
923
924
925static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
926{
927 int mtype;
928 pg_data_t *pgdat = (pg_data_t *)arg;
929
930 seq_printf(m, "\n%-23s", "Number of blocks type ");
931 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
932 seq_printf(m, "%12s ", migratetype_names[mtype]);
933 seq_putc(m, '\n');
934 walk_zones_in_node(m, pgdat, pagetypeinfo_showblockcount_print);
935
936 return 0;
937}
938
939
940
941
942
943static int pagetypeinfo_show(struct seq_file *m, void *arg)
944{
945 pg_data_t *pgdat = (pg_data_t *)arg;
946
947
948 if (!node_state(pgdat->node_id, N_MEMORY))
949 return 0;
950
951 seq_printf(m, "Page block order: %d\n", pageblock_order);
952 seq_printf(m, "Pages per block: %lu\n", pageblock_nr_pages);
953 seq_putc(m, '\n');
954 pagetypeinfo_showfree(m, pgdat);
955 pagetypeinfo_showblockcount(m, pgdat);
956
957 return 0;
958}
959
960static const struct seq_operations fragmentation_op = {
961 .start = frag_start,
962 .next = frag_next,
963 .stop = frag_stop,
964 .show = frag_show,
965};
966
967static int fragmentation_open(struct inode *inode, struct file *file)
968{
969 return seq_open(file, &fragmentation_op);
970}
971
972static const struct file_operations fragmentation_file_operations = {
973 .open = fragmentation_open,
974 .read = seq_read,
975 .llseek = seq_lseek,
976 .release = seq_release,
977};
978
979static const struct seq_operations pagetypeinfo_op = {
980 .start = frag_start,
981 .next = frag_next,
982 .stop = frag_stop,
983 .show = pagetypeinfo_show,
984};
985
986static int pagetypeinfo_open(struct inode *inode, struct file *file)
987{
988 return seq_open(file, &pagetypeinfo_op);
989}
990
991static const struct file_operations pagetypeinfo_file_ops = {
992 .open = pagetypeinfo_open,
993 .read = seq_read,
994 .llseek = seq_lseek,
995 .release = seq_release,
996};
997
998static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
999 struct zone *zone)
1000{
1001 int i;
1002 seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
1003 seq_printf(m,
1004 "\n pages free %lu"
1005 "\n min %lu"
1006 "\n low %lu"
1007 "\n high %lu"
1008 "\n scanned %lu"
1009 "\n spanned %lu"
1010 "\n present %lu"
1011 "\n managed %lu",
1012 zone_page_state(zone, NR_FREE_PAGES),
1013 min_wmark_pages(zone),
1014 low_wmark_pages(zone),
1015 high_wmark_pages(zone),
1016 zone->pages_scanned,
1017 zone->spanned_pages,
1018 zone->present_pages,
1019 zone->managed_pages);
1020
1021 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
1022 seq_printf(m, "\n %-12s %lu", vmstat_text[i],
1023 zone_page_state(zone, i));
1024
1025 seq_printf(m,
1026 "\n protection: (%lu",
1027 zone->lowmem_reserve[0]);
1028 for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
1029 seq_printf(m, ", %lu", zone->lowmem_reserve[i]);
1030 seq_printf(m,
1031 ")"
1032 "\n pagesets");
1033 for_each_online_cpu(i) {
1034 struct per_cpu_pageset *pageset;
1035
1036 pageset = per_cpu_ptr(zone->pageset, i);
1037 seq_printf(m,
1038 "\n cpu: %i"
1039 "\n count: %i"
1040 "\n high: %i"
1041 "\n batch: %i",
1042 i,
1043 pageset->pcp.count,
1044 pageset->pcp.high,
1045 pageset->pcp.batch);
1046#ifdef CONFIG_SMP
1047 seq_printf(m, "\n vm stats threshold: %d",
1048 pageset->stat_threshold);
1049#endif
1050 }
1051 seq_printf(m,
1052 "\n all_unreclaimable: %u"
1053 "\n start_pfn: %lu"
1054 "\n inactive_ratio: %u",
1055 zone->all_unreclaimable,
1056 zone->zone_start_pfn,
1057 zone->inactive_ratio);
1058 seq_putc(m, '\n');
1059}
1060
1061
1062
1063
1064static int zoneinfo_show(struct seq_file *m, void *arg)
1065{
1066 pg_data_t *pgdat = (pg_data_t *)arg;
1067 walk_zones_in_node(m, pgdat, zoneinfo_show_print);
1068 return 0;
1069}
1070
1071static const struct seq_operations zoneinfo_op = {
1072 .start = frag_start,
1073
1074 .next = frag_next,
1075 .stop = frag_stop,
1076 .show = zoneinfo_show,
1077};
1078
1079static int zoneinfo_open(struct inode *inode, struct file *file)
1080{
1081 return seq_open(file, &zoneinfo_op);
1082}
1083
1084static const struct file_operations proc_zoneinfo_file_operations = {
1085 .open = zoneinfo_open,
1086 .read = seq_read,
1087 .llseek = seq_lseek,
1088 .release = seq_release,
1089};
1090
1091enum writeback_stat_item {
1092 NR_DIRTY_THRESHOLD,
1093 NR_DIRTY_BG_THRESHOLD,
1094 NR_VM_WRITEBACK_STAT_ITEMS,
1095};
1096
1097static void *vmstat_start(struct seq_file *m, loff_t *pos)
1098{
1099 unsigned long *v;
1100 int i, stat_items_size;
1101
1102 if (*pos >= ARRAY_SIZE(vmstat_text))
1103 return NULL;
1104 stat_items_size = NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) +
1105 NR_VM_WRITEBACK_STAT_ITEMS * sizeof(unsigned long);
1106
1107#ifdef CONFIG_VM_EVENT_COUNTERS
1108 stat_items_size += sizeof(struct vm_event_state);
1109#endif
1110
1111 v = kmalloc(stat_items_size, GFP_KERNEL);
1112 m->private = v;
1113 if (!v)
1114 return ERR_PTR(-ENOMEM);
1115 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
1116 v[i] = global_page_state(i);
1117 v += NR_VM_ZONE_STAT_ITEMS;
1118
1119 global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,
1120 v + NR_DIRTY_THRESHOLD);
1121 v += NR_VM_WRITEBACK_STAT_ITEMS;
1122
1123#ifdef CONFIG_VM_EVENT_COUNTERS
1124 all_vm_events(v);
1125 v[PGPGIN] /= 2;
1126 v[PGPGOUT] /= 2;
1127#endif
1128 return (unsigned long *)m->private + *pos;
1129}
1130
1131static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
1132{
1133 (*pos)++;
1134 if (*pos >= ARRAY_SIZE(vmstat_text))
1135 return NULL;
1136 return (unsigned long *)m->private + *pos;
1137}
1138
1139static int vmstat_show(struct seq_file *m, void *arg)
1140{
1141 unsigned long *l = arg;
1142 unsigned long off = l - (unsigned long *)m->private;
1143
1144 seq_printf(m, "%s %lu\n", vmstat_text[off], *l);
1145 return 0;
1146}
1147
1148static void vmstat_stop(struct seq_file *m, void *arg)
1149{
1150 kfree(m->private);
1151 m->private = NULL;
1152}
1153
1154static const struct seq_operations vmstat_op = {
1155 .start = vmstat_start,
1156 .next = vmstat_next,
1157 .stop = vmstat_stop,
1158 .show = vmstat_show,
1159};
1160
1161static int vmstat_open(struct inode *inode, struct file *file)
1162{
1163 return seq_open(file, &vmstat_op);
1164}
1165
1166static const struct file_operations proc_vmstat_file_operations = {
1167 .open = vmstat_open,
1168 .read = seq_read,
1169 .llseek = seq_lseek,
1170 .release = seq_release,
1171};
1172#endif
1173
1174#ifdef CONFIG_SMP
1175static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
1176int sysctl_stat_interval __read_mostly = HZ;
1177
1178static void vmstat_update(struct work_struct *w)
1179{
1180 refresh_cpu_vm_stats(smp_processor_id());
1181 schedule_delayed_work(&__get_cpu_var(vmstat_work),
1182 round_jiffies_relative(sysctl_stat_interval));
1183}
1184
1185static void __cpuinit start_cpu_timer(int cpu)
1186{
1187 struct delayed_work *work = &per_cpu(vmstat_work, cpu);
1188
1189 INIT_DEFERRABLE_WORK(work, vmstat_update);
1190 schedule_delayed_work_on(cpu, work, __round_jiffies_relative(HZ, cpu));
1191}
1192
1193
1194
1195
1196
1197static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb,
1198 unsigned long action,
1199 void *hcpu)
1200{
1201 long cpu = (long)hcpu;
1202
1203 switch (action) {
1204 case CPU_ONLINE:
1205 case CPU_ONLINE_FROZEN:
1206 refresh_zone_stat_thresholds();
1207 start_cpu_timer(cpu);
1208 node_set_state(cpu_to_node(cpu), N_CPU);
1209 break;
1210 case CPU_DOWN_PREPARE:
1211 case CPU_DOWN_PREPARE_FROZEN:
1212 cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
1213 per_cpu(vmstat_work, cpu).work.func = NULL;
1214 break;
1215 case CPU_DOWN_FAILED:
1216 case CPU_DOWN_FAILED_FROZEN:
1217 start_cpu_timer(cpu);
1218 break;
1219 case CPU_DEAD:
1220 case CPU_DEAD_FROZEN:
1221 refresh_zone_stat_thresholds();
1222 break;
1223 default:
1224 break;
1225 }
1226 return NOTIFY_OK;
1227}
1228
1229static struct notifier_block __cpuinitdata vmstat_notifier =
1230 { &vmstat_cpuup_callback, NULL, 0 };
1231#endif
1232
1233static int __init setup_vmstat(void)
1234{
1235#ifdef CONFIG_SMP
1236 int cpu;
1237
1238 register_cpu_notifier(&vmstat_notifier);
1239
1240 for_each_online_cpu(cpu)
1241 start_cpu_timer(cpu);
1242#endif
1243#ifdef CONFIG_PROC_FS
1244 proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);
1245 proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops);
1246 proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations);
1247 proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations);
1248#endif
1249 return 0;
1250}
1251module_init(setup_vmstat)
1252
1253#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)
1254#include <linux/debugfs.h>
1255
1256
1257
1258
1259
1260
1261static int unusable_free_index(unsigned int order,
1262 struct contig_page_info *info)
1263{
1264
1265 if (info->free_pages == 0)
1266 return 1000;
1267
1268
1269
1270
1271
1272
1273
1274
1275 return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages);
1276
1277}
1278
1279static void unusable_show_print(struct seq_file *m,
1280 pg_data_t *pgdat, struct zone *zone)
1281{
1282 unsigned int order;
1283 int index;
1284 struct contig_page_info info;
1285
1286 seq_printf(m, "Node %d, zone %8s ",
1287 pgdat->node_id,
1288 zone->name);
1289 for (order = 0; order < MAX_ORDER; ++order) {
1290 fill_contig_page_info(zone, order, &info);
1291 index = unusable_free_index(order, &info);
1292 seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
1293 }
1294
1295 seq_putc(m, '\n');
1296}
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307static int unusable_show(struct seq_file *m, void *arg)
1308{
1309 pg_data_t *pgdat = (pg_data_t *)arg;
1310
1311
1312 if (!node_state(pgdat->node_id, N_MEMORY))
1313 return 0;
1314
1315 walk_zones_in_node(m, pgdat, unusable_show_print);
1316
1317 return 0;
1318}
1319
1320static const struct seq_operations unusable_op = {
1321 .start = frag_start,
1322 .next = frag_next,
1323 .stop = frag_stop,
1324 .show = unusable_show,
1325};
1326
1327static int unusable_open(struct inode *inode, struct file *file)
1328{
1329 return seq_open(file, &unusable_op);
1330}
1331
1332static const struct file_operations unusable_file_ops = {
1333 .open = unusable_open,
1334 .read = seq_read,
1335 .llseek = seq_lseek,
1336 .release = seq_release,
1337};
1338
1339static void extfrag_show_print(struct seq_file *m,
1340 pg_data_t *pgdat, struct zone *zone)
1341{
1342 unsigned int order;
1343 int index;
1344
1345
1346 struct contig_page_info info;
1347
1348 seq_printf(m, "Node %d, zone %8s ",
1349 pgdat->node_id,
1350 zone->name);
1351 for (order = 0; order < MAX_ORDER; ++order) {
1352 fill_contig_page_info(zone, order, &info);
1353 index = __fragmentation_index(order, &info);
1354 seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
1355 }
1356
1357 seq_putc(m, '\n');
1358}
1359
1360
1361
1362
1363static int extfrag_show(struct seq_file *m, void *arg)
1364{
1365 pg_data_t *pgdat = (pg_data_t *)arg;
1366
1367 walk_zones_in_node(m, pgdat, extfrag_show_print);
1368
1369 return 0;
1370}
1371
1372static const struct seq_operations extfrag_op = {
1373 .start = frag_start,
1374 .next = frag_next,
1375 .stop = frag_stop,
1376 .show = extfrag_show,
1377};
1378
1379static int extfrag_open(struct inode *inode, struct file *file)
1380{
1381 return seq_open(file, &extfrag_op);
1382}
1383
1384static const struct file_operations extfrag_file_ops = {
1385 .open = extfrag_open,
1386 .read = seq_read,
1387 .llseek = seq_lseek,
1388 .release = seq_release,
1389};
1390
1391static int __init extfrag_debug_init(void)
1392{
1393 struct dentry *extfrag_debug_root;
1394
1395 extfrag_debug_root = debugfs_create_dir("extfrag", NULL);
1396 if (!extfrag_debug_root)
1397 return -ENOMEM;
1398
1399 if (!debugfs_create_file("unusable_index", 0444,
1400 extfrag_debug_root, NULL, &unusable_file_ops))
1401 goto fail;
1402
1403 if (!debugfs_create_file("extfrag_index", 0444,
1404 extfrag_debug_root, NULL, &extfrag_file_ops))
1405 goto fail;
1406
1407 return 0;
1408fail:
1409 debugfs_remove_recursive(extfrag_debug_root);
1410 return -ENOMEM;
1411}
1412
1413module_init(extfrag_debug_init);
1414#endif
1415