1
2
3
4
5
6
7
8
9
10
11#include <linux/fs.h>
12#include <linux/mm.h>
13#include <linux/err.h>
14#include <linux/module.h>
15#include <linux/slab.h>
16#include <linux/cpu.h>
17#include <linux/vmstat.h>
18#include <linux/sched.h>
19#include <linux/math64.h>
20#include <linux/writeback.h>
21#include <linux/compaction.h>
22
23#ifdef CONFIG_VM_EVENT_COUNTERS
24DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
25EXPORT_PER_CPU_SYMBOL(vm_event_states);
26
27static void sum_vm_events(unsigned long *ret)
28{
29 int cpu;
30 int i;
31
32 memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
33
34 for_each_online_cpu(cpu) {
35 struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
36
37 for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
38 ret[i] += this->event[i];
39 }
40}
41
42
43
44
45
46
47void all_vm_events(unsigned long *ret)
48{
49 get_online_cpus();
50 sum_vm_events(ret);
51 put_online_cpus();
52}
53EXPORT_SYMBOL_GPL(all_vm_events);
54
55#ifdef CONFIG_HOTPLUG
56
57
58
59
60
61
62void vm_events_fold_cpu(int cpu)
63{
64 struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
65 int i;
66
67 for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
68 count_vm_events(i, fold_state->event[i]);
69 fold_state->event[i] = 0;
70 }
71}
72#endif
73
74#endif
75
76
77
78
79
80
81atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
82EXPORT_SYMBOL(vm_stat);
83
84#ifdef CONFIG_SMP
85
86int calculate_pressure_threshold(struct zone *zone)
87{
88 int threshold;
89 int watermark_distance;
90
91
92
93
94
95
96
97
98
99 watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone);
100 threshold = max(1, (int)(watermark_distance / num_online_cpus()));
101
102
103
104
105 threshold = min(125, threshold);
106
107 return threshold;
108}
109
110int calculate_normal_threshold(struct zone *zone)
111{
112 int threshold;
113 int mem;
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145 mem = zone->present_pages >> (27 - PAGE_SHIFT);
146
147 threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem));
148
149
150
151
152 threshold = min(125, threshold);
153
154 return threshold;
155}
156
157
158
159
160void refresh_zone_stat_thresholds(void)
161{
162 struct zone *zone;
163 int cpu;
164 int threshold;
165
166 for_each_populated_zone(zone) {
167 unsigned long max_drift, tolerate_drift;
168
169 threshold = calculate_normal_threshold(zone);
170
171 for_each_online_cpu(cpu)
172 per_cpu_ptr(zone->pageset, cpu)->stat_threshold
173 = threshold;
174
175
176
177
178
179
180 tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone);
181 max_drift = num_online_cpus() * threshold;
182 if (max_drift > tolerate_drift)
183 zone->percpu_drift_mark = high_wmark_pages(zone) +
184 max_drift;
185 }
186}
187
188void set_pgdat_percpu_threshold(pg_data_t *pgdat,
189 int (*calculate_pressure)(struct zone *))
190{
191 struct zone *zone;
192 int cpu;
193 int threshold;
194 int i;
195
196 for (i = 0; i < pgdat->nr_zones; i++) {
197 zone = &pgdat->node_zones[i];
198 if (!zone->percpu_drift_mark)
199 continue;
200
201 threshold = (*calculate_pressure)(zone);
202 for_each_possible_cpu(cpu)
203 per_cpu_ptr(zone->pageset, cpu)->stat_threshold
204 = threshold;
205 }
206}
207
208
209
210
211void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
212 int delta)
213{
214 struct per_cpu_pageset __percpu *pcp = zone->pageset;
215 s8 __percpu *p = pcp->vm_stat_diff + item;
216 long x;
217 long t;
218
219 x = delta + __this_cpu_read(*p);
220
221 t = __this_cpu_read(pcp->stat_threshold);
222
223 if (unlikely(x > t || x < -t)) {
224 zone_page_state_add(x, zone, item);
225 x = 0;
226 }
227 __this_cpu_write(*p, x);
228}
229EXPORT_SYMBOL(__mod_zone_page_state);
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
255{
256 struct per_cpu_pageset __percpu *pcp = zone->pageset;
257 s8 __percpu *p = pcp->vm_stat_diff + item;
258 s8 v, t;
259
260 v = __this_cpu_inc_return(*p);
261 t = __this_cpu_read(pcp->stat_threshold);
262 if (unlikely(v > t)) {
263 s8 overstep = t >> 1;
264
265 zone_page_state_add(v + overstep, zone, item);
266 __this_cpu_write(*p, -overstep);
267 }
268}
269
270void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
271{
272 __inc_zone_state(page_zone(page), item);
273}
274EXPORT_SYMBOL(__inc_zone_page_state);
275
276void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
277{
278 struct per_cpu_pageset __percpu *pcp = zone->pageset;
279 s8 __percpu *p = pcp->vm_stat_diff + item;
280 s8 v, t;
281
282 v = __this_cpu_dec_return(*p);
283 t = __this_cpu_read(pcp->stat_threshold);
284 if (unlikely(v < - t)) {
285 s8 overstep = t >> 1;
286
287 zone_page_state_add(v - overstep, zone, item);
288 __this_cpu_write(*p, overstep);
289 }
290}
291
292void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
293{
294 __dec_zone_state(page_zone(page), item);
295}
296EXPORT_SYMBOL(__dec_zone_page_state);
297
298#ifdef CONFIG_HAVE_CMPXCHG_LOCAL
299
300
301
302
303
304
305
306
307
308
309
310
311static inline void mod_state(struct zone *zone,
312 enum zone_stat_item item, int delta, int overstep_mode)
313{
314 struct per_cpu_pageset __percpu *pcp = zone->pageset;
315 s8 __percpu *p = pcp->vm_stat_diff + item;
316 long o, n, t, z;
317
318 do {
319 z = 0;
320
321
322
323
324
325
326
327
328
329
330
331 t = this_cpu_read(pcp->stat_threshold);
332
333 o = this_cpu_read(*p);
334 n = delta + o;
335
336 if (n > t || n < -t) {
337 int os = overstep_mode * (t >> 1) ;
338
339
340 z = n + os;
341 n = -os;
342 }
343 } while (this_cpu_cmpxchg(*p, o, n) != o);
344
345 if (z)
346 zone_page_state_add(z, zone, item);
347}
348
349void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
350 int delta)
351{
352 mod_state(zone, item, delta, 0);
353}
354EXPORT_SYMBOL(mod_zone_page_state);
355
356void inc_zone_state(struct zone *zone, enum zone_stat_item item)
357{
358 mod_state(zone, item, 1, 1);
359}
360
361void inc_zone_page_state(struct page *page, enum zone_stat_item item)
362{
363 mod_state(page_zone(page), item, 1, 1);
364}
365EXPORT_SYMBOL(inc_zone_page_state);
366
367void dec_zone_page_state(struct page *page, enum zone_stat_item item)
368{
369 mod_state(page_zone(page), item, -1, -1);
370}
371EXPORT_SYMBOL(dec_zone_page_state);
372#else
373
374
375
376void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
377 int delta)
378{
379 unsigned long flags;
380
381 local_irq_save(flags);
382 __mod_zone_page_state(zone, item, delta);
383 local_irq_restore(flags);
384}
385EXPORT_SYMBOL(mod_zone_page_state);
386
387void inc_zone_state(struct zone *zone, enum zone_stat_item item)
388{
389 unsigned long flags;
390
391 local_irq_save(flags);
392 __inc_zone_state(zone, item);
393 local_irq_restore(flags);
394}
395
396void inc_zone_page_state(struct page *page, enum zone_stat_item item)
397{
398 unsigned long flags;
399 struct zone *zone;
400
401 zone = page_zone(page);
402 local_irq_save(flags);
403 __inc_zone_state(zone, item);
404 local_irq_restore(flags);
405}
406EXPORT_SYMBOL(inc_zone_page_state);
407
408void dec_zone_page_state(struct page *page, enum zone_stat_item item)
409{
410 unsigned long flags;
411
412 local_irq_save(flags);
413 __dec_zone_page_state(page, item);
414 local_irq_restore(flags);
415}
416EXPORT_SYMBOL(dec_zone_page_state);
417#endif
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437void refresh_cpu_vm_stats(int cpu)
438{
439 struct zone *zone;
440 int i;
441 int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
442
443 for_each_populated_zone(zone) {
444 struct per_cpu_pageset *p;
445
446 p = per_cpu_ptr(zone->pageset, cpu);
447
448 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
449 if (p->vm_stat_diff[i]) {
450 unsigned long flags;
451 int v;
452
453 local_irq_save(flags);
454 v = p->vm_stat_diff[i];
455 p->vm_stat_diff[i] = 0;
456 local_irq_restore(flags);
457 atomic_long_add(v, &zone->vm_stat[i]);
458 global_diff[i] += v;
459#ifdef CONFIG_NUMA
460
461 p->expire = 3;
462#endif
463 }
464 cond_resched();
465#ifdef CONFIG_NUMA
466
467
468
469
470
471
472
473 if (!p->expire || !p->pcp.count)
474 continue;
475
476
477
478
479 if (zone_to_nid(zone) == numa_node_id()) {
480 p->expire = 0;
481 continue;
482 }
483
484 p->expire--;
485 if (p->expire)
486 continue;
487
488 if (p->pcp.count)
489 drain_zone_pages(zone, &p->pcp);
490#endif
491 }
492
493 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
494 if (global_diff[i])
495 atomic_long_add(global_diff[i], &vm_stat[i]);
496}
497
498#endif
499
500#ifdef CONFIG_NUMA
501
502
503
504
505
506
507
508
509
510
511void zone_statistics(struct zone *preferred_zone, struct zone *z, gfp_t flags)
512{
513 if (z->zone_pgdat == preferred_zone->zone_pgdat) {
514 __inc_zone_state(z, NUMA_HIT);
515 } else {
516 __inc_zone_state(z, NUMA_MISS);
517 __inc_zone_state(preferred_zone, NUMA_FOREIGN);
518 }
519 if (z->node == ((flags & __GFP_OTHER_NODE) ?
520 preferred_zone->node : numa_node_id()))
521 __inc_zone_state(z, NUMA_LOCAL);
522 else
523 __inc_zone_state(z, NUMA_OTHER);
524}
525#endif
526
527#ifdef CONFIG_COMPACTION
528
529struct contig_page_info {
530 unsigned long free_pages;
531 unsigned long free_blocks_total;
532 unsigned long free_blocks_suitable;
533};
534
535
536
537
538
539
540
541
542
543static void fill_contig_page_info(struct zone *zone,
544 unsigned int suitable_order,
545 struct contig_page_info *info)
546{
547 unsigned int order;
548
549 info->free_pages = 0;
550 info->free_blocks_total = 0;
551 info->free_blocks_suitable = 0;
552
553 for (order = 0; order < MAX_ORDER; order++) {
554 unsigned long blocks;
555
556
557 blocks = zone->free_area[order].nr_free;
558 info->free_blocks_total += blocks;
559
560
561 info->free_pages += blocks << order;
562
563
564 if (order >= suitable_order)
565 info->free_blocks_suitable += blocks <<
566 (order - suitable_order);
567 }
568}
569
570
571
572
573
574
575
576
577static int __fragmentation_index(unsigned int order, struct contig_page_info *info)
578{
579 unsigned long requested = 1UL << order;
580
581 if (!info->free_blocks_total)
582 return 0;
583
584
585 if (info->free_blocks_suitable)
586 return -1000;
587
588
589
590
591
592
593
594 return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total);
595}
596
597
598int fragmentation_index(struct zone *zone, unsigned int order)
599{
600 struct contig_page_info info;
601
602 fill_contig_page_info(zone, order, &info);
603 return __fragmentation_index(order, &info);
604}
605#endif
606
607#if defined(CONFIG_PROC_FS) || defined(CONFIG_COMPACTION)
608#include <linux/proc_fs.h>
609#include <linux/seq_file.h>
610
611static char * const migratetype_names[MIGRATE_TYPES] = {
612 "Unmovable",
613 "Reclaimable",
614 "Movable",
615 "Reserve",
616#ifdef CONFIG_CMA
617 "CMA",
618#endif
619 "Isolate",
620};
621
622static void *frag_start(struct seq_file *m, loff_t *pos)
623{
624 pg_data_t *pgdat;
625 loff_t node = *pos;
626 for (pgdat = first_online_pgdat();
627 pgdat && node;
628 pgdat = next_online_pgdat(pgdat))
629 --node;
630
631 return pgdat;
632}
633
634static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
635{
636 pg_data_t *pgdat = (pg_data_t *)arg;
637
638 (*pos)++;
639 return next_online_pgdat(pgdat);
640}
641
642static void frag_stop(struct seq_file *m, void *arg)
643{
644}
645
646
647static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
648 void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
649{
650 struct zone *zone;
651 struct zone *node_zones = pgdat->node_zones;
652 unsigned long flags;
653
654 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
655 if (!populated_zone(zone))
656 continue;
657
658 spin_lock_irqsave(&zone->lock, flags);
659 print(m, pgdat, zone);
660 spin_unlock_irqrestore(&zone->lock, flags);
661 }
662}
663#endif
664
665#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || defined(CONFIG_NUMA)
666#ifdef CONFIG_ZONE_DMA
667#define TEXT_FOR_DMA(xx) xx "_dma",
668#else
669#define TEXT_FOR_DMA(xx)
670#endif
671
672#ifdef CONFIG_ZONE_DMA32
673#define TEXT_FOR_DMA32(xx) xx "_dma32",
674#else
675#define TEXT_FOR_DMA32(xx)
676#endif
677
678#ifdef CONFIG_HIGHMEM
679#define TEXT_FOR_HIGHMEM(xx) xx "_high",
680#else
681#define TEXT_FOR_HIGHMEM(xx)
682#endif
683
684#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
685 TEXT_FOR_HIGHMEM(xx) xx "_movable",
686
687const char * const vmstat_text[] = {
688
689 "nr_free_pages",
690 "nr_inactive_anon",
691 "nr_active_anon",
692 "nr_inactive_file",
693 "nr_active_file",
694 "nr_unevictable",
695 "nr_mlock",
696 "nr_anon_pages",
697 "nr_mapped",
698 "nr_file_pages",
699 "nr_dirty",
700 "nr_writeback",
701 "nr_slab_reclaimable",
702 "nr_slab_unreclaimable",
703 "nr_page_table_pages",
704 "nr_kernel_stack",
705 "nr_unstable",
706 "nr_bounce",
707 "nr_vmscan_write",
708 "nr_vmscan_immediate_reclaim",
709 "nr_writeback_temp",
710 "nr_isolated_anon",
711 "nr_isolated_file",
712 "nr_shmem",
713 "nr_dirtied",
714 "nr_written",
715
716#ifdef CONFIG_NUMA
717 "numa_hit",
718 "numa_miss",
719 "numa_foreign",
720 "numa_interleave",
721 "numa_local",
722 "numa_other",
723#endif
724 "nr_anon_transparent_hugepages",
725 "nr_dirty_threshold",
726 "nr_dirty_background_threshold",
727
728#ifdef CONFIG_VM_EVENT_COUNTERS
729 "pgpgin",
730 "pgpgout",
731 "pswpin",
732 "pswpout",
733
734 TEXTS_FOR_ZONES("pgalloc")
735
736 "pgfree",
737 "pgactivate",
738 "pgdeactivate",
739
740 "pgfault",
741 "pgmajfault",
742
743 TEXTS_FOR_ZONES("pgrefill")
744 TEXTS_FOR_ZONES("pgsteal_kswapd")
745 TEXTS_FOR_ZONES("pgsteal_direct")
746 TEXTS_FOR_ZONES("pgscan_kswapd")
747 TEXTS_FOR_ZONES("pgscan_direct")
748
749#ifdef CONFIG_NUMA
750 "zone_reclaim_failed",
751#endif
752 "pginodesteal",
753 "slabs_scanned",
754 "kswapd_inodesteal",
755 "kswapd_low_wmark_hit_quickly",
756 "kswapd_high_wmark_hit_quickly",
757 "kswapd_skip_congestion_wait",
758 "pageoutrun",
759 "allocstall",
760
761 "pgrotated",
762
763#ifdef CONFIG_COMPACTION
764 "compact_blocks_moved",
765 "compact_pages_moved",
766 "compact_pagemigrate_failed",
767 "compact_stall",
768 "compact_fail",
769 "compact_success",
770#endif
771
772#ifdef CONFIG_HUGETLB_PAGE
773 "htlb_buddy_alloc_success",
774 "htlb_buddy_alloc_fail",
775#endif
776 "unevictable_pgs_culled",
777 "unevictable_pgs_scanned",
778 "unevictable_pgs_rescued",
779 "unevictable_pgs_mlocked",
780 "unevictable_pgs_munlocked",
781 "unevictable_pgs_cleared",
782 "unevictable_pgs_stranded",
783 "unevictable_pgs_mlockfreed",
784
785#ifdef CONFIG_TRANSPARENT_HUGEPAGE
786 "thp_fault_alloc",
787 "thp_fault_fallback",
788 "thp_collapse_alloc",
789 "thp_collapse_alloc_failed",
790 "thp_split",
791#endif
792
793#endif
794};
795#endif
796
797
798#ifdef CONFIG_PROC_FS
799static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
800 struct zone *zone)
801{
802 int order;
803
804 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
805 for (order = 0; order < MAX_ORDER; ++order)
806 seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
807 seq_putc(m, '\n');
808}
809
810
811
812
813static int frag_show(struct seq_file *m, void *arg)
814{
815 pg_data_t *pgdat = (pg_data_t *)arg;
816 walk_zones_in_node(m, pgdat, frag_show_print);
817 return 0;
818}
819
820static void pagetypeinfo_showfree_print(struct seq_file *m,
821 pg_data_t *pgdat, struct zone *zone)
822{
823 int order, mtype;
824
825 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) {
826 seq_printf(m, "Node %4d, zone %8s, type %12s ",
827 pgdat->node_id,
828 zone->name,
829 migratetype_names[mtype]);
830 for (order = 0; order < MAX_ORDER; ++order) {
831 unsigned long freecount = 0;
832 struct free_area *area;
833 struct list_head *curr;
834
835 area = &(zone->free_area[order]);
836
837 list_for_each(curr, &area->free_list[mtype])
838 freecount++;
839 seq_printf(m, "%6lu ", freecount);
840 }
841 seq_putc(m, '\n');
842 }
843}
844
845
846static int pagetypeinfo_showfree(struct seq_file *m, void *arg)
847{
848 int order;
849 pg_data_t *pgdat = (pg_data_t *)arg;
850
851
852 seq_printf(m, "%-43s ", "Free pages count per migrate type at order");
853 for (order = 0; order < MAX_ORDER; ++order)
854 seq_printf(m, "%6d ", order);
855 seq_putc(m, '\n');
856
857 walk_zones_in_node(m, pgdat, pagetypeinfo_showfree_print);
858
859 return 0;
860}
861
862static void pagetypeinfo_showblockcount_print(struct seq_file *m,
863 pg_data_t *pgdat, struct zone *zone)
864{
865 int mtype;
866 unsigned long pfn;
867 unsigned long start_pfn = zone->zone_start_pfn;
868 unsigned long end_pfn = start_pfn + zone->spanned_pages;
869 unsigned long count[MIGRATE_TYPES] = { 0, };
870
871 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
872 struct page *page;
873
874 if (!pfn_valid(pfn))
875 continue;
876
877 page = pfn_to_page(pfn);
878
879
880 if (!memmap_valid_within(pfn, page, zone))
881 continue;
882
883 mtype = get_pageblock_migratetype(page);
884
885 if (mtype < MIGRATE_TYPES)
886 count[mtype]++;
887 }
888
889
890 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
891 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
892 seq_printf(m, "%12lu ", count[mtype]);
893 seq_putc(m, '\n');
894}
895
896
897static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
898{
899 int mtype;
900 pg_data_t *pgdat = (pg_data_t *)arg;
901
902 seq_printf(m, "\n%-23s", "Number of blocks type ");
903 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
904 seq_printf(m, "%12s ", migratetype_names[mtype]);
905 seq_putc(m, '\n');
906 walk_zones_in_node(m, pgdat, pagetypeinfo_showblockcount_print);
907
908 return 0;
909}
910
911
912
913
914
915static int pagetypeinfo_show(struct seq_file *m, void *arg)
916{
917 pg_data_t *pgdat = (pg_data_t *)arg;
918
919
920 if (!node_state(pgdat->node_id, N_HIGH_MEMORY))
921 return 0;
922
923 seq_printf(m, "Page block order: %d\n", pageblock_order);
924 seq_printf(m, "Pages per block: %lu\n", pageblock_nr_pages);
925 seq_putc(m, '\n');
926 pagetypeinfo_showfree(m, pgdat);
927 pagetypeinfo_showblockcount(m, pgdat);
928
929 return 0;
930}
931
932static const struct seq_operations fragmentation_op = {
933 .start = frag_start,
934 .next = frag_next,
935 .stop = frag_stop,
936 .show = frag_show,
937};
938
939static int fragmentation_open(struct inode *inode, struct file *file)
940{
941 return seq_open(file, &fragmentation_op);
942}
943
944static const struct file_operations fragmentation_file_operations = {
945 .open = fragmentation_open,
946 .read = seq_read,
947 .llseek = seq_lseek,
948 .release = seq_release,
949};
950
951static const struct seq_operations pagetypeinfo_op = {
952 .start = frag_start,
953 .next = frag_next,
954 .stop = frag_stop,
955 .show = pagetypeinfo_show,
956};
957
958static int pagetypeinfo_open(struct inode *inode, struct file *file)
959{
960 return seq_open(file, &pagetypeinfo_op);
961}
962
963static const struct file_operations pagetypeinfo_file_ops = {
964 .open = pagetypeinfo_open,
965 .read = seq_read,
966 .llseek = seq_lseek,
967 .release = seq_release,
968};
969
970static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
971 struct zone *zone)
972{
973 int i;
974 seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
975 seq_printf(m,
976 "\n pages free %lu"
977 "\n min %lu"
978 "\n low %lu"
979 "\n high %lu"
980 "\n scanned %lu"
981 "\n spanned %lu"
982 "\n present %lu",
983 zone_page_state(zone, NR_FREE_PAGES),
984 min_wmark_pages(zone),
985 low_wmark_pages(zone),
986 high_wmark_pages(zone),
987 zone->pages_scanned,
988 zone->spanned_pages,
989 zone->present_pages);
990
991 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
992 seq_printf(m, "\n %-12s %lu", vmstat_text[i],
993 zone_page_state(zone, i));
994
995 seq_printf(m,
996 "\n protection: (%lu",
997 zone->lowmem_reserve[0]);
998 for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
999 seq_printf(m, ", %lu", zone->lowmem_reserve[i]);
1000 seq_printf(m,
1001 ")"
1002 "\n pagesets");
1003 for_each_online_cpu(i) {
1004 struct per_cpu_pageset *pageset;
1005
1006 pageset = per_cpu_ptr(zone->pageset, i);
1007 seq_printf(m,
1008 "\n cpu: %i"
1009 "\n count: %i"
1010 "\n high: %i"
1011 "\n batch: %i",
1012 i,
1013 pageset->pcp.count,
1014 pageset->pcp.high,
1015 pageset->pcp.batch);
1016#ifdef CONFIG_SMP
1017 seq_printf(m, "\n vm stats threshold: %d",
1018 pageset->stat_threshold);
1019#endif
1020 }
1021 seq_printf(m,
1022 "\n all_unreclaimable: %u"
1023 "\n start_pfn: %lu"
1024 "\n inactive_ratio: %u",
1025 zone->all_unreclaimable,
1026 zone->zone_start_pfn,
1027 zone->inactive_ratio);
1028 seq_putc(m, '\n');
1029}
1030
1031
1032
1033
1034static int zoneinfo_show(struct seq_file *m, void *arg)
1035{
1036 pg_data_t *pgdat = (pg_data_t *)arg;
1037 walk_zones_in_node(m, pgdat, zoneinfo_show_print);
1038 return 0;
1039}
1040
1041static const struct seq_operations zoneinfo_op = {
1042 .start = frag_start,
1043
1044 .next = frag_next,
1045 .stop = frag_stop,
1046 .show = zoneinfo_show,
1047};
1048
1049static int zoneinfo_open(struct inode *inode, struct file *file)
1050{
1051 return seq_open(file, &zoneinfo_op);
1052}
1053
1054static const struct file_operations proc_zoneinfo_file_operations = {
1055 .open = zoneinfo_open,
1056 .read = seq_read,
1057 .llseek = seq_lseek,
1058 .release = seq_release,
1059};
1060
1061enum writeback_stat_item {
1062 NR_DIRTY_THRESHOLD,
1063 NR_DIRTY_BG_THRESHOLD,
1064 NR_VM_WRITEBACK_STAT_ITEMS,
1065};
1066
1067static void *vmstat_start(struct seq_file *m, loff_t *pos)
1068{
1069 unsigned long *v;
1070 int i, stat_items_size;
1071
1072 if (*pos >= ARRAY_SIZE(vmstat_text))
1073 return NULL;
1074 stat_items_size = NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) +
1075 NR_VM_WRITEBACK_STAT_ITEMS * sizeof(unsigned long);
1076
1077#ifdef CONFIG_VM_EVENT_COUNTERS
1078 stat_items_size += sizeof(struct vm_event_state);
1079#endif
1080
1081 v = kmalloc(stat_items_size, GFP_KERNEL);
1082 m->private = v;
1083 if (!v)
1084 return ERR_PTR(-ENOMEM);
1085 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
1086 v[i] = global_page_state(i);
1087 v += NR_VM_ZONE_STAT_ITEMS;
1088
1089 global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,
1090 v + NR_DIRTY_THRESHOLD);
1091 v += NR_VM_WRITEBACK_STAT_ITEMS;
1092
1093#ifdef CONFIG_VM_EVENT_COUNTERS
1094 all_vm_events(v);
1095 v[PGPGIN] /= 2;
1096 v[PGPGOUT] /= 2;
1097#endif
1098 return (unsigned long *)m->private + *pos;
1099}
1100
1101static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
1102{
1103 (*pos)++;
1104 if (*pos >= ARRAY_SIZE(vmstat_text))
1105 return NULL;
1106 return (unsigned long *)m->private + *pos;
1107}
1108
1109static int vmstat_show(struct seq_file *m, void *arg)
1110{
1111 unsigned long *l = arg;
1112 unsigned long off = l - (unsigned long *)m->private;
1113
1114 seq_printf(m, "%s %lu\n", vmstat_text[off], *l);
1115 return 0;
1116}
1117
1118static void vmstat_stop(struct seq_file *m, void *arg)
1119{
1120 kfree(m->private);
1121 m->private = NULL;
1122}
1123
1124static const struct seq_operations vmstat_op = {
1125 .start = vmstat_start,
1126 .next = vmstat_next,
1127 .stop = vmstat_stop,
1128 .show = vmstat_show,
1129};
1130
1131static int vmstat_open(struct inode *inode, struct file *file)
1132{
1133 return seq_open(file, &vmstat_op);
1134}
1135
1136static const struct file_operations proc_vmstat_file_operations = {
1137 .open = vmstat_open,
1138 .read = seq_read,
1139 .llseek = seq_lseek,
1140 .release = seq_release,
1141};
1142#endif
1143
1144#ifdef CONFIG_SMP
1145static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
1146int sysctl_stat_interval __read_mostly = HZ;
1147
1148static void vmstat_update(struct work_struct *w)
1149{
1150 refresh_cpu_vm_stats(smp_processor_id());
1151 schedule_delayed_work(&__get_cpu_var(vmstat_work),
1152 round_jiffies_relative(sysctl_stat_interval));
1153}
1154
1155static void __cpuinit start_cpu_timer(int cpu)
1156{
1157 struct delayed_work *work = &per_cpu(vmstat_work, cpu);
1158
1159 INIT_DELAYED_WORK_DEFERRABLE(work, vmstat_update);
1160 schedule_delayed_work_on(cpu, work, __round_jiffies_relative(HZ, cpu));
1161}
1162
1163
1164
1165
1166
1167static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb,
1168 unsigned long action,
1169 void *hcpu)
1170{
1171 long cpu = (long)hcpu;
1172
1173 switch (action) {
1174 case CPU_ONLINE:
1175 case CPU_ONLINE_FROZEN:
1176 refresh_zone_stat_thresholds();
1177 start_cpu_timer(cpu);
1178 node_set_state(cpu_to_node(cpu), N_CPU);
1179 break;
1180 case CPU_DOWN_PREPARE:
1181 case CPU_DOWN_PREPARE_FROZEN:
1182 cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
1183 per_cpu(vmstat_work, cpu).work.func = NULL;
1184 break;
1185 case CPU_DOWN_FAILED:
1186 case CPU_DOWN_FAILED_FROZEN:
1187 start_cpu_timer(cpu);
1188 break;
1189 case CPU_DEAD:
1190 case CPU_DEAD_FROZEN:
1191 refresh_zone_stat_thresholds();
1192 break;
1193 default:
1194 break;
1195 }
1196 return NOTIFY_OK;
1197}
1198
1199static struct notifier_block __cpuinitdata vmstat_notifier =
1200 { &vmstat_cpuup_callback, NULL, 0 };
1201#endif
1202
1203static int __init setup_vmstat(void)
1204{
1205#ifdef CONFIG_SMP
1206 int cpu;
1207
1208 register_cpu_notifier(&vmstat_notifier);
1209
1210 for_each_online_cpu(cpu)
1211 start_cpu_timer(cpu);
1212#endif
1213#ifdef CONFIG_PROC_FS
1214 proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);
1215 proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops);
1216 proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations);
1217 proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations);
1218#endif
1219 return 0;
1220}
1221module_init(setup_vmstat)
1222
1223#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)
1224#include <linux/debugfs.h>
1225
1226
1227
1228
1229
1230
1231static int unusable_free_index(unsigned int order,
1232 struct contig_page_info *info)
1233{
1234
1235 if (info->free_pages == 0)
1236 return 1000;
1237
1238
1239
1240
1241
1242
1243
1244
1245 return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages);
1246
1247}
1248
1249static void unusable_show_print(struct seq_file *m,
1250 pg_data_t *pgdat, struct zone *zone)
1251{
1252 unsigned int order;
1253 int index;
1254 struct contig_page_info info;
1255
1256 seq_printf(m, "Node %d, zone %8s ",
1257 pgdat->node_id,
1258 zone->name);
1259 for (order = 0; order < MAX_ORDER; ++order) {
1260 fill_contig_page_info(zone, order, &info);
1261 index = unusable_free_index(order, &info);
1262 seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
1263 }
1264
1265 seq_putc(m, '\n');
1266}
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277static int unusable_show(struct seq_file *m, void *arg)
1278{
1279 pg_data_t *pgdat = (pg_data_t *)arg;
1280
1281
1282 if (!node_state(pgdat->node_id, N_HIGH_MEMORY))
1283 return 0;
1284
1285 walk_zones_in_node(m, pgdat, unusable_show_print);
1286
1287 return 0;
1288}
1289
1290static const struct seq_operations unusable_op = {
1291 .start = frag_start,
1292 .next = frag_next,
1293 .stop = frag_stop,
1294 .show = unusable_show,
1295};
1296
1297static int unusable_open(struct inode *inode, struct file *file)
1298{
1299 return seq_open(file, &unusable_op);
1300}
1301
1302static const struct file_operations unusable_file_ops = {
1303 .open = unusable_open,
1304 .read = seq_read,
1305 .llseek = seq_lseek,
1306 .release = seq_release,
1307};
1308
1309static void extfrag_show_print(struct seq_file *m,
1310 pg_data_t *pgdat, struct zone *zone)
1311{
1312 unsigned int order;
1313 int index;
1314
1315
1316 struct contig_page_info info;
1317
1318 seq_printf(m, "Node %d, zone %8s ",
1319 pgdat->node_id,
1320 zone->name);
1321 for (order = 0; order < MAX_ORDER; ++order) {
1322 fill_contig_page_info(zone, order, &info);
1323 index = __fragmentation_index(order, &info);
1324 seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
1325 }
1326
1327 seq_putc(m, '\n');
1328}
1329
1330
1331
1332
1333static int extfrag_show(struct seq_file *m, void *arg)
1334{
1335 pg_data_t *pgdat = (pg_data_t *)arg;
1336
1337 walk_zones_in_node(m, pgdat, extfrag_show_print);
1338
1339 return 0;
1340}
1341
1342static const struct seq_operations extfrag_op = {
1343 .start = frag_start,
1344 .next = frag_next,
1345 .stop = frag_stop,
1346 .show = extfrag_show,
1347};
1348
1349static int extfrag_open(struct inode *inode, struct file *file)
1350{
1351 return seq_open(file, &extfrag_op);
1352}
1353
1354static const struct file_operations extfrag_file_ops = {
1355 .open = extfrag_open,
1356 .read = seq_read,
1357 .llseek = seq_lseek,
1358 .release = seq_release,
1359};
1360
1361static int __init extfrag_debug_init(void)
1362{
1363 struct dentry *extfrag_debug_root;
1364
1365 extfrag_debug_root = debugfs_create_dir("extfrag", NULL);
1366 if (!extfrag_debug_root)
1367 return -ENOMEM;
1368
1369 if (!debugfs_create_file("unusable_index", 0444,
1370 extfrag_debug_root, NULL, &unusable_file_ops))
1371 goto fail;
1372
1373 if (!debugfs_create_file("extfrag_index", 0444,
1374 extfrag_debug_root, NULL, &extfrag_file_ops))
1375 goto fail;
1376
1377 return 0;
1378fail:
1379 debugfs_remove_recursive(extfrag_debug_root);
1380 return -ENOMEM;
1381}
1382
1383module_init(extfrag_debug_init);
1384#endif
1385