1
2
3
4
5
6
7
8
9
10
11
12#include <linux/fs.h>
13#include <linux/mm.h>
14#include <linux/err.h>
15#include <linux/module.h>
16#include <linux/slab.h>
17#include <linux/cpu.h>
18#include <linux/cpumask.h>
19#include <linux/vmstat.h>
20#include <linux/proc_fs.h>
21#include <linux/seq_file.h>
22#include <linux/debugfs.h>
23#include <linux/sched.h>
24#include <linux/math64.h>
25#include <linux/writeback.h>
26#include <linux/compaction.h>
27#include <linux/mm_inline.h>
28#include <linux/page_ext.h>
29#include <linux/page_owner.h>
30
31#include "internal.h"
32
33#define NUMA_STATS_THRESHOLD (U16_MAX - 2)
34
35#ifdef CONFIG_VM_EVENT_COUNTERS
36DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
37EXPORT_PER_CPU_SYMBOL(vm_event_states);
38
39static void sum_vm_events(unsigned long *ret)
40{
41 int cpu;
42 int i;
43
44 memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
45
46 for_each_online_cpu(cpu) {
47 struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
48
49 for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
50 ret[i] += this->event[i];
51 }
52}
53
54
55
56
57
58
59void all_vm_events(unsigned long *ret)
60{
61 get_online_cpus();
62 sum_vm_events(ret);
63 put_online_cpus();
64}
65EXPORT_SYMBOL_GPL(all_vm_events);
66
67
68
69
70
71
72
73void vm_events_fold_cpu(int cpu)
74{
75 struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
76 int i;
77
78 for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
79 count_vm_events(i, fold_state->event[i]);
80 fold_state->event[i] = 0;
81 }
82}
83
84#endif
85
86
87
88
89
90
91atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
92atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS] __cacheline_aligned_in_smp;
93atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp;
94EXPORT_SYMBOL(vm_zone_stat);
95EXPORT_SYMBOL(vm_numa_stat);
96EXPORT_SYMBOL(vm_node_stat);
97
98#ifdef CONFIG_SMP
99
100int calculate_pressure_threshold(struct zone *zone)
101{
102 int threshold;
103 int watermark_distance;
104
105
106
107
108
109
110
111
112
113 watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone);
114 threshold = max(1, (int)(watermark_distance / num_online_cpus()));
115
116
117
118
119 threshold = min(125, threshold);
120
121 return threshold;
122}
123
124int calculate_normal_threshold(struct zone *zone)
125{
126 int threshold;
127 int mem;
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159 mem = zone->managed_pages >> (27 - PAGE_SHIFT);
160
161 threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem));
162
163
164
165
166 threshold = min(125, threshold);
167
168 return threshold;
169}
170
171
172
173
174void refresh_zone_stat_thresholds(void)
175{
176 struct pglist_data *pgdat;
177 struct zone *zone;
178 int cpu;
179 int threshold;
180
181
182 for_each_online_pgdat(pgdat) {
183 for_each_online_cpu(cpu) {
184 per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold = 0;
185 }
186 }
187
188 for_each_populated_zone(zone) {
189 struct pglist_data *pgdat = zone->zone_pgdat;
190 unsigned long max_drift, tolerate_drift;
191
192 threshold = calculate_normal_threshold(zone);
193
194 for_each_online_cpu(cpu) {
195 int pgdat_threshold;
196
197 per_cpu_ptr(zone->pageset, cpu)->stat_threshold
198 = threshold;
199
200
201 pgdat_threshold = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold;
202 per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold
203 = max(threshold, pgdat_threshold);
204 }
205
206
207
208
209
210
211 tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone);
212 max_drift = num_online_cpus() * threshold;
213 if (max_drift > tolerate_drift)
214 zone->percpu_drift_mark = high_wmark_pages(zone) +
215 max_drift;
216 }
217}
218
219void set_pgdat_percpu_threshold(pg_data_t *pgdat,
220 int (*calculate_pressure)(struct zone *))
221{
222 struct zone *zone;
223 int cpu;
224 int threshold;
225 int i;
226
227 for (i = 0; i < pgdat->nr_zones; i++) {
228 zone = &pgdat->node_zones[i];
229 if (!zone->percpu_drift_mark)
230 continue;
231
232 threshold = (*calculate_pressure)(zone);
233 for_each_online_cpu(cpu)
234 per_cpu_ptr(zone->pageset, cpu)->stat_threshold
235 = threshold;
236 }
237}
238
239
240
241
242
243
244void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
245 long delta)
246{
247 struct per_cpu_pageset __percpu *pcp = zone->pageset;
248 s8 __percpu *p = pcp->vm_stat_diff + item;
249 long x;
250 long t;
251
252 x = delta + __this_cpu_read(*p);
253
254 t = __this_cpu_read(pcp->stat_threshold);
255
256 if (unlikely(x > t || x < -t)) {
257 zone_page_state_add(x, zone, item);
258 x = 0;
259 }
260 __this_cpu_write(*p, x);
261}
262EXPORT_SYMBOL(__mod_zone_page_state);
263
264void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
265 long delta)
266{
267 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
268 s8 __percpu *p = pcp->vm_node_stat_diff + item;
269 long x;
270 long t;
271
272 x = delta + __this_cpu_read(*p);
273
274 t = __this_cpu_read(pcp->stat_threshold);
275
276 if (unlikely(x > t || x < -t)) {
277 node_page_state_add(x, pgdat, item);
278 x = 0;
279 }
280 __this_cpu_write(*p, x);
281}
282EXPORT_SYMBOL(__mod_node_page_state);
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
308{
309 struct per_cpu_pageset __percpu *pcp = zone->pageset;
310 s8 __percpu *p = pcp->vm_stat_diff + item;
311 s8 v, t;
312
313 v = __this_cpu_inc_return(*p);
314 t = __this_cpu_read(pcp->stat_threshold);
315 if (unlikely(v > t)) {
316 s8 overstep = t >> 1;
317
318 zone_page_state_add(v + overstep, zone, item);
319 __this_cpu_write(*p, -overstep);
320 }
321}
322
323void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
324{
325 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
326 s8 __percpu *p = pcp->vm_node_stat_diff + item;
327 s8 v, t;
328
329 v = __this_cpu_inc_return(*p);
330 t = __this_cpu_read(pcp->stat_threshold);
331 if (unlikely(v > t)) {
332 s8 overstep = t >> 1;
333
334 node_page_state_add(v + overstep, pgdat, item);
335 __this_cpu_write(*p, -overstep);
336 }
337}
338
339void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
340{
341 __inc_zone_state(page_zone(page), item);
342}
343EXPORT_SYMBOL(__inc_zone_page_state);
344
345void __inc_node_page_state(struct page *page, enum node_stat_item item)
346{
347 __inc_node_state(page_pgdat(page), item);
348}
349EXPORT_SYMBOL(__inc_node_page_state);
350
351void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
352{
353 struct per_cpu_pageset __percpu *pcp = zone->pageset;
354 s8 __percpu *p = pcp->vm_stat_diff + item;
355 s8 v, t;
356
357 v = __this_cpu_dec_return(*p);
358 t = __this_cpu_read(pcp->stat_threshold);
359 if (unlikely(v < - t)) {
360 s8 overstep = t >> 1;
361
362 zone_page_state_add(v - overstep, zone, item);
363 __this_cpu_write(*p, overstep);
364 }
365}
366
367void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
368{
369 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
370 s8 __percpu *p = pcp->vm_node_stat_diff + item;
371 s8 v, t;
372
373 v = __this_cpu_dec_return(*p);
374 t = __this_cpu_read(pcp->stat_threshold);
375 if (unlikely(v < - t)) {
376 s8 overstep = t >> 1;
377
378 node_page_state_add(v - overstep, pgdat, item);
379 __this_cpu_write(*p, overstep);
380 }
381}
382
383void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
384{
385 __dec_zone_state(page_zone(page), item);
386}
387EXPORT_SYMBOL(__dec_zone_page_state);
388
389void __dec_node_page_state(struct page *page, enum node_stat_item item)
390{
391 __dec_node_state(page_pgdat(page), item);
392}
393EXPORT_SYMBOL(__dec_node_page_state);
394
395#ifdef CONFIG_HAVE_CMPXCHG_LOCAL
396
397
398
399
400
401
402
403
404
405
406
407
408static inline void mod_zone_state(struct zone *zone,
409 enum zone_stat_item item, long delta, int overstep_mode)
410{
411 struct per_cpu_pageset __percpu *pcp = zone->pageset;
412 s8 __percpu *p = pcp->vm_stat_diff + item;
413 long o, n, t, z;
414
415 do {
416 z = 0;
417
418
419
420
421
422
423
424
425
426
427
428 t = this_cpu_read(pcp->stat_threshold);
429
430 o = this_cpu_read(*p);
431 n = delta + o;
432
433 if (n > t || n < -t) {
434 int os = overstep_mode * (t >> 1) ;
435
436
437 z = n + os;
438 n = -os;
439 }
440 } while (this_cpu_cmpxchg(*p, o, n) != o);
441
442 if (z)
443 zone_page_state_add(z, zone, item);
444}
445
446void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
447 long delta)
448{
449 mod_zone_state(zone, item, delta, 0);
450}
451EXPORT_SYMBOL(mod_zone_page_state);
452
453void inc_zone_page_state(struct page *page, enum zone_stat_item item)
454{
455 mod_zone_state(page_zone(page), item, 1, 1);
456}
457EXPORT_SYMBOL(inc_zone_page_state);
458
459void dec_zone_page_state(struct page *page, enum zone_stat_item item)
460{
461 mod_zone_state(page_zone(page), item, -1, -1);
462}
463EXPORT_SYMBOL(dec_zone_page_state);
464
465static inline void mod_node_state(struct pglist_data *pgdat,
466 enum node_stat_item item, int delta, int overstep_mode)
467{
468 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
469 s8 __percpu *p = pcp->vm_node_stat_diff + item;
470 long o, n, t, z;
471
472 do {
473 z = 0;
474
475
476
477
478
479
480
481
482
483
484
485 t = this_cpu_read(pcp->stat_threshold);
486
487 o = this_cpu_read(*p);
488 n = delta + o;
489
490 if (n > t || n < -t) {
491 int os = overstep_mode * (t >> 1) ;
492
493
494 z = n + os;
495 n = -os;
496 }
497 } while (this_cpu_cmpxchg(*p, o, n) != o);
498
499 if (z)
500 node_page_state_add(z, pgdat, item);
501}
502
503void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
504 long delta)
505{
506 mod_node_state(pgdat, item, delta, 0);
507}
508EXPORT_SYMBOL(mod_node_page_state);
509
510void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
511{
512 mod_node_state(pgdat, item, 1, 1);
513}
514
515void inc_node_page_state(struct page *page, enum node_stat_item item)
516{
517 mod_node_state(page_pgdat(page), item, 1, 1);
518}
519EXPORT_SYMBOL(inc_node_page_state);
520
521void dec_node_page_state(struct page *page, enum node_stat_item item)
522{
523 mod_node_state(page_pgdat(page), item, -1, -1);
524}
525EXPORT_SYMBOL(dec_node_page_state);
526#else
527
528
529
530void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
531 long delta)
532{
533 unsigned long flags;
534
535 local_irq_save(flags);
536 __mod_zone_page_state(zone, item, delta);
537 local_irq_restore(flags);
538}
539EXPORT_SYMBOL(mod_zone_page_state);
540
541void inc_zone_page_state(struct page *page, enum zone_stat_item item)
542{
543 unsigned long flags;
544 struct zone *zone;
545
546 zone = page_zone(page);
547 local_irq_save(flags);
548 __inc_zone_state(zone, item);
549 local_irq_restore(flags);
550}
551EXPORT_SYMBOL(inc_zone_page_state);
552
553void dec_zone_page_state(struct page *page, enum zone_stat_item item)
554{
555 unsigned long flags;
556
557 local_irq_save(flags);
558 __dec_zone_page_state(page, item);
559 local_irq_restore(flags);
560}
561EXPORT_SYMBOL(dec_zone_page_state);
562
563void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
564{
565 unsigned long flags;
566
567 local_irq_save(flags);
568 __inc_node_state(pgdat, item);
569 local_irq_restore(flags);
570}
571EXPORT_SYMBOL(inc_node_state);
572
573void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
574 long delta)
575{
576 unsigned long flags;
577
578 local_irq_save(flags);
579 __mod_node_page_state(pgdat, item, delta);
580 local_irq_restore(flags);
581}
582EXPORT_SYMBOL(mod_node_page_state);
583
584void inc_node_page_state(struct page *page, enum node_stat_item item)
585{
586 unsigned long flags;
587 struct pglist_data *pgdat;
588
589 pgdat = page_pgdat(page);
590 local_irq_save(flags);
591 __inc_node_state(pgdat, item);
592 local_irq_restore(flags);
593}
594EXPORT_SYMBOL(inc_node_page_state);
595
596void dec_node_page_state(struct page *page, enum node_stat_item item)
597{
598 unsigned long flags;
599
600 local_irq_save(flags);
601 __dec_node_page_state(page, item);
602 local_irq_restore(flags);
603}
604EXPORT_SYMBOL(dec_node_page_state);
605#endif
606
607
608
609
610
611#ifdef CONFIG_NUMA
612static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff)
613{
614 int i;
615 int changes = 0;
616
617 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
618 if (zone_diff[i]) {
619 atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
620 changes++;
621 }
622
623 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
624 if (numa_diff[i]) {
625 atomic_long_add(numa_diff[i], &vm_numa_stat[i]);
626 changes++;
627 }
628
629 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
630 if (node_diff[i]) {
631 atomic_long_add(node_diff[i], &vm_node_stat[i]);
632 changes++;
633 }
634 return changes;
635}
636#else
637static int fold_diff(int *zone_diff, int *node_diff)
638{
639 int i;
640 int changes = 0;
641
642 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
643 if (zone_diff[i]) {
644 atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
645 changes++;
646 }
647
648 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
649 if (node_diff[i]) {
650 atomic_long_add(node_diff[i], &vm_node_stat[i]);
651 changes++;
652 }
653 return changes;
654}
655#endif
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673static int refresh_cpu_vm_stats(bool do_pagesets)
674{
675 struct pglist_data *pgdat;
676 struct zone *zone;
677 int i;
678 int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
679#ifdef CONFIG_NUMA
680 int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
681#endif
682 int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
683 int changes = 0;
684
685 for_each_populated_zone(zone) {
686 struct per_cpu_pageset __percpu *p = zone->pageset;
687
688 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
689 int v;
690
691 v = this_cpu_xchg(p->vm_stat_diff[i], 0);
692 if (v) {
693
694 atomic_long_add(v, &zone->vm_stat[i]);
695 global_zone_diff[i] += v;
696#ifdef CONFIG_NUMA
697
698 __this_cpu_write(p->expire, 3);
699#endif
700 }
701 }
702#ifdef CONFIG_NUMA
703 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) {
704 int v;
705
706 v = this_cpu_xchg(p->vm_numa_stat_diff[i], 0);
707 if (v) {
708
709 atomic_long_add(v, &zone->vm_numa_stat[i]);
710 global_numa_diff[i] += v;
711 __this_cpu_write(p->expire, 3);
712 }
713 }
714
715 if (do_pagesets) {
716 cond_resched();
717
718
719
720
721
722
723
724 if (!__this_cpu_read(p->expire) ||
725 !__this_cpu_read(p->pcp.count))
726 continue;
727
728
729
730
731 if (zone_to_nid(zone) == numa_node_id()) {
732 __this_cpu_write(p->expire, 0);
733 continue;
734 }
735
736 if (__this_cpu_dec_return(p->expire))
737 continue;
738
739 if (__this_cpu_read(p->pcp.count)) {
740 drain_zone_pages(zone, this_cpu_ptr(&p->pcp));
741 changes++;
742 }
743 }
744#endif
745 }
746
747 for_each_online_pgdat(pgdat) {
748 struct per_cpu_nodestat __percpu *p = pgdat->per_cpu_nodestats;
749
750 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
751 int v;
752
753 v = this_cpu_xchg(p->vm_node_stat_diff[i], 0);
754 if (v) {
755 atomic_long_add(v, &pgdat->vm_stat[i]);
756 global_node_diff[i] += v;
757 }
758 }
759 }
760
761#ifdef CONFIG_NUMA
762 changes += fold_diff(global_zone_diff, global_numa_diff,
763 global_node_diff);
764#else
765 changes += fold_diff(global_zone_diff, global_node_diff);
766#endif
767 return changes;
768}
769
770
771
772
773
774
775void cpu_vm_stats_fold(int cpu)
776{
777 struct pglist_data *pgdat;
778 struct zone *zone;
779 int i;
780 int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
781#ifdef CONFIG_NUMA
782 int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
783#endif
784 int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
785
786 for_each_populated_zone(zone) {
787 struct per_cpu_pageset *p;
788
789 p = per_cpu_ptr(zone->pageset, cpu);
790
791 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
792 if (p->vm_stat_diff[i]) {
793 int v;
794
795 v = p->vm_stat_diff[i];
796 p->vm_stat_diff[i] = 0;
797 atomic_long_add(v, &zone->vm_stat[i]);
798 global_zone_diff[i] += v;
799 }
800
801#ifdef CONFIG_NUMA
802 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
803 if (p->vm_numa_stat_diff[i]) {
804 int v;
805
806 v = p->vm_numa_stat_diff[i];
807 p->vm_numa_stat_diff[i] = 0;
808 atomic_long_add(v, &zone->vm_numa_stat[i]);
809 global_numa_diff[i] += v;
810 }
811#endif
812 }
813
814 for_each_online_pgdat(pgdat) {
815 struct per_cpu_nodestat *p;
816
817 p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu);
818
819 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
820 if (p->vm_node_stat_diff[i]) {
821 int v;
822
823 v = p->vm_node_stat_diff[i];
824 p->vm_node_stat_diff[i] = 0;
825 atomic_long_add(v, &pgdat->vm_stat[i]);
826 global_node_diff[i] += v;
827 }
828 }
829
830#ifdef CONFIG_NUMA
831 fold_diff(global_zone_diff, global_numa_diff, global_node_diff);
832#else
833 fold_diff(global_zone_diff, global_node_diff);
834#endif
835}
836
837
838
839
840
841void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset)
842{
843 int i;
844
845 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
846 if (pset->vm_stat_diff[i]) {
847 int v = pset->vm_stat_diff[i];
848 pset->vm_stat_diff[i] = 0;
849 atomic_long_add(v, &zone->vm_stat[i]);
850 atomic_long_add(v, &vm_zone_stat[i]);
851 }
852
853#ifdef CONFIG_NUMA
854 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
855 if (pset->vm_numa_stat_diff[i]) {
856 int v = pset->vm_numa_stat_diff[i];
857
858 pset->vm_numa_stat_diff[i] = 0;
859 atomic_long_add(v, &zone->vm_numa_stat[i]);
860 atomic_long_add(v, &vm_numa_stat[i]);
861 }
862#endif
863}
864#endif
865
866#ifdef CONFIG_NUMA
867void __inc_numa_state(struct zone *zone,
868 enum numa_stat_item item)
869{
870 struct per_cpu_pageset __percpu *pcp = zone->pageset;
871 u16 __percpu *p = pcp->vm_numa_stat_diff + item;
872 u16 v;
873
874 v = __this_cpu_inc_return(*p);
875
876 if (unlikely(v > NUMA_STATS_THRESHOLD)) {
877 zone_numa_state_add(v, zone, item);
878 __this_cpu_write(*p, 0);
879 }
880}
881
882
883
884
885
886
887unsigned long sum_zone_node_page_state(int node,
888 enum zone_stat_item item)
889{
890 struct zone *zones = NODE_DATA(node)->node_zones;
891 int i;
892 unsigned long count = 0;
893
894 for (i = 0; i < MAX_NR_ZONES; i++)
895 count += zone_page_state(zones + i, item);
896
897 return count;
898}
899
900
901
902
903
904unsigned long sum_zone_numa_state(int node,
905 enum numa_stat_item item)
906{
907 struct zone *zones = NODE_DATA(node)->node_zones;
908 int i;
909 unsigned long count = 0;
910
911 for (i = 0; i < MAX_NR_ZONES; i++)
912 count += zone_numa_state_snapshot(zones + i, item);
913
914 return count;
915}
916
917
918
919
920unsigned long node_page_state(struct pglist_data *pgdat,
921 enum node_stat_item item)
922{
923 long x = atomic_long_read(&pgdat->vm_stat[item]);
924#ifdef CONFIG_SMP
925 if (x < 0)
926 x = 0;
927#endif
928 return x;
929}
930#endif
931
932#ifdef CONFIG_COMPACTION
933
934struct contig_page_info {
935 unsigned long free_pages;
936 unsigned long free_blocks_total;
937 unsigned long free_blocks_suitable;
938};
939
940
941
942
943
944
945
946
947
948static void fill_contig_page_info(struct zone *zone,
949 unsigned int suitable_order,
950 struct contig_page_info *info)
951{
952 unsigned int order;
953
954 info->free_pages = 0;
955 info->free_blocks_total = 0;
956 info->free_blocks_suitable = 0;
957
958 for (order = 0; order < MAX_ORDER; order++) {
959 unsigned long blocks;
960
961
962 blocks = zone->free_area[order].nr_free;
963 info->free_blocks_total += blocks;
964
965
966 info->free_pages += blocks << order;
967
968
969 if (order >= suitable_order)
970 info->free_blocks_suitable += blocks <<
971 (order - suitable_order);
972 }
973}
974
975
976
977
978
979
980
981
982static int __fragmentation_index(unsigned int order, struct contig_page_info *info)
983{
984 unsigned long requested = 1UL << order;
985
986 if (WARN_ON_ONCE(order >= MAX_ORDER))
987 return 0;
988
989 if (!info->free_blocks_total)
990 return 0;
991
992
993 if (info->free_blocks_suitable)
994 return -1000;
995
996
997
998
999
1000
1001
1002 return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total);
1003}
1004
1005
1006int fragmentation_index(struct zone *zone, unsigned int order)
1007{
1008 struct contig_page_info info;
1009
1010 fill_contig_page_info(zone, order, &info);
1011 return __fragmentation_index(order, &info);
1012}
1013#endif
1014
1015#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || defined(CONFIG_NUMA)
1016#ifdef CONFIG_ZONE_DMA
1017#define TEXT_FOR_DMA(xx) xx "_dma",
1018#else
1019#define TEXT_FOR_DMA(xx)
1020#endif
1021
1022#ifdef CONFIG_ZONE_DMA32
1023#define TEXT_FOR_DMA32(xx) xx "_dma32",
1024#else
1025#define TEXT_FOR_DMA32(xx)
1026#endif
1027
1028#ifdef CONFIG_HIGHMEM
1029#define TEXT_FOR_HIGHMEM(xx) xx "_high",
1030#else
1031#define TEXT_FOR_HIGHMEM(xx)
1032#endif
1033
1034#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
1035 TEXT_FOR_HIGHMEM(xx) xx "_movable",
1036
1037const char * const vmstat_text[] = {
1038
1039 "nr_free_pages",
1040 "nr_zone_inactive_anon",
1041 "nr_zone_active_anon",
1042 "nr_zone_inactive_file",
1043 "nr_zone_active_file",
1044 "nr_zone_unevictable",
1045 "nr_zone_write_pending",
1046 "nr_mlock",
1047 "nr_page_table_pages",
1048 "nr_kernel_stack",
1049 "nr_bounce",
1050#if IS_ENABLED(CONFIG_ZSMALLOC)
1051 "nr_zspages",
1052#endif
1053 "nr_free_cma",
1054
1055
1056#ifdef CONFIG_NUMA
1057 "numa_hit",
1058 "numa_miss",
1059 "numa_foreign",
1060 "numa_interleave",
1061 "numa_local",
1062 "numa_other",
1063#endif
1064
1065
1066 "nr_inactive_anon",
1067 "nr_active_anon",
1068 "nr_inactive_file",
1069 "nr_active_file",
1070 "nr_unevictable",
1071 "nr_slab_reclaimable",
1072 "nr_slab_unreclaimable",
1073 "nr_isolated_anon",
1074 "nr_isolated_file",
1075 "workingset_refault",
1076 "workingset_activate",
1077 "workingset_nodereclaim",
1078 "nr_anon_pages",
1079 "nr_mapped",
1080 "nr_file_pages",
1081 "nr_dirty",
1082 "nr_writeback",
1083 "nr_writeback_temp",
1084 "nr_shmem",
1085 "nr_shmem_hugepages",
1086 "nr_shmem_pmdmapped",
1087 "nr_anon_transparent_hugepages",
1088 "nr_unstable",
1089 "nr_vmscan_write",
1090 "nr_vmscan_immediate_reclaim",
1091 "nr_dirtied",
1092 "nr_written",
1093
1094
1095 "nr_dirty_threshold",
1096 "nr_dirty_background_threshold",
1097
1098#ifdef CONFIG_VM_EVENT_COUNTERS
1099
1100 "pgpgin",
1101 "pgpgout",
1102 "pswpin",
1103 "pswpout",
1104
1105 TEXTS_FOR_ZONES("pgalloc")
1106 TEXTS_FOR_ZONES("allocstall")
1107 TEXTS_FOR_ZONES("pgskip")
1108
1109 "pgfree",
1110 "pgactivate",
1111 "pgdeactivate",
1112 "pglazyfree",
1113
1114 "pgfault",
1115 "pgmajfault",
1116 "pglazyfreed",
1117
1118 "pgrefill",
1119 "pgsteal_kswapd",
1120 "pgsteal_direct",
1121 "pgscan_kswapd",
1122 "pgscan_direct",
1123 "pgscan_direct_throttle",
1124
1125#ifdef CONFIG_NUMA
1126 "zone_reclaim_failed",
1127#endif
1128 "pginodesteal",
1129 "slabs_scanned",
1130 "kswapd_inodesteal",
1131 "kswapd_low_wmark_hit_quickly",
1132 "kswapd_high_wmark_hit_quickly",
1133 "pageoutrun",
1134
1135 "pgrotated",
1136
1137 "drop_pagecache",
1138 "drop_slab",
1139 "oom_kill",
1140
1141#ifdef CONFIG_NUMA_BALANCING
1142 "numa_pte_updates",
1143 "numa_huge_pte_updates",
1144 "numa_hint_faults",
1145 "numa_hint_faults_local",
1146 "numa_pages_migrated",
1147#endif
1148#ifdef CONFIG_MIGRATION
1149 "pgmigrate_success",
1150 "pgmigrate_fail",
1151#endif
1152#ifdef CONFIG_COMPACTION
1153 "compact_migrate_scanned",
1154 "compact_free_scanned",
1155 "compact_isolated",
1156 "compact_stall",
1157 "compact_fail",
1158 "compact_success",
1159 "compact_daemon_wake",
1160 "compact_daemon_migrate_scanned",
1161 "compact_daemon_free_scanned",
1162#endif
1163
1164#ifdef CONFIG_HUGETLB_PAGE
1165 "htlb_buddy_alloc_success",
1166 "htlb_buddy_alloc_fail",
1167#endif
1168 "unevictable_pgs_culled",
1169 "unevictable_pgs_scanned",
1170 "unevictable_pgs_rescued",
1171 "unevictable_pgs_mlocked",
1172 "unevictable_pgs_munlocked",
1173 "unevictable_pgs_cleared",
1174 "unevictable_pgs_stranded",
1175
1176#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1177 "thp_fault_alloc",
1178 "thp_fault_fallback",
1179 "thp_collapse_alloc",
1180 "thp_collapse_alloc_failed",
1181 "thp_file_alloc",
1182 "thp_file_mapped",
1183 "thp_split_page",
1184 "thp_split_page_failed",
1185 "thp_deferred_split_page",
1186 "thp_split_pmd",
1187#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
1188 "thp_split_pud",
1189#endif
1190 "thp_zero_page_alloc",
1191 "thp_zero_page_alloc_failed",
1192 "thp_swpout",
1193 "thp_swpout_fallback",
1194#endif
1195#ifdef CONFIG_MEMORY_BALLOON
1196 "balloon_inflate",
1197 "balloon_deflate",
1198#ifdef CONFIG_BALLOON_COMPACTION
1199 "balloon_migrate",
1200#endif
1201#endif
1202#ifdef CONFIG_DEBUG_TLBFLUSH
1203#ifdef CONFIG_SMP
1204 "nr_tlb_remote_flush",
1205 "nr_tlb_remote_flush_received",
1206#endif
1207 "nr_tlb_local_flush_all",
1208 "nr_tlb_local_flush_one",
1209#endif
1210
1211#ifdef CONFIG_DEBUG_VM_VMACACHE
1212 "vmacache_find_calls",
1213 "vmacache_find_hits",
1214 "vmacache_full_flushes",
1215#endif
1216#ifdef CONFIG_SWAP
1217 "swap_ra",
1218 "swap_ra_hit",
1219#endif
1220#endif
1221};
1222#endif
1223
1224#if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \
1225 defined(CONFIG_PROC_FS)
1226static void *frag_start(struct seq_file *m, loff_t *pos)
1227{
1228 pg_data_t *pgdat;
1229 loff_t node = *pos;
1230
1231 for (pgdat = first_online_pgdat();
1232 pgdat && node;
1233 pgdat = next_online_pgdat(pgdat))
1234 --node;
1235
1236 return pgdat;
1237}
1238
1239static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
1240{
1241 pg_data_t *pgdat = (pg_data_t *)arg;
1242
1243 (*pos)++;
1244 return next_online_pgdat(pgdat);
1245}
1246
1247static void frag_stop(struct seq_file *m, void *arg)
1248{
1249}
1250
1251
1252
1253
1254
1255static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
1256 bool assert_populated, bool nolock,
1257 void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
1258{
1259 struct zone *zone;
1260 struct zone *node_zones = pgdat->node_zones;
1261 unsigned long flags;
1262
1263 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
1264 if (assert_populated && !populated_zone(zone))
1265 continue;
1266
1267 if (!nolock)
1268 spin_lock_irqsave(&zone->lock, flags);
1269 print(m, pgdat, zone);
1270 if (!nolock)
1271 spin_unlock_irqrestore(&zone->lock, flags);
1272 }
1273}
1274#endif
1275
1276#ifdef CONFIG_PROC_FS
1277static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
1278 struct zone *zone)
1279{
1280 int order;
1281
1282 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1283 for (order = 0; order < MAX_ORDER; ++order)
1284 seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
1285 seq_putc(m, '\n');
1286}
1287
1288
1289
1290
1291static int frag_show(struct seq_file *m, void *arg)
1292{
1293 pg_data_t *pgdat = (pg_data_t *)arg;
1294 walk_zones_in_node(m, pgdat, true, false, frag_show_print);
1295 return 0;
1296}
1297
1298static void pagetypeinfo_showfree_print(struct seq_file *m,
1299 pg_data_t *pgdat, struct zone *zone)
1300{
1301 int order, mtype;
1302
1303 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) {
1304 seq_printf(m, "Node %4d, zone %8s, type %12s ",
1305 pgdat->node_id,
1306 zone->name,
1307 migratetype_names[mtype]);
1308 for (order = 0; order < MAX_ORDER; ++order) {
1309 unsigned long freecount = 0;
1310 struct free_area *area;
1311 struct list_head *curr;
1312
1313 area = &(zone->free_area[order]);
1314
1315 list_for_each(curr, &area->free_list[mtype])
1316 freecount++;
1317 seq_printf(m, "%6lu ", freecount);
1318 }
1319 seq_putc(m, '\n');
1320 }
1321}
1322
1323
1324static int pagetypeinfo_showfree(struct seq_file *m, void *arg)
1325{
1326 int order;
1327 pg_data_t *pgdat = (pg_data_t *)arg;
1328
1329
1330 seq_printf(m, "%-43s ", "Free pages count per migrate type at order");
1331 for (order = 0; order < MAX_ORDER; ++order)
1332 seq_printf(m, "%6d ", order);
1333 seq_putc(m, '\n');
1334
1335 walk_zones_in_node(m, pgdat, true, false, pagetypeinfo_showfree_print);
1336
1337 return 0;
1338}
1339
1340static void pagetypeinfo_showblockcount_print(struct seq_file *m,
1341 pg_data_t *pgdat, struct zone *zone)
1342{
1343 int mtype;
1344 unsigned long pfn;
1345 unsigned long start_pfn = zone->zone_start_pfn;
1346 unsigned long end_pfn = zone_end_pfn(zone);
1347 unsigned long count[MIGRATE_TYPES] = { 0, };
1348
1349 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
1350 struct page *page;
1351
1352 page = pfn_to_online_page(pfn);
1353 if (!page)
1354 continue;
1355
1356
1357 if (!memmap_valid_within(pfn, page, zone))
1358 continue;
1359
1360 if (page_zone(page) != zone)
1361 continue;
1362
1363 mtype = get_pageblock_migratetype(page);
1364
1365 if (mtype < MIGRATE_TYPES)
1366 count[mtype]++;
1367 }
1368
1369
1370 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1371 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1372 seq_printf(m, "%12lu ", count[mtype]);
1373 seq_putc(m, '\n');
1374}
1375
1376
1377static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
1378{
1379 int mtype;
1380 pg_data_t *pgdat = (pg_data_t *)arg;
1381
1382 seq_printf(m, "\n%-23s", "Number of blocks type ");
1383 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1384 seq_printf(m, "%12s ", migratetype_names[mtype]);
1385 seq_putc(m, '\n');
1386 walk_zones_in_node(m, pgdat, true, false,
1387 pagetypeinfo_showblockcount_print);
1388
1389 return 0;
1390}
1391
1392
1393
1394
1395
1396
1397
1398static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat)
1399{
1400#ifdef CONFIG_PAGE_OWNER
1401 int mtype;
1402
1403 if (!static_branch_unlikely(&page_owner_inited))
1404 return;
1405
1406 drain_all_pages(NULL);
1407
1408 seq_printf(m, "\n%-23s", "Number of mixed blocks ");
1409 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1410 seq_printf(m, "%12s ", migratetype_names[mtype]);
1411 seq_putc(m, '\n');
1412
1413 walk_zones_in_node(m, pgdat, true, true,
1414 pagetypeinfo_showmixedcount_print);
1415#endif
1416}
1417
1418
1419
1420
1421
1422static int pagetypeinfo_show(struct seq_file *m, void *arg)
1423{
1424 pg_data_t *pgdat = (pg_data_t *)arg;
1425
1426
1427 if (!node_state(pgdat->node_id, N_MEMORY))
1428 return 0;
1429
1430 seq_printf(m, "Page block order: %d\n", pageblock_order);
1431 seq_printf(m, "Pages per block: %lu\n", pageblock_nr_pages);
1432 seq_putc(m, '\n');
1433 pagetypeinfo_showfree(m, pgdat);
1434 pagetypeinfo_showblockcount(m, pgdat);
1435 pagetypeinfo_showmixedcount(m, pgdat);
1436
1437 return 0;
1438}
1439
1440static const struct seq_operations fragmentation_op = {
1441 .start = frag_start,
1442 .next = frag_next,
1443 .stop = frag_stop,
1444 .show = frag_show,
1445};
1446
1447static int fragmentation_open(struct inode *inode, struct file *file)
1448{
1449 return seq_open(file, &fragmentation_op);
1450}
1451
1452static const struct file_operations buddyinfo_file_operations = {
1453 .open = fragmentation_open,
1454 .read = seq_read,
1455 .llseek = seq_lseek,
1456 .release = seq_release,
1457};
1458
1459static const struct seq_operations pagetypeinfo_op = {
1460 .start = frag_start,
1461 .next = frag_next,
1462 .stop = frag_stop,
1463 .show = pagetypeinfo_show,
1464};
1465
1466static int pagetypeinfo_open(struct inode *inode, struct file *file)
1467{
1468 return seq_open(file, &pagetypeinfo_op);
1469}
1470
1471static const struct file_operations pagetypeinfo_file_operations = {
1472 .open = pagetypeinfo_open,
1473 .read = seq_read,
1474 .llseek = seq_lseek,
1475 .release = seq_release,
1476};
1477
1478static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone)
1479{
1480 int zid;
1481
1482 for (zid = 0; zid < MAX_NR_ZONES; zid++) {
1483 struct zone *compare = &pgdat->node_zones[zid];
1484
1485 if (populated_zone(compare))
1486 return zone == compare;
1487 }
1488
1489 return false;
1490}
1491
1492static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
1493 struct zone *zone)
1494{
1495 int i;
1496 seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
1497 if (is_zone_first_populated(pgdat, zone)) {
1498 seq_printf(m, "\n per-node stats");
1499 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
1500 seq_printf(m, "\n %-12s %lu",
1501 vmstat_text[i + NR_VM_ZONE_STAT_ITEMS +
1502 NR_VM_NUMA_STAT_ITEMS],
1503 node_page_state(pgdat, i));
1504 }
1505 }
1506 seq_printf(m,
1507 "\n pages free %lu"
1508 "\n min %lu"
1509 "\n low %lu"
1510 "\n high %lu"
1511 "\n spanned %lu"
1512 "\n present %lu"
1513 "\n managed %lu",
1514 zone_page_state(zone, NR_FREE_PAGES),
1515 min_wmark_pages(zone),
1516 low_wmark_pages(zone),
1517 high_wmark_pages(zone),
1518 zone->spanned_pages,
1519 zone->present_pages,
1520 zone->managed_pages);
1521
1522 seq_printf(m,
1523 "\n protection: (%ld",
1524 zone->lowmem_reserve[0]);
1525 for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
1526 seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
1527 seq_putc(m, ')');
1528
1529
1530 if (!populated_zone(zone)) {
1531 seq_putc(m, '\n');
1532 return;
1533 }
1534
1535 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
1536 seq_printf(m, "\n %-12s %lu", vmstat_text[i],
1537 zone_page_state(zone, i));
1538
1539#ifdef CONFIG_NUMA
1540 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
1541 seq_printf(m, "\n %-12s %lu",
1542 vmstat_text[i + NR_VM_ZONE_STAT_ITEMS],
1543 zone_numa_state_snapshot(zone, i));
1544#endif
1545
1546 seq_printf(m, "\n pagesets");
1547 for_each_online_cpu(i) {
1548 struct per_cpu_pageset *pageset;
1549
1550 pageset = per_cpu_ptr(zone->pageset, i);
1551 seq_printf(m,
1552 "\n cpu: %i"
1553 "\n count: %i"
1554 "\n high: %i"
1555 "\n batch: %i",
1556 i,
1557 pageset->pcp.count,
1558 pageset->pcp.high,
1559 pageset->pcp.batch);
1560#ifdef CONFIG_SMP
1561 seq_printf(m, "\n vm stats threshold: %d",
1562 pageset->stat_threshold);
1563#endif
1564 }
1565 seq_printf(m,
1566 "\n node_unreclaimable: %u"
1567 "\n start_pfn: %lu"
1568 "\n node_inactive_ratio: %u",
1569 pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES,
1570 zone->zone_start_pfn,
1571 zone->zone_pgdat->inactive_ratio);
1572 seq_putc(m, '\n');
1573}
1574
1575
1576
1577
1578
1579
1580
1581static int zoneinfo_show(struct seq_file *m, void *arg)
1582{
1583 pg_data_t *pgdat = (pg_data_t *)arg;
1584 walk_zones_in_node(m, pgdat, false, false, zoneinfo_show_print);
1585 return 0;
1586}
1587
1588static const struct seq_operations zoneinfo_op = {
1589 .start = frag_start,
1590
1591 .next = frag_next,
1592 .stop = frag_stop,
1593 .show = zoneinfo_show,
1594};
1595
1596static int zoneinfo_open(struct inode *inode, struct file *file)
1597{
1598 return seq_open(file, &zoneinfo_op);
1599}
1600
1601static const struct file_operations zoneinfo_file_operations = {
1602 .open = zoneinfo_open,
1603 .read = seq_read,
1604 .llseek = seq_lseek,
1605 .release = seq_release,
1606};
1607
1608enum writeback_stat_item {
1609 NR_DIRTY_THRESHOLD,
1610 NR_DIRTY_BG_THRESHOLD,
1611 NR_VM_WRITEBACK_STAT_ITEMS,
1612};
1613
1614static void *vmstat_start(struct seq_file *m, loff_t *pos)
1615{
1616 unsigned long *v;
1617 int i, stat_items_size;
1618
1619 if (*pos >= ARRAY_SIZE(vmstat_text))
1620 return NULL;
1621 stat_items_size = NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) +
1622 NR_VM_NUMA_STAT_ITEMS * sizeof(unsigned long) +
1623 NR_VM_NODE_STAT_ITEMS * sizeof(unsigned long) +
1624 NR_VM_WRITEBACK_STAT_ITEMS * sizeof(unsigned long);
1625
1626#ifdef CONFIG_VM_EVENT_COUNTERS
1627 stat_items_size += sizeof(struct vm_event_state);
1628#endif
1629
1630 v = kmalloc(stat_items_size, GFP_KERNEL);
1631 m->private = v;
1632 if (!v)
1633 return ERR_PTR(-ENOMEM);
1634 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
1635 v[i] = global_zone_page_state(i);
1636 v += NR_VM_ZONE_STAT_ITEMS;
1637
1638#ifdef CONFIG_NUMA
1639 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
1640 v[i] = global_numa_state(i);
1641 v += NR_VM_NUMA_STAT_ITEMS;
1642#endif
1643
1644 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
1645 v[i] = global_node_page_state(i);
1646 v += NR_VM_NODE_STAT_ITEMS;
1647
1648 global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,
1649 v + NR_DIRTY_THRESHOLD);
1650 v += NR_VM_WRITEBACK_STAT_ITEMS;
1651
1652#ifdef CONFIG_VM_EVENT_COUNTERS
1653 all_vm_events(v);
1654 v[PGPGIN] /= 2;
1655 v[PGPGOUT] /= 2;
1656#endif
1657 return (unsigned long *)m->private + *pos;
1658}
1659
1660static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
1661{
1662 (*pos)++;
1663 if (*pos >= ARRAY_SIZE(vmstat_text))
1664 return NULL;
1665 return (unsigned long *)m->private + *pos;
1666}
1667
1668static int vmstat_show(struct seq_file *m, void *arg)
1669{
1670 unsigned long *l = arg;
1671 unsigned long off = l - (unsigned long *)m->private;
1672
1673 seq_puts(m, vmstat_text[off]);
1674 seq_put_decimal_ull(m, " ", *l);
1675 seq_putc(m, '\n');
1676 return 0;
1677}
1678
1679static void vmstat_stop(struct seq_file *m, void *arg)
1680{
1681 kfree(m->private);
1682 m->private = NULL;
1683}
1684
1685static const struct seq_operations vmstat_op = {
1686 .start = vmstat_start,
1687 .next = vmstat_next,
1688 .stop = vmstat_stop,
1689 .show = vmstat_show,
1690};
1691
1692static int vmstat_open(struct inode *inode, struct file *file)
1693{
1694 return seq_open(file, &vmstat_op);
1695}
1696
1697static const struct file_operations vmstat_file_operations = {
1698 .open = vmstat_open,
1699 .read = seq_read,
1700 .llseek = seq_lseek,
1701 .release = seq_release,
1702};
1703#endif
1704
1705#ifdef CONFIG_SMP
1706static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
1707int sysctl_stat_interval __read_mostly = HZ;
1708
1709#ifdef CONFIG_PROC_FS
1710static void refresh_vm_stats(struct work_struct *work)
1711{
1712 refresh_cpu_vm_stats(true);
1713}
1714
1715int vmstat_refresh(struct ctl_table *table, int write,
1716 void __user *buffer, size_t *lenp, loff_t *ppos)
1717{
1718 long val;
1719 int err;
1720 int i;
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734 err = schedule_on_each_cpu(refresh_vm_stats);
1735 if (err)
1736 return err;
1737 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
1738 val = atomic_long_read(&vm_zone_stat[i]);
1739 if (val < 0) {
1740 pr_warn("%s: %s %ld\n",
1741 __func__, vmstat_text[i], val);
1742 err = -EINVAL;
1743 }
1744 }
1745#ifdef CONFIG_NUMA
1746 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) {
1747 val = atomic_long_read(&vm_numa_stat[i]);
1748 if (val < 0) {
1749 pr_warn("%s: %s %ld\n",
1750 __func__, vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], val);
1751 err = -EINVAL;
1752 }
1753 }
1754#endif
1755 if (err)
1756 return err;
1757 if (write)
1758 *ppos += *lenp;
1759 else
1760 *lenp = 0;
1761 return 0;
1762}
1763#endif
1764
1765static void vmstat_update(struct work_struct *w)
1766{
1767 if (refresh_cpu_vm_stats(true)) {
1768
1769
1770
1771
1772
1773 queue_delayed_work_on(smp_processor_id(), mm_percpu_wq,
1774 this_cpu_ptr(&vmstat_work),
1775 round_jiffies_relative(sysctl_stat_interval));
1776 }
1777}
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788static bool need_update(int cpu)
1789{
1790 struct zone *zone;
1791
1792 for_each_populated_zone(zone) {
1793 struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu);
1794
1795 BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1);
1796#ifdef CONFIG_NUMA
1797 BUILD_BUG_ON(sizeof(p->vm_numa_stat_diff[0]) != 2);
1798#endif
1799
1800
1801
1802
1803
1804 if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS))
1805 return true;
1806#ifdef CONFIG_NUMA
1807 if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS))
1808 return true;
1809#endif
1810 }
1811 return false;
1812}
1813
1814
1815
1816
1817
1818
1819void quiet_vmstat(void)
1820{
1821 if (system_state != SYSTEM_RUNNING)
1822 return;
1823
1824 if (!delayed_work_pending(this_cpu_ptr(&vmstat_work)))
1825 return;
1826
1827 if (!need_update(smp_processor_id()))
1828 return;
1829
1830
1831
1832
1833
1834
1835
1836 refresh_cpu_vm_stats(false);
1837}
1838
1839
1840
1841
1842
1843
1844
1845static void vmstat_shepherd(struct work_struct *w);
1846
1847static DECLARE_DEFERRABLE_WORK(shepherd, vmstat_shepherd);
1848
1849static void vmstat_shepherd(struct work_struct *w)
1850{
1851 int cpu;
1852
1853 get_online_cpus();
1854
1855 for_each_online_cpu(cpu) {
1856 struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
1857
1858 if (!delayed_work_pending(dw) && need_update(cpu))
1859 queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0);
1860 }
1861 put_online_cpus();
1862
1863 schedule_delayed_work(&shepherd,
1864 round_jiffies_relative(sysctl_stat_interval));
1865}
1866
1867static void __init start_shepherd_timer(void)
1868{
1869 int cpu;
1870
1871 for_each_possible_cpu(cpu)
1872 INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
1873 vmstat_update);
1874
1875 schedule_delayed_work(&shepherd,
1876 round_jiffies_relative(sysctl_stat_interval));
1877}
1878
1879static void __init init_cpu_node_state(void)
1880{
1881 int node;
1882
1883 for_each_online_node(node) {
1884 if (cpumask_weight(cpumask_of_node(node)) > 0)
1885 node_set_state(node, N_CPU);
1886 }
1887}
1888
1889static int vmstat_cpu_online(unsigned int cpu)
1890{
1891 refresh_zone_stat_thresholds();
1892 node_set_state(cpu_to_node(cpu), N_CPU);
1893 return 0;
1894}
1895
1896static int vmstat_cpu_down_prep(unsigned int cpu)
1897{
1898 cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
1899 return 0;
1900}
1901
1902static int vmstat_cpu_dead(unsigned int cpu)
1903{
1904 const struct cpumask *node_cpus;
1905 int node;
1906
1907 node = cpu_to_node(cpu);
1908
1909 refresh_zone_stat_thresholds();
1910 node_cpus = cpumask_of_node(node);
1911 if (cpumask_weight(node_cpus) > 0)
1912 return 0;
1913
1914 node_clear_state(node, N_CPU);
1915 return 0;
1916}
1917
1918#endif
1919
1920struct workqueue_struct *mm_percpu_wq;
1921
1922void __init init_mm_internals(void)
1923{
1924 int ret __maybe_unused;
1925
1926 mm_percpu_wq = alloc_workqueue("mm_percpu_wq", WQ_MEM_RECLAIM, 0);
1927
1928#ifdef CONFIG_SMP
1929 ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead",
1930 NULL, vmstat_cpu_dead);
1931 if (ret < 0)
1932 pr_err("vmstat: failed to register 'dead' hotplug state\n");
1933
1934 ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "mm/vmstat:online",
1935 vmstat_cpu_online,
1936 vmstat_cpu_down_prep);
1937 if (ret < 0)
1938 pr_err("vmstat: failed to register 'online' hotplug state\n");
1939
1940 get_online_cpus();
1941 init_cpu_node_state();
1942 put_online_cpus();
1943
1944 start_shepherd_timer();
1945#endif
1946#ifdef CONFIG_PROC_FS
1947 proc_create("buddyinfo", 0444, NULL, &buddyinfo_file_operations);
1948 proc_create("pagetypeinfo", 0444, NULL, &pagetypeinfo_file_operations);
1949 proc_create("vmstat", 0444, NULL, &vmstat_file_operations);
1950 proc_create("zoneinfo", 0444, NULL, &zoneinfo_file_operations);
1951#endif
1952}
1953
1954#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)
1955
1956
1957
1958
1959
1960static int unusable_free_index(unsigned int order,
1961 struct contig_page_info *info)
1962{
1963
1964 if (info->free_pages == 0)
1965 return 1000;
1966
1967
1968
1969
1970
1971
1972
1973
1974 return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages);
1975
1976}
1977
1978static void unusable_show_print(struct seq_file *m,
1979 pg_data_t *pgdat, struct zone *zone)
1980{
1981 unsigned int order;
1982 int index;
1983 struct contig_page_info info;
1984
1985 seq_printf(m, "Node %d, zone %8s ",
1986 pgdat->node_id,
1987 zone->name);
1988 for (order = 0; order < MAX_ORDER; ++order) {
1989 fill_contig_page_info(zone, order, &info);
1990 index = unusable_free_index(order, &info);
1991 seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
1992 }
1993
1994 seq_putc(m, '\n');
1995}
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006static int unusable_show(struct seq_file *m, void *arg)
2007{
2008 pg_data_t *pgdat = (pg_data_t *)arg;
2009
2010
2011 if (!node_state(pgdat->node_id, N_MEMORY))
2012 return 0;
2013
2014 walk_zones_in_node(m, pgdat, true, false, unusable_show_print);
2015
2016 return 0;
2017}
2018
2019static const struct seq_operations unusable_op = {
2020 .start = frag_start,
2021 .next = frag_next,
2022 .stop = frag_stop,
2023 .show = unusable_show,
2024};
2025
2026static int unusable_open(struct inode *inode, struct file *file)
2027{
2028 return seq_open(file, &unusable_op);
2029}
2030
2031static const struct file_operations unusable_file_ops = {
2032 .open = unusable_open,
2033 .read = seq_read,
2034 .llseek = seq_lseek,
2035 .release = seq_release,
2036};
2037
2038static void extfrag_show_print(struct seq_file *m,
2039 pg_data_t *pgdat, struct zone *zone)
2040{
2041 unsigned int order;
2042 int index;
2043
2044
2045 struct contig_page_info info;
2046
2047 seq_printf(m, "Node %d, zone %8s ",
2048 pgdat->node_id,
2049 zone->name);
2050 for (order = 0; order < MAX_ORDER; ++order) {
2051 fill_contig_page_info(zone, order, &info);
2052 index = __fragmentation_index(order, &info);
2053 seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
2054 }
2055
2056 seq_putc(m, '\n');
2057}
2058
2059
2060
2061
2062static int extfrag_show(struct seq_file *m, void *arg)
2063{
2064 pg_data_t *pgdat = (pg_data_t *)arg;
2065
2066 walk_zones_in_node(m, pgdat, true, false, extfrag_show_print);
2067
2068 return 0;
2069}
2070
2071static const struct seq_operations extfrag_op = {
2072 .start = frag_start,
2073 .next = frag_next,
2074 .stop = frag_stop,
2075 .show = extfrag_show,
2076};
2077
2078static int extfrag_open(struct inode *inode, struct file *file)
2079{
2080 return seq_open(file, &extfrag_op);
2081}
2082
2083static const struct file_operations extfrag_file_ops = {
2084 .open = extfrag_open,
2085 .read = seq_read,
2086 .llseek = seq_lseek,
2087 .release = seq_release,
2088};
2089
2090static int __init extfrag_debug_init(void)
2091{
2092 struct dentry *extfrag_debug_root;
2093
2094 extfrag_debug_root = debugfs_create_dir("extfrag", NULL);
2095 if (!extfrag_debug_root)
2096 return -ENOMEM;
2097
2098 if (!debugfs_create_file("unusable_index", 0444,
2099 extfrag_debug_root, NULL, &unusable_file_ops))
2100 goto fail;
2101
2102 if (!debugfs_create_file("extfrag_index", 0444,
2103 extfrag_debug_root, NULL, &extfrag_file_ops))
2104 goto fail;
2105
2106 return 0;
2107fail:
2108 debugfs_remove_recursive(extfrag_debug_root);
2109 return -ENOMEM;
2110}
2111
2112module_init(extfrag_debug_init);
2113#endif
2114