1
2
3
4
5
6
7
8
9
10
11
12#include <linux/fs.h>
13#include <linux/mm.h>
14#include <linux/err.h>
15#include <linux/module.h>
16#include <linux/slab.h>
17#include <linux/cpu.h>
18#include <linux/cpumask.h>
19#include <linux/vmstat.h>
20#include <linux/proc_fs.h>
21#include <linux/seq_file.h>
22#include <linux/debugfs.h>
23#include <linux/sched.h>
24#include <linux/math64.h>
25#include <linux/writeback.h>
26#include <linux/compaction.h>
27#include <linux/mm_inline.h>
28#include <linux/page_ext.h>
29#include <linux/page_owner.h>
30
31#include "internal.h"
32
33#define NUMA_STATS_THRESHOLD (U16_MAX - 2)
34
35#ifdef CONFIG_NUMA
36int sysctl_vm_numa_stat = ENABLE_NUMA_STAT;
37
38
39static void zero_zone_numa_counters(struct zone *zone)
40{
41 int item, cpu;
42
43 for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++) {
44 atomic_long_set(&zone->vm_numa_stat[item], 0);
45 for_each_online_cpu(cpu)
46 per_cpu_ptr(zone->pageset, cpu)->vm_numa_stat_diff[item]
47 = 0;
48 }
49}
50
51
52static void zero_zones_numa_counters(void)
53{
54 struct zone *zone;
55
56 for_each_populated_zone(zone)
57 zero_zone_numa_counters(zone);
58}
59
60
61static void zero_global_numa_counters(void)
62{
63 int item;
64
65 for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++)
66 atomic_long_set(&vm_numa_stat[item], 0);
67}
68
69static void invalid_numa_statistics(void)
70{
71 zero_zones_numa_counters();
72 zero_global_numa_counters();
73}
74
75static DEFINE_MUTEX(vm_numa_stat_lock);
76
77int sysctl_vm_numa_stat_handler(struct ctl_table *table, int write,
78 void __user *buffer, size_t *length, loff_t *ppos)
79{
80 int ret, oldval;
81
82 mutex_lock(&vm_numa_stat_lock);
83 if (write)
84 oldval = sysctl_vm_numa_stat;
85 ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
86 if (ret || !write)
87 goto out;
88
89 if (oldval == sysctl_vm_numa_stat)
90 goto out;
91 else if (sysctl_vm_numa_stat == ENABLE_NUMA_STAT) {
92 static_branch_enable(&vm_numa_stat_key);
93 pr_info("enable numa statistics\n");
94 } else {
95 static_branch_disable(&vm_numa_stat_key);
96 invalid_numa_statistics();
97 pr_info("disable numa statistics, and clear numa counters\n");
98 }
99
100out:
101 mutex_unlock(&vm_numa_stat_lock);
102 return ret;
103}
104#endif
105
106#ifdef CONFIG_VM_EVENT_COUNTERS
107DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
108EXPORT_PER_CPU_SYMBOL(vm_event_states);
109
110static void sum_vm_events(unsigned long *ret)
111{
112 int cpu;
113 int i;
114
115 memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
116
117 for_each_online_cpu(cpu) {
118 struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
119
120 for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
121 ret[i] += this->event[i];
122 }
123}
124
125
126
127
128
129
130void all_vm_events(unsigned long *ret)
131{
132 get_online_cpus();
133 sum_vm_events(ret);
134 put_online_cpus();
135}
136EXPORT_SYMBOL_GPL(all_vm_events);
137
138
139
140
141
142
143
144void vm_events_fold_cpu(int cpu)
145{
146 struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
147 int i;
148
149 for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
150 count_vm_events(i, fold_state->event[i]);
151 fold_state->event[i] = 0;
152 }
153}
154
155#endif
156
157
158
159
160
161
162atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
163atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS] __cacheline_aligned_in_smp;
164atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp;
165EXPORT_SYMBOL(vm_zone_stat);
166EXPORT_SYMBOL(vm_numa_stat);
167EXPORT_SYMBOL(vm_node_stat);
168
169#ifdef CONFIG_SMP
170
171int calculate_pressure_threshold(struct zone *zone)
172{
173 int threshold;
174 int watermark_distance;
175
176
177
178
179
180
181
182
183
184 watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone);
185 threshold = max(1, (int)(watermark_distance / num_online_cpus()));
186
187
188
189
190 threshold = min(125, threshold);
191
192 return threshold;
193}
194
195int calculate_normal_threshold(struct zone *zone)
196{
197 int threshold;
198 int mem;
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230 mem = zone_managed_pages(zone) >> (27 - PAGE_SHIFT);
231
232 threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem));
233
234
235
236
237 threshold = min(125, threshold);
238
239 return threshold;
240}
241
242
243
244
245void refresh_zone_stat_thresholds(void)
246{
247 struct pglist_data *pgdat;
248 struct zone *zone;
249 int cpu;
250 int threshold;
251
252
253 for_each_online_pgdat(pgdat) {
254 for_each_online_cpu(cpu) {
255 per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold = 0;
256 }
257 }
258
259 for_each_populated_zone(zone) {
260 struct pglist_data *pgdat = zone->zone_pgdat;
261 unsigned long max_drift, tolerate_drift;
262
263 threshold = calculate_normal_threshold(zone);
264
265 for_each_online_cpu(cpu) {
266 int pgdat_threshold;
267
268 per_cpu_ptr(zone->pageset, cpu)->stat_threshold
269 = threshold;
270
271
272 pgdat_threshold = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold;
273 per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold
274 = max(threshold, pgdat_threshold);
275 }
276
277
278
279
280
281
282 tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone);
283 max_drift = num_online_cpus() * threshold;
284 if (max_drift > tolerate_drift)
285 zone->percpu_drift_mark = high_wmark_pages(zone) +
286 max_drift;
287 }
288}
289
290void set_pgdat_percpu_threshold(pg_data_t *pgdat,
291 int (*calculate_pressure)(struct zone *))
292{
293 struct zone *zone;
294 int cpu;
295 int threshold;
296 int i;
297
298 for (i = 0; i < pgdat->nr_zones; i++) {
299 zone = &pgdat->node_zones[i];
300 if (!zone->percpu_drift_mark)
301 continue;
302
303 threshold = (*calculate_pressure)(zone);
304 for_each_online_cpu(cpu)
305 per_cpu_ptr(zone->pageset, cpu)->stat_threshold
306 = threshold;
307 }
308}
309
310
311
312
313
314
315void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
316 long delta)
317{
318 struct per_cpu_pageset __percpu *pcp = zone->pageset;
319 s8 __percpu *p = pcp->vm_stat_diff + item;
320 long x;
321 long t;
322
323 x = delta + __this_cpu_read(*p);
324
325 t = __this_cpu_read(pcp->stat_threshold);
326
327 if (unlikely(x > t || x < -t)) {
328 zone_page_state_add(x, zone, item);
329 x = 0;
330 }
331 __this_cpu_write(*p, x);
332}
333EXPORT_SYMBOL(__mod_zone_page_state);
334
335void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
336 long delta)
337{
338 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
339 s8 __percpu *p = pcp->vm_node_stat_diff + item;
340 long x;
341 long t;
342
343 x = delta + __this_cpu_read(*p);
344
345 t = __this_cpu_read(pcp->stat_threshold);
346
347 if (unlikely(x > t || x < -t)) {
348 node_page_state_add(x, pgdat, item);
349 x = 0;
350 }
351 __this_cpu_write(*p, x);
352}
353EXPORT_SYMBOL(__mod_node_page_state);
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
379{
380 struct per_cpu_pageset __percpu *pcp = zone->pageset;
381 s8 __percpu *p = pcp->vm_stat_diff + item;
382 s8 v, t;
383
384 v = __this_cpu_inc_return(*p);
385 t = __this_cpu_read(pcp->stat_threshold);
386 if (unlikely(v > t)) {
387 s8 overstep = t >> 1;
388
389 zone_page_state_add(v + overstep, zone, item);
390 __this_cpu_write(*p, -overstep);
391 }
392}
393
394void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
395{
396 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
397 s8 __percpu *p = pcp->vm_node_stat_diff + item;
398 s8 v, t;
399
400 v = __this_cpu_inc_return(*p);
401 t = __this_cpu_read(pcp->stat_threshold);
402 if (unlikely(v > t)) {
403 s8 overstep = t >> 1;
404
405 node_page_state_add(v + overstep, pgdat, item);
406 __this_cpu_write(*p, -overstep);
407 }
408}
409
410void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
411{
412 __inc_zone_state(page_zone(page), item);
413}
414EXPORT_SYMBOL(__inc_zone_page_state);
415
416void __inc_node_page_state(struct page *page, enum node_stat_item item)
417{
418 __inc_node_state(page_pgdat(page), item);
419}
420EXPORT_SYMBOL(__inc_node_page_state);
421
422void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
423{
424 struct per_cpu_pageset __percpu *pcp = zone->pageset;
425 s8 __percpu *p = pcp->vm_stat_diff + item;
426 s8 v, t;
427
428 v = __this_cpu_dec_return(*p);
429 t = __this_cpu_read(pcp->stat_threshold);
430 if (unlikely(v < - t)) {
431 s8 overstep = t >> 1;
432
433 zone_page_state_add(v - overstep, zone, item);
434 __this_cpu_write(*p, overstep);
435 }
436}
437
438void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
439{
440 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
441 s8 __percpu *p = pcp->vm_node_stat_diff + item;
442 s8 v, t;
443
444 v = __this_cpu_dec_return(*p);
445 t = __this_cpu_read(pcp->stat_threshold);
446 if (unlikely(v < - t)) {
447 s8 overstep = t >> 1;
448
449 node_page_state_add(v - overstep, pgdat, item);
450 __this_cpu_write(*p, overstep);
451 }
452}
453
454void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
455{
456 __dec_zone_state(page_zone(page), item);
457}
458EXPORT_SYMBOL(__dec_zone_page_state);
459
460void __dec_node_page_state(struct page *page, enum node_stat_item item)
461{
462 __dec_node_state(page_pgdat(page), item);
463}
464EXPORT_SYMBOL(__dec_node_page_state);
465
466#ifdef CONFIG_HAVE_CMPXCHG_LOCAL
467
468
469
470
471
472
473
474
475
476
477
478
479static inline void mod_zone_state(struct zone *zone,
480 enum zone_stat_item item, long delta, int overstep_mode)
481{
482 struct per_cpu_pageset __percpu *pcp = zone->pageset;
483 s8 __percpu *p = pcp->vm_stat_diff + item;
484 long o, n, t, z;
485
486 do {
487 z = 0;
488
489
490
491
492
493
494
495
496
497
498
499 t = this_cpu_read(pcp->stat_threshold);
500
501 o = this_cpu_read(*p);
502 n = delta + o;
503
504 if (n > t || n < -t) {
505 int os = overstep_mode * (t >> 1) ;
506
507
508 z = n + os;
509 n = -os;
510 }
511 } while (this_cpu_cmpxchg(*p, o, n) != o);
512
513 if (z)
514 zone_page_state_add(z, zone, item);
515}
516
517void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
518 long delta)
519{
520 mod_zone_state(zone, item, delta, 0);
521}
522EXPORT_SYMBOL(mod_zone_page_state);
523
524void inc_zone_page_state(struct page *page, enum zone_stat_item item)
525{
526 mod_zone_state(page_zone(page), item, 1, 1);
527}
528EXPORT_SYMBOL(inc_zone_page_state);
529
530void dec_zone_page_state(struct page *page, enum zone_stat_item item)
531{
532 mod_zone_state(page_zone(page), item, -1, -1);
533}
534EXPORT_SYMBOL(dec_zone_page_state);
535
536static inline void mod_node_state(struct pglist_data *pgdat,
537 enum node_stat_item item, int delta, int overstep_mode)
538{
539 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
540 s8 __percpu *p = pcp->vm_node_stat_diff + item;
541 long o, n, t, z;
542
543 do {
544 z = 0;
545
546
547
548
549
550
551
552
553
554
555
556 t = this_cpu_read(pcp->stat_threshold);
557
558 o = this_cpu_read(*p);
559 n = delta + o;
560
561 if (n > t || n < -t) {
562 int os = overstep_mode * (t >> 1) ;
563
564
565 z = n + os;
566 n = -os;
567 }
568 } while (this_cpu_cmpxchg(*p, o, n) != o);
569
570 if (z)
571 node_page_state_add(z, pgdat, item);
572}
573
574void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
575 long delta)
576{
577 mod_node_state(pgdat, item, delta, 0);
578}
579EXPORT_SYMBOL(mod_node_page_state);
580
581void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
582{
583 mod_node_state(pgdat, item, 1, 1);
584}
585
586void inc_node_page_state(struct page *page, enum node_stat_item item)
587{
588 mod_node_state(page_pgdat(page), item, 1, 1);
589}
590EXPORT_SYMBOL(inc_node_page_state);
591
592void dec_node_page_state(struct page *page, enum node_stat_item item)
593{
594 mod_node_state(page_pgdat(page), item, -1, -1);
595}
596EXPORT_SYMBOL(dec_node_page_state);
597#else
598
599
600
601void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
602 long delta)
603{
604 unsigned long flags;
605
606 local_irq_save(flags);
607 __mod_zone_page_state(zone, item, delta);
608 local_irq_restore(flags);
609}
610EXPORT_SYMBOL(mod_zone_page_state);
611
612void inc_zone_page_state(struct page *page, enum zone_stat_item item)
613{
614 unsigned long flags;
615 struct zone *zone;
616
617 zone = page_zone(page);
618 local_irq_save(flags);
619 __inc_zone_state(zone, item);
620 local_irq_restore(flags);
621}
622EXPORT_SYMBOL(inc_zone_page_state);
623
624void dec_zone_page_state(struct page *page, enum zone_stat_item item)
625{
626 unsigned long flags;
627
628 local_irq_save(flags);
629 __dec_zone_page_state(page, item);
630 local_irq_restore(flags);
631}
632EXPORT_SYMBOL(dec_zone_page_state);
633
634void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
635{
636 unsigned long flags;
637
638 local_irq_save(flags);
639 __inc_node_state(pgdat, item);
640 local_irq_restore(flags);
641}
642EXPORT_SYMBOL(inc_node_state);
643
644void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
645 long delta)
646{
647 unsigned long flags;
648
649 local_irq_save(flags);
650 __mod_node_page_state(pgdat, item, delta);
651 local_irq_restore(flags);
652}
653EXPORT_SYMBOL(mod_node_page_state);
654
655void inc_node_page_state(struct page *page, enum node_stat_item item)
656{
657 unsigned long flags;
658 struct pglist_data *pgdat;
659
660 pgdat = page_pgdat(page);
661 local_irq_save(flags);
662 __inc_node_state(pgdat, item);
663 local_irq_restore(flags);
664}
665EXPORT_SYMBOL(inc_node_page_state);
666
667void dec_node_page_state(struct page *page, enum node_stat_item item)
668{
669 unsigned long flags;
670
671 local_irq_save(flags);
672 __dec_node_page_state(page, item);
673 local_irq_restore(flags);
674}
675EXPORT_SYMBOL(dec_node_page_state);
676#endif
677
678
679
680
681
682#ifdef CONFIG_NUMA
683static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff)
684{
685 int i;
686 int changes = 0;
687
688 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
689 if (zone_diff[i]) {
690 atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
691 changes++;
692 }
693
694 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
695 if (numa_diff[i]) {
696 atomic_long_add(numa_diff[i], &vm_numa_stat[i]);
697 changes++;
698 }
699
700 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
701 if (node_diff[i]) {
702 atomic_long_add(node_diff[i], &vm_node_stat[i]);
703 changes++;
704 }
705 return changes;
706}
707#else
708static int fold_diff(int *zone_diff, int *node_diff)
709{
710 int i;
711 int changes = 0;
712
713 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
714 if (zone_diff[i]) {
715 atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
716 changes++;
717 }
718
719 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
720 if (node_diff[i]) {
721 atomic_long_add(node_diff[i], &vm_node_stat[i]);
722 changes++;
723 }
724 return changes;
725}
726#endif
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744static int refresh_cpu_vm_stats(bool do_pagesets)
745{
746 struct pglist_data *pgdat;
747 struct zone *zone;
748 int i;
749 int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
750#ifdef CONFIG_NUMA
751 int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
752#endif
753 int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
754 int changes = 0;
755
756 for_each_populated_zone(zone) {
757 struct per_cpu_pageset __percpu *p = zone->pageset;
758
759 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
760 int v;
761
762 v = this_cpu_xchg(p->vm_stat_diff[i], 0);
763 if (v) {
764
765 atomic_long_add(v, &zone->vm_stat[i]);
766 global_zone_diff[i] += v;
767#ifdef CONFIG_NUMA
768
769 __this_cpu_write(p->expire, 3);
770#endif
771 }
772 }
773#ifdef CONFIG_NUMA
774 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) {
775 int v;
776
777 v = this_cpu_xchg(p->vm_numa_stat_diff[i], 0);
778 if (v) {
779
780 atomic_long_add(v, &zone->vm_numa_stat[i]);
781 global_numa_diff[i] += v;
782 __this_cpu_write(p->expire, 3);
783 }
784 }
785
786 if (do_pagesets) {
787 cond_resched();
788
789
790
791
792
793
794
795 if (!__this_cpu_read(p->expire) ||
796 !__this_cpu_read(p->pcp.count))
797 continue;
798
799
800
801
802 if (zone_to_nid(zone) == numa_node_id()) {
803 __this_cpu_write(p->expire, 0);
804 continue;
805 }
806
807 if (__this_cpu_dec_return(p->expire))
808 continue;
809
810 if (__this_cpu_read(p->pcp.count)) {
811 drain_zone_pages(zone, this_cpu_ptr(&p->pcp));
812 changes++;
813 }
814 }
815#endif
816 }
817
818 for_each_online_pgdat(pgdat) {
819 struct per_cpu_nodestat __percpu *p = pgdat->per_cpu_nodestats;
820
821 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
822 int v;
823
824 v = this_cpu_xchg(p->vm_node_stat_diff[i], 0);
825 if (v) {
826 atomic_long_add(v, &pgdat->vm_stat[i]);
827 global_node_diff[i] += v;
828 }
829 }
830 }
831
832#ifdef CONFIG_NUMA
833 changes += fold_diff(global_zone_diff, global_numa_diff,
834 global_node_diff);
835#else
836 changes += fold_diff(global_zone_diff, global_node_diff);
837#endif
838 return changes;
839}
840
841
842
843
844
845
846void cpu_vm_stats_fold(int cpu)
847{
848 struct pglist_data *pgdat;
849 struct zone *zone;
850 int i;
851 int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
852#ifdef CONFIG_NUMA
853 int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
854#endif
855 int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
856
857 for_each_populated_zone(zone) {
858 struct per_cpu_pageset *p;
859
860 p = per_cpu_ptr(zone->pageset, cpu);
861
862 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
863 if (p->vm_stat_diff[i]) {
864 int v;
865
866 v = p->vm_stat_diff[i];
867 p->vm_stat_diff[i] = 0;
868 atomic_long_add(v, &zone->vm_stat[i]);
869 global_zone_diff[i] += v;
870 }
871
872#ifdef CONFIG_NUMA
873 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
874 if (p->vm_numa_stat_diff[i]) {
875 int v;
876
877 v = p->vm_numa_stat_diff[i];
878 p->vm_numa_stat_diff[i] = 0;
879 atomic_long_add(v, &zone->vm_numa_stat[i]);
880 global_numa_diff[i] += v;
881 }
882#endif
883 }
884
885 for_each_online_pgdat(pgdat) {
886 struct per_cpu_nodestat *p;
887
888 p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu);
889
890 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
891 if (p->vm_node_stat_diff[i]) {
892 int v;
893
894 v = p->vm_node_stat_diff[i];
895 p->vm_node_stat_diff[i] = 0;
896 atomic_long_add(v, &pgdat->vm_stat[i]);
897 global_node_diff[i] += v;
898 }
899 }
900
901#ifdef CONFIG_NUMA
902 fold_diff(global_zone_diff, global_numa_diff, global_node_diff);
903#else
904 fold_diff(global_zone_diff, global_node_diff);
905#endif
906}
907
908
909
910
911
912void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset)
913{
914 int i;
915
916 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
917 if (pset->vm_stat_diff[i]) {
918 int v = pset->vm_stat_diff[i];
919 pset->vm_stat_diff[i] = 0;
920 atomic_long_add(v, &zone->vm_stat[i]);
921 atomic_long_add(v, &vm_zone_stat[i]);
922 }
923
924#ifdef CONFIG_NUMA
925 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
926 if (pset->vm_numa_stat_diff[i]) {
927 int v = pset->vm_numa_stat_diff[i];
928
929 pset->vm_numa_stat_diff[i] = 0;
930 atomic_long_add(v, &zone->vm_numa_stat[i]);
931 atomic_long_add(v, &vm_numa_stat[i]);
932 }
933#endif
934}
935#endif
936
937#ifdef CONFIG_NUMA
938void __inc_numa_state(struct zone *zone,
939 enum numa_stat_item item)
940{
941 struct per_cpu_pageset __percpu *pcp = zone->pageset;
942 u16 __percpu *p = pcp->vm_numa_stat_diff + item;
943 u16 v;
944
945 v = __this_cpu_inc_return(*p);
946
947 if (unlikely(v > NUMA_STATS_THRESHOLD)) {
948 zone_numa_state_add(v, zone, item);
949 __this_cpu_write(*p, 0);
950 }
951}
952
953
954
955
956
957
958unsigned long sum_zone_node_page_state(int node,
959 enum zone_stat_item item)
960{
961 struct zone *zones = NODE_DATA(node)->node_zones;
962 int i;
963 unsigned long count = 0;
964
965 for (i = 0; i < MAX_NR_ZONES; i++)
966 count += zone_page_state(zones + i, item);
967
968 return count;
969}
970
971
972
973
974
975unsigned long sum_zone_numa_state(int node,
976 enum numa_stat_item item)
977{
978 struct zone *zones = NODE_DATA(node)->node_zones;
979 int i;
980 unsigned long count = 0;
981
982 for (i = 0; i < MAX_NR_ZONES; i++)
983 count += zone_numa_state_snapshot(zones + i, item);
984
985 return count;
986}
987
988
989
990
991unsigned long node_page_state(struct pglist_data *pgdat,
992 enum node_stat_item item)
993{
994 long x = atomic_long_read(&pgdat->vm_stat[item]);
995#ifdef CONFIG_SMP
996 if (x < 0)
997 x = 0;
998#endif
999 return x;
1000}
1001#endif
1002
1003#ifdef CONFIG_COMPACTION
1004
1005struct contig_page_info {
1006 unsigned long free_pages;
1007 unsigned long free_blocks_total;
1008 unsigned long free_blocks_suitable;
1009};
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019static void fill_contig_page_info(struct zone *zone,
1020 unsigned int suitable_order,
1021 struct contig_page_info *info)
1022{
1023 unsigned int order;
1024
1025 info->free_pages = 0;
1026 info->free_blocks_total = 0;
1027 info->free_blocks_suitable = 0;
1028
1029 for (order = 0; order < MAX_ORDER; order++) {
1030 unsigned long blocks;
1031
1032
1033 blocks = zone->free_area[order].nr_free;
1034 info->free_blocks_total += blocks;
1035
1036
1037 info->free_pages += blocks << order;
1038
1039
1040 if (order >= suitable_order)
1041 info->free_blocks_suitable += blocks <<
1042 (order - suitable_order);
1043 }
1044}
1045
1046
1047
1048
1049
1050
1051
1052
1053static int __fragmentation_index(unsigned int order, struct contig_page_info *info)
1054{
1055 unsigned long requested = 1UL << order;
1056
1057 if (WARN_ON_ONCE(order >= MAX_ORDER))
1058 return 0;
1059
1060 if (!info->free_blocks_total)
1061 return 0;
1062
1063
1064 if (info->free_blocks_suitable)
1065 return -1000;
1066
1067
1068
1069
1070
1071
1072
1073 return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total);
1074}
1075
1076
1077int fragmentation_index(struct zone *zone, unsigned int order)
1078{
1079 struct contig_page_info info;
1080
1081 fill_contig_page_info(zone, order, &info);
1082 return __fragmentation_index(order, &info);
1083}
1084#endif
1085
1086#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || defined(CONFIG_NUMA)
1087#ifdef CONFIG_ZONE_DMA
1088#define TEXT_FOR_DMA(xx) xx "_dma",
1089#else
1090#define TEXT_FOR_DMA(xx)
1091#endif
1092
1093#ifdef CONFIG_ZONE_DMA32
1094#define TEXT_FOR_DMA32(xx) xx "_dma32",
1095#else
1096#define TEXT_FOR_DMA32(xx)
1097#endif
1098
1099#ifdef CONFIG_HIGHMEM
1100#define TEXT_FOR_HIGHMEM(xx) xx "_high",
1101#else
1102#define TEXT_FOR_HIGHMEM(xx)
1103#endif
1104
1105#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
1106 TEXT_FOR_HIGHMEM(xx) xx "_movable",
1107
1108const char * const vmstat_text[] = {
1109
1110 "nr_free_pages",
1111 "nr_zone_inactive_anon",
1112 "nr_zone_active_anon",
1113 "nr_zone_inactive_file",
1114 "nr_zone_active_file",
1115 "nr_zone_unevictable",
1116 "nr_zone_write_pending",
1117 "nr_mlock",
1118 "nr_page_table_pages",
1119 "nr_kernel_stack",
1120 "nr_bounce",
1121#if IS_ENABLED(CONFIG_ZSMALLOC)
1122 "nr_zspages",
1123#endif
1124 "nr_free_cma",
1125
1126
1127#ifdef CONFIG_NUMA
1128 "numa_hit",
1129 "numa_miss",
1130 "numa_foreign",
1131 "numa_interleave",
1132 "numa_local",
1133 "numa_other",
1134#endif
1135
1136
1137 "nr_inactive_anon",
1138 "nr_active_anon",
1139 "nr_inactive_file",
1140 "nr_active_file",
1141 "nr_unevictable",
1142 "nr_slab_reclaimable",
1143 "nr_slab_unreclaimable",
1144 "nr_isolated_anon",
1145 "nr_isolated_file",
1146 "workingset_nodes",
1147 "workingset_refault",
1148 "workingset_activate",
1149 "workingset_restore",
1150 "workingset_nodereclaim",
1151 "nr_anon_pages",
1152 "nr_mapped",
1153 "nr_file_pages",
1154 "nr_dirty",
1155 "nr_writeback",
1156 "nr_writeback_temp",
1157 "nr_shmem",
1158 "nr_shmem_hugepages",
1159 "nr_shmem_pmdmapped",
1160 "nr_anon_transparent_hugepages",
1161 "nr_unstable",
1162 "nr_vmscan_write",
1163 "nr_vmscan_immediate_reclaim",
1164 "nr_dirtied",
1165 "nr_written",
1166 "nr_kernel_misc_reclaimable",
1167
1168
1169 "nr_dirty_threshold",
1170 "nr_dirty_background_threshold",
1171
1172#ifdef CONFIG_VM_EVENT_COUNTERS
1173
1174 "pgpgin",
1175 "pgpgout",
1176 "pswpin",
1177 "pswpout",
1178
1179 TEXTS_FOR_ZONES("pgalloc")
1180 TEXTS_FOR_ZONES("allocstall")
1181 TEXTS_FOR_ZONES("pgskip")
1182
1183 "pgfree",
1184 "pgactivate",
1185 "pgdeactivate",
1186 "pglazyfree",
1187
1188 "pgfault",
1189 "pgmajfault",
1190 "pglazyfreed",
1191
1192 "pgrefill",
1193 "pgsteal_kswapd",
1194 "pgsteal_direct",
1195 "pgscan_kswapd",
1196 "pgscan_direct",
1197 "pgscan_direct_throttle",
1198
1199#ifdef CONFIG_NUMA
1200 "zone_reclaim_failed",
1201#endif
1202 "pginodesteal",
1203 "slabs_scanned",
1204 "kswapd_inodesteal",
1205 "kswapd_low_wmark_hit_quickly",
1206 "kswapd_high_wmark_hit_quickly",
1207 "pageoutrun",
1208
1209 "pgrotated",
1210
1211 "drop_pagecache",
1212 "drop_slab",
1213 "oom_kill",
1214
1215#ifdef CONFIG_NUMA_BALANCING
1216 "numa_pte_updates",
1217 "numa_huge_pte_updates",
1218 "numa_hint_faults",
1219 "numa_hint_faults_local",
1220 "numa_pages_migrated",
1221#endif
1222#ifdef CONFIG_MIGRATION
1223 "pgmigrate_success",
1224 "pgmigrate_fail",
1225#endif
1226#ifdef CONFIG_COMPACTION
1227 "compact_migrate_scanned",
1228 "compact_free_scanned",
1229 "compact_isolated",
1230 "compact_stall",
1231 "compact_fail",
1232 "compact_success",
1233 "compact_daemon_wake",
1234 "compact_daemon_migrate_scanned",
1235 "compact_daemon_free_scanned",
1236#endif
1237
1238#ifdef CONFIG_HUGETLB_PAGE
1239 "htlb_buddy_alloc_success",
1240 "htlb_buddy_alloc_fail",
1241#endif
1242 "unevictable_pgs_culled",
1243 "unevictable_pgs_scanned",
1244 "unevictable_pgs_rescued",
1245 "unevictable_pgs_mlocked",
1246 "unevictable_pgs_munlocked",
1247 "unevictable_pgs_cleared",
1248 "unevictable_pgs_stranded",
1249
1250#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1251 "thp_fault_alloc",
1252 "thp_fault_fallback",
1253 "thp_collapse_alloc",
1254 "thp_collapse_alloc_failed",
1255 "thp_file_alloc",
1256 "thp_file_mapped",
1257 "thp_split_page",
1258 "thp_split_page_failed",
1259 "thp_deferred_split_page",
1260 "thp_split_pmd",
1261#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
1262 "thp_split_pud",
1263#endif
1264 "thp_zero_page_alloc",
1265 "thp_zero_page_alloc_failed",
1266 "thp_swpout",
1267 "thp_swpout_fallback",
1268#endif
1269#ifdef CONFIG_MEMORY_BALLOON
1270 "balloon_inflate",
1271 "balloon_deflate",
1272#ifdef CONFIG_BALLOON_COMPACTION
1273 "balloon_migrate",
1274#endif
1275#endif
1276#ifdef CONFIG_DEBUG_TLBFLUSH
1277#ifdef CONFIG_SMP
1278 "nr_tlb_remote_flush",
1279 "nr_tlb_remote_flush_received",
1280#else
1281 "",
1282 "",
1283#endif
1284 "nr_tlb_local_flush_all",
1285 "nr_tlb_local_flush_one",
1286#endif
1287
1288#ifdef CONFIG_DEBUG_VM_VMACACHE
1289 "vmacache_find_calls",
1290 "vmacache_find_hits",
1291#endif
1292#ifdef CONFIG_SWAP
1293 "swap_ra",
1294 "swap_ra_hit",
1295#endif
1296#endif
1297};
1298#endif
1299
1300#if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \
1301 defined(CONFIG_PROC_FS)
1302static void *frag_start(struct seq_file *m, loff_t *pos)
1303{
1304 pg_data_t *pgdat;
1305 loff_t node = *pos;
1306
1307 for (pgdat = first_online_pgdat();
1308 pgdat && node;
1309 pgdat = next_online_pgdat(pgdat))
1310 --node;
1311
1312 return pgdat;
1313}
1314
1315static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
1316{
1317 pg_data_t *pgdat = (pg_data_t *)arg;
1318
1319 (*pos)++;
1320 return next_online_pgdat(pgdat);
1321}
1322
1323static void frag_stop(struct seq_file *m, void *arg)
1324{
1325}
1326
1327
1328
1329
1330
1331static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
1332 bool assert_populated, bool nolock,
1333 void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
1334{
1335 struct zone *zone;
1336 struct zone *node_zones = pgdat->node_zones;
1337 unsigned long flags;
1338
1339 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
1340 if (assert_populated && !populated_zone(zone))
1341 continue;
1342
1343 if (!nolock)
1344 spin_lock_irqsave(&zone->lock, flags);
1345 print(m, pgdat, zone);
1346 if (!nolock)
1347 spin_unlock_irqrestore(&zone->lock, flags);
1348 }
1349}
1350#endif
1351
1352#ifdef CONFIG_PROC_FS
1353static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
1354 struct zone *zone)
1355{
1356 int order;
1357
1358 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1359 for (order = 0; order < MAX_ORDER; ++order)
1360 seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
1361 seq_putc(m, '\n');
1362}
1363
1364
1365
1366
1367static int frag_show(struct seq_file *m, void *arg)
1368{
1369 pg_data_t *pgdat = (pg_data_t *)arg;
1370 walk_zones_in_node(m, pgdat, true, false, frag_show_print);
1371 return 0;
1372}
1373
1374static void pagetypeinfo_showfree_print(struct seq_file *m,
1375 pg_data_t *pgdat, struct zone *zone)
1376{
1377 int order, mtype;
1378
1379 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) {
1380 seq_printf(m, "Node %4d, zone %8s, type %12s ",
1381 pgdat->node_id,
1382 zone->name,
1383 migratetype_names[mtype]);
1384 for (order = 0; order < MAX_ORDER; ++order) {
1385 unsigned long freecount = 0;
1386 struct free_area *area;
1387 struct list_head *curr;
1388
1389 area = &(zone->free_area[order]);
1390
1391 list_for_each(curr, &area->free_list[mtype])
1392 freecount++;
1393 seq_printf(m, "%6lu ", freecount);
1394 }
1395 seq_putc(m, '\n');
1396 }
1397}
1398
1399
1400static int pagetypeinfo_showfree(struct seq_file *m, void *arg)
1401{
1402 int order;
1403 pg_data_t *pgdat = (pg_data_t *)arg;
1404
1405
1406 seq_printf(m, "%-43s ", "Free pages count per migrate type at order");
1407 for (order = 0; order < MAX_ORDER; ++order)
1408 seq_printf(m, "%6d ", order);
1409 seq_putc(m, '\n');
1410
1411 walk_zones_in_node(m, pgdat, true, false, pagetypeinfo_showfree_print);
1412
1413 return 0;
1414}
1415
1416static void pagetypeinfo_showblockcount_print(struct seq_file *m,
1417 pg_data_t *pgdat, struct zone *zone)
1418{
1419 int mtype;
1420 unsigned long pfn;
1421 unsigned long start_pfn = zone->zone_start_pfn;
1422 unsigned long end_pfn = zone_end_pfn(zone);
1423 unsigned long count[MIGRATE_TYPES] = { 0, };
1424
1425 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
1426 struct page *page;
1427
1428 page = pfn_to_online_page(pfn);
1429 if (!page)
1430 continue;
1431
1432
1433 if (!memmap_valid_within(pfn, page, zone))
1434 continue;
1435
1436 if (page_zone(page) != zone)
1437 continue;
1438
1439 mtype = get_pageblock_migratetype(page);
1440
1441 if (mtype < MIGRATE_TYPES)
1442 count[mtype]++;
1443 }
1444
1445
1446 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1447 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1448 seq_printf(m, "%12lu ", count[mtype]);
1449 seq_putc(m, '\n');
1450}
1451
1452
1453static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
1454{
1455 int mtype;
1456 pg_data_t *pgdat = (pg_data_t *)arg;
1457
1458 seq_printf(m, "\n%-23s", "Number of blocks type ");
1459 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1460 seq_printf(m, "%12s ", migratetype_names[mtype]);
1461 seq_putc(m, '\n');
1462 walk_zones_in_node(m, pgdat, true, false,
1463 pagetypeinfo_showblockcount_print);
1464
1465 return 0;
1466}
1467
1468
1469
1470
1471
1472
1473
1474static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat)
1475{
1476#ifdef CONFIG_PAGE_OWNER
1477 int mtype;
1478
1479 if (!static_branch_unlikely(&page_owner_inited))
1480 return;
1481
1482 drain_all_pages(NULL);
1483
1484 seq_printf(m, "\n%-23s", "Number of mixed blocks ");
1485 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1486 seq_printf(m, "%12s ", migratetype_names[mtype]);
1487 seq_putc(m, '\n');
1488
1489 walk_zones_in_node(m, pgdat, true, true,
1490 pagetypeinfo_showmixedcount_print);
1491#endif
1492}
1493
1494
1495
1496
1497
1498static int pagetypeinfo_show(struct seq_file *m, void *arg)
1499{
1500 pg_data_t *pgdat = (pg_data_t *)arg;
1501
1502
1503 if (!node_state(pgdat->node_id, N_MEMORY))
1504 return 0;
1505
1506 seq_printf(m, "Page block order: %d\n", pageblock_order);
1507 seq_printf(m, "Pages per block: %lu\n", pageblock_nr_pages);
1508 seq_putc(m, '\n');
1509 pagetypeinfo_showfree(m, pgdat);
1510 pagetypeinfo_showblockcount(m, pgdat);
1511 pagetypeinfo_showmixedcount(m, pgdat);
1512
1513 return 0;
1514}
1515
1516static const struct seq_operations fragmentation_op = {
1517 .start = frag_start,
1518 .next = frag_next,
1519 .stop = frag_stop,
1520 .show = frag_show,
1521};
1522
1523static const struct seq_operations pagetypeinfo_op = {
1524 .start = frag_start,
1525 .next = frag_next,
1526 .stop = frag_stop,
1527 .show = pagetypeinfo_show,
1528};
1529
1530static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone)
1531{
1532 int zid;
1533
1534 for (zid = 0; zid < MAX_NR_ZONES; zid++) {
1535 struct zone *compare = &pgdat->node_zones[zid];
1536
1537 if (populated_zone(compare))
1538 return zone == compare;
1539 }
1540
1541 return false;
1542}
1543
1544static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
1545 struct zone *zone)
1546{
1547 int i;
1548 seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
1549 if (is_zone_first_populated(pgdat, zone)) {
1550 seq_printf(m, "\n per-node stats");
1551 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
1552 seq_printf(m, "\n %-12s %lu",
1553 vmstat_text[i + NR_VM_ZONE_STAT_ITEMS +
1554 NR_VM_NUMA_STAT_ITEMS],
1555 node_page_state(pgdat, i));
1556 }
1557 }
1558 seq_printf(m,
1559 "\n pages free %lu"
1560 "\n min %lu"
1561 "\n low %lu"
1562 "\n high %lu"
1563 "\n spanned %lu"
1564 "\n present %lu"
1565 "\n managed %lu",
1566 zone_page_state(zone, NR_FREE_PAGES),
1567 min_wmark_pages(zone),
1568 low_wmark_pages(zone),
1569 high_wmark_pages(zone),
1570 zone->spanned_pages,
1571 zone->present_pages,
1572 zone_managed_pages(zone));
1573
1574 seq_printf(m,
1575 "\n protection: (%ld",
1576 zone->lowmem_reserve[0]);
1577 for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
1578 seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
1579 seq_putc(m, ')');
1580
1581
1582 if (!populated_zone(zone)) {
1583 seq_putc(m, '\n');
1584 return;
1585 }
1586
1587 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
1588 seq_printf(m, "\n %-12s %lu", vmstat_text[i],
1589 zone_page_state(zone, i));
1590
1591#ifdef CONFIG_NUMA
1592 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
1593 seq_printf(m, "\n %-12s %lu",
1594 vmstat_text[i + NR_VM_ZONE_STAT_ITEMS],
1595 zone_numa_state_snapshot(zone, i));
1596#endif
1597
1598 seq_printf(m, "\n pagesets");
1599 for_each_online_cpu(i) {
1600 struct per_cpu_pageset *pageset;
1601
1602 pageset = per_cpu_ptr(zone->pageset, i);
1603 seq_printf(m,
1604 "\n cpu: %i"
1605 "\n count: %i"
1606 "\n high: %i"
1607 "\n batch: %i",
1608 i,
1609 pageset->pcp.count,
1610 pageset->pcp.high,
1611 pageset->pcp.batch);
1612#ifdef CONFIG_SMP
1613 seq_printf(m, "\n vm stats threshold: %d",
1614 pageset->stat_threshold);
1615#endif
1616 }
1617 seq_printf(m,
1618 "\n node_unreclaimable: %u"
1619 "\n start_pfn: %lu",
1620 pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES,
1621 zone->zone_start_pfn);
1622 seq_putc(m, '\n');
1623}
1624
1625
1626
1627
1628
1629
1630
1631static int zoneinfo_show(struct seq_file *m, void *arg)
1632{
1633 pg_data_t *pgdat = (pg_data_t *)arg;
1634 walk_zones_in_node(m, pgdat, false, false, zoneinfo_show_print);
1635 return 0;
1636}
1637
1638static const struct seq_operations zoneinfo_op = {
1639 .start = frag_start,
1640
1641 .next = frag_next,
1642 .stop = frag_stop,
1643 .show = zoneinfo_show,
1644};
1645
1646enum writeback_stat_item {
1647 NR_DIRTY_THRESHOLD,
1648 NR_DIRTY_BG_THRESHOLD,
1649 NR_VM_WRITEBACK_STAT_ITEMS,
1650};
1651
1652static void *vmstat_start(struct seq_file *m, loff_t *pos)
1653{
1654 unsigned long *v;
1655 int i, stat_items_size;
1656
1657 if (*pos >= ARRAY_SIZE(vmstat_text))
1658 return NULL;
1659 stat_items_size = NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) +
1660 NR_VM_NUMA_STAT_ITEMS * sizeof(unsigned long) +
1661 NR_VM_NODE_STAT_ITEMS * sizeof(unsigned long) +
1662 NR_VM_WRITEBACK_STAT_ITEMS * sizeof(unsigned long);
1663
1664#ifdef CONFIG_VM_EVENT_COUNTERS
1665 stat_items_size += sizeof(struct vm_event_state);
1666#endif
1667
1668 BUILD_BUG_ON(stat_items_size !=
1669 ARRAY_SIZE(vmstat_text) * sizeof(unsigned long));
1670 v = kmalloc(stat_items_size, GFP_KERNEL);
1671 m->private = v;
1672 if (!v)
1673 return ERR_PTR(-ENOMEM);
1674 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
1675 v[i] = global_zone_page_state(i);
1676 v += NR_VM_ZONE_STAT_ITEMS;
1677
1678#ifdef CONFIG_NUMA
1679 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
1680 v[i] = global_numa_state(i);
1681 v += NR_VM_NUMA_STAT_ITEMS;
1682#endif
1683
1684 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
1685 v[i] = global_node_page_state(i);
1686 v += NR_VM_NODE_STAT_ITEMS;
1687
1688 global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,
1689 v + NR_DIRTY_THRESHOLD);
1690 v += NR_VM_WRITEBACK_STAT_ITEMS;
1691
1692#ifdef CONFIG_VM_EVENT_COUNTERS
1693 all_vm_events(v);
1694 v[PGPGIN] /= 2;
1695 v[PGPGOUT] /= 2;
1696#endif
1697 return (unsigned long *)m->private + *pos;
1698}
1699
1700static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
1701{
1702 (*pos)++;
1703 if (*pos >= ARRAY_SIZE(vmstat_text))
1704 return NULL;
1705 return (unsigned long *)m->private + *pos;
1706}
1707
1708static int vmstat_show(struct seq_file *m, void *arg)
1709{
1710 unsigned long *l = arg;
1711 unsigned long off = l - (unsigned long *)m->private;
1712
1713 seq_puts(m, vmstat_text[off]);
1714 seq_put_decimal_ull(m, " ", *l);
1715 seq_putc(m, '\n');
1716 return 0;
1717}
1718
1719static void vmstat_stop(struct seq_file *m, void *arg)
1720{
1721 kfree(m->private);
1722 m->private = NULL;
1723}
1724
1725static const struct seq_operations vmstat_op = {
1726 .start = vmstat_start,
1727 .next = vmstat_next,
1728 .stop = vmstat_stop,
1729 .show = vmstat_show,
1730};
1731#endif
1732
1733#ifdef CONFIG_SMP
1734static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
1735int sysctl_stat_interval __read_mostly = HZ;
1736
1737#ifdef CONFIG_PROC_FS
1738static void refresh_vm_stats(struct work_struct *work)
1739{
1740 refresh_cpu_vm_stats(true);
1741}
1742
1743int vmstat_refresh(struct ctl_table *table, int write,
1744 void __user *buffer, size_t *lenp, loff_t *ppos)
1745{
1746 long val;
1747 int err;
1748 int i;
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762 err = schedule_on_each_cpu(refresh_vm_stats);
1763 if (err)
1764 return err;
1765 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
1766 val = atomic_long_read(&vm_zone_stat[i]);
1767 if (val < 0) {
1768 pr_warn("%s: %s %ld\n",
1769 __func__, vmstat_text[i], val);
1770 err = -EINVAL;
1771 }
1772 }
1773#ifdef CONFIG_NUMA
1774 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) {
1775 val = atomic_long_read(&vm_numa_stat[i]);
1776 if (val < 0) {
1777 pr_warn("%s: %s %ld\n",
1778 __func__, vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], val);
1779 err = -EINVAL;
1780 }
1781 }
1782#endif
1783 if (err)
1784 return err;
1785 if (write)
1786 *ppos += *lenp;
1787 else
1788 *lenp = 0;
1789 return 0;
1790}
1791#endif
1792
1793static void vmstat_update(struct work_struct *w)
1794{
1795 if (refresh_cpu_vm_stats(true)) {
1796
1797
1798
1799
1800
1801 queue_delayed_work_on(smp_processor_id(), mm_percpu_wq,
1802 this_cpu_ptr(&vmstat_work),
1803 round_jiffies_relative(sysctl_stat_interval));
1804 }
1805}
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816static bool need_update(int cpu)
1817{
1818 struct zone *zone;
1819
1820 for_each_populated_zone(zone) {
1821 struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu);
1822
1823 BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1);
1824#ifdef CONFIG_NUMA
1825 BUILD_BUG_ON(sizeof(p->vm_numa_stat_diff[0]) != 2);
1826#endif
1827
1828
1829
1830
1831 if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS *
1832 sizeof(p->vm_stat_diff[0])))
1833 return true;
1834#ifdef CONFIG_NUMA
1835 if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS *
1836 sizeof(p->vm_numa_stat_diff[0])))
1837 return true;
1838#endif
1839 }
1840 return false;
1841}
1842
1843
1844
1845
1846
1847
1848void quiet_vmstat(void)
1849{
1850 if (system_state != SYSTEM_RUNNING)
1851 return;
1852
1853 if (!delayed_work_pending(this_cpu_ptr(&vmstat_work)))
1854 return;
1855
1856 if (!need_update(smp_processor_id()))
1857 return;
1858
1859
1860
1861
1862
1863
1864
1865 refresh_cpu_vm_stats(false);
1866}
1867
1868
1869
1870
1871
1872
1873
1874static void vmstat_shepherd(struct work_struct *w);
1875
1876static DECLARE_DEFERRABLE_WORK(shepherd, vmstat_shepherd);
1877
1878static void vmstat_shepherd(struct work_struct *w)
1879{
1880 int cpu;
1881
1882 get_online_cpus();
1883
1884 for_each_online_cpu(cpu) {
1885 struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
1886
1887 if (!delayed_work_pending(dw) && need_update(cpu))
1888 queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0);
1889 }
1890 put_online_cpus();
1891
1892 schedule_delayed_work(&shepherd,
1893 round_jiffies_relative(sysctl_stat_interval));
1894}
1895
1896static void __init start_shepherd_timer(void)
1897{
1898 int cpu;
1899
1900 for_each_possible_cpu(cpu)
1901 INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
1902 vmstat_update);
1903
1904 schedule_delayed_work(&shepherd,
1905 round_jiffies_relative(sysctl_stat_interval));
1906}
1907
1908static void __init init_cpu_node_state(void)
1909{
1910 int node;
1911
1912 for_each_online_node(node) {
1913 if (cpumask_weight(cpumask_of_node(node)) > 0)
1914 node_set_state(node, N_CPU);
1915 }
1916}
1917
1918static int vmstat_cpu_online(unsigned int cpu)
1919{
1920 refresh_zone_stat_thresholds();
1921 node_set_state(cpu_to_node(cpu), N_CPU);
1922 return 0;
1923}
1924
1925static int vmstat_cpu_down_prep(unsigned int cpu)
1926{
1927 cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
1928 return 0;
1929}
1930
1931static int vmstat_cpu_dead(unsigned int cpu)
1932{
1933 const struct cpumask *node_cpus;
1934 int node;
1935
1936 node = cpu_to_node(cpu);
1937
1938 refresh_zone_stat_thresholds();
1939 node_cpus = cpumask_of_node(node);
1940 if (cpumask_weight(node_cpus) > 0)
1941 return 0;
1942
1943 node_clear_state(node, N_CPU);
1944 return 0;
1945}
1946
1947#endif
1948
1949struct workqueue_struct *mm_percpu_wq;
1950
1951void __init init_mm_internals(void)
1952{
1953 int ret __maybe_unused;
1954
1955 mm_percpu_wq = alloc_workqueue("mm_percpu_wq", WQ_MEM_RECLAIM, 0);
1956
1957#ifdef CONFIG_SMP
1958 ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead",
1959 NULL, vmstat_cpu_dead);
1960 if (ret < 0)
1961 pr_err("vmstat: failed to register 'dead' hotplug state\n");
1962
1963 ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "mm/vmstat:online",
1964 vmstat_cpu_online,
1965 vmstat_cpu_down_prep);
1966 if (ret < 0)
1967 pr_err("vmstat: failed to register 'online' hotplug state\n");
1968
1969 get_online_cpus();
1970 init_cpu_node_state();
1971 put_online_cpus();
1972
1973 start_shepherd_timer();
1974#endif
1975#ifdef CONFIG_PROC_FS
1976 proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op);
1977 proc_create_seq("pagetypeinfo", 0444, NULL, &pagetypeinfo_op);
1978 proc_create_seq("vmstat", 0444, NULL, &vmstat_op);
1979 proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op);
1980#endif
1981}
1982
1983#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)
1984
1985
1986
1987
1988
1989static int unusable_free_index(unsigned int order,
1990 struct contig_page_info *info)
1991{
1992
1993 if (info->free_pages == 0)
1994 return 1000;
1995
1996
1997
1998
1999
2000
2001
2002
2003 return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages);
2004
2005}
2006
2007static void unusable_show_print(struct seq_file *m,
2008 pg_data_t *pgdat, struct zone *zone)
2009{
2010 unsigned int order;
2011 int index;
2012 struct contig_page_info info;
2013
2014 seq_printf(m, "Node %d, zone %8s ",
2015 pgdat->node_id,
2016 zone->name);
2017 for (order = 0; order < MAX_ORDER; ++order) {
2018 fill_contig_page_info(zone, order, &info);
2019 index = unusable_free_index(order, &info);
2020 seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
2021 }
2022
2023 seq_putc(m, '\n');
2024}
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035static int unusable_show(struct seq_file *m, void *arg)
2036{
2037 pg_data_t *pgdat = (pg_data_t *)arg;
2038
2039
2040 if (!node_state(pgdat->node_id, N_MEMORY))
2041 return 0;
2042
2043 walk_zones_in_node(m, pgdat, true, false, unusable_show_print);
2044
2045 return 0;
2046}
2047
2048static const struct seq_operations unusable_op = {
2049 .start = frag_start,
2050 .next = frag_next,
2051 .stop = frag_stop,
2052 .show = unusable_show,
2053};
2054
2055static int unusable_open(struct inode *inode, struct file *file)
2056{
2057 return seq_open(file, &unusable_op);
2058}
2059
2060static const struct file_operations unusable_file_ops = {
2061 .open = unusable_open,
2062 .read = seq_read,
2063 .llseek = seq_lseek,
2064 .release = seq_release,
2065};
2066
2067static void extfrag_show_print(struct seq_file *m,
2068 pg_data_t *pgdat, struct zone *zone)
2069{
2070 unsigned int order;
2071 int index;
2072
2073
2074 struct contig_page_info info;
2075
2076 seq_printf(m, "Node %d, zone %8s ",
2077 pgdat->node_id,
2078 zone->name);
2079 for (order = 0; order < MAX_ORDER; ++order) {
2080 fill_contig_page_info(zone, order, &info);
2081 index = __fragmentation_index(order, &info);
2082 seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
2083 }
2084
2085 seq_putc(m, '\n');
2086}
2087
2088
2089
2090
2091static int extfrag_show(struct seq_file *m, void *arg)
2092{
2093 pg_data_t *pgdat = (pg_data_t *)arg;
2094
2095 walk_zones_in_node(m, pgdat, true, false, extfrag_show_print);
2096
2097 return 0;
2098}
2099
2100static const struct seq_operations extfrag_op = {
2101 .start = frag_start,
2102 .next = frag_next,
2103 .stop = frag_stop,
2104 .show = extfrag_show,
2105};
2106
2107static int extfrag_open(struct inode *inode, struct file *file)
2108{
2109 return seq_open(file, &extfrag_op);
2110}
2111
2112static const struct file_operations extfrag_file_ops = {
2113 .open = extfrag_open,
2114 .read = seq_read,
2115 .llseek = seq_lseek,
2116 .release = seq_release,
2117};
2118
2119static int __init extfrag_debug_init(void)
2120{
2121 struct dentry *extfrag_debug_root;
2122
2123 extfrag_debug_root = debugfs_create_dir("extfrag", NULL);
2124 if (!extfrag_debug_root)
2125 return -ENOMEM;
2126
2127 if (!debugfs_create_file("unusable_index", 0444,
2128 extfrag_debug_root, NULL, &unusable_file_ops))
2129 goto fail;
2130
2131 if (!debugfs_create_file("extfrag_index", 0444,
2132 extfrag_debug_root, NULL, &extfrag_file_ops))
2133 goto fail;
2134
2135 return 0;
2136fail:
2137 debugfs_remove_recursive(extfrag_debug_root);
2138 return -ENOMEM;
2139}
2140
2141module_init(extfrag_debug_init);
2142#endif
2143