1
2
3
4
5
6
7
8
9
10
11
12#include <linux/fs.h>
13#include <linux/mm.h>
14#include <linux/err.h>
15#include <linux/module.h>
16#include <linux/slab.h>
17#include <linux/cpu.h>
18#include <linux/cpumask.h>
19#include <linux/vmstat.h>
20#include <linux/proc_fs.h>
21#include <linux/seq_file.h>
22#include <linux/debugfs.h>
23#include <linux/sched.h>
24#include <linux/math64.h>
25#include <linux/writeback.h>
26#include <linux/compaction.h>
27#include <linux/mm_inline.h>
28#include <linux/page_ext.h>
29#include <linux/page_owner.h>
30
31#include "internal.h"
32
33#define NUMA_STATS_THRESHOLD (U16_MAX - 2)
34
35#ifdef CONFIG_NUMA
36int sysctl_vm_numa_stat = ENABLE_NUMA_STAT;
37
38
39static void zero_zone_numa_counters(struct zone *zone)
40{
41 int item, cpu;
42
43 for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++) {
44 atomic_long_set(&zone->vm_numa_stat[item], 0);
45 for_each_online_cpu(cpu)
46 per_cpu_ptr(zone->pageset, cpu)->vm_numa_stat_diff[item]
47 = 0;
48 }
49}
50
51
52static void zero_zones_numa_counters(void)
53{
54 struct zone *zone;
55
56 for_each_populated_zone(zone)
57 zero_zone_numa_counters(zone);
58}
59
60
61static void zero_global_numa_counters(void)
62{
63 int item;
64
65 for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++)
66 atomic_long_set(&vm_numa_stat[item], 0);
67}
68
69static void invalid_numa_statistics(void)
70{
71 zero_zones_numa_counters();
72 zero_global_numa_counters();
73}
74
75static DEFINE_MUTEX(vm_numa_stat_lock);
76
77int sysctl_vm_numa_stat_handler(struct ctl_table *table, int write,
78 void __user *buffer, size_t *length, loff_t *ppos)
79{
80 int ret, oldval;
81
82 mutex_lock(&vm_numa_stat_lock);
83 if (write)
84 oldval = sysctl_vm_numa_stat;
85 ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
86 if (ret || !write)
87 goto out;
88
89 if (oldval == sysctl_vm_numa_stat)
90 goto out;
91 else if (sysctl_vm_numa_stat == ENABLE_NUMA_STAT) {
92 static_branch_enable(&vm_numa_stat_key);
93 pr_info("enable numa statistics\n");
94 } else {
95 static_branch_disable(&vm_numa_stat_key);
96 invalid_numa_statistics();
97 pr_info("disable numa statistics, and clear numa counters\n");
98 }
99
100out:
101 mutex_unlock(&vm_numa_stat_lock);
102 return ret;
103}
104#endif
105
106#ifdef CONFIG_VM_EVENT_COUNTERS
107DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
108EXPORT_PER_CPU_SYMBOL(vm_event_states);
109
110static void sum_vm_events(unsigned long *ret)
111{
112 int cpu;
113 int i;
114
115 memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
116
117 for_each_online_cpu(cpu) {
118 struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
119
120 for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
121 ret[i] += this->event[i];
122 }
123}
124
125
126
127
128
129
130void all_vm_events(unsigned long *ret)
131{
132 get_online_cpus();
133 sum_vm_events(ret);
134 put_online_cpus();
135}
136EXPORT_SYMBOL_GPL(all_vm_events);
137
138
139
140
141
142
143
144void vm_events_fold_cpu(int cpu)
145{
146 struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
147 int i;
148
149 for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
150 count_vm_events(i, fold_state->event[i]);
151 fold_state->event[i] = 0;
152 }
153}
154
155#endif
156
157
158
159
160
161
162atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
163atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS] __cacheline_aligned_in_smp;
164atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp;
165EXPORT_SYMBOL(vm_zone_stat);
166EXPORT_SYMBOL(vm_numa_stat);
167EXPORT_SYMBOL(vm_node_stat);
168
169#ifdef CONFIG_SMP
170
171int calculate_pressure_threshold(struct zone *zone)
172{
173 int threshold;
174 int watermark_distance;
175
176
177
178
179
180
181
182
183
184 watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone);
185 threshold = max(1, (int)(watermark_distance / num_online_cpus()));
186
187
188
189
190 threshold = min(125, threshold);
191
192 return threshold;
193}
194
195int calculate_normal_threshold(struct zone *zone)
196{
197 int threshold;
198 int mem;
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230 mem = zone_managed_pages(zone) >> (27 - PAGE_SHIFT);
231
232 threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem));
233
234
235
236
237 threshold = min(125, threshold);
238
239 return threshold;
240}
241
242
243
244
245void refresh_zone_stat_thresholds(void)
246{
247 struct pglist_data *pgdat;
248 struct zone *zone;
249 int cpu;
250 int threshold;
251
252
253 for_each_online_pgdat(pgdat) {
254 for_each_online_cpu(cpu) {
255 per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold = 0;
256 }
257 }
258
259 for_each_populated_zone(zone) {
260 struct pglist_data *pgdat = zone->zone_pgdat;
261 unsigned long max_drift, tolerate_drift;
262
263 threshold = calculate_normal_threshold(zone);
264
265 for_each_online_cpu(cpu) {
266 int pgdat_threshold;
267
268 per_cpu_ptr(zone->pageset, cpu)->stat_threshold
269 = threshold;
270
271
272 pgdat_threshold = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold;
273 per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold
274 = max(threshold, pgdat_threshold);
275 }
276
277
278
279
280
281
282 tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone);
283 max_drift = num_online_cpus() * threshold;
284 if (max_drift > tolerate_drift)
285 zone->percpu_drift_mark = high_wmark_pages(zone) +
286 max_drift;
287 }
288}
289
290void set_pgdat_percpu_threshold(pg_data_t *pgdat,
291 int (*calculate_pressure)(struct zone *))
292{
293 struct zone *zone;
294 int cpu;
295 int threshold;
296 int i;
297
298 for (i = 0; i < pgdat->nr_zones; i++) {
299 zone = &pgdat->node_zones[i];
300 if (!zone->percpu_drift_mark)
301 continue;
302
303 threshold = (*calculate_pressure)(zone);
304 for_each_online_cpu(cpu)
305 per_cpu_ptr(zone->pageset, cpu)->stat_threshold
306 = threshold;
307 }
308}
309
310
311
312
313
314
315void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
316 long delta)
317{
318 struct per_cpu_pageset __percpu *pcp = zone->pageset;
319 s8 __percpu *p = pcp->vm_stat_diff + item;
320 long x;
321 long t;
322
323 x = delta + __this_cpu_read(*p);
324
325 t = __this_cpu_read(pcp->stat_threshold);
326
327 if (unlikely(x > t || x < -t)) {
328 zone_page_state_add(x, zone, item);
329 x = 0;
330 }
331 __this_cpu_write(*p, x);
332}
333EXPORT_SYMBOL(__mod_zone_page_state);
334
335void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
336 long delta)
337{
338 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
339 s8 __percpu *p = pcp->vm_node_stat_diff + item;
340 long x;
341 long t;
342
343 if (vmstat_item_in_bytes(item)) {
344 VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
345 delta >>= PAGE_SHIFT;
346 }
347
348 x = delta + __this_cpu_read(*p);
349
350 t = __this_cpu_read(pcp->stat_threshold);
351
352 if (unlikely(x > t || x < -t)) {
353 node_page_state_add(x, pgdat, item);
354 x = 0;
355 }
356 __this_cpu_write(*p, x);
357}
358EXPORT_SYMBOL(__mod_node_page_state);
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
384{
385 struct per_cpu_pageset __percpu *pcp = zone->pageset;
386 s8 __percpu *p = pcp->vm_stat_diff + item;
387 s8 v, t;
388
389 v = __this_cpu_inc_return(*p);
390 t = __this_cpu_read(pcp->stat_threshold);
391 if (unlikely(v > t)) {
392 s8 overstep = t >> 1;
393
394 zone_page_state_add(v + overstep, zone, item);
395 __this_cpu_write(*p, -overstep);
396 }
397}
398
399void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
400{
401 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
402 s8 __percpu *p = pcp->vm_node_stat_diff + item;
403 s8 v, t;
404
405 VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
406
407 v = __this_cpu_inc_return(*p);
408 t = __this_cpu_read(pcp->stat_threshold);
409 if (unlikely(v > t)) {
410 s8 overstep = t >> 1;
411
412 node_page_state_add(v + overstep, pgdat, item);
413 __this_cpu_write(*p, -overstep);
414 }
415}
416
417void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
418{
419 __inc_zone_state(page_zone(page), item);
420}
421EXPORT_SYMBOL(__inc_zone_page_state);
422
423void __inc_node_page_state(struct page *page, enum node_stat_item item)
424{
425 __inc_node_state(page_pgdat(page), item);
426}
427EXPORT_SYMBOL(__inc_node_page_state);
428
429void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
430{
431 struct per_cpu_pageset __percpu *pcp = zone->pageset;
432 s8 __percpu *p = pcp->vm_stat_diff + item;
433 s8 v, t;
434
435 v = __this_cpu_dec_return(*p);
436 t = __this_cpu_read(pcp->stat_threshold);
437 if (unlikely(v < - t)) {
438 s8 overstep = t >> 1;
439
440 zone_page_state_add(v - overstep, zone, item);
441 __this_cpu_write(*p, overstep);
442 }
443}
444
445void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
446{
447 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
448 s8 __percpu *p = pcp->vm_node_stat_diff + item;
449 s8 v, t;
450
451 VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
452
453 v = __this_cpu_dec_return(*p);
454 t = __this_cpu_read(pcp->stat_threshold);
455 if (unlikely(v < - t)) {
456 s8 overstep = t >> 1;
457
458 node_page_state_add(v - overstep, pgdat, item);
459 __this_cpu_write(*p, overstep);
460 }
461}
462
463void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
464{
465 __dec_zone_state(page_zone(page), item);
466}
467EXPORT_SYMBOL(__dec_zone_page_state);
468
469void __dec_node_page_state(struct page *page, enum node_stat_item item)
470{
471 __dec_node_state(page_pgdat(page), item);
472}
473EXPORT_SYMBOL(__dec_node_page_state);
474
475#ifdef CONFIG_HAVE_CMPXCHG_LOCAL
476
477
478
479
480
481
482
483
484
485
486
487
488static inline void mod_zone_state(struct zone *zone,
489 enum zone_stat_item item, long delta, int overstep_mode)
490{
491 struct per_cpu_pageset __percpu *pcp = zone->pageset;
492 s8 __percpu *p = pcp->vm_stat_diff + item;
493 long o, n, t, z;
494
495 do {
496 z = 0;
497
498
499
500
501
502
503
504
505
506
507
508 t = this_cpu_read(pcp->stat_threshold);
509
510 o = this_cpu_read(*p);
511 n = delta + o;
512
513 if (n > t || n < -t) {
514 int os = overstep_mode * (t >> 1) ;
515
516
517 z = n + os;
518 n = -os;
519 }
520 } while (this_cpu_cmpxchg(*p, o, n) != o);
521
522 if (z)
523 zone_page_state_add(z, zone, item);
524}
525
526void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
527 long delta)
528{
529 mod_zone_state(zone, item, delta, 0);
530}
531EXPORT_SYMBOL(mod_zone_page_state);
532
533void inc_zone_page_state(struct page *page, enum zone_stat_item item)
534{
535 mod_zone_state(page_zone(page), item, 1, 1);
536}
537EXPORT_SYMBOL(inc_zone_page_state);
538
539void dec_zone_page_state(struct page *page, enum zone_stat_item item)
540{
541 mod_zone_state(page_zone(page), item, -1, -1);
542}
543EXPORT_SYMBOL(dec_zone_page_state);
544
545static inline void mod_node_state(struct pglist_data *pgdat,
546 enum node_stat_item item, int delta, int overstep_mode)
547{
548 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
549 s8 __percpu *p = pcp->vm_node_stat_diff + item;
550 long o, n, t, z;
551
552 if (vmstat_item_in_bytes(item)) {
553 VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
554 delta >>= PAGE_SHIFT;
555 }
556
557 do {
558 z = 0;
559
560
561
562
563
564
565
566
567
568
569
570 t = this_cpu_read(pcp->stat_threshold);
571
572 o = this_cpu_read(*p);
573 n = delta + o;
574
575 if (n > t || n < -t) {
576 int os = overstep_mode * (t >> 1) ;
577
578
579 z = n + os;
580 n = -os;
581 }
582 } while (this_cpu_cmpxchg(*p, o, n) != o);
583
584 if (z)
585 node_page_state_add(z, pgdat, item);
586}
587
588void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
589 long delta)
590{
591 mod_node_state(pgdat, item, delta, 0);
592}
593EXPORT_SYMBOL(mod_node_page_state);
594
595void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
596{
597 mod_node_state(pgdat, item, 1, 1);
598}
599
600void inc_node_page_state(struct page *page, enum node_stat_item item)
601{
602 mod_node_state(page_pgdat(page), item, 1, 1);
603}
604EXPORT_SYMBOL(inc_node_page_state);
605
606void dec_node_page_state(struct page *page, enum node_stat_item item)
607{
608 mod_node_state(page_pgdat(page), item, -1, -1);
609}
610EXPORT_SYMBOL(dec_node_page_state);
611#else
612
613
614
615void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
616 long delta)
617{
618 unsigned long flags;
619
620 local_irq_save(flags);
621 __mod_zone_page_state(zone, item, delta);
622 local_irq_restore(flags);
623}
624EXPORT_SYMBOL(mod_zone_page_state);
625
626void inc_zone_page_state(struct page *page, enum zone_stat_item item)
627{
628 unsigned long flags;
629 struct zone *zone;
630
631 zone = page_zone(page);
632 local_irq_save(flags);
633 __inc_zone_state(zone, item);
634 local_irq_restore(flags);
635}
636EXPORT_SYMBOL(inc_zone_page_state);
637
638void dec_zone_page_state(struct page *page, enum zone_stat_item item)
639{
640 unsigned long flags;
641
642 local_irq_save(flags);
643 __dec_zone_page_state(page, item);
644 local_irq_restore(flags);
645}
646EXPORT_SYMBOL(dec_zone_page_state);
647
648void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
649{
650 unsigned long flags;
651
652 local_irq_save(flags);
653 __inc_node_state(pgdat, item);
654 local_irq_restore(flags);
655}
656EXPORT_SYMBOL(inc_node_state);
657
658void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
659 long delta)
660{
661 unsigned long flags;
662
663 local_irq_save(flags);
664 __mod_node_page_state(pgdat, item, delta);
665 local_irq_restore(flags);
666}
667EXPORT_SYMBOL(mod_node_page_state);
668
669void inc_node_page_state(struct page *page, enum node_stat_item item)
670{
671 unsigned long flags;
672 struct pglist_data *pgdat;
673
674 pgdat = page_pgdat(page);
675 local_irq_save(flags);
676 __inc_node_state(pgdat, item);
677 local_irq_restore(flags);
678}
679EXPORT_SYMBOL(inc_node_page_state);
680
681void dec_node_page_state(struct page *page, enum node_stat_item item)
682{
683 unsigned long flags;
684
685 local_irq_save(flags);
686 __dec_node_page_state(page, item);
687 local_irq_restore(flags);
688}
689EXPORT_SYMBOL(dec_node_page_state);
690#endif
691
692
693
694
695
696#ifdef CONFIG_NUMA
697static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff)
698{
699 int i;
700 int changes = 0;
701
702 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
703 if (zone_diff[i]) {
704 atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
705 changes++;
706 }
707
708 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
709 if (numa_diff[i]) {
710 atomic_long_add(numa_diff[i], &vm_numa_stat[i]);
711 changes++;
712 }
713
714 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
715 if (node_diff[i]) {
716 atomic_long_add(node_diff[i], &vm_node_stat[i]);
717 changes++;
718 }
719 return changes;
720}
721#else
722static int fold_diff(int *zone_diff, int *node_diff)
723{
724 int i;
725 int changes = 0;
726
727 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
728 if (zone_diff[i]) {
729 atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
730 changes++;
731 }
732
733 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
734 if (node_diff[i]) {
735 atomic_long_add(node_diff[i], &vm_node_stat[i]);
736 changes++;
737 }
738 return changes;
739}
740#endif
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758static int refresh_cpu_vm_stats(bool do_pagesets)
759{
760 struct pglist_data *pgdat;
761 struct zone *zone;
762 int i;
763 int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
764#ifdef CONFIG_NUMA
765 int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
766#endif
767 int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
768 int changes = 0;
769
770 for_each_populated_zone(zone) {
771 struct per_cpu_pageset __percpu *p = zone->pageset;
772
773 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
774 int v;
775
776 v = this_cpu_xchg(p->vm_stat_diff[i], 0);
777 if (v) {
778
779 atomic_long_add(v, &zone->vm_stat[i]);
780 global_zone_diff[i] += v;
781#ifdef CONFIG_NUMA
782
783 __this_cpu_write(p->expire, 3);
784#endif
785 }
786 }
787#ifdef CONFIG_NUMA
788 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) {
789 int v;
790
791 v = this_cpu_xchg(p->vm_numa_stat_diff[i], 0);
792 if (v) {
793
794 atomic_long_add(v, &zone->vm_numa_stat[i]);
795 global_numa_diff[i] += v;
796 __this_cpu_write(p->expire, 3);
797 }
798 }
799
800 if (do_pagesets) {
801 cond_resched();
802
803
804
805
806
807
808
809 if (!__this_cpu_read(p->expire) ||
810 !__this_cpu_read(p->pcp.count))
811 continue;
812
813
814
815
816 if (zone_to_nid(zone) == numa_node_id()) {
817 __this_cpu_write(p->expire, 0);
818 continue;
819 }
820
821 if (__this_cpu_dec_return(p->expire))
822 continue;
823
824 if (__this_cpu_read(p->pcp.count)) {
825 drain_zone_pages(zone, this_cpu_ptr(&p->pcp));
826 changes++;
827 }
828 }
829#endif
830 }
831
832 for_each_online_pgdat(pgdat) {
833 struct per_cpu_nodestat __percpu *p = pgdat->per_cpu_nodestats;
834
835 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
836 int v;
837
838 v = this_cpu_xchg(p->vm_node_stat_diff[i], 0);
839 if (v) {
840 atomic_long_add(v, &pgdat->vm_stat[i]);
841 global_node_diff[i] += v;
842 }
843 }
844 }
845
846#ifdef CONFIG_NUMA
847 changes += fold_diff(global_zone_diff, global_numa_diff,
848 global_node_diff);
849#else
850 changes += fold_diff(global_zone_diff, global_node_diff);
851#endif
852 return changes;
853}
854
855
856
857
858
859
860void cpu_vm_stats_fold(int cpu)
861{
862 struct pglist_data *pgdat;
863 struct zone *zone;
864 int i;
865 int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
866#ifdef CONFIG_NUMA
867 int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
868#endif
869 int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
870
871 for_each_populated_zone(zone) {
872 struct per_cpu_pageset *p;
873
874 p = per_cpu_ptr(zone->pageset, cpu);
875
876 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
877 if (p->vm_stat_diff[i]) {
878 int v;
879
880 v = p->vm_stat_diff[i];
881 p->vm_stat_diff[i] = 0;
882 atomic_long_add(v, &zone->vm_stat[i]);
883 global_zone_diff[i] += v;
884 }
885
886#ifdef CONFIG_NUMA
887 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
888 if (p->vm_numa_stat_diff[i]) {
889 int v;
890
891 v = p->vm_numa_stat_diff[i];
892 p->vm_numa_stat_diff[i] = 0;
893 atomic_long_add(v, &zone->vm_numa_stat[i]);
894 global_numa_diff[i] += v;
895 }
896#endif
897 }
898
899 for_each_online_pgdat(pgdat) {
900 struct per_cpu_nodestat *p;
901
902 p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu);
903
904 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
905 if (p->vm_node_stat_diff[i]) {
906 int v;
907
908 v = p->vm_node_stat_diff[i];
909 p->vm_node_stat_diff[i] = 0;
910 atomic_long_add(v, &pgdat->vm_stat[i]);
911 global_node_diff[i] += v;
912 }
913 }
914
915#ifdef CONFIG_NUMA
916 fold_diff(global_zone_diff, global_numa_diff, global_node_diff);
917#else
918 fold_diff(global_zone_diff, global_node_diff);
919#endif
920}
921
922
923
924
925
926void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset)
927{
928 int i;
929
930 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
931 if (pset->vm_stat_diff[i]) {
932 int v = pset->vm_stat_diff[i];
933 pset->vm_stat_diff[i] = 0;
934 atomic_long_add(v, &zone->vm_stat[i]);
935 atomic_long_add(v, &vm_zone_stat[i]);
936 }
937
938#ifdef CONFIG_NUMA
939 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
940 if (pset->vm_numa_stat_diff[i]) {
941 int v = pset->vm_numa_stat_diff[i];
942
943 pset->vm_numa_stat_diff[i] = 0;
944 atomic_long_add(v, &zone->vm_numa_stat[i]);
945 atomic_long_add(v, &vm_numa_stat[i]);
946 }
947#endif
948}
949#endif
950
951#ifdef CONFIG_NUMA
952void __inc_numa_state(struct zone *zone,
953 enum numa_stat_item item)
954{
955 struct per_cpu_pageset __percpu *pcp = zone->pageset;
956 u16 __percpu *p = pcp->vm_numa_stat_diff + item;
957 u16 v;
958
959 v = __this_cpu_inc_return(*p);
960
961 if (unlikely(v > NUMA_STATS_THRESHOLD)) {
962 zone_numa_state_add(v, zone, item);
963 __this_cpu_write(*p, 0);
964 }
965}
966
967
968
969
970
971
972unsigned long sum_zone_node_page_state(int node,
973 enum zone_stat_item item)
974{
975 struct zone *zones = NODE_DATA(node)->node_zones;
976 int i;
977 unsigned long count = 0;
978
979 for (i = 0; i < MAX_NR_ZONES; i++)
980 count += zone_page_state(zones + i, item);
981
982 return count;
983}
984
985
986
987
988
989unsigned long sum_zone_numa_state(int node,
990 enum numa_stat_item item)
991{
992 struct zone *zones = NODE_DATA(node)->node_zones;
993 int i;
994 unsigned long count = 0;
995
996 for (i = 0; i < MAX_NR_ZONES; i++)
997 count += zone_numa_state_snapshot(zones + i, item);
998
999 return count;
1000}
1001
1002
1003
1004
1005unsigned long node_page_state_pages(struct pglist_data *pgdat,
1006 enum node_stat_item item)
1007{
1008 long x = atomic_long_read(&pgdat->vm_stat[item]);
1009#ifdef CONFIG_SMP
1010 if (x < 0)
1011 x = 0;
1012#endif
1013 return x;
1014}
1015
1016unsigned long node_page_state(struct pglist_data *pgdat,
1017 enum node_stat_item item)
1018{
1019 VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
1020
1021 return node_page_state_pages(pgdat, item);
1022}
1023#endif
1024
1025#ifdef CONFIG_COMPACTION
1026
1027struct contig_page_info {
1028 unsigned long free_pages;
1029 unsigned long free_blocks_total;
1030 unsigned long free_blocks_suitable;
1031};
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041static void fill_contig_page_info(struct zone *zone,
1042 unsigned int suitable_order,
1043 struct contig_page_info *info)
1044{
1045 unsigned int order;
1046
1047 info->free_pages = 0;
1048 info->free_blocks_total = 0;
1049 info->free_blocks_suitable = 0;
1050
1051 for (order = 0; order < MAX_ORDER; order++) {
1052 unsigned long blocks;
1053
1054
1055 blocks = zone->free_area[order].nr_free;
1056 info->free_blocks_total += blocks;
1057
1058
1059 info->free_pages += blocks << order;
1060
1061
1062 if (order >= suitable_order)
1063 info->free_blocks_suitable += blocks <<
1064 (order - suitable_order);
1065 }
1066}
1067
1068
1069
1070
1071
1072
1073
1074
1075static int __fragmentation_index(unsigned int order, struct contig_page_info *info)
1076{
1077 unsigned long requested = 1UL << order;
1078
1079 if (WARN_ON_ONCE(order >= MAX_ORDER))
1080 return 0;
1081
1082 if (!info->free_blocks_total)
1083 return 0;
1084
1085
1086 if (info->free_blocks_suitable)
1087 return -1000;
1088
1089
1090
1091
1092
1093
1094
1095 return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total);
1096}
1097
1098
1099
1100
1101
1102
1103unsigned int extfrag_for_order(struct zone *zone, unsigned int order)
1104{
1105 struct contig_page_info info;
1106
1107 fill_contig_page_info(zone, order, &info);
1108 if (info.free_pages == 0)
1109 return 0;
1110
1111 return div_u64((info.free_pages -
1112 (info.free_blocks_suitable << order)) * 100,
1113 info.free_pages);
1114}
1115
1116
1117int fragmentation_index(struct zone *zone, unsigned int order)
1118{
1119 struct contig_page_info info;
1120
1121 fill_contig_page_info(zone, order, &info);
1122 return __fragmentation_index(order, &info);
1123}
1124#endif
1125
1126#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || \
1127 defined(CONFIG_NUMA) || defined(CONFIG_MEMCG)
1128#ifdef CONFIG_ZONE_DMA
1129#define TEXT_FOR_DMA(xx) xx "_dma",
1130#else
1131#define TEXT_FOR_DMA(xx)
1132#endif
1133
1134#ifdef CONFIG_ZONE_DMA32
1135#define TEXT_FOR_DMA32(xx) xx "_dma32",
1136#else
1137#define TEXT_FOR_DMA32(xx)
1138#endif
1139
1140#ifdef CONFIG_HIGHMEM
1141#define TEXT_FOR_HIGHMEM(xx) xx "_high",
1142#else
1143#define TEXT_FOR_HIGHMEM(xx)
1144#endif
1145
1146#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
1147 TEXT_FOR_HIGHMEM(xx) xx "_movable",
1148
1149const char * const vmstat_text[] = {
1150
1151 "nr_free_pages",
1152 "nr_zone_inactive_anon",
1153 "nr_zone_active_anon",
1154 "nr_zone_inactive_file",
1155 "nr_zone_active_file",
1156 "nr_zone_unevictable",
1157 "nr_zone_write_pending",
1158 "nr_mlock",
1159 "nr_bounce",
1160#if IS_ENABLED(CONFIG_ZSMALLOC)
1161 "nr_zspages",
1162#endif
1163 "nr_free_cma",
1164
1165
1166#ifdef CONFIG_NUMA
1167 "numa_hit",
1168 "numa_miss",
1169 "numa_foreign",
1170 "numa_interleave",
1171 "numa_local",
1172 "numa_other",
1173#endif
1174
1175
1176 "nr_inactive_anon",
1177 "nr_active_anon",
1178 "nr_inactive_file",
1179 "nr_active_file",
1180 "nr_unevictable",
1181 "nr_slab_reclaimable",
1182 "nr_slab_unreclaimable",
1183 "nr_isolated_anon",
1184 "nr_isolated_file",
1185 "workingset_nodes",
1186 "workingset_refault_anon",
1187 "workingset_refault_file",
1188 "workingset_activate_anon",
1189 "workingset_activate_file",
1190 "workingset_restore_anon",
1191 "workingset_restore_file",
1192 "workingset_nodereclaim",
1193 "nr_anon_pages",
1194 "nr_mapped",
1195 "nr_file_pages",
1196 "nr_dirty",
1197 "nr_writeback",
1198 "nr_writeback_temp",
1199 "nr_shmem",
1200 "nr_shmem_hugepages",
1201 "nr_shmem_pmdmapped",
1202 "nr_file_hugepages",
1203 "nr_file_pmdmapped",
1204 "nr_anon_transparent_hugepages",
1205 "nr_vmscan_write",
1206 "nr_vmscan_immediate_reclaim",
1207 "nr_dirtied",
1208 "nr_written",
1209 "nr_kernel_misc_reclaimable",
1210 "nr_foll_pin_acquired",
1211 "nr_foll_pin_released",
1212 "nr_kernel_stack",
1213#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
1214 "nr_shadow_call_stack",
1215#endif
1216 "nr_page_table_pages",
1217#ifdef CONFIG_SWAP
1218 "nr_swapcached",
1219#endif
1220
1221
1222 "nr_dirty_threshold",
1223 "nr_dirty_background_threshold",
1224
1225#if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG)
1226
1227 "pgpgin",
1228 "pgpgout",
1229 "pswpin",
1230 "pswpout",
1231
1232 TEXTS_FOR_ZONES("pgalloc")
1233 TEXTS_FOR_ZONES("allocstall")
1234 TEXTS_FOR_ZONES("pgskip")
1235
1236 "pgfree",
1237 "pgactivate",
1238 "pgdeactivate",
1239 "pglazyfree",
1240
1241 "pgfault",
1242 "pgmajfault",
1243 "pglazyfreed",
1244
1245 "pgrefill",
1246 "pgsteal_kswapd",
1247 "pgsteal_direct",
1248 "pgscan_kswapd",
1249 "pgscan_direct",
1250 "pgscan_direct_throttle",
1251 "pgscan_anon",
1252 "pgscan_file",
1253 "pgsteal_anon",
1254 "pgsteal_file",
1255
1256#ifdef CONFIG_NUMA
1257 "zone_reclaim_failed",
1258#endif
1259 "pginodesteal",
1260 "slabs_scanned",
1261 "kswapd_inodesteal",
1262 "kswapd_low_wmark_hit_quickly",
1263 "kswapd_high_wmark_hit_quickly",
1264 "pageoutrun",
1265
1266 "pgrotated",
1267
1268 "drop_pagecache",
1269 "drop_slab",
1270 "oom_kill",
1271
1272#ifdef CONFIG_NUMA_BALANCING
1273 "numa_pte_updates",
1274 "numa_huge_pte_updates",
1275 "numa_hint_faults",
1276 "numa_hint_faults_local",
1277 "numa_pages_migrated",
1278#endif
1279#ifdef CONFIG_MIGRATION
1280 "pgmigrate_success",
1281 "pgmigrate_fail",
1282#endif
1283#ifdef CONFIG_COMPACTION
1284 "compact_migrate_scanned",
1285 "compact_free_scanned",
1286 "compact_isolated",
1287 "compact_stall",
1288 "compact_fail",
1289 "compact_success",
1290 "compact_daemon_wake",
1291 "compact_daemon_migrate_scanned",
1292 "compact_daemon_free_scanned",
1293#endif
1294
1295#ifdef CONFIG_HUGETLB_PAGE
1296 "htlb_buddy_alloc_success",
1297 "htlb_buddy_alloc_fail",
1298#endif
1299 "unevictable_pgs_culled",
1300 "unevictable_pgs_scanned",
1301 "unevictable_pgs_rescued",
1302 "unevictable_pgs_mlocked",
1303 "unevictable_pgs_munlocked",
1304 "unevictable_pgs_cleared",
1305 "unevictable_pgs_stranded",
1306
1307#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1308 "thp_fault_alloc",
1309 "thp_fault_fallback",
1310 "thp_fault_fallback_charge",
1311 "thp_collapse_alloc",
1312 "thp_collapse_alloc_failed",
1313 "thp_file_alloc",
1314 "thp_file_fallback",
1315 "thp_file_fallback_charge",
1316 "thp_file_mapped",
1317 "thp_split_page",
1318 "thp_split_page_failed",
1319 "thp_deferred_split_page",
1320 "thp_split_pmd",
1321#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
1322 "thp_split_pud",
1323#endif
1324 "thp_zero_page_alloc",
1325 "thp_zero_page_alloc_failed",
1326 "thp_swpout",
1327 "thp_swpout_fallback",
1328#endif
1329#ifdef CONFIG_MEMORY_BALLOON
1330 "balloon_inflate",
1331 "balloon_deflate",
1332#ifdef CONFIG_BALLOON_COMPACTION
1333 "balloon_migrate",
1334#endif
1335#endif
1336#ifdef CONFIG_DEBUG_TLBFLUSH
1337 "nr_tlb_remote_flush",
1338 "nr_tlb_remote_flush_received",
1339 "nr_tlb_local_flush_all",
1340 "nr_tlb_local_flush_one",
1341#endif
1342
1343#ifdef CONFIG_DEBUG_VM_VMACACHE
1344 "vmacache_find_calls",
1345 "vmacache_find_hits",
1346#endif
1347#ifdef CONFIG_SWAP
1348 "swap_ra",
1349 "swap_ra_hit",
1350#endif
1351#endif
1352};
1353#endif
1354
1355#if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \
1356 defined(CONFIG_PROC_FS)
1357static void *frag_start(struct seq_file *m, loff_t *pos)
1358{
1359 pg_data_t *pgdat;
1360 loff_t node = *pos;
1361
1362 for (pgdat = first_online_pgdat();
1363 pgdat && node;
1364 pgdat = next_online_pgdat(pgdat))
1365 --node;
1366
1367 return pgdat;
1368}
1369
1370static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
1371{
1372 pg_data_t *pgdat = (pg_data_t *)arg;
1373
1374 (*pos)++;
1375 return next_online_pgdat(pgdat);
1376}
1377
1378static void frag_stop(struct seq_file *m, void *arg)
1379{
1380}
1381
1382
1383
1384
1385
1386static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
1387 bool assert_populated, bool nolock,
1388 void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
1389{
1390 struct zone *zone;
1391 struct zone *node_zones = pgdat->node_zones;
1392 unsigned long flags;
1393
1394 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
1395 if (assert_populated && !populated_zone(zone))
1396 continue;
1397
1398 if (!nolock)
1399 spin_lock_irqsave(&zone->lock, flags);
1400 print(m, pgdat, zone);
1401 if (!nolock)
1402 spin_unlock_irqrestore(&zone->lock, flags);
1403 }
1404}
1405#endif
1406
1407#ifdef CONFIG_PROC_FS
1408static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
1409 struct zone *zone)
1410{
1411 int order;
1412
1413 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1414 for (order = 0; order < MAX_ORDER; ++order)
1415 seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
1416 seq_putc(m, '\n');
1417}
1418
1419
1420
1421
1422static int frag_show(struct seq_file *m, void *arg)
1423{
1424 pg_data_t *pgdat = (pg_data_t *)arg;
1425 walk_zones_in_node(m, pgdat, true, false, frag_show_print);
1426 return 0;
1427}
1428
1429static void pagetypeinfo_showfree_print(struct seq_file *m,
1430 pg_data_t *pgdat, struct zone *zone)
1431{
1432 int order, mtype;
1433
1434 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) {
1435 seq_printf(m, "Node %4d, zone %8s, type %12s ",
1436 pgdat->node_id,
1437 zone->name,
1438 migratetype_names[mtype]);
1439 for (order = 0; order < MAX_ORDER; ++order) {
1440 unsigned long freecount = 0;
1441 struct free_area *area;
1442 struct list_head *curr;
1443 bool overflow = false;
1444
1445 area = &(zone->free_area[order]);
1446
1447 list_for_each(curr, &area->free_list[mtype]) {
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457 if (++freecount >= 100000) {
1458 overflow = true;
1459 break;
1460 }
1461 }
1462 seq_printf(m, "%s%6lu ", overflow ? ">" : "", freecount);
1463 spin_unlock_irq(&zone->lock);
1464 cond_resched();
1465 spin_lock_irq(&zone->lock);
1466 }
1467 seq_putc(m, '\n');
1468 }
1469}
1470
1471
1472static int pagetypeinfo_showfree(struct seq_file *m, void *arg)
1473{
1474 int order;
1475 pg_data_t *pgdat = (pg_data_t *)arg;
1476
1477
1478 seq_printf(m, "%-43s ", "Free pages count per migrate type at order");
1479 for (order = 0; order < MAX_ORDER; ++order)
1480 seq_printf(m, "%6d ", order);
1481 seq_putc(m, '\n');
1482
1483 walk_zones_in_node(m, pgdat, true, false, pagetypeinfo_showfree_print);
1484
1485 return 0;
1486}
1487
1488static void pagetypeinfo_showblockcount_print(struct seq_file *m,
1489 pg_data_t *pgdat, struct zone *zone)
1490{
1491 int mtype;
1492 unsigned long pfn;
1493 unsigned long start_pfn = zone->zone_start_pfn;
1494 unsigned long end_pfn = zone_end_pfn(zone);
1495 unsigned long count[MIGRATE_TYPES] = { 0, };
1496
1497 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
1498 struct page *page;
1499
1500 page = pfn_to_online_page(pfn);
1501 if (!page)
1502 continue;
1503
1504
1505 if (!memmap_valid_within(pfn, page, zone))
1506 continue;
1507
1508 if (page_zone(page) != zone)
1509 continue;
1510
1511 mtype = get_pageblock_migratetype(page);
1512
1513 if (mtype < MIGRATE_TYPES)
1514 count[mtype]++;
1515 }
1516
1517
1518 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1519 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1520 seq_printf(m, "%12lu ", count[mtype]);
1521 seq_putc(m, '\n');
1522}
1523
1524
1525static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
1526{
1527 int mtype;
1528 pg_data_t *pgdat = (pg_data_t *)arg;
1529
1530 seq_printf(m, "\n%-23s", "Number of blocks type ");
1531 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1532 seq_printf(m, "%12s ", migratetype_names[mtype]);
1533 seq_putc(m, '\n');
1534 walk_zones_in_node(m, pgdat, true, false,
1535 pagetypeinfo_showblockcount_print);
1536
1537 return 0;
1538}
1539
1540
1541
1542
1543
1544
1545
1546static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat)
1547{
1548#ifdef CONFIG_PAGE_OWNER
1549 int mtype;
1550
1551 if (!static_branch_unlikely(&page_owner_inited))
1552 return;
1553
1554 drain_all_pages(NULL);
1555
1556 seq_printf(m, "\n%-23s", "Number of mixed blocks ");
1557 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1558 seq_printf(m, "%12s ", migratetype_names[mtype]);
1559 seq_putc(m, '\n');
1560
1561 walk_zones_in_node(m, pgdat, true, true,
1562 pagetypeinfo_showmixedcount_print);
1563#endif
1564}
1565
1566
1567
1568
1569
1570static int pagetypeinfo_show(struct seq_file *m, void *arg)
1571{
1572 pg_data_t *pgdat = (pg_data_t *)arg;
1573
1574
1575 if (!node_state(pgdat->node_id, N_MEMORY))
1576 return 0;
1577
1578 seq_printf(m, "Page block order: %d\n", pageblock_order);
1579 seq_printf(m, "Pages per block: %lu\n", pageblock_nr_pages);
1580 seq_putc(m, '\n');
1581 pagetypeinfo_showfree(m, pgdat);
1582 pagetypeinfo_showblockcount(m, pgdat);
1583 pagetypeinfo_showmixedcount(m, pgdat);
1584
1585 return 0;
1586}
1587
1588static const struct seq_operations fragmentation_op = {
1589 .start = frag_start,
1590 .next = frag_next,
1591 .stop = frag_stop,
1592 .show = frag_show,
1593};
1594
1595static const struct seq_operations pagetypeinfo_op = {
1596 .start = frag_start,
1597 .next = frag_next,
1598 .stop = frag_stop,
1599 .show = pagetypeinfo_show,
1600};
1601
1602static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone)
1603{
1604 int zid;
1605
1606 for (zid = 0; zid < MAX_NR_ZONES; zid++) {
1607 struct zone *compare = &pgdat->node_zones[zid];
1608
1609 if (populated_zone(compare))
1610 return zone == compare;
1611 }
1612
1613 return false;
1614}
1615
1616static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
1617 struct zone *zone)
1618{
1619 int i;
1620 seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
1621 if (is_zone_first_populated(pgdat, zone)) {
1622 seq_printf(m, "\n per-node stats");
1623 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
1624 unsigned long pages = node_page_state_pages(pgdat, i);
1625
1626 if (vmstat_item_print_in_thp(i))
1627 pages /= HPAGE_PMD_NR;
1628 seq_printf(m, "\n %-12s %lu", node_stat_name(i),
1629 pages);
1630 }
1631 }
1632 seq_printf(m,
1633 "\n pages free %lu"
1634 "\n min %lu"
1635 "\n low %lu"
1636 "\n high %lu"
1637 "\n spanned %lu"
1638 "\n present %lu"
1639 "\n managed %lu",
1640 zone_page_state(zone, NR_FREE_PAGES),
1641 min_wmark_pages(zone),
1642 low_wmark_pages(zone),
1643 high_wmark_pages(zone),
1644 zone->spanned_pages,
1645 zone->present_pages,
1646 zone_managed_pages(zone));
1647
1648 seq_printf(m,
1649 "\n protection: (%ld",
1650 zone->lowmem_reserve[0]);
1651 for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
1652 seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
1653 seq_putc(m, ')');
1654
1655
1656 if (!populated_zone(zone)) {
1657 seq_putc(m, '\n');
1658 return;
1659 }
1660
1661 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
1662 seq_printf(m, "\n %-12s %lu", zone_stat_name(i),
1663 zone_page_state(zone, i));
1664
1665#ifdef CONFIG_NUMA
1666 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
1667 seq_printf(m, "\n %-12s %lu", numa_stat_name(i),
1668 zone_numa_state_snapshot(zone, i));
1669#endif
1670
1671 seq_printf(m, "\n pagesets");
1672 for_each_online_cpu(i) {
1673 struct per_cpu_pageset *pageset;
1674
1675 pageset = per_cpu_ptr(zone->pageset, i);
1676 seq_printf(m,
1677 "\n cpu: %i"
1678 "\n count: %i"
1679 "\n high: %i"
1680 "\n batch: %i",
1681 i,
1682 pageset->pcp.count,
1683 pageset->pcp.high,
1684 pageset->pcp.batch);
1685#ifdef CONFIG_SMP
1686 seq_printf(m, "\n vm stats threshold: %d",
1687 pageset->stat_threshold);
1688#endif
1689 }
1690 seq_printf(m,
1691 "\n node_unreclaimable: %u"
1692 "\n start_pfn: %lu",
1693 pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES,
1694 zone->zone_start_pfn);
1695 seq_putc(m, '\n');
1696}
1697
1698
1699
1700
1701
1702
1703
1704static int zoneinfo_show(struct seq_file *m, void *arg)
1705{
1706 pg_data_t *pgdat = (pg_data_t *)arg;
1707 walk_zones_in_node(m, pgdat, false, false, zoneinfo_show_print);
1708 return 0;
1709}
1710
1711static const struct seq_operations zoneinfo_op = {
1712 .start = frag_start,
1713
1714 .next = frag_next,
1715 .stop = frag_stop,
1716 .show = zoneinfo_show,
1717};
1718
1719#define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \
1720 NR_VM_NUMA_STAT_ITEMS + \
1721 NR_VM_NODE_STAT_ITEMS + \
1722 NR_VM_WRITEBACK_STAT_ITEMS + \
1723 (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \
1724 NR_VM_EVENT_ITEMS : 0))
1725
1726static void *vmstat_start(struct seq_file *m, loff_t *pos)
1727{
1728 unsigned long *v;
1729 int i;
1730
1731 if (*pos >= NR_VMSTAT_ITEMS)
1732 return NULL;
1733
1734 BUILD_BUG_ON(ARRAY_SIZE(vmstat_text) < NR_VMSTAT_ITEMS);
1735 v = kmalloc_array(NR_VMSTAT_ITEMS, sizeof(unsigned long), GFP_KERNEL);
1736 m->private = v;
1737 if (!v)
1738 return ERR_PTR(-ENOMEM);
1739 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
1740 v[i] = global_zone_page_state(i);
1741 v += NR_VM_ZONE_STAT_ITEMS;
1742
1743#ifdef CONFIG_NUMA
1744 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
1745 v[i] = global_numa_state(i);
1746 v += NR_VM_NUMA_STAT_ITEMS;
1747#endif
1748
1749 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
1750 v[i] = global_node_page_state_pages(i);
1751 if (vmstat_item_print_in_thp(i))
1752 v[i] /= HPAGE_PMD_NR;
1753 }
1754 v += NR_VM_NODE_STAT_ITEMS;
1755
1756 global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,
1757 v + NR_DIRTY_THRESHOLD);
1758 v += NR_VM_WRITEBACK_STAT_ITEMS;
1759
1760#ifdef CONFIG_VM_EVENT_COUNTERS
1761 all_vm_events(v);
1762 v[PGPGIN] /= 2;
1763 v[PGPGOUT] /= 2;
1764#endif
1765 return (unsigned long *)m->private + *pos;
1766}
1767
1768static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
1769{
1770 (*pos)++;
1771 if (*pos >= NR_VMSTAT_ITEMS)
1772 return NULL;
1773 return (unsigned long *)m->private + *pos;
1774}
1775
1776static int vmstat_show(struct seq_file *m, void *arg)
1777{
1778 unsigned long *l = arg;
1779 unsigned long off = l - (unsigned long *)m->private;
1780
1781 seq_puts(m, vmstat_text[off]);
1782 seq_put_decimal_ull(m, " ", *l);
1783 seq_putc(m, '\n');
1784
1785 if (off == NR_VMSTAT_ITEMS - 1) {
1786
1787
1788
1789
1790 seq_puts(m, "nr_unstable 0\n");
1791 }
1792 return 0;
1793}
1794
1795static void vmstat_stop(struct seq_file *m, void *arg)
1796{
1797 kfree(m->private);
1798 m->private = NULL;
1799}
1800
1801static const struct seq_operations vmstat_op = {
1802 .start = vmstat_start,
1803 .next = vmstat_next,
1804 .stop = vmstat_stop,
1805 .show = vmstat_show,
1806};
1807#endif
1808
1809#ifdef CONFIG_SMP
1810static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
1811int sysctl_stat_interval __read_mostly = HZ;
1812
1813#ifdef CONFIG_PROC_FS
1814static void refresh_vm_stats(struct work_struct *work)
1815{
1816 refresh_cpu_vm_stats(true);
1817}
1818
1819int vmstat_refresh(struct ctl_table *table, int write,
1820 void __user *buffer, size_t *lenp, loff_t *ppos)
1821{
1822 long val;
1823 int err;
1824 int i;
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838 err = schedule_on_each_cpu(refresh_vm_stats);
1839 if (err)
1840 return err;
1841 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
1842 val = atomic_long_read(&vm_zone_stat[i]);
1843 if (val < 0) {
1844 pr_warn("%s: %s %ld\n",
1845 __func__, zone_stat_name(i), val);
1846 err = -EINVAL;
1847 }
1848 }
1849#ifdef CONFIG_NUMA
1850 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) {
1851 val = atomic_long_read(&vm_numa_stat[i]);
1852 if (val < 0) {
1853 pr_warn("%s: %s %ld\n",
1854 __func__, numa_stat_name(i), val);
1855 err = -EINVAL;
1856 }
1857 }
1858#endif
1859 if (err)
1860 return err;
1861 if (write)
1862 *ppos += *lenp;
1863 else
1864 *lenp = 0;
1865 return 0;
1866}
1867#endif
1868
1869static void vmstat_update(struct work_struct *w)
1870{
1871 if (refresh_cpu_vm_stats(true)) {
1872
1873
1874
1875
1876
1877 queue_delayed_work_on(smp_processor_id(), mm_percpu_wq,
1878 this_cpu_ptr(&vmstat_work),
1879 round_jiffies_relative(sysctl_stat_interval));
1880 }
1881}
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892static bool need_update(int cpu)
1893{
1894 struct zone *zone;
1895
1896 for_each_populated_zone(zone) {
1897 struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu);
1898
1899 BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1);
1900#ifdef CONFIG_NUMA
1901 BUILD_BUG_ON(sizeof(p->vm_numa_stat_diff[0]) != 2);
1902#endif
1903
1904
1905
1906
1907 if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS *
1908 sizeof(p->vm_stat_diff[0])))
1909 return true;
1910#ifdef CONFIG_NUMA
1911 if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS *
1912 sizeof(p->vm_numa_stat_diff[0])))
1913 return true;
1914#endif
1915 }
1916 return false;
1917}
1918
1919
1920
1921
1922
1923
1924void quiet_vmstat(void)
1925{
1926 if (system_state != SYSTEM_RUNNING)
1927 return;
1928
1929 if (!delayed_work_pending(this_cpu_ptr(&vmstat_work)))
1930 return;
1931
1932 if (!need_update(smp_processor_id()))
1933 return;
1934
1935
1936
1937
1938
1939
1940
1941 refresh_cpu_vm_stats(false);
1942}
1943
1944
1945
1946
1947
1948
1949
1950static void vmstat_shepherd(struct work_struct *w);
1951
1952static DECLARE_DEFERRABLE_WORK(shepherd, vmstat_shepherd);
1953
1954static void vmstat_shepherd(struct work_struct *w)
1955{
1956 int cpu;
1957
1958 get_online_cpus();
1959
1960 for_each_online_cpu(cpu) {
1961 struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
1962
1963 if (!delayed_work_pending(dw) && need_update(cpu))
1964 queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0);
1965 }
1966 put_online_cpus();
1967
1968 schedule_delayed_work(&shepherd,
1969 round_jiffies_relative(sysctl_stat_interval));
1970}
1971
1972static void __init start_shepherd_timer(void)
1973{
1974 int cpu;
1975
1976 for_each_possible_cpu(cpu)
1977 INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
1978 vmstat_update);
1979
1980 schedule_delayed_work(&shepherd,
1981 round_jiffies_relative(sysctl_stat_interval));
1982}
1983
1984static void __init init_cpu_node_state(void)
1985{
1986 int node;
1987
1988 for_each_online_node(node) {
1989 if (cpumask_weight(cpumask_of_node(node)) > 0)
1990 node_set_state(node, N_CPU);
1991 }
1992}
1993
1994static int vmstat_cpu_online(unsigned int cpu)
1995{
1996 refresh_zone_stat_thresholds();
1997 node_set_state(cpu_to_node(cpu), N_CPU);
1998 return 0;
1999}
2000
2001static int vmstat_cpu_down_prep(unsigned int cpu)
2002{
2003 cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
2004 return 0;
2005}
2006
2007static int vmstat_cpu_dead(unsigned int cpu)
2008{
2009 const struct cpumask *node_cpus;
2010 int node;
2011
2012 node = cpu_to_node(cpu);
2013
2014 refresh_zone_stat_thresholds();
2015 node_cpus = cpumask_of_node(node);
2016 if (cpumask_weight(node_cpus) > 0)
2017 return 0;
2018
2019 node_clear_state(node, N_CPU);
2020 return 0;
2021}
2022
2023#endif
2024
2025struct workqueue_struct *mm_percpu_wq;
2026
2027void __init init_mm_internals(void)
2028{
2029 int ret __maybe_unused;
2030
2031 mm_percpu_wq = alloc_workqueue("mm_percpu_wq", WQ_MEM_RECLAIM, 0);
2032
2033#ifdef CONFIG_SMP
2034 ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead",
2035 NULL, vmstat_cpu_dead);
2036 if (ret < 0)
2037 pr_err("vmstat: failed to register 'dead' hotplug state\n");
2038
2039 ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "mm/vmstat:online",
2040 vmstat_cpu_online,
2041 vmstat_cpu_down_prep);
2042 if (ret < 0)
2043 pr_err("vmstat: failed to register 'online' hotplug state\n");
2044
2045 get_online_cpus();
2046 init_cpu_node_state();
2047 put_online_cpus();
2048
2049 start_shepherd_timer();
2050#endif
2051#ifdef CONFIG_PROC_FS
2052 proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op);
2053 proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op);
2054 proc_create_seq("vmstat", 0444, NULL, &vmstat_op);
2055 proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op);
2056#endif
2057}
2058
2059#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)
2060
2061
2062
2063
2064
2065static int unusable_free_index(unsigned int order,
2066 struct contig_page_info *info)
2067{
2068
2069 if (info->free_pages == 0)
2070 return 1000;
2071
2072
2073
2074
2075
2076
2077
2078
2079 return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages);
2080
2081}
2082
2083static void unusable_show_print(struct seq_file *m,
2084 pg_data_t *pgdat, struct zone *zone)
2085{
2086 unsigned int order;
2087 int index;
2088 struct contig_page_info info;
2089
2090 seq_printf(m, "Node %d, zone %8s ",
2091 pgdat->node_id,
2092 zone->name);
2093 for (order = 0; order < MAX_ORDER; ++order) {
2094 fill_contig_page_info(zone, order, &info);
2095 index = unusable_free_index(order, &info);
2096 seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
2097 }
2098
2099 seq_putc(m, '\n');
2100}
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111static int unusable_show(struct seq_file *m, void *arg)
2112{
2113 pg_data_t *pgdat = (pg_data_t *)arg;
2114
2115
2116 if (!node_state(pgdat->node_id, N_MEMORY))
2117 return 0;
2118
2119 walk_zones_in_node(m, pgdat, true, false, unusable_show_print);
2120
2121 return 0;
2122}
2123
2124static const struct seq_operations unusable_op = {
2125 .start = frag_start,
2126 .next = frag_next,
2127 .stop = frag_stop,
2128 .show = unusable_show,
2129};
2130
2131static int unusable_open(struct inode *inode, struct file *file)
2132{
2133 return seq_open(file, &unusable_op);
2134}
2135
2136static const struct file_operations unusable_file_ops = {
2137 .open = unusable_open,
2138 .read = seq_read,
2139 .llseek = seq_lseek,
2140 .release = seq_release,
2141};
2142
2143static void extfrag_show_print(struct seq_file *m,
2144 pg_data_t *pgdat, struct zone *zone)
2145{
2146 unsigned int order;
2147 int index;
2148
2149
2150 struct contig_page_info info;
2151
2152 seq_printf(m, "Node %d, zone %8s ",
2153 pgdat->node_id,
2154 zone->name);
2155 for (order = 0; order < MAX_ORDER; ++order) {
2156 fill_contig_page_info(zone, order, &info);
2157 index = __fragmentation_index(order, &info);
2158 seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
2159 }
2160
2161 seq_putc(m, '\n');
2162}
2163
2164
2165
2166
2167static int extfrag_show(struct seq_file *m, void *arg)
2168{
2169 pg_data_t *pgdat = (pg_data_t *)arg;
2170
2171 walk_zones_in_node(m, pgdat, true, false, extfrag_show_print);
2172
2173 return 0;
2174}
2175
2176static const struct seq_operations extfrag_op = {
2177 .start = frag_start,
2178 .next = frag_next,
2179 .stop = frag_stop,
2180 .show = extfrag_show,
2181};
2182
2183static int extfrag_open(struct inode *inode, struct file *file)
2184{
2185 return seq_open(file, &extfrag_op);
2186}
2187
2188static const struct file_operations extfrag_file_ops = {
2189 .open = extfrag_open,
2190 .read = seq_read,
2191 .llseek = seq_lseek,
2192 .release = seq_release,
2193};
2194
2195static int __init extfrag_debug_init(void)
2196{
2197 struct dentry *extfrag_debug_root;
2198
2199 extfrag_debug_root = debugfs_create_dir("extfrag", NULL);
2200 if (!extfrag_debug_root)
2201 return -ENOMEM;
2202
2203 if (!debugfs_create_file("unusable_index", 0444,
2204 extfrag_debug_root, NULL, &unusable_file_ops))
2205 goto fail;
2206
2207 if (!debugfs_create_file("extfrag_index", 0444,
2208 extfrag_debug_root, NULL, &extfrag_file_ops))
2209 goto fail;
2210
2211 return 0;
2212fail:
2213 debugfs_remove_recursive(extfrag_debug_root);
2214 return -ENOMEM;
2215}
2216
2217module_init(extfrag_debug_init);
2218#endif
2219