1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/fs.h>
14#include <linux/mm.h>
15#include <linux/err.h>
16#include <linux/module.h>
17#include <linux/slab.h>
18#include <linux/cpu.h>
19#include <linux/cpumask.h>
20#include <linux/vmstat.h>
21#include <linux/proc_fs.h>
22#include <linux/seq_file.h>
23#include <linux/debugfs.h>
24#include <linux/sched.h>
25#include <linux/math64.h>
26#include <linux/writeback.h>
27#include <linux/compaction.h>
28#include <linux/mm_inline.h>
29#include <linux/page_ext.h>
30#include <linux/page_owner.h>
31#include <linux/migrate.h>
32
33#include "internal.h"
34
35#ifdef CONFIG_NUMA
36int sysctl_vm_numa_stat = ENABLE_NUMA_STAT;
37
38
39static void zero_zone_numa_counters(struct zone *zone)
40{
41 int item, cpu;
42
43 for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) {
44 atomic_long_set(&zone->vm_numa_event[item], 0);
45 for_each_online_cpu(cpu) {
46 per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_numa_event[item]
47 = 0;
48 }
49 }
50}
51
52
53static void zero_zones_numa_counters(void)
54{
55 struct zone *zone;
56
57 for_each_populated_zone(zone)
58 zero_zone_numa_counters(zone);
59}
60
61
62static void zero_global_numa_counters(void)
63{
64 int item;
65
66 for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
67 atomic_long_set(&vm_numa_event[item], 0);
68}
69
70static void invalid_numa_statistics(void)
71{
72 zero_zones_numa_counters();
73 zero_global_numa_counters();
74}
75
76static DEFINE_MUTEX(vm_numa_stat_lock);
77
78int sysctl_vm_numa_stat_handler(struct ctl_table *table, int write,
79 void *buffer, size_t *length, loff_t *ppos)
80{
81 int ret, oldval;
82
83 mutex_lock(&vm_numa_stat_lock);
84 if (write)
85 oldval = sysctl_vm_numa_stat;
86 ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
87 if (ret || !write)
88 goto out;
89
90 if (oldval == sysctl_vm_numa_stat)
91 goto out;
92 else if (sysctl_vm_numa_stat == ENABLE_NUMA_STAT) {
93 static_branch_enable(&vm_numa_stat_key);
94 pr_info("enable numa statistics\n");
95 } else {
96 static_branch_disable(&vm_numa_stat_key);
97 invalid_numa_statistics();
98 pr_info("disable numa statistics, and clear numa counters\n");
99 }
100
101out:
102 mutex_unlock(&vm_numa_stat_lock);
103 return ret;
104}
105#endif
106
107#ifdef CONFIG_VM_EVENT_COUNTERS
108DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
109EXPORT_PER_CPU_SYMBOL(vm_event_states);
110
111static void sum_vm_events(unsigned long *ret)
112{
113 int cpu;
114 int i;
115
116 memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
117
118 for_each_online_cpu(cpu) {
119 struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
120
121 for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
122 ret[i] += this->event[i];
123 }
124}
125
126
127
128
129
130
131void all_vm_events(unsigned long *ret)
132{
133 cpus_read_lock();
134 sum_vm_events(ret);
135 cpus_read_unlock();
136}
137EXPORT_SYMBOL_GPL(all_vm_events);
138
139
140
141
142
143
144
145void vm_events_fold_cpu(int cpu)
146{
147 struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
148 int i;
149
150 for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
151 count_vm_events(i, fold_state->event[i]);
152 fold_state->event[i] = 0;
153 }
154}
155
156#endif
157
158
159
160
161
162
163atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
164atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp;
165atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS] __cacheline_aligned_in_smp;
166EXPORT_SYMBOL(vm_zone_stat);
167EXPORT_SYMBOL(vm_node_stat);
168
169#ifdef CONFIG_NUMA
170static void fold_vm_zone_numa_events(struct zone *zone)
171{
172 unsigned long zone_numa_events[NR_VM_NUMA_EVENT_ITEMS] = { 0, };
173 int cpu;
174 enum numa_stat_item item;
175
176 for_each_online_cpu(cpu) {
177 struct per_cpu_zonestat *pzstats;
178
179 pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
180 for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
181 zone_numa_events[item] += xchg(&pzstats->vm_numa_event[item], 0);
182 }
183
184 for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
185 zone_numa_event_add(zone_numa_events[item], zone, item);
186}
187
188void fold_vm_numa_events(void)
189{
190 struct zone *zone;
191
192 for_each_populated_zone(zone)
193 fold_vm_zone_numa_events(zone);
194}
195#endif
196
197#ifdef CONFIG_SMP
198
199int calculate_pressure_threshold(struct zone *zone)
200{
201 int threshold;
202 int watermark_distance;
203
204
205
206
207
208
209
210
211
212 watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone);
213 threshold = max(1, (int)(watermark_distance / num_online_cpus()));
214
215
216
217
218 threshold = min(125, threshold);
219
220 return threshold;
221}
222
223int calculate_normal_threshold(struct zone *zone)
224{
225 int threshold;
226 int mem;
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258 mem = zone_managed_pages(zone) >> (27 - PAGE_SHIFT);
259
260 threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem));
261
262
263
264
265 threshold = min(125, threshold);
266
267 return threshold;
268}
269
270
271
272
273void refresh_zone_stat_thresholds(void)
274{
275 struct pglist_data *pgdat;
276 struct zone *zone;
277 int cpu;
278 int threshold;
279
280
281 for_each_online_pgdat(pgdat) {
282 for_each_online_cpu(cpu) {
283 per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold = 0;
284 }
285 }
286
287 for_each_populated_zone(zone) {
288 struct pglist_data *pgdat = zone->zone_pgdat;
289 unsigned long max_drift, tolerate_drift;
290
291 threshold = calculate_normal_threshold(zone);
292
293 for_each_online_cpu(cpu) {
294 int pgdat_threshold;
295
296 per_cpu_ptr(zone->per_cpu_zonestats, cpu)->stat_threshold
297 = threshold;
298
299
300 pgdat_threshold = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold;
301 per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold
302 = max(threshold, pgdat_threshold);
303 }
304
305
306
307
308
309
310 tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone);
311 max_drift = num_online_cpus() * threshold;
312 if (max_drift > tolerate_drift)
313 zone->percpu_drift_mark = high_wmark_pages(zone) +
314 max_drift;
315 }
316}
317
318void set_pgdat_percpu_threshold(pg_data_t *pgdat,
319 int (*calculate_pressure)(struct zone *))
320{
321 struct zone *zone;
322 int cpu;
323 int threshold;
324 int i;
325
326 for (i = 0; i < pgdat->nr_zones; i++) {
327 zone = &pgdat->node_zones[i];
328 if (!zone->percpu_drift_mark)
329 continue;
330
331 threshold = (*calculate_pressure)(zone);
332 for_each_online_cpu(cpu)
333 per_cpu_ptr(zone->per_cpu_zonestats, cpu)->stat_threshold
334 = threshold;
335 }
336}
337
338
339
340
341
342
343void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
344 long delta)
345{
346 struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
347 s8 __percpu *p = pcp->vm_stat_diff + item;
348 long x;
349 long t;
350
351
352
353
354
355
356
357
358 if (IS_ENABLED(CONFIG_PREEMPT_RT))
359 preempt_disable();
360
361 x = delta + __this_cpu_read(*p);
362
363 t = __this_cpu_read(pcp->stat_threshold);
364
365 if (unlikely(abs(x) > t)) {
366 zone_page_state_add(x, zone, item);
367 x = 0;
368 }
369 __this_cpu_write(*p, x);
370
371 if (IS_ENABLED(CONFIG_PREEMPT_RT))
372 preempt_enable();
373}
374EXPORT_SYMBOL(__mod_zone_page_state);
375
376void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
377 long delta)
378{
379 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
380 s8 __percpu *p = pcp->vm_node_stat_diff + item;
381 long x;
382 long t;
383
384 if (vmstat_item_in_bytes(item)) {
385
386
387
388
389
390
391 VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
392 delta >>= PAGE_SHIFT;
393 }
394
395
396 if (IS_ENABLED(CONFIG_PREEMPT_RT))
397 preempt_disable();
398
399 x = delta + __this_cpu_read(*p);
400
401 t = __this_cpu_read(pcp->stat_threshold);
402
403 if (unlikely(abs(x) > t)) {
404 node_page_state_add(x, pgdat, item);
405 x = 0;
406 }
407 __this_cpu_write(*p, x);
408
409 if (IS_ENABLED(CONFIG_PREEMPT_RT))
410 preempt_enable();
411}
412EXPORT_SYMBOL(__mod_node_page_state);
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
438{
439 struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
440 s8 __percpu *p = pcp->vm_stat_diff + item;
441 s8 v, t;
442
443
444 if (IS_ENABLED(CONFIG_PREEMPT_RT))
445 preempt_disable();
446
447 v = __this_cpu_inc_return(*p);
448 t = __this_cpu_read(pcp->stat_threshold);
449 if (unlikely(v > t)) {
450 s8 overstep = t >> 1;
451
452 zone_page_state_add(v + overstep, zone, item);
453 __this_cpu_write(*p, -overstep);
454 }
455
456 if (IS_ENABLED(CONFIG_PREEMPT_RT))
457 preempt_enable();
458}
459
460void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
461{
462 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
463 s8 __percpu *p = pcp->vm_node_stat_diff + item;
464 s8 v, t;
465
466 VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
467
468
469 if (IS_ENABLED(CONFIG_PREEMPT_RT))
470 preempt_disable();
471
472 v = __this_cpu_inc_return(*p);
473 t = __this_cpu_read(pcp->stat_threshold);
474 if (unlikely(v > t)) {
475 s8 overstep = t >> 1;
476
477 node_page_state_add(v + overstep, pgdat, item);
478 __this_cpu_write(*p, -overstep);
479 }
480
481 if (IS_ENABLED(CONFIG_PREEMPT_RT))
482 preempt_enable();
483}
484
485void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
486{
487 __inc_zone_state(page_zone(page), item);
488}
489EXPORT_SYMBOL(__inc_zone_page_state);
490
491void __inc_node_page_state(struct page *page, enum node_stat_item item)
492{
493 __inc_node_state(page_pgdat(page), item);
494}
495EXPORT_SYMBOL(__inc_node_page_state);
496
497void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
498{
499 struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
500 s8 __percpu *p = pcp->vm_stat_diff + item;
501 s8 v, t;
502
503
504 if (IS_ENABLED(CONFIG_PREEMPT_RT))
505 preempt_disable();
506
507 v = __this_cpu_dec_return(*p);
508 t = __this_cpu_read(pcp->stat_threshold);
509 if (unlikely(v < - t)) {
510 s8 overstep = t >> 1;
511
512 zone_page_state_add(v - overstep, zone, item);
513 __this_cpu_write(*p, overstep);
514 }
515
516 if (IS_ENABLED(CONFIG_PREEMPT_RT))
517 preempt_enable();
518}
519
520void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
521{
522 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
523 s8 __percpu *p = pcp->vm_node_stat_diff + item;
524 s8 v, t;
525
526 VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
527
528
529 if (IS_ENABLED(CONFIG_PREEMPT_RT))
530 preempt_disable();
531
532 v = __this_cpu_dec_return(*p);
533 t = __this_cpu_read(pcp->stat_threshold);
534 if (unlikely(v < - t)) {
535 s8 overstep = t >> 1;
536
537 node_page_state_add(v - overstep, pgdat, item);
538 __this_cpu_write(*p, overstep);
539 }
540
541 if (IS_ENABLED(CONFIG_PREEMPT_RT))
542 preempt_enable();
543}
544
545void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
546{
547 __dec_zone_state(page_zone(page), item);
548}
549EXPORT_SYMBOL(__dec_zone_page_state);
550
551void __dec_node_page_state(struct page *page, enum node_stat_item item)
552{
553 __dec_node_state(page_pgdat(page), item);
554}
555EXPORT_SYMBOL(__dec_node_page_state);
556
557#ifdef CONFIG_HAVE_CMPXCHG_LOCAL
558
559
560
561
562
563
564
565
566
567
568
569
570static inline void mod_zone_state(struct zone *zone,
571 enum zone_stat_item item, long delta, int overstep_mode)
572{
573 struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
574 s8 __percpu *p = pcp->vm_stat_diff + item;
575 long o, n, t, z;
576
577 do {
578 z = 0;
579
580
581
582
583
584
585
586
587
588
589
590 t = this_cpu_read(pcp->stat_threshold);
591
592 o = this_cpu_read(*p);
593 n = delta + o;
594
595 if (abs(n) > t) {
596 int os = overstep_mode * (t >> 1) ;
597
598
599 z = n + os;
600 n = -os;
601 }
602 } while (this_cpu_cmpxchg(*p, o, n) != o);
603
604 if (z)
605 zone_page_state_add(z, zone, item);
606}
607
608void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
609 long delta)
610{
611 mod_zone_state(zone, item, delta, 0);
612}
613EXPORT_SYMBOL(mod_zone_page_state);
614
615void inc_zone_page_state(struct page *page, enum zone_stat_item item)
616{
617 mod_zone_state(page_zone(page), item, 1, 1);
618}
619EXPORT_SYMBOL(inc_zone_page_state);
620
621void dec_zone_page_state(struct page *page, enum zone_stat_item item)
622{
623 mod_zone_state(page_zone(page), item, -1, -1);
624}
625EXPORT_SYMBOL(dec_zone_page_state);
626
627static inline void mod_node_state(struct pglist_data *pgdat,
628 enum node_stat_item item, int delta, int overstep_mode)
629{
630 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
631 s8 __percpu *p = pcp->vm_node_stat_diff + item;
632 long o, n, t, z;
633
634 if (vmstat_item_in_bytes(item)) {
635
636
637
638
639
640
641 VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
642 delta >>= PAGE_SHIFT;
643 }
644
645 do {
646 z = 0;
647
648
649
650
651
652
653
654
655
656
657
658 t = this_cpu_read(pcp->stat_threshold);
659
660 o = this_cpu_read(*p);
661 n = delta + o;
662
663 if (abs(n) > t) {
664 int os = overstep_mode * (t >> 1) ;
665
666
667 z = n + os;
668 n = -os;
669 }
670 } while (this_cpu_cmpxchg(*p, o, n) != o);
671
672 if (z)
673 node_page_state_add(z, pgdat, item);
674}
675
676void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
677 long delta)
678{
679 mod_node_state(pgdat, item, delta, 0);
680}
681EXPORT_SYMBOL(mod_node_page_state);
682
683void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
684{
685 mod_node_state(pgdat, item, 1, 1);
686}
687
688void inc_node_page_state(struct page *page, enum node_stat_item item)
689{
690 mod_node_state(page_pgdat(page), item, 1, 1);
691}
692EXPORT_SYMBOL(inc_node_page_state);
693
694void dec_node_page_state(struct page *page, enum node_stat_item item)
695{
696 mod_node_state(page_pgdat(page), item, -1, -1);
697}
698EXPORT_SYMBOL(dec_node_page_state);
699#else
700
701
702
703void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
704 long delta)
705{
706 unsigned long flags;
707
708 local_irq_save(flags);
709 __mod_zone_page_state(zone, item, delta);
710 local_irq_restore(flags);
711}
712EXPORT_SYMBOL(mod_zone_page_state);
713
714void inc_zone_page_state(struct page *page, enum zone_stat_item item)
715{
716 unsigned long flags;
717 struct zone *zone;
718
719 zone = page_zone(page);
720 local_irq_save(flags);
721 __inc_zone_state(zone, item);
722 local_irq_restore(flags);
723}
724EXPORT_SYMBOL(inc_zone_page_state);
725
726void dec_zone_page_state(struct page *page, enum zone_stat_item item)
727{
728 unsigned long flags;
729
730 local_irq_save(flags);
731 __dec_zone_page_state(page, item);
732 local_irq_restore(flags);
733}
734EXPORT_SYMBOL(dec_zone_page_state);
735
736void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
737{
738 unsigned long flags;
739
740 local_irq_save(flags);
741 __inc_node_state(pgdat, item);
742 local_irq_restore(flags);
743}
744EXPORT_SYMBOL(inc_node_state);
745
746void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
747 long delta)
748{
749 unsigned long flags;
750
751 local_irq_save(flags);
752 __mod_node_page_state(pgdat, item, delta);
753 local_irq_restore(flags);
754}
755EXPORT_SYMBOL(mod_node_page_state);
756
757void inc_node_page_state(struct page *page, enum node_stat_item item)
758{
759 unsigned long flags;
760 struct pglist_data *pgdat;
761
762 pgdat = page_pgdat(page);
763 local_irq_save(flags);
764 __inc_node_state(pgdat, item);
765 local_irq_restore(flags);
766}
767EXPORT_SYMBOL(inc_node_page_state);
768
769void dec_node_page_state(struct page *page, enum node_stat_item item)
770{
771 unsigned long flags;
772
773 local_irq_save(flags);
774 __dec_node_page_state(page, item);
775 local_irq_restore(flags);
776}
777EXPORT_SYMBOL(dec_node_page_state);
778#endif
779
780
781
782
783
784static int fold_diff(int *zone_diff, int *node_diff)
785{
786 int i;
787 int changes = 0;
788
789 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
790 if (zone_diff[i]) {
791 atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
792 changes++;
793 }
794
795 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
796 if (node_diff[i]) {
797 atomic_long_add(node_diff[i], &vm_node_stat[i]);
798 changes++;
799 }
800 return changes;
801}
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819static int refresh_cpu_vm_stats(bool do_pagesets)
820{
821 struct pglist_data *pgdat;
822 struct zone *zone;
823 int i;
824 int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
825 int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
826 int changes = 0;
827
828 for_each_populated_zone(zone) {
829 struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats;
830#ifdef CONFIG_NUMA
831 struct per_cpu_pages __percpu *pcp = zone->per_cpu_pageset;
832#endif
833
834 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
835 int v;
836
837 v = this_cpu_xchg(pzstats->vm_stat_diff[i], 0);
838 if (v) {
839
840 atomic_long_add(v, &zone->vm_stat[i]);
841 global_zone_diff[i] += v;
842#ifdef CONFIG_NUMA
843
844 __this_cpu_write(pcp->expire, 3);
845#endif
846 }
847 }
848#ifdef CONFIG_NUMA
849
850 if (do_pagesets) {
851 cond_resched();
852
853
854
855
856
857
858
859 if (!__this_cpu_read(pcp->expire) ||
860 !__this_cpu_read(pcp->count))
861 continue;
862
863
864
865
866 if (zone_to_nid(zone) == numa_node_id()) {
867 __this_cpu_write(pcp->expire, 0);
868 continue;
869 }
870
871 if (__this_cpu_dec_return(pcp->expire))
872 continue;
873
874 if (__this_cpu_read(pcp->count)) {
875 drain_zone_pages(zone, this_cpu_ptr(pcp));
876 changes++;
877 }
878 }
879#endif
880 }
881
882 for_each_online_pgdat(pgdat) {
883 struct per_cpu_nodestat __percpu *p = pgdat->per_cpu_nodestats;
884
885 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
886 int v;
887
888 v = this_cpu_xchg(p->vm_node_stat_diff[i], 0);
889 if (v) {
890 atomic_long_add(v, &pgdat->vm_stat[i]);
891 global_node_diff[i] += v;
892 }
893 }
894 }
895
896 changes += fold_diff(global_zone_diff, global_node_diff);
897 return changes;
898}
899
900
901
902
903
904
905void cpu_vm_stats_fold(int cpu)
906{
907 struct pglist_data *pgdat;
908 struct zone *zone;
909 int i;
910 int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
911 int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
912
913 for_each_populated_zone(zone) {
914 struct per_cpu_zonestat *pzstats;
915
916 pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
917
918 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
919 if (pzstats->vm_stat_diff[i]) {
920 int v;
921
922 v = pzstats->vm_stat_diff[i];
923 pzstats->vm_stat_diff[i] = 0;
924 atomic_long_add(v, &zone->vm_stat[i]);
925 global_zone_diff[i] += v;
926 }
927 }
928#ifdef CONFIG_NUMA
929 for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) {
930 if (pzstats->vm_numa_event[i]) {
931 unsigned long v;
932
933 v = pzstats->vm_numa_event[i];
934 pzstats->vm_numa_event[i] = 0;
935 zone_numa_event_add(v, zone, i);
936 }
937 }
938#endif
939 }
940
941 for_each_online_pgdat(pgdat) {
942 struct per_cpu_nodestat *p;
943
944 p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu);
945
946 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
947 if (p->vm_node_stat_diff[i]) {
948 int v;
949
950 v = p->vm_node_stat_diff[i];
951 p->vm_node_stat_diff[i] = 0;
952 atomic_long_add(v, &pgdat->vm_stat[i]);
953 global_node_diff[i] += v;
954 }
955 }
956
957 fold_diff(global_zone_diff, global_node_diff);
958}
959
960
961
962
963
964void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *pzstats)
965{
966 unsigned long v;
967 int i;
968
969 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
970 if (pzstats->vm_stat_diff[i]) {
971 v = pzstats->vm_stat_diff[i];
972 pzstats->vm_stat_diff[i] = 0;
973 zone_page_state_add(v, zone, i);
974 }
975 }
976
977#ifdef CONFIG_NUMA
978 for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) {
979 if (pzstats->vm_numa_event[i]) {
980 v = pzstats->vm_numa_event[i];
981 pzstats->vm_numa_event[i] = 0;
982 zone_numa_event_add(v, zone, i);
983 }
984 }
985#endif
986}
987#endif
988
989#ifdef CONFIG_NUMA
990
991
992
993
994
995unsigned long sum_zone_node_page_state(int node,
996 enum zone_stat_item item)
997{
998 struct zone *zones = NODE_DATA(node)->node_zones;
999 int i;
1000 unsigned long count = 0;
1001
1002 for (i = 0; i < MAX_NR_ZONES; i++)
1003 count += zone_page_state(zones + i, item);
1004
1005 return count;
1006}
1007
1008
1009unsigned long sum_zone_numa_event_state(int node,
1010 enum numa_stat_item item)
1011{
1012 struct zone *zones = NODE_DATA(node)->node_zones;
1013 unsigned long count = 0;
1014 int i;
1015
1016 for (i = 0; i < MAX_NR_ZONES; i++)
1017 count += zone_numa_event_state(zones + i, item);
1018
1019 return count;
1020}
1021
1022
1023
1024
1025unsigned long node_page_state_pages(struct pglist_data *pgdat,
1026 enum node_stat_item item)
1027{
1028 long x = atomic_long_read(&pgdat->vm_stat[item]);
1029#ifdef CONFIG_SMP
1030 if (x < 0)
1031 x = 0;
1032#endif
1033 return x;
1034}
1035
1036unsigned long node_page_state(struct pglist_data *pgdat,
1037 enum node_stat_item item)
1038{
1039 VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
1040
1041 return node_page_state_pages(pgdat, item);
1042}
1043#endif
1044
1045#ifdef CONFIG_COMPACTION
1046
1047struct contig_page_info {
1048 unsigned long free_pages;
1049 unsigned long free_blocks_total;
1050 unsigned long free_blocks_suitable;
1051};
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061static void fill_contig_page_info(struct zone *zone,
1062 unsigned int suitable_order,
1063 struct contig_page_info *info)
1064{
1065 unsigned int order;
1066
1067 info->free_pages = 0;
1068 info->free_blocks_total = 0;
1069 info->free_blocks_suitable = 0;
1070
1071 for (order = 0; order < MAX_ORDER; order++) {
1072 unsigned long blocks;
1073
1074
1075
1076
1077
1078
1079
1080 blocks = data_race(zone->free_area[order].nr_free);
1081 info->free_blocks_total += blocks;
1082
1083
1084 info->free_pages += blocks << order;
1085
1086
1087 if (order >= suitable_order)
1088 info->free_blocks_suitable += blocks <<
1089 (order - suitable_order);
1090 }
1091}
1092
1093
1094
1095
1096
1097
1098
1099
1100static int __fragmentation_index(unsigned int order, struct contig_page_info *info)
1101{
1102 unsigned long requested = 1UL << order;
1103
1104 if (WARN_ON_ONCE(order >= MAX_ORDER))
1105 return 0;
1106
1107 if (!info->free_blocks_total)
1108 return 0;
1109
1110
1111 if (info->free_blocks_suitable)
1112 return -1000;
1113
1114
1115
1116
1117
1118
1119
1120 return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total);
1121}
1122
1123
1124
1125
1126
1127
1128unsigned int extfrag_for_order(struct zone *zone, unsigned int order)
1129{
1130 struct contig_page_info info;
1131
1132 fill_contig_page_info(zone, order, &info);
1133 if (info.free_pages == 0)
1134 return 0;
1135
1136 return div_u64((info.free_pages -
1137 (info.free_blocks_suitable << order)) * 100,
1138 info.free_pages);
1139}
1140
1141
1142int fragmentation_index(struct zone *zone, unsigned int order)
1143{
1144 struct contig_page_info info;
1145
1146 fill_contig_page_info(zone, order, &info);
1147 return __fragmentation_index(order, &info);
1148}
1149#endif
1150
1151#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || \
1152 defined(CONFIG_NUMA) || defined(CONFIG_MEMCG)
1153#ifdef CONFIG_ZONE_DMA
1154#define TEXT_FOR_DMA(xx) xx "_dma",
1155#else
1156#define TEXT_FOR_DMA(xx)
1157#endif
1158
1159#ifdef CONFIG_ZONE_DMA32
1160#define TEXT_FOR_DMA32(xx) xx "_dma32",
1161#else
1162#define TEXT_FOR_DMA32(xx)
1163#endif
1164
1165#ifdef CONFIG_HIGHMEM
1166#define TEXT_FOR_HIGHMEM(xx) xx "_high",
1167#else
1168#define TEXT_FOR_HIGHMEM(xx)
1169#endif
1170
1171#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
1172 TEXT_FOR_HIGHMEM(xx) xx "_movable",
1173
1174const char * const vmstat_text[] = {
1175
1176 "nr_free_pages",
1177 "nr_zone_inactive_anon",
1178 "nr_zone_active_anon",
1179 "nr_zone_inactive_file",
1180 "nr_zone_active_file",
1181 "nr_zone_unevictable",
1182 "nr_zone_write_pending",
1183 "nr_mlock",
1184 "nr_bounce",
1185#if IS_ENABLED(CONFIG_ZSMALLOC)
1186 "nr_zspages",
1187#endif
1188 "nr_free_cma",
1189
1190
1191#ifdef CONFIG_NUMA
1192 "numa_hit",
1193 "numa_miss",
1194 "numa_foreign",
1195 "numa_interleave",
1196 "numa_local",
1197 "numa_other",
1198#endif
1199
1200
1201 "nr_inactive_anon",
1202 "nr_active_anon",
1203 "nr_inactive_file",
1204 "nr_active_file",
1205 "nr_unevictable",
1206 "nr_slab_reclaimable",
1207 "nr_slab_unreclaimable",
1208 "nr_isolated_anon",
1209 "nr_isolated_file",
1210 "workingset_nodes",
1211 "workingset_refault_anon",
1212 "workingset_refault_file",
1213 "workingset_activate_anon",
1214 "workingset_activate_file",
1215 "workingset_restore_anon",
1216 "workingset_restore_file",
1217 "workingset_nodereclaim",
1218 "nr_anon_pages",
1219 "nr_mapped",
1220 "nr_file_pages",
1221 "nr_dirty",
1222 "nr_writeback",
1223 "nr_writeback_temp",
1224 "nr_shmem",
1225 "nr_shmem_hugepages",
1226 "nr_shmem_pmdmapped",
1227 "nr_file_hugepages",
1228 "nr_file_pmdmapped",
1229 "nr_anon_transparent_hugepages",
1230 "nr_vmscan_write",
1231 "nr_vmscan_immediate_reclaim",
1232 "nr_dirtied",
1233 "nr_written",
1234 "nr_throttled_written",
1235 "nr_kernel_misc_reclaimable",
1236 "nr_foll_pin_acquired",
1237 "nr_foll_pin_released",
1238 "nr_kernel_stack",
1239#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
1240 "nr_shadow_call_stack",
1241#endif
1242 "nr_page_table_pages",
1243#ifdef CONFIG_SWAP
1244 "nr_swapcached",
1245#endif
1246#ifdef CONFIG_NUMA_BALANCING
1247 "pgpromote_success",
1248#endif
1249
1250
1251 "nr_dirty_threshold",
1252 "nr_dirty_background_threshold",
1253
1254#if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG)
1255
1256 "pgpgin",
1257 "pgpgout",
1258 "pswpin",
1259 "pswpout",
1260
1261 TEXTS_FOR_ZONES("pgalloc")
1262 TEXTS_FOR_ZONES("allocstall")
1263 TEXTS_FOR_ZONES("pgskip")
1264
1265 "pgfree",
1266 "pgactivate",
1267 "pgdeactivate",
1268 "pglazyfree",
1269
1270 "pgfault",
1271 "pgmajfault",
1272 "pglazyfreed",
1273
1274 "pgrefill",
1275 "pgreuse",
1276 "pgsteal_kswapd",
1277 "pgsteal_direct",
1278 "pgdemote_kswapd",
1279 "pgdemote_direct",
1280 "pgscan_kswapd",
1281 "pgscan_direct",
1282 "pgscan_direct_throttle",
1283 "pgscan_anon",
1284 "pgscan_file",
1285 "pgsteal_anon",
1286 "pgsteal_file",
1287
1288#ifdef CONFIG_NUMA
1289 "zone_reclaim_failed",
1290#endif
1291 "pginodesteal",
1292 "slabs_scanned",
1293 "kswapd_inodesteal",
1294 "kswapd_low_wmark_hit_quickly",
1295 "kswapd_high_wmark_hit_quickly",
1296 "pageoutrun",
1297
1298 "pgrotated",
1299
1300 "drop_pagecache",
1301 "drop_slab",
1302 "oom_kill",
1303
1304#ifdef CONFIG_NUMA_BALANCING
1305 "numa_pte_updates",
1306 "numa_huge_pte_updates",
1307 "numa_hint_faults",
1308 "numa_hint_faults_local",
1309 "numa_pages_migrated",
1310#endif
1311#ifdef CONFIG_MIGRATION
1312 "pgmigrate_success",
1313 "pgmigrate_fail",
1314 "thp_migration_success",
1315 "thp_migration_fail",
1316 "thp_migration_split",
1317#endif
1318#ifdef CONFIG_COMPACTION
1319 "compact_migrate_scanned",
1320 "compact_free_scanned",
1321 "compact_isolated",
1322 "compact_stall",
1323 "compact_fail",
1324 "compact_success",
1325 "compact_daemon_wake",
1326 "compact_daemon_migrate_scanned",
1327 "compact_daemon_free_scanned",
1328#endif
1329
1330#ifdef CONFIG_HUGETLB_PAGE
1331 "htlb_buddy_alloc_success",
1332 "htlb_buddy_alloc_fail",
1333#endif
1334#ifdef CONFIG_CMA
1335 "cma_alloc_success",
1336 "cma_alloc_fail",
1337#endif
1338 "unevictable_pgs_culled",
1339 "unevictable_pgs_scanned",
1340 "unevictable_pgs_rescued",
1341 "unevictable_pgs_mlocked",
1342 "unevictable_pgs_munlocked",
1343 "unevictable_pgs_cleared",
1344 "unevictable_pgs_stranded",
1345
1346#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1347 "thp_fault_alloc",
1348 "thp_fault_fallback",
1349 "thp_fault_fallback_charge",
1350 "thp_collapse_alloc",
1351 "thp_collapse_alloc_failed",
1352 "thp_file_alloc",
1353 "thp_file_fallback",
1354 "thp_file_fallback_charge",
1355 "thp_file_mapped",
1356 "thp_split_page",
1357 "thp_split_page_failed",
1358 "thp_deferred_split_page",
1359 "thp_split_pmd",
1360 "thp_scan_exceed_none_pte",
1361 "thp_scan_exceed_swap_pte",
1362 "thp_scan_exceed_share_pte",
1363#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
1364 "thp_split_pud",
1365#endif
1366 "thp_zero_page_alloc",
1367 "thp_zero_page_alloc_failed",
1368 "thp_swpout",
1369 "thp_swpout_fallback",
1370#endif
1371#ifdef CONFIG_MEMORY_BALLOON
1372 "balloon_inflate",
1373 "balloon_deflate",
1374#ifdef CONFIG_BALLOON_COMPACTION
1375 "balloon_migrate",
1376#endif
1377#endif
1378#ifdef CONFIG_DEBUG_TLBFLUSH
1379 "nr_tlb_remote_flush",
1380 "nr_tlb_remote_flush_received",
1381 "nr_tlb_local_flush_all",
1382 "nr_tlb_local_flush_one",
1383#endif
1384
1385#ifdef CONFIG_DEBUG_VM_VMACACHE
1386 "vmacache_find_calls",
1387 "vmacache_find_hits",
1388#endif
1389#ifdef CONFIG_SWAP
1390 "swap_ra",
1391 "swap_ra_hit",
1392#ifdef CONFIG_KSM
1393 "ksm_swpin_copy",
1394#endif
1395#endif
1396#ifdef CONFIG_X86
1397 "direct_map_level2_splits",
1398 "direct_map_level3_splits",
1399#endif
1400#endif
1401};
1402#endif
1403
1404#if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \
1405 defined(CONFIG_PROC_FS)
1406static void *frag_start(struct seq_file *m, loff_t *pos)
1407{
1408 pg_data_t *pgdat;
1409 loff_t node = *pos;
1410
1411 for (pgdat = first_online_pgdat();
1412 pgdat && node;
1413 pgdat = next_online_pgdat(pgdat))
1414 --node;
1415
1416 return pgdat;
1417}
1418
1419static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
1420{
1421 pg_data_t *pgdat = (pg_data_t *)arg;
1422
1423 (*pos)++;
1424 return next_online_pgdat(pgdat);
1425}
1426
1427static void frag_stop(struct seq_file *m, void *arg)
1428{
1429}
1430
1431
1432
1433
1434
1435static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
1436 bool assert_populated, bool nolock,
1437 void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
1438{
1439 struct zone *zone;
1440 struct zone *node_zones = pgdat->node_zones;
1441 unsigned long flags;
1442
1443 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
1444 if (assert_populated && !populated_zone(zone))
1445 continue;
1446
1447 if (!nolock)
1448 spin_lock_irqsave(&zone->lock, flags);
1449 print(m, pgdat, zone);
1450 if (!nolock)
1451 spin_unlock_irqrestore(&zone->lock, flags);
1452 }
1453}
1454#endif
1455
1456#ifdef CONFIG_PROC_FS
1457static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
1458 struct zone *zone)
1459{
1460 int order;
1461
1462 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1463 for (order = 0; order < MAX_ORDER; ++order)
1464
1465
1466
1467
1468 seq_printf(m, "%6lu ", data_race(zone->free_area[order].nr_free));
1469 seq_putc(m, '\n');
1470}
1471
1472
1473
1474
1475static int frag_show(struct seq_file *m, void *arg)
1476{
1477 pg_data_t *pgdat = (pg_data_t *)arg;
1478 walk_zones_in_node(m, pgdat, true, false, frag_show_print);
1479 return 0;
1480}
1481
1482static void pagetypeinfo_showfree_print(struct seq_file *m,
1483 pg_data_t *pgdat, struct zone *zone)
1484{
1485 int order, mtype;
1486
1487 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) {
1488 seq_printf(m, "Node %4d, zone %8s, type %12s ",
1489 pgdat->node_id,
1490 zone->name,
1491 migratetype_names[mtype]);
1492 for (order = 0; order < MAX_ORDER; ++order) {
1493 unsigned long freecount = 0;
1494 struct free_area *area;
1495 struct list_head *curr;
1496 bool overflow = false;
1497
1498 area = &(zone->free_area[order]);
1499
1500 list_for_each(curr, &area->free_list[mtype]) {
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510 if (++freecount >= 100000) {
1511 overflow = true;
1512 break;
1513 }
1514 }
1515 seq_printf(m, "%s%6lu ", overflow ? ">" : "", freecount);
1516 spin_unlock_irq(&zone->lock);
1517 cond_resched();
1518 spin_lock_irq(&zone->lock);
1519 }
1520 seq_putc(m, '\n');
1521 }
1522}
1523
1524
1525static void pagetypeinfo_showfree(struct seq_file *m, void *arg)
1526{
1527 int order;
1528 pg_data_t *pgdat = (pg_data_t *)arg;
1529
1530
1531 seq_printf(m, "%-43s ", "Free pages count per migrate type at order");
1532 for (order = 0; order < MAX_ORDER; ++order)
1533 seq_printf(m, "%6d ", order);
1534 seq_putc(m, '\n');
1535
1536 walk_zones_in_node(m, pgdat, true, false, pagetypeinfo_showfree_print);
1537}
1538
1539static void pagetypeinfo_showblockcount_print(struct seq_file *m,
1540 pg_data_t *pgdat, struct zone *zone)
1541{
1542 int mtype;
1543 unsigned long pfn;
1544 unsigned long start_pfn = zone->zone_start_pfn;
1545 unsigned long end_pfn = zone_end_pfn(zone);
1546 unsigned long count[MIGRATE_TYPES] = { 0, };
1547
1548 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
1549 struct page *page;
1550
1551 page = pfn_to_online_page(pfn);
1552 if (!page)
1553 continue;
1554
1555 if (page_zone(page) != zone)
1556 continue;
1557
1558 mtype = get_pageblock_migratetype(page);
1559
1560 if (mtype < MIGRATE_TYPES)
1561 count[mtype]++;
1562 }
1563
1564
1565 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1566 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1567 seq_printf(m, "%12lu ", count[mtype]);
1568 seq_putc(m, '\n');
1569}
1570
1571
1572static void pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
1573{
1574 int mtype;
1575 pg_data_t *pgdat = (pg_data_t *)arg;
1576
1577 seq_printf(m, "\n%-23s", "Number of blocks type ");
1578 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1579 seq_printf(m, "%12s ", migratetype_names[mtype]);
1580 seq_putc(m, '\n');
1581 walk_zones_in_node(m, pgdat, true, false,
1582 pagetypeinfo_showblockcount_print);
1583}
1584
1585
1586
1587
1588
1589
1590
1591static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat)
1592{
1593#ifdef CONFIG_PAGE_OWNER
1594 int mtype;
1595
1596 if (!static_branch_unlikely(&page_owner_inited))
1597 return;
1598
1599 drain_all_pages(NULL);
1600
1601 seq_printf(m, "\n%-23s", "Number of mixed blocks ");
1602 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1603 seq_printf(m, "%12s ", migratetype_names[mtype]);
1604 seq_putc(m, '\n');
1605
1606 walk_zones_in_node(m, pgdat, true, true,
1607 pagetypeinfo_showmixedcount_print);
1608#endif
1609}
1610
1611
1612
1613
1614
1615static int pagetypeinfo_show(struct seq_file *m, void *arg)
1616{
1617 pg_data_t *pgdat = (pg_data_t *)arg;
1618
1619
1620 if (!node_state(pgdat->node_id, N_MEMORY))
1621 return 0;
1622
1623 seq_printf(m, "Page block order: %d\n", pageblock_order);
1624 seq_printf(m, "Pages per block: %lu\n", pageblock_nr_pages);
1625 seq_putc(m, '\n');
1626 pagetypeinfo_showfree(m, pgdat);
1627 pagetypeinfo_showblockcount(m, pgdat);
1628 pagetypeinfo_showmixedcount(m, pgdat);
1629
1630 return 0;
1631}
1632
1633static const struct seq_operations fragmentation_op = {
1634 .start = frag_start,
1635 .next = frag_next,
1636 .stop = frag_stop,
1637 .show = frag_show,
1638};
1639
1640static const struct seq_operations pagetypeinfo_op = {
1641 .start = frag_start,
1642 .next = frag_next,
1643 .stop = frag_stop,
1644 .show = pagetypeinfo_show,
1645};
1646
1647static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone)
1648{
1649 int zid;
1650
1651 for (zid = 0; zid < MAX_NR_ZONES; zid++) {
1652 struct zone *compare = &pgdat->node_zones[zid];
1653
1654 if (populated_zone(compare))
1655 return zone == compare;
1656 }
1657
1658 return false;
1659}
1660
1661static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
1662 struct zone *zone)
1663{
1664 int i;
1665 seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
1666 if (is_zone_first_populated(pgdat, zone)) {
1667 seq_printf(m, "\n per-node stats");
1668 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
1669 unsigned long pages = node_page_state_pages(pgdat, i);
1670
1671 if (vmstat_item_print_in_thp(i))
1672 pages /= HPAGE_PMD_NR;
1673 seq_printf(m, "\n %-12s %lu", node_stat_name(i),
1674 pages);
1675 }
1676 }
1677 seq_printf(m,
1678 "\n pages free %lu"
1679 "\n boost %lu"
1680 "\n min %lu"
1681 "\n low %lu"
1682 "\n high %lu"
1683 "\n spanned %lu"
1684 "\n present %lu"
1685 "\n managed %lu"
1686 "\n cma %lu",
1687 zone_page_state(zone, NR_FREE_PAGES),
1688 zone->watermark_boost,
1689 min_wmark_pages(zone),
1690 low_wmark_pages(zone),
1691 high_wmark_pages(zone),
1692 zone->spanned_pages,
1693 zone->present_pages,
1694 zone_managed_pages(zone),
1695 zone_cma_pages(zone));
1696
1697 seq_printf(m,
1698 "\n protection: (%ld",
1699 zone->lowmem_reserve[0]);
1700 for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
1701 seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
1702 seq_putc(m, ')');
1703
1704
1705 if (!populated_zone(zone)) {
1706 seq_putc(m, '\n');
1707 return;
1708 }
1709
1710 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
1711 seq_printf(m, "\n %-12s %lu", zone_stat_name(i),
1712 zone_page_state(zone, i));
1713
1714#ifdef CONFIG_NUMA
1715 for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++)
1716 seq_printf(m, "\n %-12s %lu", numa_stat_name(i),
1717 zone_numa_event_state(zone, i));
1718#endif
1719
1720 seq_printf(m, "\n pagesets");
1721 for_each_online_cpu(i) {
1722 struct per_cpu_pages *pcp;
1723 struct per_cpu_zonestat __maybe_unused *pzstats;
1724
1725 pcp = per_cpu_ptr(zone->per_cpu_pageset, i);
1726 seq_printf(m,
1727 "\n cpu: %i"
1728 "\n count: %i"
1729 "\n high: %i"
1730 "\n batch: %i",
1731 i,
1732 pcp->count,
1733 pcp->high,
1734 pcp->batch);
1735#ifdef CONFIG_SMP
1736 pzstats = per_cpu_ptr(zone->per_cpu_zonestats, i);
1737 seq_printf(m, "\n vm stats threshold: %d",
1738 pzstats->stat_threshold);
1739#endif
1740 }
1741 seq_printf(m,
1742 "\n node_unreclaimable: %u"
1743 "\n start_pfn: %lu",
1744 pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES,
1745 zone->zone_start_pfn);
1746 seq_putc(m, '\n');
1747}
1748
1749
1750
1751
1752
1753
1754
1755static int zoneinfo_show(struct seq_file *m, void *arg)
1756{
1757 pg_data_t *pgdat = (pg_data_t *)arg;
1758 walk_zones_in_node(m, pgdat, false, false, zoneinfo_show_print);
1759 return 0;
1760}
1761
1762static const struct seq_operations zoneinfo_op = {
1763 .start = frag_start,
1764
1765 .next = frag_next,
1766 .stop = frag_stop,
1767 .show = zoneinfo_show,
1768};
1769
1770#define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \
1771 NR_VM_NUMA_EVENT_ITEMS + \
1772 NR_VM_NODE_STAT_ITEMS + \
1773 NR_VM_WRITEBACK_STAT_ITEMS + \
1774 (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \
1775 NR_VM_EVENT_ITEMS : 0))
1776
1777static void *vmstat_start(struct seq_file *m, loff_t *pos)
1778{
1779 unsigned long *v;
1780 int i;
1781
1782 if (*pos >= NR_VMSTAT_ITEMS)
1783 return NULL;
1784
1785 BUILD_BUG_ON(ARRAY_SIZE(vmstat_text) < NR_VMSTAT_ITEMS);
1786 fold_vm_numa_events();
1787 v = kmalloc_array(NR_VMSTAT_ITEMS, sizeof(unsigned long), GFP_KERNEL);
1788 m->private = v;
1789 if (!v)
1790 return ERR_PTR(-ENOMEM);
1791 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
1792 v[i] = global_zone_page_state(i);
1793 v += NR_VM_ZONE_STAT_ITEMS;
1794
1795#ifdef CONFIG_NUMA
1796 for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++)
1797 v[i] = global_numa_event_state(i);
1798 v += NR_VM_NUMA_EVENT_ITEMS;
1799#endif
1800
1801 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
1802 v[i] = global_node_page_state_pages(i);
1803 if (vmstat_item_print_in_thp(i))
1804 v[i] /= HPAGE_PMD_NR;
1805 }
1806 v += NR_VM_NODE_STAT_ITEMS;
1807
1808 global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,
1809 v + NR_DIRTY_THRESHOLD);
1810 v += NR_VM_WRITEBACK_STAT_ITEMS;
1811
1812#ifdef CONFIG_VM_EVENT_COUNTERS
1813 all_vm_events(v);
1814 v[PGPGIN] /= 2;
1815 v[PGPGOUT] /= 2;
1816#endif
1817 return (unsigned long *)m->private + *pos;
1818}
1819
1820static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
1821{
1822 (*pos)++;
1823 if (*pos >= NR_VMSTAT_ITEMS)
1824 return NULL;
1825 return (unsigned long *)m->private + *pos;
1826}
1827
1828static int vmstat_show(struct seq_file *m, void *arg)
1829{
1830 unsigned long *l = arg;
1831 unsigned long off = l - (unsigned long *)m->private;
1832
1833 seq_puts(m, vmstat_text[off]);
1834 seq_put_decimal_ull(m, " ", *l);
1835 seq_putc(m, '\n');
1836
1837 if (off == NR_VMSTAT_ITEMS - 1) {
1838
1839
1840
1841
1842 seq_puts(m, "nr_unstable 0\n");
1843 }
1844 return 0;
1845}
1846
1847static void vmstat_stop(struct seq_file *m, void *arg)
1848{
1849 kfree(m->private);
1850 m->private = NULL;
1851}
1852
1853static const struct seq_operations vmstat_op = {
1854 .start = vmstat_start,
1855 .next = vmstat_next,
1856 .stop = vmstat_stop,
1857 .show = vmstat_show,
1858};
1859#endif
1860
1861#ifdef CONFIG_SMP
1862static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
1863int sysctl_stat_interval __read_mostly = HZ;
1864
1865#ifdef CONFIG_PROC_FS
1866static void refresh_vm_stats(struct work_struct *work)
1867{
1868 refresh_cpu_vm_stats(true);
1869}
1870
1871int vmstat_refresh(struct ctl_table *table, int write,
1872 void *buffer, size_t *lenp, loff_t *ppos)
1873{
1874 long val;
1875 int err;
1876 int i;
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890 err = schedule_on_each_cpu(refresh_vm_stats);
1891 if (err)
1892 return err;
1893 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
1894
1895
1896
1897 switch (i) {
1898 case NR_ZONE_WRITE_PENDING:
1899 case NR_FREE_CMA_PAGES:
1900 continue;
1901 }
1902 val = atomic_long_read(&vm_zone_stat[i]);
1903 if (val < 0) {
1904 pr_warn("%s: %s %ld\n",
1905 __func__, zone_stat_name(i), val);
1906 }
1907 }
1908 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
1909
1910
1911
1912 switch (i) {
1913 case NR_WRITEBACK:
1914 continue;
1915 }
1916 val = atomic_long_read(&vm_node_stat[i]);
1917 if (val < 0) {
1918 pr_warn("%s: %s %ld\n",
1919 __func__, node_stat_name(i), val);
1920 }
1921 }
1922 if (write)
1923 *ppos += *lenp;
1924 else
1925 *lenp = 0;
1926 return 0;
1927}
1928#endif
1929
1930static void vmstat_update(struct work_struct *w)
1931{
1932 if (refresh_cpu_vm_stats(true)) {
1933
1934
1935
1936
1937
1938 queue_delayed_work_on(smp_processor_id(), mm_percpu_wq,
1939 this_cpu_ptr(&vmstat_work),
1940 round_jiffies_relative(sysctl_stat_interval));
1941 }
1942}
1943
1944
1945
1946
1947
1948static bool need_update(int cpu)
1949{
1950 pg_data_t *last_pgdat = NULL;
1951 struct zone *zone;
1952
1953 for_each_populated_zone(zone) {
1954 struct per_cpu_zonestat *pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
1955 struct per_cpu_nodestat *n;
1956
1957
1958
1959
1960 if (memchr_inv(pzstats->vm_stat_diff, 0, sizeof(pzstats->vm_stat_diff)))
1961 return true;
1962
1963 if (last_pgdat == zone->zone_pgdat)
1964 continue;
1965 last_pgdat = zone->zone_pgdat;
1966 n = per_cpu_ptr(zone->zone_pgdat->per_cpu_nodestats, cpu);
1967 if (memchr_inv(n->vm_node_stat_diff, 0, sizeof(n->vm_node_stat_diff)))
1968 return true;
1969 }
1970 return false;
1971}
1972
1973
1974
1975
1976
1977
1978void quiet_vmstat(void)
1979{
1980 if (system_state != SYSTEM_RUNNING)
1981 return;
1982
1983 if (!delayed_work_pending(this_cpu_ptr(&vmstat_work)))
1984 return;
1985
1986 if (!need_update(smp_processor_id()))
1987 return;
1988
1989
1990
1991
1992
1993
1994
1995 refresh_cpu_vm_stats(false);
1996}
1997
1998
1999
2000
2001
2002
2003
2004static void vmstat_shepherd(struct work_struct *w);
2005
2006static DECLARE_DEFERRABLE_WORK(shepherd, vmstat_shepherd);
2007
2008static void vmstat_shepherd(struct work_struct *w)
2009{
2010 int cpu;
2011
2012 cpus_read_lock();
2013
2014 for_each_online_cpu(cpu) {
2015 struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
2016
2017 if (!delayed_work_pending(dw) && need_update(cpu))
2018 queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0);
2019
2020 cond_resched();
2021 }
2022 cpus_read_unlock();
2023
2024 schedule_delayed_work(&shepherd,
2025 round_jiffies_relative(sysctl_stat_interval));
2026}
2027
2028static void __init start_shepherd_timer(void)
2029{
2030 int cpu;
2031
2032 for_each_possible_cpu(cpu)
2033 INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
2034 vmstat_update);
2035
2036 schedule_delayed_work(&shepherd,
2037 round_jiffies_relative(sysctl_stat_interval));
2038}
2039
2040static void __init init_cpu_node_state(void)
2041{
2042 int node;
2043
2044 for_each_online_node(node) {
2045 if (cpumask_weight(cpumask_of_node(node)) > 0)
2046 node_set_state(node, N_CPU);
2047 }
2048}
2049
2050static int vmstat_cpu_online(unsigned int cpu)
2051{
2052 refresh_zone_stat_thresholds();
2053
2054 if (!node_state(cpu_to_node(cpu), N_CPU)) {
2055 node_set_state(cpu_to_node(cpu), N_CPU);
2056 set_migration_target_nodes();
2057 }
2058
2059 return 0;
2060}
2061
2062static int vmstat_cpu_down_prep(unsigned int cpu)
2063{
2064 cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
2065 return 0;
2066}
2067
2068static int vmstat_cpu_dead(unsigned int cpu)
2069{
2070 const struct cpumask *node_cpus;
2071 int node;
2072
2073 node = cpu_to_node(cpu);
2074
2075 refresh_zone_stat_thresholds();
2076 node_cpus = cpumask_of_node(node);
2077 if (cpumask_weight(node_cpus) > 0)
2078 return 0;
2079
2080 node_clear_state(node, N_CPU);
2081 set_migration_target_nodes();
2082
2083 return 0;
2084}
2085
2086#endif
2087
2088struct workqueue_struct *mm_percpu_wq;
2089
2090void __init init_mm_internals(void)
2091{
2092 int ret __maybe_unused;
2093
2094 mm_percpu_wq = alloc_workqueue("mm_percpu_wq", WQ_MEM_RECLAIM, 0);
2095
2096#ifdef CONFIG_SMP
2097 ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead",
2098 NULL, vmstat_cpu_dead);
2099 if (ret < 0)
2100 pr_err("vmstat: failed to register 'dead' hotplug state\n");
2101
2102 ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "mm/vmstat:online",
2103 vmstat_cpu_online,
2104 vmstat_cpu_down_prep);
2105 if (ret < 0)
2106 pr_err("vmstat: failed to register 'online' hotplug state\n");
2107
2108 cpus_read_lock();
2109 init_cpu_node_state();
2110 cpus_read_unlock();
2111
2112 start_shepherd_timer();
2113#endif
2114#if defined(CONFIG_MIGRATION) && defined(CONFIG_HOTPLUG_CPU)
2115 migrate_on_reclaim_init();
2116#endif
2117#ifdef CONFIG_PROC_FS
2118 proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op);
2119 proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op);
2120 proc_create_seq("vmstat", 0444, NULL, &vmstat_op);
2121 proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op);
2122#endif
2123}
2124
2125#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)
2126
2127
2128
2129
2130
2131static int unusable_free_index(unsigned int order,
2132 struct contig_page_info *info)
2133{
2134
2135 if (info->free_pages == 0)
2136 return 1000;
2137
2138
2139
2140
2141
2142
2143
2144
2145 return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages);
2146
2147}
2148
2149static void unusable_show_print(struct seq_file *m,
2150 pg_data_t *pgdat, struct zone *zone)
2151{
2152 unsigned int order;
2153 int index;
2154 struct contig_page_info info;
2155
2156 seq_printf(m, "Node %d, zone %8s ",
2157 pgdat->node_id,
2158 zone->name);
2159 for (order = 0; order < MAX_ORDER; ++order) {
2160 fill_contig_page_info(zone, order, &info);
2161 index = unusable_free_index(order, &info);
2162 seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
2163 }
2164
2165 seq_putc(m, '\n');
2166}
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177static int unusable_show(struct seq_file *m, void *arg)
2178{
2179 pg_data_t *pgdat = (pg_data_t *)arg;
2180
2181
2182 if (!node_state(pgdat->node_id, N_MEMORY))
2183 return 0;
2184
2185 walk_zones_in_node(m, pgdat, true, false, unusable_show_print);
2186
2187 return 0;
2188}
2189
2190static const struct seq_operations unusable_sops = {
2191 .start = frag_start,
2192 .next = frag_next,
2193 .stop = frag_stop,
2194 .show = unusable_show,
2195};
2196
2197DEFINE_SEQ_ATTRIBUTE(unusable);
2198
2199static void extfrag_show_print(struct seq_file *m,
2200 pg_data_t *pgdat, struct zone *zone)
2201{
2202 unsigned int order;
2203 int index;
2204
2205
2206 struct contig_page_info info;
2207
2208 seq_printf(m, "Node %d, zone %8s ",
2209 pgdat->node_id,
2210 zone->name);
2211 for (order = 0; order < MAX_ORDER; ++order) {
2212 fill_contig_page_info(zone, order, &info);
2213 index = __fragmentation_index(order, &info);
2214 seq_printf(m, "%2d.%03d ", index / 1000, index % 1000);
2215 }
2216
2217 seq_putc(m, '\n');
2218}
2219
2220
2221
2222
2223static int extfrag_show(struct seq_file *m, void *arg)
2224{
2225 pg_data_t *pgdat = (pg_data_t *)arg;
2226
2227 walk_zones_in_node(m, pgdat, true, false, extfrag_show_print);
2228
2229 return 0;
2230}
2231
2232static const struct seq_operations extfrag_sops = {
2233 .start = frag_start,
2234 .next = frag_next,
2235 .stop = frag_stop,
2236 .show = extfrag_show,
2237};
2238
2239DEFINE_SEQ_ATTRIBUTE(extfrag);
2240
2241static int __init extfrag_debug_init(void)
2242{
2243 struct dentry *extfrag_debug_root;
2244
2245 extfrag_debug_root = debugfs_create_dir("extfrag", NULL);
2246
2247 debugfs_create_file("unusable_index", 0444, extfrag_debug_root, NULL,
2248 &unusable_fops);
2249
2250 debugfs_create_file("extfrag_index", 0444, extfrag_debug_root, NULL,
2251 &extfrag_fops);
2252
2253 return 0;
2254}
2255
2256module_init(extfrag_debug_init);
2257#endif
2258