1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/fs.h>
14#include <linux/mm.h>
15#include <linux/err.h>
16#include <linux/module.h>
17#include <linux/slab.h>
18#include <linux/cpu.h>
19#include <linux/cpumask.h>
20#include <linux/vmstat.h>
21#include <linux/proc_fs.h>
22#include <linux/seq_file.h>
23#include <linux/debugfs.h>
24#include <linux/sched.h>
25#include <linux/math64.h>
26#include <linux/writeback.h>
27#include <linux/compaction.h>
28#include <linux/mm_inline.h>
29#include <linux/page_ext.h>
30#include <linux/page_owner.h>
31
32#include "internal.h"
33
34#ifdef CONFIG_NUMA
35int sysctl_vm_numa_stat = ENABLE_NUMA_STAT;
36
37
38static void zero_zone_numa_counters(struct zone *zone)
39{
40 int item, cpu;
41
42 for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++) {
43 atomic_long_set(&zone->vm_numa_event[item], 0);
44 for_each_online_cpu(cpu) {
45 per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_numa_event[item]
46 = 0;
47 }
48 }
49}
50
51
52static void zero_zones_numa_counters(void)
53{
54 struct zone *zone;
55
56 for_each_populated_zone(zone)
57 zero_zone_numa_counters(zone);
58}
59
60
61static void zero_global_numa_counters(void)
62{
63 int item;
64
65 for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
66 atomic_long_set(&vm_numa_event[item], 0);
67}
68
69static void invalid_numa_statistics(void)
70{
71 zero_zones_numa_counters();
72 zero_global_numa_counters();
73}
74
75static DEFINE_MUTEX(vm_numa_stat_lock);
76
77int sysctl_vm_numa_stat_handler(struct ctl_table *table, int write,
78 void *buffer, size_t *length, loff_t *ppos)
79{
80 int ret, oldval;
81
82 mutex_lock(&vm_numa_stat_lock);
83 if (write)
84 oldval = sysctl_vm_numa_stat;
85 ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
86 if (ret || !write)
87 goto out;
88
89 if (oldval == sysctl_vm_numa_stat)
90 goto out;
91 else if (sysctl_vm_numa_stat == ENABLE_NUMA_STAT) {
92 static_branch_enable(&vm_numa_stat_key);
93 pr_info("enable numa statistics\n");
94 } else {
95 static_branch_disable(&vm_numa_stat_key);
96 invalid_numa_statistics();
97 pr_info("disable numa statistics, and clear numa counters\n");
98 }
99
100out:
101 mutex_unlock(&vm_numa_stat_lock);
102 return ret;
103}
104#endif
105
106#ifdef CONFIG_VM_EVENT_COUNTERS
107DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
108EXPORT_PER_CPU_SYMBOL(vm_event_states);
109
110static void sum_vm_events(unsigned long *ret)
111{
112 int cpu;
113 int i;
114
115 memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
116
117 for_each_online_cpu(cpu) {
118 struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
119
120 for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
121 ret[i] += this->event[i];
122 }
123}
124
125
126
127
128
129
130void all_vm_events(unsigned long *ret)
131{
132 cpus_read_lock();
133 sum_vm_events(ret);
134 cpus_read_unlock();
135}
136EXPORT_SYMBOL_GPL(all_vm_events);
137
138
139
140
141
142
143
144void vm_events_fold_cpu(int cpu)
145{
146 struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
147 int i;
148
149 for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
150 count_vm_events(i, fold_state->event[i]);
151 fold_state->event[i] = 0;
152 }
153}
154
155#endif
156
157
158
159
160
161
162atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
163atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp;
164atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS] __cacheline_aligned_in_smp;
165EXPORT_SYMBOL(vm_zone_stat);
166EXPORT_SYMBOL(vm_node_stat);
167
168#ifdef CONFIG_SMP
169
170int calculate_pressure_threshold(struct zone *zone)
171{
172 int threshold;
173 int watermark_distance;
174
175
176
177
178
179
180
181
182
183 watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone);
184 threshold = max(1, (int)(watermark_distance / num_online_cpus()));
185
186
187
188
189 threshold = min(125, threshold);
190
191 return threshold;
192}
193
194int calculate_normal_threshold(struct zone *zone)
195{
196 int threshold;
197 int mem;
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229 mem = zone_managed_pages(zone) >> (27 - PAGE_SHIFT);
230
231 threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem));
232
233
234
235
236 threshold = min(125, threshold);
237
238 return threshold;
239}
240
241
242
243
244void refresh_zone_stat_thresholds(void)
245{
246 struct pglist_data *pgdat;
247 struct zone *zone;
248 int cpu;
249 int threshold;
250
251
252 for_each_online_pgdat(pgdat) {
253 for_each_online_cpu(cpu) {
254 per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold = 0;
255 }
256 }
257
258 for_each_populated_zone(zone) {
259 struct pglist_data *pgdat = zone->zone_pgdat;
260 unsigned long max_drift, tolerate_drift;
261
262 threshold = calculate_normal_threshold(zone);
263
264 for_each_online_cpu(cpu) {
265 int pgdat_threshold;
266
267 per_cpu_ptr(zone->per_cpu_zonestats, cpu)->stat_threshold
268 = threshold;
269
270
271 pgdat_threshold = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold;
272 per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold
273 = max(threshold, pgdat_threshold);
274 }
275
276
277
278
279
280
281 tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone);
282 max_drift = num_online_cpus() * threshold;
283 if (max_drift > tolerate_drift)
284 zone->percpu_drift_mark = high_wmark_pages(zone) +
285 max_drift;
286 }
287}
288
289void set_pgdat_percpu_threshold(pg_data_t *pgdat,
290 int (*calculate_pressure)(struct zone *))
291{
292 struct zone *zone;
293 int cpu;
294 int threshold;
295 int i;
296
297 for (i = 0; i < pgdat->nr_zones; i++) {
298 zone = &pgdat->node_zones[i];
299 if (!zone->percpu_drift_mark)
300 continue;
301
302 threshold = (*calculate_pressure)(zone);
303 for_each_online_cpu(cpu)
304 per_cpu_ptr(zone->per_cpu_zonestats, cpu)->stat_threshold
305 = threshold;
306 }
307}
308
309
310
311
312
313
314void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
315 long delta)
316{
317 struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
318 s8 __percpu *p = pcp->vm_stat_diff + item;
319 long x;
320 long t;
321
322
323
324
325
326
327
328
329 if (IS_ENABLED(CONFIG_PREEMPT_RT))
330 preempt_disable();
331
332 x = delta + __this_cpu_read(*p);
333
334 t = __this_cpu_read(pcp->stat_threshold);
335
336 if (unlikely(abs(x) > t)) {
337 zone_page_state_add(x, zone, item);
338 x = 0;
339 }
340 __this_cpu_write(*p, x);
341
342 if (IS_ENABLED(CONFIG_PREEMPT_RT))
343 preempt_enable();
344}
345EXPORT_SYMBOL(__mod_zone_page_state);
346
347void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
348 long delta)
349{
350 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
351 s8 __percpu *p = pcp->vm_node_stat_diff + item;
352 long x;
353 long t;
354
355 if (vmstat_item_in_bytes(item)) {
356
357
358
359
360
361
362 VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
363 delta >>= PAGE_SHIFT;
364 }
365
366
367 if (IS_ENABLED(CONFIG_PREEMPT_RT))
368 preempt_disable();
369
370 x = delta + __this_cpu_read(*p);
371
372 t = __this_cpu_read(pcp->stat_threshold);
373
374 if (unlikely(abs(x) > t)) {
375 node_page_state_add(x, pgdat, item);
376 x = 0;
377 }
378 __this_cpu_write(*p, x);
379
380 if (IS_ENABLED(CONFIG_PREEMPT_RT))
381 preempt_enable();
382}
383EXPORT_SYMBOL(__mod_node_page_state);
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
409{
410 struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
411 s8 __percpu *p = pcp->vm_stat_diff + item;
412 s8 v, t;
413
414
415 if (IS_ENABLED(CONFIG_PREEMPT_RT))
416 preempt_disable();
417
418 v = __this_cpu_inc_return(*p);
419 t = __this_cpu_read(pcp->stat_threshold);
420 if (unlikely(v > t)) {
421 s8 overstep = t >> 1;
422
423 zone_page_state_add(v + overstep, zone, item);
424 __this_cpu_write(*p, -overstep);
425 }
426
427 if (IS_ENABLED(CONFIG_PREEMPT_RT))
428 preempt_enable();
429}
430
431void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
432{
433 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
434 s8 __percpu *p = pcp->vm_node_stat_diff + item;
435 s8 v, t;
436
437 VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
438
439
440 if (IS_ENABLED(CONFIG_PREEMPT_RT))
441 preempt_disable();
442
443 v = __this_cpu_inc_return(*p);
444 t = __this_cpu_read(pcp->stat_threshold);
445 if (unlikely(v > t)) {
446 s8 overstep = t >> 1;
447
448 node_page_state_add(v + overstep, pgdat, item);
449 __this_cpu_write(*p, -overstep);
450 }
451
452 if (IS_ENABLED(CONFIG_PREEMPT_RT))
453 preempt_enable();
454}
455
456void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
457{
458 __inc_zone_state(page_zone(page), item);
459}
460EXPORT_SYMBOL(__inc_zone_page_state);
461
462void __inc_node_page_state(struct page *page, enum node_stat_item item)
463{
464 __inc_node_state(page_pgdat(page), item);
465}
466EXPORT_SYMBOL(__inc_node_page_state);
467
468void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
469{
470 struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
471 s8 __percpu *p = pcp->vm_stat_diff + item;
472 s8 v, t;
473
474
475 if (IS_ENABLED(CONFIG_PREEMPT_RT))
476 preempt_disable();
477
478 v = __this_cpu_dec_return(*p);
479 t = __this_cpu_read(pcp->stat_threshold);
480 if (unlikely(v < - t)) {
481 s8 overstep = t >> 1;
482
483 zone_page_state_add(v - overstep, zone, item);
484 __this_cpu_write(*p, overstep);
485 }
486
487 if (IS_ENABLED(CONFIG_PREEMPT_RT))
488 preempt_enable();
489}
490
491void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
492{
493 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
494 s8 __percpu *p = pcp->vm_node_stat_diff + item;
495 s8 v, t;
496
497 VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
498
499
500 if (IS_ENABLED(CONFIG_PREEMPT_RT))
501 preempt_disable();
502
503 v = __this_cpu_dec_return(*p);
504 t = __this_cpu_read(pcp->stat_threshold);
505 if (unlikely(v < - t)) {
506 s8 overstep = t >> 1;
507
508 node_page_state_add(v - overstep, pgdat, item);
509 __this_cpu_write(*p, overstep);
510 }
511
512 if (IS_ENABLED(CONFIG_PREEMPT_RT))
513 preempt_enable();
514}
515
516void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
517{
518 __dec_zone_state(page_zone(page), item);
519}
520EXPORT_SYMBOL(__dec_zone_page_state);
521
522void __dec_node_page_state(struct page *page, enum node_stat_item item)
523{
524 __dec_node_state(page_pgdat(page), item);
525}
526EXPORT_SYMBOL(__dec_node_page_state);
527
528#ifdef CONFIG_HAVE_CMPXCHG_LOCAL
529
530
531
532
533
534
535
536
537
538
539
540
541static inline void mod_zone_state(struct zone *zone,
542 enum zone_stat_item item, long delta, int overstep_mode)
543{
544 struct per_cpu_zonestat __percpu *pcp = zone->per_cpu_zonestats;
545 s8 __percpu *p = pcp->vm_stat_diff + item;
546 long o, n, t, z;
547
548 do {
549 z = 0;
550
551
552
553
554
555
556
557
558
559
560
561 t = this_cpu_read(pcp->stat_threshold);
562
563 o = this_cpu_read(*p);
564 n = delta + o;
565
566 if (abs(n) > t) {
567 int os = overstep_mode * (t >> 1) ;
568
569
570 z = n + os;
571 n = -os;
572 }
573 } while (this_cpu_cmpxchg(*p, o, n) != o);
574
575 if (z)
576 zone_page_state_add(z, zone, item);
577}
578
579void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
580 long delta)
581{
582 mod_zone_state(zone, item, delta, 0);
583}
584EXPORT_SYMBOL(mod_zone_page_state);
585
586void inc_zone_page_state(struct page *page, enum zone_stat_item item)
587{
588 mod_zone_state(page_zone(page), item, 1, 1);
589}
590EXPORT_SYMBOL(inc_zone_page_state);
591
592void dec_zone_page_state(struct page *page, enum zone_stat_item item)
593{
594 mod_zone_state(page_zone(page), item, -1, -1);
595}
596EXPORT_SYMBOL(dec_zone_page_state);
597
598static inline void mod_node_state(struct pglist_data *pgdat,
599 enum node_stat_item item, int delta, int overstep_mode)
600{
601 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
602 s8 __percpu *p = pcp->vm_node_stat_diff + item;
603 long o, n, t, z;
604
605 if (vmstat_item_in_bytes(item)) {
606
607
608
609
610
611
612 VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
613 delta >>= PAGE_SHIFT;
614 }
615
616 do {
617 z = 0;
618
619
620
621
622
623
624
625
626
627
628
629 t = this_cpu_read(pcp->stat_threshold);
630
631 o = this_cpu_read(*p);
632 n = delta + o;
633
634 if (abs(n) > t) {
635 int os = overstep_mode * (t >> 1) ;
636
637
638 z = n + os;
639 n = -os;
640 }
641 } while (this_cpu_cmpxchg(*p, o, n) != o);
642
643 if (z)
644 node_page_state_add(z, pgdat, item);
645}
646
647void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
648 long delta)
649{
650 mod_node_state(pgdat, item, delta, 0);
651}
652EXPORT_SYMBOL(mod_node_page_state);
653
654void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
655{
656 mod_node_state(pgdat, item, 1, 1);
657}
658
659void inc_node_page_state(struct page *page, enum node_stat_item item)
660{
661 mod_node_state(page_pgdat(page), item, 1, 1);
662}
663EXPORT_SYMBOL(inc_node_page_state);
664
665void dec_node_page_state(struct page *page, enum node_stat_item item)
666{
667 mod_node_state(page_pgdat(page), item, -1, -1);
668}
669EXPORT_SYMBOL(dec_node_page_state);
670#else
671
672
673
674void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
675 long delta)
676{
677 unsigned long flags;
678
679 local_irq_save(flags);
680 __mod_zone_page_state(zone, item, delta);
681 local_irq_restore(flags);
682}
683EXPORT_SYMBOL(mod_zone_page_state);
684
685void inc_zone_page_state(struct page *page, enum zone_stat_item item)
686{
687 unsigned long flags;
688 struct zone *zone;
689
690 zone = page_zone(page);
691 local_irq_save(flags);
692 __inc_zone_state(zone, item);
693 local_irq_restore(flags);
694}
695EXPORT_SYMBOL(inc_zone_page_state);
696
697void dec_zone_page_state(struct page *page, enum zone_stat_item item)
698{
699 unsigned long flags;
700
701 local_irq_save(flags);
702 __dec_zone_page_state(page, item);
703 local_irq_restore(flags);
704}
705EXPORT_SYMBOL(dec_zone_page_state);
706
707void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
708{
709 unsigned long flags;
710
711 local_irq_save(flags);
712 __inc_node_state(pgdat, item);
713 local_irq_restore(flags);
714}
715EXPORT_SYMBOL(inc_node_state);
716
717void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
718 long delta)
719{
720 unsigned long flags;
721
722 local_irq_save(flags);
723 __mod_node_page_state(pgdat, item, delta);
724 local_irq_restore(flags);
725}
726EXPORT_SYMBOL(mod_node_page_state);
727
728void inc_node_page_state(struct page *page, enum node_stat_item item)
729{
730 unsigned long flags;
731 struct pglist_data *pgdat;
732
733 pgdat = page_pgdat(page);
734 local_irq_save(flags);
735 __inc_node_state(pgdat, item);
736 local_irq_restore(flags);
737}
738EXPORT_SYMBOL(inc_node_page_state);
739
740void dec_node_page_state(struct page *page, enum node_stat_item item)
741{
742 unsigned long flags;
743
744 local_irq_save(flags);
745 __dec_node_page_state(page, item);
746 local_irq_restore(flags);
747}
748EXPORT_SYMBOL(dec_node_page_state);
749#endif
750
751
752
753
754
755static int fold_diff(int *zone_diff, int *node_diff)
756{
757 int i;
758 int changes = 0;
759
760 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
761 if (zone_diff[i]) {
762 atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
763 changes++;
764 }
765
766 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
767 if (node_diff[i]) {
768 atomic_long_add(node_diff[i], &vm_node_stat[i]);
769 changes++;
770 }
771 return changes;
772}
773
774#ifdef CONFIG_NUMA
775static void fold_vm_zone_numa_events(struct zone *zone)
776{
777 unsigned long zone_numa_events[NR_VM_NUMA_EVENT_ITEMS] = { 0, };
778 int cpu;
779 enum numa_stat_item item;
780
781 for_each_online_cpu(cpu) {
782 struct per_cpu_zonestat *pzstats;
783
784 pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
785 for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
786 zone_numa_events[item] += xchg(&pzstats->vm_numa_event[item], 0);
787 }
788
789 for (item = 0; item < NR_VM_NUMA_EVENT_ITEMS; item++)
790 zone_numa_event_add(zone_numa_events[item], zone, item);
791}
792
793void fold_vm_numa_events(void)
794{
795 struct zone *zone;
796
797 for_each_populated_zone(zone)
798 fold_vm_zone_numa_events(zone);
799}
800#endif
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818static int refresh_cpu_vm_stats(bool do_pagesets)
819{
820 struct pglist_data *pgdat;
821 struct zone *zone;
822 int i;
823 int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
824 int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
825 int changes = 0;
826
827 for_each_populated_zone(zone) {
828 struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats;
829#ifdef CONFIG_NUMA
830 struct per_cpu_pages __percpu *pcp = zone->per_cpu_pageset;
831#endif
832
833 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
834 int v;
835
836 v = this_cpu_xchg(pzstats->vm_stat_diff[i], 0);
837 if (v) {
838
839 atomic_long_add(v, &zone->vm_stat[i]);
840 global_zone_diff[i] += v;
841#ifdef CONFIG_NUMA
842
843 __this_cpu_write(pcp->expire, 3);
844#endif
845 }
846 }
847#ifdef CONFIG_NUMA
848
849 if (do_pagesets) {
850 cond_resched();
851
852
853
854
855
856
857
858 if (!__this_cpu_read(pcp->expire) ||
859 !__this_cpu_read(pcp->count))
860 continue;
861
862
863
864
865 if (zone_to_nid(zone) == numa_node_id()) {
866 __this_cpu_write(pcp->expire, 0);
867 continue;
868 }
869
870 if (__this_cpu_dec_return(pcp->expire))
871 continue;
872
873 if (__this_cpu_read(pcp->count)) {
874 drain_zone_pages(zone, this_cpu_ptr(pcp));
875 changes++;
876 }
877 }
878#endif
879 }
880
881 for_each_online_pgdat(pgdat) {
882 struct per_cpu_nodestat __percpu *p = pgdat->per_cpu_nodestats;
883
884 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
885 int v;
886
887 v = this_cpu_xchg(p->vm_node_stat_diff[i], 0);
888 if (v) {
889 atomic_long_add(v, &pgdat->vm_stat[i]);
890 global_node_diff[i] += v;
891 }
892 }
893 }
894
895 changes += fold_diff(global_zone_diff, global_node_diff);
896 return changes;
897}
898
899
900
901
902
903
904void cpu_vm_stats_fold(int cpu)
905{
906 struct pglist_data *pgdat;
907 struct zone *zone;
908 int i;
909 int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
910 int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
911
912 for_each_populated_zone(zone) {
913 struct per_cpu_zonestat *pzstats;
914
915 pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
916
917 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
918 if (pzstats->vm_stat_diff[i]) {
919 int v;
920
921 v = pzstats->vm_stat_diff[i];
922 pzstats->vm_stat_diff[i] = 0;
923 atomic_long_add(v, &zone->vm_stat[i]);
924 global_zone_diff[i] += v;
925 }
926 }
927#ifdef CONFIG_NUMA
928 for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) {
929 if (pzstats->vm_numa_event[i]) {
930 unsigned long v;
931
932 v = pzstats->vm_numa_event[i];
933 pzstats->vm_numa_event[i] = 0;
934 zone_numa_event_add(v, zone, i);
935 }
936 }
937#endif
938 }
939
940 for_each_online_pgdat(pgdat) {
941 struct per_cpu_nodestat *p;
942
943 p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu);
944
945 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
946 if (p->vm_node_stat_diff[i]) {
947 int v;
948
949 v = p->vm_node_stat_diff[i];
950 p->vm_node_stat_diff[i] = 0;
951 atomic_long_add(v, &pgdat->vm_stat[i]);
952 global_node_diff[i] += v;
953 }
954 }
955
956 fold_diff(global_zone_diff, global_node_diff);
957}
958
959
960
961
962
963void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *pzstats)
964{
965 unsigned long v;
966 int i;
967
968 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
969 if (pzstats->vm_stat_diff[i]) {
970 v = pzstats->vm_stat_diff[i];
971 pzstats->vm_stat_diff[i] = 0;
972 zone_page_state_add(v, zone, i);
973 }
974 }
975
976#ifdef CONFIG_NUMA
977 for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++) {
978 if (pzstats->vm_numa_event[i]) {
979 v = pzstats->vm_numa_event[i];
980 pzstats->vm_numa_event[i] = 0;
981 zone_numa_event_add(v, zone, i);
982 }
983 }
984#endif
985}
986#endif
987
988#ifdef CONFIG_NUMA
989
990
991
992
993
994unsigned long sum_zone_node_page_state(int node,
995 enum zone_stat_item item)
996{
997 struct zone *zones = NODE_DATA(node)->node_zones;
998 int i;
999 unsigned long count = 0;
1000
1001 for (i = 0; i < MAX_NR_ZONES; i++)
1002 count += zone_page_state(zones + i, item);
1003
1004 return count;
1005}
1006
1007
1008unsigned long sum_zone_numa_event_state(int node,
1009 enum numa_stat_item item)
1010{
1011 struct zone *zones = NODE_DATA(node)->node_zones;
1012 unsigned long count = 0;
1013 int i;
1014
1015 for (i = 0; i < MAX_NR_ZONES; i++)
1016 count += zone_numa_event_state(zones + i, item);
1017
1018 return count;
1019}
1020
1021
1022
1023
1024unsigned long node_page_state_pages(struct pglist_data *pgdat,
1025 enum node_stat_item item)
1026{
1027 long x = atomic_long_read(&pgdat->vm_stat[item]);
1028#ifdef CONFIG_SMP
1029 if (x < 0)
1030 x = 0;
1031#endif
1032 return x;
1033}
1034
1035unsigned long node_page_state(struct pglist_data *pgdat,
1036 enum node_stat_item item)
1037{
1038 VM_WARN_ON_ONCE(vmstat_item_in_bytes(item));
1039
1040 return node_page_state_pages(pgdat, item);
1041}
1042#endif
1043
1044#ifdef CONFIG_COMPACTION
1045
1046struct contig_page_info {
1047 unsigned long free_pages;
1048 unsigned long free_blocks_total;
1049 unsigned long free_blocks_suitable;
1050};
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060static void fill_contig_page_info(struct zone *zone,
1061 unsigned int suitable_order,
1062 struct contig_page_info *info)
1063{
1064 unsigned int order;
1065
1066 info->free_pages = 0;
1067 info->free_blocks_total = 0;
1068 info->free_blocks_suitable = 0;
1069
1070 for (order = 0; order < MAX_ORDER; order++) {
1071 unsigned long blocks;
1072
1073
1074 blocks = zone->free_area[order].nr_free;
1075 info->free_blocks_total += blocks;
1076
1077
1078 info->free_pages += blocks << order;
1079
1080
1081 if (order >= suitable_order)
1082 info->free_blocks_suitable += blocks <<
1083 (order - suitable_order);
1084 }
1085}
1086
1087
1088
1089
1090
1091
1092
1093
1094static int __fragmentation_index(unsigned int order, struct contig_page_info *info)
1095{
1096 unsigned long requested = 1UL << order;
1097
1098 if (WARN_ON_ONCE(order >= MAX_ORDER))
1099 return 0;
1100
1101 if (!info->free_blocks_total)
1102 return 0;
1103
1104
1105 if (info->free_blocks_suitable)
1106 return -1000;
1107
1108
1109
1110
1111
1112
1113
1114 return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total);
1115}
1116
1117
1118
1119
1120
1121
1122unsigned int extfrag_for_order(struct zone *zone, unsigned int order)
1123{
1124 struct contig_page_info info;
1125
1126 fill_contig_page_info(zone, order, &info);
1127 if (info.free_pages == 0)
1128 return 0;
1129
1130 return div_u64((info.free_pages -
1131 (info.free_blocks_suitable << order)) * 100,
1132 info.free_pages);
1133}
1134
1135
1136int fragmentation_index(struct zone *zone, unsigned int order)
1137{
1138 struct contig_page_info info;
1139
1140 fill_contig_page_info(zone, order, &info);
1141 return __fragmentation_index(order, &info);
1142}
1143#endif
1144
1145#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || \
1146 defined(CONFIG_NUMA) || defined(CONFIG_MEMCG)
1147#ifdef CONFIG_ZONE_DMA
1148#define TEXT_FOR_DMA(xx) xx "_dma",
1149#else
1150#define TEXT_FOR_DMA(xx)
1151#endif
1152
1153#ifdef CONFIG_ZONE_DMA32
1154#define TEXT_FOR_DMA32(xx) xx "_dma32",
1155#else
1156#define TEXT_FOR_DMA32(xx)
1157#endif
1158
1159#ifdef CONFIG_HIGHMEM
1160#define TEXT_FOR_HIGHMEM(xx) xx "_high",
1161#else
1162#define TEXT_FOR_HIGHMEM(xx)
1163#endif
1164
1165#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
1166 TEXT_FOR_HIGHMEM(xx) xx "_movable",
1167
1168const char * const vmstat_text[] = {
1169
1170 "nr_free_pages",
1171 "nr_zone_inactive_anon",
1172 "nr_zone_active_anon",
1173 "nr_zone_inactive_file",
1174 "nr_zone_active_file",
1175 "nr_zone_unevictable",
1176 "nr_zone_write_pending",
1177 "nr_mlock",
1178 "nr_bounce",
1179#if IS_ENABLED(CONFIG_ZSMALLOC)
1180 "nr_zspages",
1181#endif
1182 "nr_free_cma",
1183
1184
1185#ifdef CONFIG_NUMA
1186 "numa_hit",
1187 "numa_miss",
1188 "numa_foreign",
1189 "numa_interleave",
1190 "numa_local",
1191 "numa_other",
1192#endif
1193
1194
1195 "nr_inactive_anon",
1196 "nr_active_anon",
1197 "nr_inactive_file",
1198 "nr_active_file",
1199 "nr_unevictable",
1200 "nr_slab_reclaimable",
1201 "nr_slab_unreclaimable",
1202 "nr_isolated_anon",
1203 "nr_isolated_file",
1204 "workingset_nodes",
1205 "workingset_refault_anon",
1206 "workingset_refault_file",
1207 "workingset_activate_anon",
1208 "workingset_activate_file",
1209 "workingset_restore_anon",
1210 "workingset_restore_file",
1211 "workingset_nodereclaim",
1212 "nr_anon_pages",
1213 "nr_mapped",
1214 "nr_file_pages",
1215 "nr_dirty",
1216 "nr_writeback",
1217 "nr_writeback_temp",
1218 "nr_shmem",
1219 "nr_shmem_hugepages",
1220 "nr_shmem_pmdmapped",
1221 "nr_file_hugepages",
1222 "nr_file_pmdmapped",
1223 "nr_anon_transparent_hugepages",
1224 "nr_vmscan_write",
1225 "nr_vmscan_immediate_reclaim",
1226 "nr_dirtied",
1227 "nr_written",
1228 "nr_kernel_misc_reclaimable",
1229 "nr_foll_pin_acquired",
1230 "nr_foll_pin_released",
1231 "nr_kernel_stack",
1232#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
1233 "nr_shadow_call_stack",
1234#endif
1235 "nr_page_table_pages",
1236#ifdef CONFIG_SWAP
1237 "nr_swapcached",
1238#endif
1239
1240
1241 "nr_dirty_threshold",
1242 "nr_dirty_background_threshold",
1243
1244#if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG)
1245
1246 "pgpgin",
1247 "pgpgout",
1248 "pswpin",
1249 "pswpout",
1250
1251 TEXTS_FOR_ZONES("pgalloc")
1252 TEXTS_FOR_ZONES("allocstall")
1253 TEXTS_FOR_ZONES("pgskip")
1254
1255 "pgfree",
1256 "pgactivate",
1257 "pgdeactivate",
1258 "pglazyfree",
1259
1260 "pgfault",
1261 "pgmajfault",
1262 "pglazyfreed",
1263
1264 "pgrefill",
1265 "pgreuse",
1266 "pgsteal_kswapd",
1267 "pgsteal_direct",
1268 "pgdemote_kswapd",
1269 "pgdemote_direct",
1270 "pgscan_kswapd",
1271 "pgscan_direct",
1272 "pgscan_direct_throttle",
1273 "pgscan_anon",
1274 "pgscan_file",
1275 "pgsteal_anon",
1276 "pgsteal_file",
1277
1278#ifdef CONFIG_NUMA
1279 "zone_reclaim_failed",
1280#endif
1281 "pginodesteal",
1282 "slabs_scanned",
1283 "kswapd_inodesteal",
1284 "kswapd_low_wmark_hit_quickly",
1285 "kswapd_high_wmark_hit_quickly",
1286 "pageoutrun",
1287
1288 "pgrotated",
1289
1290 "drop_pagecache",
1291 "drop_slab",
1292 "oom_kill",
1293
1294#ifdef CONFIG_NUMA_BALANCING
1295 "numa_pte_updates",
1296 "numa_huge_pte_updates",
1297 "numa_hint_faults",
1298 "numa_hint_faults_local",
1299 "numa_pages_migrated",
1300#endif
1301#ifdef CONFIG_MIGRATION
1302 "pgmigrate_success",
1303 "pgmigrate_fail",
1304 "thp_migration_success",
1305 "thp_migration_fail",
1306 "thp_migration_split",
1307#endif
1308#ifdef CONFIG_COMPACTION
1309 "compact_migrate_scanned",
1310 "compact_free_scanned",
1311 "compact_isolated",
1312 "compact_stall",
1313 "compact_fail",
1314 "compact_success",
1315 "compact_daemon_wake",
1316 "compact_daemon_migrate_scanned",
1317 "compact_daemon_free_scanned",
1318#endif
1319
1320#ifdef CONFIG_HUGETLB_PAGE
1321 "htlb_buddy_alloc_success",
1322 "htlb_buddy_alloc_fail",
1323#endif
1324#ifdef CONFIG_CMA
1325 "cma_alloc_success",
1326 "cma_alloc_fail",
1327#endif
1328 "unevictable_pgs_culled",
1329 "unevictable_pgs_scanned",
1330 "unevictable_pgs_rescued",
1331 "unevictable_pgs_mlocked",
1332 "unevictable_pgs_munlocked",
1333 "unevictable_pgs_cleared",
1334 "unevictable_pgs_stranded",
1335
1336#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1337 "thp_fault_alloc",
1338 "thp_fault_fallback",
1339 "thp_fault_fallback_charge",
1340 "thp_collapse_alloc",
1341 "thp_collapse_alloc_failed",
1342 "thp_file_alloc",
1343 "thp_file_fallback",
1344 "thp_file_fallback_charge",
1345 "thp_file_mapped",
1346 "thp_split_page",
1347 "thp_split_page_failed",
1348 "thp_deferred_split_page",
1349 "thp_split_pmd",
1350#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
1351 "thp_split_pud",
1352#endif
1353 "thp_zero_page_alloc",
1354 "thp_zero_page_alloc_failed",
1355 "thp_swpout",
1356 "thp_swpout_fallback",
1357#endif
1358#ifdef CONFIG_MEMORY_BALLOON
1359 "balloon_inflate",
1360 "balloon_deflate",
1361#ifdef CONFIG_BALLOON_COMPACTION
1362 "balloon_migrate",
1363#endif
1364#endif
1365#ifdef CONFIG_DEBUG_TLBFLUSH
1366 "nr_tlb_remote_flush",
1367 "nr_tlb_remote_flush_received",
1368 "nr_tlb_local_flush_all",
1369 "nr_tlb_local_flush_one",
1370#endif
1371
1372#ifdef CONFIG_DEBUG_VM_VMACACHE
1373 "vmacache_find_calls",
1374 "vmacache_find_hits",
1375#endif
1376#ifdef CONFIG_SWAP
1377 "swap_ra",
1378 "swap_ra_hit",
1379#endif
1380#ifdef CONFIG_X86
1381 "direct_map_level2_splits",
1382 "direct_map_level3_splits",
1383#endif
1384#endif
1385};
1386#endif
1387
1388#if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \
1389 defined(CONFIG_PROC_FS)
1390static void *frag_start(struct seq_file *m, loff_t *pos)
1391{
1392 pg_data_t *pgdat;
1393 loff_t node = *pos;
1394
1395 for (pgdat = first_online_pgdat();
1396 pgdat && node;
1397 pgdat = next_online_pgdat(pgdat))
1398 --node;
1399
1400 return pgdat;
1401}
1402
1403static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
1404{
1405 pg_data_t *pgdat = (pg_data_t *)arg;
1406
1407 (*pos)++;
1408 return next_online_pgdat(pgdat);
1409}
1410
1411static void frag_stop(struct seq_file *m, void *arg)
1412{
1413}
1414
1415
1416
1417
1418
1419static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
1420 bool assert_populated, bool nolock,
1421 void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
1422{
1423 struct zone *zone;
1424 struct zone *node_zones = pgdat->node_zones;
1425 unsigned long flags;
1426
1427 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
1428 if (assert_populated && !populated_zone(zone))
1429 continue;
1430
1431 if (!nolock)
1432 spin_lock_irqsave(&zone->lock, flags);
1433 print(m, pgdat, zone);
1434 if (!nolock)
1435 spin_unlock_irqrestore(&zone->lock, flags);
1436 }
1437}
1438#endif
1439
1440#ifdef CONFIG_PROC_FS
1441static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
1442 struct zone *zone)
1443{
1444 int order;
1445
1446 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1447 for (order = 0; order < MAX_ORDER; ++order)
1448 seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
1449 seq_putc(m, '\n');
1450}
1451
1452
1453
1454
1455static int frag_show(struct seq_file *m, void *arg)
1456{
1457 pg_data_t *pgdat = (pg_data_t *)arg;
1458 walk_zones_in_node(m, pgdat, true, false, frag_show_print);
1459 return 0;
1460}
1461
1462static void pagetypeinfo_showfree_print(struct seq_file *m,
1463 pg_data_t *pgdat, struct zone *zone)
1464{
1465 int order, mtype;
1466
1467 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) {
1468 seq_printf(m, "Node %4d, zone %8s, type %12s ",
1469 pgdat->node_id,
1470 zone->name,
1471 migratetype_names[mtype]);
1472 for (order = 0; order < MAX_ORDER; ++order) {
1473 unsigned long freecount = 0;
1474 struct free_area *area;
1475 struct list_head *curr;
1476 bool overflow = false;
1477
1478 area = &(zone->free_area[order]);
1479
1480 list_for_each(curr, &area->free_list[mtype]) {
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490 if (++freecount >= 100000) {
1491 overflow = true;
1492 break;
1493 }
1494 }
1495 seq_printf(m, "%s%6lu ", overflow ? ">" : "", freecount);
1496 spin_unlock_irq(&zone->lock);
1497 cond_resched();
1498 spin_lock_irq(&zone->lock);
1499 }
1500 seq_putc(m, '\n');
1501 }
1502}
1503
1504
1505static void pagetypeinfo_showfree(struct seq_file *m, void *arg)
1506{
1507 int order;
1508 pg_data_t *pgdat = (pg_data_t *)arg;
1509
1510
1511 seq_printf(m, "%-43s ", "Free pages count per migrate type at order");
1512 for (order = 0; order < MAX_ORDER; ++order)
1513 seq_printf(m, "%6d ", order);
1514 seq_putc(m, '\n');
1515
1516 walk_zones_in_node(m, pgdat, true, false, pagetypeinfo_showfree_print);
1517}
1518
1519static void pagetypeinfo_showblockcount_print(struct seq_file *m,
1520 pg_data_t *pgdat, struct zone *zone)
1521{
1522 int mtype;
1523 unsigned long pfn;
1524 unsigned long start_pfn = zone->zone_start_pfn;
1525 unsigned long end_pfn = zone_end_pfn(zone);
1526 unsigned long count[MIGRATE_TYPES] = { 0, };
1527
1528 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
1529 struct page *page;
1530
1531 page = pfn_to_online_page(pfn);
1532 if (!page)
1533 continue;
1534
1535 if (page_zone(page) != zone)
1536 continue;
1537
1538 mtype = get_pageblock_migratetype(page);
1539
1540 if (mtype < MIGRATE_TYPES)
1541 count[mtype]++;
1542 }
1543
1544
1545 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1546 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1547 seq_printf(m, "%12lu ", count[mtype]);
1548 seq_putc(m, '\n');
1549}
1550
1551
1552static void pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
1553{
1554 int mtype;
1555 pg_data_t *pgdat = (pg_data_t *)arg;
1556
1557 seq_printf(m, "\n%-23s", "Number of blocks type ");
1558 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1559 seq_printf(m, "%12s ", migratetype_names[mtype]);
1560 seq_putc(m, '\n');
1561 walk_zones_in_node(m, pgdat, true, false,
1562 pagetypeinfo_showblockcount_print);
1563}
1564
1565
1566
1567
1568
1569
1570
1571static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat)
1572{
1573#ifdef CONFIG_PAGE_OWNER
1574 int mtype;
1575
1576 if (!static_branch_unlikely(&page_owner_inited))
1577 return;
1578
1579 drain_all_pages(NULL);
1580
1581 seq_printf(m, "\n%-23s", "Number of mixed blocks ");
1582 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1583 seq_printf(m, "%12s ", migratetype_names[mtype]);
1584 seq_putc(m, '\n');
1585
1586 walk_zones_in_node(m, pgdat, true, true,
1587 pagetypeinfo_showmixedcount_print);
1588#endif
1589}
1590
1591
1592
1593
1594
1595static int pagetypeinfo_show(struct seq_file *m, void *arg)
1596{
1597 pg_data_t *pgdat = (pg_data_t *)arg;
1598
1599
1600 if (!node_state(pgdat->node_id, N_MEMORY))
1601 return 0;
1602
1603 seq_printf(m, "Page block order: %d\n", pageblock_order);
1604 seq_printf(m, "Pages per block: %lu\n", pageblock_nr_pages);
1605 seq_putc(m, '\n');
1606 pagetypeinfo_showfree(m, pgdat);
1607 pagetypeinfo_showblockcount(m, pgdat);
1608 pagetypeinfo_showmixedcount(m, pgdat);
1609
1610 return 0;
1611}
1612
1613static const struct seq_operations fragmentation_op = {
1614 .start = frag_start,
1615 .next = frag_next,
1616 .stop = frag_stop,
1617 .show = frag_show,
1618};
1619
1620static const struct seq_operations pagetypeinfo_op = {
1621 .start = frag_start,
1622 .next = frag_next,
1623 .stop = frag_stop,
1624 .show = pagetypeinfo_show,
1625};
1626
1627static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone)
1628{
1629 int zid;
1630
1631 for (zid = 0; zid < MAX_NR_ZONES; zid++) {
1632 struct zone *compare = &pgdat->node_zones[zid];
1633
1634 if (populated_zone(compare))
1635 return zone == compare;
1636 }
1637
1638 return false;
1639}
1640
1641static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
1642 struct zone *zone)
1643{
1644 int i;
1645 seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
1646 if (is_zone_first_populated(pgdat, zone)) {
1647 seq_printf(m, "\n per-node stats");
1648 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
1649 unsigned long pages = node_page_state_pages(pgdat, i);
1650
1651 if (vmstat_item_print_in_thp(i))
1652 pages /= HPAGE_PMD_NR;
1653 seq_printf(m, "\n %-12s %lu", node_stat_name(i),
1654 pages);
1655 }
1656 }
1657 seq_printf(m,
1658 "\n pages free %lu"
1659 "\n min %lu"
1660 "\n low %lu"
1661 "\n high %lu"
1662 "\n spanned %lu"
1663 "\n present %lu"
1664 "\n managed %lu"
1665 "\n cma %lu",
1666 zone_page_state(zone, NR_FREE_PAGES),
1667 min_wmark_pages(zone),
1668 low_wmark_pages(zone),
1669 high_wmark_pages(zone),
1670 zone->spanned_pages,
1671 zone->present_pages,
1672 zone_managed_pages(zone),
1673 zone_cma_pages(zone));
1674
1675 seq_printf(m,
1676 "\n protection: (%ld",
1677 zone->lowmem_reserve[0]);
1678 for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
1679 seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
1680 seq_putc(m, ')');
1681
1682
1683 if (!populated_zone(zone)) {
1684 seq_putc(m, '\n');
1685 return;
1686 }
1687
1688 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
1689 seq_printf(m, "\n %-12s %lu", zone_stat_name(i),
1690 zone_page_state(zone, i));
1691
1692#ifdef CONFIG_NUMA
1693 for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++)
1694 seq_printf(m, "\n %-12s %lu", numa_stat_name(i),
1695 zone_numa_event_state(zone, i));
1696#endif
1697
1698 seq_printf(m, "\n pagesets");
1699 for_each_online_cpu(i) {
1700 struct per_cpu_pages *pcp;
1701 struct per_cpu_zonestat __maybe_unused *pzstats;
1702
1703 pcp = per_cpu_ptr(zone->per_cpu_pageset, i);
1704 seq_printf(m,
1705 "\n cpu: %i"
1706 "\n count: %i"
1707 "\n high: %i"
1708 "\n batch: %i",
1709 i,
1710 pcp->count,
1711 pcp->high,
1712 pcp->batch);
1713#ifdef CONFIG_SMP
1714 pzstats = per_cpu_ptr(zone->per_cpu_zonestats, i);
1715 seq_printf(m, "\n vm stats threshold: %d",
1716 pzstats->stat_threshold);
1717#endif
1718 }
1719 seq_printf(m,
1720 "\n node_unreclaimable: %u"
1721 "\n start_pfn: %lu",
1722 pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES,
1723 zone->zone_start_pfn);
1724 seq_putc(m, '\n');
1725}
1726
1727
1728
1729
1730
1731
1732
1733static int zoneinfo_show(struct seq_file *m, void *arg)
1734{
1735 pg_data_t *pgdat = (pg_data_t *)arg;
1736 walk_zones_in_node(m, pgdat, false, false, zoneinfo_show_print);
1737 return 0;
1738}
1739
1740static const struct seq_operations zoneinfo_op = {
1741 .start = frag_start,
1742
1743 .next = frag_next,
1744 .stop = frag_stop,
1745 .show = zoneinfo_show,
1746};
1747
1748#define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \
1749 NR_VM_NUMA_EVENT_ITEMS + \
1750 NR_VM_NODE_STAT_ITEMS + \
1751 NR_VM_WRITEBACK_STAT_ITEMS + \
1752 (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \
1753 NR_VM_EVENT_ITEMS : 0))
1754
1755static void *vmstat_start(struct seq_file *m, loff_t *pos)
1756{
1757 unsigned long *v;
1758 int i;
1759
1760 if (*pos >= NR_VMSTAT_ITEMS)
1761 return NULL;
1762
1763 BUILD_BUG_ON(ARRAY_SIZE(vmstat_text) < NR_VMSTAT_ITEMS);
1764 fold_vm_numa_events();
1765 v = kmalloc_array(NR_VMSTAT_ITEMS, sizeof(unsigned long), GFP_KERNEL);
1766 m->private = v;
1767 if (!v)
1768 return ERR_PTR(-ENOMEM);
1769 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
1770 v[i] = global_zone_page_state(i);
1771 v += NR_VM_ZONE_STAT_ITEMS;
1772
1773#ifdef CONFIG_NUMA
1774 for (i = 0; i < NR_VM_NUMA_EVENT_ITEMS; i++)
1775 v[i] = global_numa_event_state(i);
1776 v += NR_VM_NUMA_EVENT_ITEMS;
1777#endif
1778
1779 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
1780 v[i] = global_node_page_state_pages(i);
1781 if (vmstat_item_print_in_thp(i))
1782 v[i] /= HPAGE_PMD_NR;
1783 }
1784 v += NR_VM_NODE_STAT_ITEMS;
1785
1786 global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,
1787 v + NR_DIRTY_THRESHOLD);
1788 v += NR_VM_WRITEBACK_STAT_ITEMS;
1789
1790#ifdef CONFIG_VM_EVENT_COUNTERS
1791 all_vm_events(v);
1792 v[PGPGIN] /= 2;
1793 v[PGPGOUT] /= 2;
1794#endif
1795 return (unsigned long *)m->private + *pos;
1796}
1797
1798static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
1799{
1800 (*pos)++;
1801 if (*pos >= NR_VMSTAT_ITEMS)
1802 return NULL;
1803 return (unsigned long *)m->private + *pos;
1804}
1805
1806static int vmstat_show(struct seq_file *m, void *arg)
1807{
1808 unsigned long *l = arg;
1809 unsigned long off = l - (unsigned long *)m->private;
1810
1811 seq_puts(m, vmstat_text[off]);
1812 seq_put_decimal_ull(m, " ", *l);
1813 seq_putc(m, '\n');
1814
1815 if (off == NR_VMSTAT_ITEMS - 1) {
1816
1817
1818
1819
1820 seq_puts(m, "nr_unstable 0\n");
1821 }
1822 return 0;
1823}
1824
1825static void vmstat_stop(struct seq_file *m, void *arg)
1826{
1827 kfree(m->private);
1828 m->private = NULL;
1829}
1830
1831static const struct seq_operations vmstat_op = {
1832 .start = vmstat_start,
1833 .next = vmstat_next,
1834 .stop = vmstat_stop,
1835 .show = vmstat_show,
1836};
1837#endif
1838
1839#ifdef CONFIG_SMP
1840static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
1841int sysctl_stat_interval __read_mostly = HZ;
1842
1843#ifdef CONFIG_PROC_FS
1844static void refresh_vm_stats(struct work_struct *work)
1845{
1846 refresh_cpu_vm_stats(true);
1847}
1848
1849int vmstat_refresh(struct ctl_table *table, int write,
1850 void *buffer, size_t *lenp, loff_t *ppos)
1851{
1852 long val;
1853 int err;
1854 int i;
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868 err = schedule_on_each_cpu(refresh_vm_stats);
1869 if (err)
1870 return err;
1871 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
1872
1873
1874
1875 switch (i) {
1876 case NR_ZONE_WRITE_PENDING:
1877 case NR_FREE_CMA_PAGES:
1878 continue;
1879 }
1880 val = atomic_long_read(&vm_zone_stat[i]);
1881 if (val < 0) {
1882 pr_warn("%s: %s %ld\n",
1883 __func__, zone_stat_name(i), val);
1884 }
1885 }
1886 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
1887
1888
1889
1890 switch (i) {
1891 case NR_WRITEBACK:
1892 continue;
1893 }
1894 val = atomic_long_read(&vm_node_stat[i]);
1895 if (val < 0) {
1896 pr_warn("%s: %s %ld\n",
1897 __func__, node_stat_name(i), val);
1898 }
1899 }
1900 if (write)
1901 *ppos += *lenp;
1902 else
1903 *lenp = 0;
1904 return 0;
1905}
1906#endif
1907
1908static void vmstat_update(struct work_struct *w)
1909{
1910 if (refresh_cpu_vm_stats(true)) {
1911
1912
1913
1914
1915
1916 queue_delayed_work_on(smp_processor_id(), mm_percpu_wq,
1917 this_cpu_ptr(&vmstat_work),
1918 round_jiffies_relative(sysctl_stat_interval));
1919 }
1920}
1921
1922
1923
1924
1925
1926static bool need_update(int cpu)
1927{
1928 pg_data_t *last_pgdat = NULL;
1929 struct zone *zone;
1930
1931 for_each_populated_zone(zone) {
1932 struct per_cpu_zonestat *pzstats = per_cpu_ptr(zone->per_cpu_zonestats, cpu);
1933 struct per_cpu_nodestat *n;
1934
1935
1936
1937
1938 if (memchr_inv(pzstats->vm_stat_diff, 0, sizeof(pzstats->vm_stat_diff)))
1939 return true;
1940
1941 if (last_pgdat == zone->zone_pgdat)
1942 continue;
1943 last_pgdat = zone->zone_pgdat;
1944 n = per_cpu_ptr(zone->zone_pgdat->per_cpu_nodestats, cpu);
1945 if (memchr_inv(n->vm_node_stat_diff, 0, sizeof(n->vm_node_stat_diff)))
1946 return true;
1947 }
1948 return false;
1949}
1950
1951
1952
1953
1954
1955
1956void quiet_vmstat(void)
1957{
1958 if (system_state != SYSTEM_RUNNING)
1959 return;
1960
1961 if (!delayed_work_pending(this_cpu_ptr(&vmstat_work)))
1962 return;
1963
1964 if (!need_update(smp_processor_id()))
1965 return;
1966
1967
1968
1969
1970
1971
1972
1973 refresh_cpu_vm_stats(false);
1974}
1975
1976
1977
1978
1979
1980
1981
1982static void vmstat_shepherd(struct work_struct *w);
1983
1984static DECLARE_DEFERRABLE_WORK(shepherd, vmstat_shepherd);
1985
1986static void vmstat_shepherd(struct work_struct *w)
1987{
1988 int cpu;
1989
1990 cpus_read_lock();
1991
1992 for_each_online_cpu(cpu) {
1993 struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
1994
1995 if (!delayed_work_pending(dw) && need_update(cpu))
1996 queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0);
1997
1998 cond_resched();
1999 }
2000 cpus_read_unlock();
2001
2002 schedule_delayed_work(&shepherd,
2003 round_jiffies_relative(sysctl_stat_interval));
2004}
2005
2006static void __init start_shepherd_timer(void)
2007{
2008 int cpu;
2009
2010 for_each_possible_cpu(cpu)
2011 INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
2012 vmstat_update);
2013
2014 schedule_delayed_work(&shepherd,
2015 round_jiffies_relative(sysctl_stat_interval));
2016}
2017
2018static void __init init_cpu_node_state(void)
2019{
2020 int node;
2021
2022 for_each_online_node(node) {
2023 if (cpumask_weight(cpumask_of_node(node)) > 0)
2024 node_set_state(node, N_CPU);
2025 }
2026}
2027
2028static int vmstat_cpu_online(unsigned int cpu)
2029{
2030 refresh_zone_stat_thresholds();
2031 node_set_state(cpu_to_node(cpu), N_CPU);
2032 return 0;
2033}
2034
2035static int vmstat_cpu_down_prep(unsigned int cpu)
2036{
2037 cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
2038 return 0;
2039}
2040
2041static int vmstat_cpu_dead(unsigned int cpu)
2042{
2043 const struct cpumask *node_cpus;
2044 int node;
2045
2046 node = cpu_to_node(cpu);
2047
2048 refresh_zone_stat_thresholds();
2049 node_cpus = cpumask_of_node(node);
2050 if (cpumask_weight(node_cpus) > 0)
2051 return 0;
2052
2053 node_clear_state(node, N_CPU);
2054 return 0;
2055}
2056
2057#endif
2058
2059struct workqueue_struct *mm_percpu_wq;
2060
2061void __init init_mm_internals(void)
2062{
2063 int ret __maybe_unused;
2064
2065 mm_percpu_wq = alloc_workqueue("mm_percpu_wq", WQ_MEM_RECLAIM, 0);
2066
2067#ifdef CONFIG_SMP
2068 ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead",
2069 NULL, vmstat_cpu_dead);
2070 if (ret < 0)
2071 pr_err("vmstat: failed to register 'dead' hotplug state\n");
2072
2073 ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "mm/vmstat:online",
2074 vmstat_cpu_online,
2075 vmstat_cpu_down_prep);
2076 if (ret < 0)
2077 pr_err("vmstat: failed to register 'online' hotplug state\n");
2078
2079 cpus_read_lock();
2080 init_cpu_node_state();
2081 cpus_read_unlock();
2082
2083 start_shepherd_timer();
2084#endif
2085#ifdef CONFIG_PROC_FS
2086 proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op);
2087 proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op);
2088 proc_create_seq("vmstat", 0444, NULL, &vmstat_op);
2089 proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op);
2090#endif
2091}
2092
2093#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)
2094
2095
2096
2097
2098
2099static int unusable_free_index(unsigned int order,
2100 struct contig_page_info *info)
2101{
2102
2103 if (info->free_pages == 0)
2104 return 1000;
2105
2106
2107
2108
2109
2110
2111
2112
2113 return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages);
2114
2115}
2116
2117static void unusable_show_print(struct seq_file *m,
2118 pg_data_t *pgdat, struct zone *zone)
2119{
2120 unsigned int order;
2121 int index;
2122 struct contig_page_info info;
2123
2124 seq_printf(m, "Node %d, zone %8s ",
2125 pgdat->node_id,
2126 zone->name);
2127 for (order = 0; order < MAX_ORDER; ++order) {
2128 fill_contig_page_info(zone, order, &info);
2129 index = unusable_free_index(order, &info);
2130 seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
2131 }
2132
2133 seq_putc(m, '\n');
2134}
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145static int unusable_show(struct seq_file *m, void *arg)
2146{
2147 pg_data_t *pgdat = (pg_data_t *)arg;
2148
2149
2150 if (!node_state(pgdat->node_id, N_MEMORY))
2151 return 0;
2152
2153 walk_zones_in_node(m, pgdat, true, false, unusable_show_print);
2154
2155 return 0;
2156}
2157
2158static const struct seq_operations unusable_sops = {
2159 .start = frag_start,
2160 .next = frag_next,
2161 .stop = frag_stop,
2162 .show = unusable_show,
2163};
2164
2165DEFINE_SEQ_ATTRIBUTE(unusable);
2166
2167static void extfrag_show_print(struct seq_file *m,
2168 pg_data_t *pgdat, struct zone *zone)
2169{
2170 unsigned int order;
2171 int index;
2172
2173
2174 struct contig_page_info info;
2175
2176 seq_printf(m, "Node %d, zone %8s ",
2177 pgdat->node_id,
2178 zone->name);
2179 for (order = 0; order < MAX_ORDER; ++order) {
2180 fill_contig_page_info(zone, order, &info);
2181 index = __fragmentation_index(order, &info);
2182 seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
2183 }
2184
2185 seq_putc(m, '\n');
2186}
2187
2188
2189
2190
2191static int extfrag_show(struct seq_file *m, void *arg)
2192{
2193 pg_data_t *pgdat = (pg_data_t *)arg;
2194
2195 walk_zones_in_node(m, pgdat, true, false, extfrag_show_print);
2196
2197 return 0;
2198}
2199
2200static const struct seq_operations extfrag_sops = {
2201 .start = frag_start,
2202 .next = frag_next,
2203 .stop = frag_stop,
2204 .show = extfrag_show,
2205};
2206
2207DEFINE_SEQ_ATTRIBUTE(extfrag);
2208
2209static int __init extfrag_debug_init(void)
2210{
2211 struct dentry *extfrag_debug_root;
2212
2213 extfrag_debug_root = debugfs_create_dir("extfrag", NULL);
2214
2215 debugfs_create_file("unusable_index", 0444, extfrag_debug_root, NULL,
2216 &unusable_fops);
2217
2218 debugfs_create_file("extfrag_index", 0444, extfrag_debug_root, NULL,
2219 &extfrag_fops);
2220
2221 return 0;
2222}
2223
2224module_init(extfrag_debug_init);
2225#endif
2226