1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/fs.h>
14#include <linux/mm.h>
15#include <linux/err.h>
16#include <linux/module.h>
17#include <linux/slab.h>
18#include <linux/cpu.h>
19#include <linux/cpumask.h>
20#include <linux/vmstat.h>
21#include <linux/proc_fs.h>
22#include <linux/seq_file.h>
23#include <linux/debugfs.h>
24#include <linux/sched.h>
25#include <linux/math64.h>
26#include <linux/writeback.h>
27#include <linux/compaction.h>
28#include <linux/mm_inline.h>
29#include <linux/page_ext.h>
30#include <linux/page_owner.h>
31
32#include "internal.h"
33
34#define NUMA_STATS_THRESHOLD (U16_MAX - 2)
35
36#ifdef CONFIG_NUMA
37int sysctl_vm_numa_stat = ENABLE_NUMA_STAT;
38
39
40static void zero_zone_numa_counters(struct zone *zone)
41{
42 int item, cpu;
43
44 for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++) {
45 atomic_long_set(&zone->vm_numa_stat[item], 0);
46 for_each_online_cpu(cpu)
47 per_cpu_ptr(zone->pageset, cpu)->vm_numa_stat_diff[item]
48 = 0;
49 }
50}
51
52
53static void zero_zones_numa_counters(void)
54{
55 struct zone *zone;
56
57 for_each_populated_zone(zone)
58 zero_zone_numa_counters(zone);
59}
60
61
62static void zero_global_numa_counters(void)
63{
64 int item;
65
66 for (item = 0; item < NR_VM_NUMA_STAT_ITEMS; item++)
67 atomic_long_set(&vm_numa_stat[item], 0);
68}
69
70static void invalid_numa_statistics(void)
71{
72 zero_zones_numa_counters();
73 zero_global_numa_counters();
74}
75
76static DEFINE_MUTEX(vm_numa_stat_lock);
77
78int sysctl_vm_numa_stat_handler(struct ctl_table *table, int write,
79 void __user *buffer, size_t *length, loff_t *ppos)
80{
81 int ret, oldval;
82
83 mutex_lock(&vm_numa_stat_lock);
84 if (write)
85 oldval = sysctl_vm_numa_stat;
86 ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
87 if (ret || !write)
88 goto out;
89
90 if (oldval == sysctl_vm_numa_stat)
91 goto out;
92 else if (sysctl_vm_numa_stat == ENABLE_NUMA_STAT) {
93 static_branch_enable(&vm_numa_stat_key);
94 pr_info("enable numa statistics\n");
95 } else {
96 static_branch_disable(&vm_numa_stat_key);
97 invalid_numa_statistics();
98 pr_info("disable numa statistics, and clear numa counters\n");
99 }
100
101out:
102 mutex_unlock(&vm_numa_stat_lock);
103 return ret;
104}
105#endif
106
107#ifdef CONFIG_VM_EVENT_COUNTERS
108DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
109EXPORT_PER_CPU_SYMBOL(vm_event_states);
110
111static void sum_vm_events(unsigned long *ret)
112{
113 int cpu;
114 int i;
115
116 memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
117
118 for_each_online_cpu(cpu) {
119 struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
120
121 for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
122 ret[i] += this->event[i];
123 }
124}
125
126
127
128
129
130
131void all_vm_events(unsigned long *ret)
132{
133 get_online_cpus();
134 sum_vm_events(ret);
135 put_online_cpus();
136}
137EXPORT_SYMBOL_GPL(all_vm_events);
138
139
140
141
142
143
144
145void vm_events_fold_cpu(int cpu)
146{
147 struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
148 int i;
149
150 for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
151 count_vm_events(i, fold_state->event[i]);
152 fold_state->event[i] = 0;
153 }
154}
155
156#endif
157
158
159
160
161
162
163atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp;
164atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS] __cacheline_aligned_in_smp;
165atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp;
166EXPORT_SYMBOL(vm_zone_stat);
167EXPORT_SYMBOL(vm_numa_stat);
168EXPORT_SYMBOL(vm_node_stat);
169
170#ifdef CONFIG_SMP
171
172int calculate_pressure_threshold(struct zone *zone)
173{
174 int threshold;
175 int watermark_distance;
176
177
178
179
180
181
182
183
184
185 watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone);
186 threshold = max(1, (int)(watermark_distance / num_online_cpus()));
187
188
189
190
191 threshold = min(125, threshold);
192
193 return threshold;
194}
195
196int calculate_normal_threshold(struct zone *zone)
197{
198 int threshold;
199 int mem;
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231 mem = zone_managed_pages(zone) >> (27 - PAGE_SHIFT);
232
233 threshold = 2 * fls(num_online_cpus()) * (1 + fls(mem));
234
235
236
237
238 threshold = min(125, threshold);
239
240 return threshold;
241}
242
243
244
245
246void refresh_zone_stat_thresholds(void)
247{
248 struct pglist_data *pgdat;
249 struct zone *zone;
250 int cpu;
251 int threshold;
252
253
254 for_each_online_pgdat(pgdat) {
255 for_each_online_cpu(cpu) {
256 per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold = 0;
257 }
258 }
259
260 for_each_populated_zone(zone) {
261 struct pglist_data *pgdat = zone->zone_pgdat;
262 unsigned long max_drift, tolerate_drift;
263
264 threshold = calculate_normal_threshold(zone);
265
266 for_each_online_cpu(cpu) {
267 int pgdat_threshold;
268
269 per_cpu_ptr(zone->pageset, cpu)->stat_threshold
270 = threshold;
271
272
273 pgdat_threshold = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold;
274 per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->stat_threshold
275 = max(threshold, pgdat_threshold);
276 }
277
278
279
280
281
282
283 tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone);
284 max_drift = num_online_cpus() * threshold;
285 if (max_drift > tolerate_drift)
286 zone->percpu_drift_mark = high_wmark_pages(zone) +
287 max_drift;
288 }
289}
290
291void set_pgdat_percpu_threshold(pg_data_t *pgdat,
292 int (*calculate_pressure)(struct zone *))
293{
294 struct zone *zone;
295 int cpu;
296 int threshold;
297 int i;
298
299 for (i = 0; i < pgdat->nr_zones; i++) {
300 zone = &pgdat->node_zones[i];
301 if (!zone->percpu_drift_mark)
302 continue;
303
304 threshold = (*calculate_pressure)(zone);
305 for_each_online_cpu(cpu)
306 per_cpu_ptr(zone->pageset, cpu)->stat_threshold
307 = threshold;
308 }
309}
310
311
312
313
314
315
316void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
317 long delta)
318{
319 struct per_cpu_pageset __percpu *pcp = zone->pageset;
320 s8 __percpu *p = pcp->vm_stat_diff + item;
321 long x;
322 long t;
323
324 x = delta + __this_cpu_read(*p);
325
326 t = __this_cpu_read(pcp->stat_threshold);
327
328 if (unlikely(x > t || x < -t)) {
329 zone_page_state_add(x, zone, item);
330 x = 0;
331 }
332 __this_cpu_write(*p, x);
333}
334EXPORT_SYMBOL(__mod_zone_page_state);
335
336void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
337 long delta)
338{
339 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
340 s8 __percpu *p = pcp->vm_node_stat_diff + item;
341 long x;
342 long t;
343
344 x = delta + __this_cpu_read(*p);
345
346 t = __this_cpu_read(pcp->stat_threshold);
347
348 if (unlikely(x > t || x < -t)) {
349 node_page_state_add(x, pgdat, item);
350 x = 0;
351 }
352 __this_cpu_write(*p, x);
353}
354EXPORT_SYMBOL(__mod_node_page_state);
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
380{
381 struct per_cpu_pageset __percpu *pcp = zone->pageset;
382 s8 __percpu *p = pcp->vm_stat_diff + item;
383 s8 v, t;
384
385 v = __this_cpu_inc_return(*p);
386 t = __this_cpu_read(pcp->stat_threshold);
387 if (unlikely(v > t)) {
388 s8 overstep = t >> 1;
389
390 zone_page_state_add(v + overstep, zone, item);
391 __this_cpu_write(*p, -overstep);
392 }
393}
394
395void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
396{
397 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
398 s8 __percpu *p = pcp->vm_node_stat_diff + item;
399 s8 v, t;
400
401 v = __this_cpu_inc_return(*p);
402 t = __this_cpu_read(pcp->stat_threshold);
403 if (unlikely(v > t)) {
404 s8 overstep = t >> 1;
405
406 node_page_state_add(v + overstep, pgdat, item);
407 __this_cpu_write(*p, -overstep);
408 }
409}
410
411void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
412{
413 __inc_zone_state(page_zone(page), item);
414}
415EXPORT_SYMBOL(__inc_zone_page_state);
416
417void __inc_node_page_state(struct page *page, enum node_stat_item item)
418{
419 __inc_node_state(page_pgdat(page), item);
420}
421EXPORT_SYMBOL(__inc_node_page_state);
422
423void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
424{
425 struct per_cpu_pageset __percpu *pcp = zone->pageset;
426 s8 __percpu *p = pcp->vm_stat_diff + item;
427 s8 v, t;
428
429 v = __this_cpu_dec_return(*p);
430 t = __this_cpu_read(pcp->stat_threshold);
431 if (unlikely(v < - t)) {
432 s8 overstep = t >> 1;
433
434 zone_page_state_add(v - overstep, zone, item);
435 __this_cpu_write(*p, overstep);
436 }
437}
438
439void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
440{
441 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
442 s8 __percpu *p = pcp->vm_node_stat_diff + item;
443 s8 v, t;
444
445 v = __this_cpu_dec_return(*p);
446 t = __this_cpu_read(pcp->stat_threshold);
447 if (unlikely(v < - t)) {
448 s8 overstep = t >> 1;
449
450 node_page_state_add(v - overstep, pgdat, item);
451 __this_cpu_write(*p, overstep);
452 }
453}
454
455void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
456{
457 __dec_zone_state(page_zone(page), item);
458}
459EXPORT_SYMBOL(__dec_zone_page_state);
460
461void __dec_node_page_state(struct page *page, enum node_stat_item item)
462{
463 __dec_node_state(page_pgdat(page), item);
464}
465EXPORT_SYMBOL(__dec_node_page_state);
466
467#ifdef CONFIG_HAVE_CMPXCHG_LOCAL
468
469
470
471
472
473
474
475
476
477
478
479
480static inline void mod_zone_state(struct zone *zone,
481 enum zone_stat_item item, long delta, int overstep_mode)
482{
483 struct per_cpu_pageset __percpu *pcp = zone->pageset;
484 s8 __percpu *p = pcp->vm_stat_diff + item;
485 long o, n, t, z;
486
487 do {
488 z = 0;
489
490
491
492
493
494
495
496
497
498
499
500 t = this_cpu_read(pcp->stat_threshold);
501
502 o = this_cpu_read(*p);
503 n = delta + o;
504
505 if (n > t || n < -t) {
506 int os = overstep_mode * (t >> 1) ;
507
508
509 z = n + os;
510 n = -os;
511 }
512 } while (this_cpu_cmpxchg(*p, o, n) != o);
513
514 if (z)
515 zone_page_state_add(z, zone, item);
516}
517
518void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
519 long delta)
520{
521 mod_zone_state(zone, item, delta, 0);
522}
523EXPORT_SYMBOL(mod_zone_page_state);
524
525void inc_zone_page_state(struct page *page, enum zone_stat_item item)
526{
527 mod_zone_state(page_zone(page), item, 1, 1);
528}
529EXPORT_SYMBOL(inc_zone_page_state);
530
531void dec_zone_page_state(struct page *page, enum zone_stat_item item)
532{
533 mod_zone_state(page_zone(page), item, -1, -1);
534}
535EXPORT_SYMBOL(dec_zone_page_state);
536
537static inline void mod_node_state(struct pglist_data *pgdat,
538 enum node_stat_item item, int delta, int overstep_mode)
539{
540 struct per_cpu_nodestat __percpu *pcp = pgdat->per_cpu_nodestats;
541 s8 __percpu *p = pcp->vm_node_stat_diff + item;
542 long o, n, t, z;
543
544 do {
545 z = 0;
546
547
548
549
550
551
552
553
554
555
556
557 t = this_cpu_read(pcp->stat_threshold);
558
559 o = this_cpu_read(*p);
560 n = delta + o;
561
562 if (n > t || n < -t) {
563 int os = overstep_mode * (t >> 1) ;
564
565
566 z = n + os;
567 n = -os;
568 }
569 } while (this_cpu_cmpxchg(*p, o, n) != o);
570
571 if (z)
572 node_page_state_add(z, pgdat, item);
573}
574
575void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
576 long delta)
577{
578 mod_node_state(pgdat, item, delta, 0);
579}
580EXPORT_SYMBOL(mod_node_page_state);
581
582void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
583{
584 mod_node_state(pgdat, item, 1, 1);
585}
586
587void inc_node_page_state(struct page *page, enum node_stat_item item)
588{
589 mod_node_state(page_pgdat(page), item, 1, 1);
590}
591EXPORT_SYMBOL(inc_node_page_state);
592
593void dec_node_page_state(struct page *page, enum node_stat_item item)
594{
595 mod_node_state(page_pgdat(page), item, -1, -1);
596}
597EXPORT_SYMBOL(dec_node_page_state);
598#else
599
600
601
602void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
603 long delta)
604{
605 unsigned long flags;
606
607 local_irq_save(flags);
608 __mod_zone_page_state(zone, item, delta);
609 local_irq_restore(flags);
610}
611EXPORT_SYMBOL(mod_zone_page_state);
612
613void inc_zone_page_state(struct page *page, enum zone_stat_item item)
614{
615 unsigned long flags;
616 struct zone *zone;
617
618 zone = page_zone(page);
619 local_irq_save(flags);
620 __inc_zone_state(zone, item);
621 local_irq_restore(flags);
622}
623EXPORT_SYMBOL(inc_zone_page_state);
624
625void dec_zone_page_state(struct page *page, enum zone_stat_item item)
626{
627 unsigned long flags;
628
629 local_irq_save(flags);
630 __dec_zone_page_state(page, item);
631 local_irq_restore(flags);
632}
633EXPORT_SYMBOL(dec_zone_page_state);
634
635void inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
636{
637 unsigned long flags;
638
639 local_irq_save(flags);
640 __inc_node_state(pgdat, item);
641 local_irq_restore(flags);
642}
643EXPORT_SYMBOL(inc_node_state);
644
645void mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
646 long delta)
647{
648 unsigned long flags;
649
650 local_irq_save(flags);
651 __mod_node_page_state(pgdat, item, delta);
652 local_irq_restore(flags);
653}
654EXPORT_SYMBOL(mod_node_page_state);
655
656void inc_node_page_state(struct page *page, enum node_stat_item item)
657{
658 unsigned long flags;
659 struct pglist_data *pgdat;
660
661 pgdat = page_pgdat(page);
662 local_irq_save(flags);
663 __inc_node_state(pgdat, item);
664 local_irq_restore(flags);
665}
666EXPORT_SYMBOL(inc_node_page_state);
667
668void dec_node_page_state(struct page *page, enum node_stat_item item)
669{
670 unsigned long flags;
671
672 local_irq_save(flags);
673 __dec_node_page_state(page, item);
674 local_irq_restore(flags);
675}
676EXPORT_SYMBOL(dec_node_page_state);
677#endif
678
679
680
681
682
683#ifdef CONFIG_NUMA
684static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff)
685{
686 int i;
687 int changes = 0;
688
689 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
690 if (zone_diff[i]) {
691 atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
692 changes++;
693 }
694
695 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
696 if (numa_diff[i]) {
697 atomic_long_add(numa_diff[i], &vm_numa_stat[i]);
698 changes++;
699 }
700
701 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
702 if (node_diff[i]) {
703 atomic_long_add(node_diff[i], &vm_node_stat[i]);
704 changes++;
705 }
706 return changes;
707}
708#else
709static int fold_diff(int *zone_diff, int *node_diff)
710{
711 int i;
712 int changes = 0;
713
714 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
715 if (zone_diff[i]) {
716 atomic_long_add(zone_diff[i], &vm_zone_stat[i]);
717 changes++;
718 }
719
720 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
721 if (node_diff[i]) {
722 atomic_long_add(node_diff[i], &vm_node_stat[i]);
723 changes++;
724 }
725 return changes;
726}
727#endif
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745static int refresh_cpu_vm_stats(bool do_pagesets)
746{
747 struct pglist_data *pgdat;
748 struct zone *zone;
749 int i;
750 int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
751#ifdef CONFIG_NUMA
752 int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
753#endif
754 int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
755 int changes = 0;
756
757 for_each_populated_zone(zone) {
758 struct per_cpu_pageset __percpu *p = zone->pageset;
759
760 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
761 int v;
762
763 v = this_cpu_xchg(p->vm_stat_diff[i], 0);
764 if (v) {
765
766 atomic_long_add(v, &zone->vm_stat[i]);
767 global_zone_diff[i] += v;
768#ifdef CONFIG_NUMA
769
770 __this_cpu_write(p->expire, 3);
771#endif
772 }
773 }
774#ifdef CONFIG_NUMA
775 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) {
776 int v;
777
778 v = this_cpu_xchg(p->vm_numa_stat_diff[i], 0);
779 if (v) {
780
781 atomic_long_add(v, &zone->vm_numa_stat[i]);
782 global_numa_diff[i] += v;
783 __this_cpu_write(p->expire, 3);
784 }
785 }
786
787 if (do_pagesets) {
788 cond_resched();
789
790
791
792
793
794
795
796 if (!__this_cpu_read(p->expire) ||
797 !__this_cpu_read(p->pcp.count))
798 continue;
799
800
801
802
803 if (zone_to_nid(zone) == numa_node_id()) {
804 __this_cpu_write(p->expire, 0);
805 continue;
806 }
807
808 if (__this_cpu_dec_return(p->expire))
809 continue;
810
811 if (__this_cpu_read(p->pcp.count)) {
812 drain_zone_pages(zone, this_cpu_ptr(&p->pcp));
813 changes++;
814 }
815 }
816#endif
817 }
818
819 for_each_online_pgdat(pgdat) {
820 struct per_cpu_nodestat __percpu *p = pgdat->per_cpu_nodestats;
821
822 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
823 int v;
824
825 v = this_cpu_xchg(p->vm_node_stat_diff[i], 0);
826 if (v) {
827 atomic_long_add(v, &pgdat->vm_stat[i]);
828 global_node_diff[i] += v;
829 }
830 }
831 }
832
833#ifdef CONFIG_NUMA
834 changes += fold_diff(global_zone_diff, global_numa_diff,
835 global_node_diff);
836#else
837 changes += fold_diff(global_zone_diff, global_node_diff);
838#endif
839 return changes;
840}
841
842
843
844
845
846
847void cpu_vm_stats_fold(int cpu)
848{
849 struct pglist_data *pgdat;
850 struct zone *zone;
851 int i;
852 int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
853#ifdef CONFIG_NUMA
854 int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, };
855#endif
856 int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, };
857
858 for_each_populated_zone(zone) {
859 struct per_cpu_pageset *p;
860
861 p = per_cpu_ptr(zone->pageset, cpu);
862
863 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
864 if (p->vm_stat_diff[i]) {
865 int v;
866
867 v = p->vm_stat_diff[i];
868 p->vm_stat_diff[i] = 0;
869 atomic_long_add(v, &zone->vm_stat[i]);
870 global_zone_diff[i] += v;
871 }
872
873#ifdef CONFIG_NUMA
874 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
875 if (p->vm_numa_stat_diff[i]) {
876 int v;
877
878 v = p->vm_numa_stat_diff[i];
879 p->vm_numa_stat_diff[i] = 0;
880 atomic_long_add(v, &zone->vm_numa_stat[i]);
881 global_numa_diff[i] += v;
882 }
883#endif
884 }
885
886 for_each_online_pgdat(pgdat) {
887 struct per_cpu_nodestat *p;
888
889 p = per_cpu_ptr(pgdat->per_cpu_nodestats, cpu);
890
891 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
892 if (p->vm_node_stat_diff[i]) {
893 int v;
894
895 v = p->vm_node_stat_diff[i];
896 p->vm_node_stat_diff[i] = 0;
897 atomic_long_add(v, &pgdat->vm_stat[i]);
898 global_node_diff[i] += v;
899 }
900 }
901
902#ifdef CONFIG_NUMA
903 fold_diff(global_zone_diff, global_numa_diff, global_node_diff);
904#else
905 fold_diff(global_zone_diff, global_node_diff);
906#endif
907}
908
909
910
911
912
913void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset)
914{
915 int i;
916
917 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
918 if (pset->vm_stat_diff[i]) {
919 int v = pset->vm_stat_diff[i];
920 pset->vm_stat_diff[i] = 0;
921 atomic_long_add(v, &zone->vm_stat[i]);
922 atomic_long_add(v, &vm_zone_stat[i]);
923 }
924
925#ifdef CONFIG_NUMA
926 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
927 if (pset->vm_numa_stat_diff[i]) {
928 int v = pset->vm_numa_stat_diff[i];
929
930 pset->vm_numa_stat_diff[i] = 0;
931 atomic_long_add(v, &zone->vm_numa_stat[i]);
932 atomic_long_add(v, &vm_numa_stat[i]);
933 }
934#endif
935}
936#endif
937
938#ifdef CONFIG_NUMA
939void __inc_numa_state(struct zone *zone,
940 enum numa_stat_item item)
941{
942 struct per_cpu_pageset __percpu *pcp = zone->pageset;
943 u16 __percpu *p = pcp->vm_numa_stat_diff + item;
944 u16 v;
945
946 v = __this_cpu_inc_return(*p);
947
948 if (unlikely(v > NUMA_STATS_THRESHOLD)) {
949 zone_numa_state_add(v, zone, item);
950 __this_cpu_write(*p, 0);
951 }
952}
953
954
955
956
957
958
959unsigned long sum_zone_node_page_state(int node,
960 enum zone_stat_item item)
961{
962 struct zone *zones = NODE_DATA(node)->node_zones;
963 int i;
964 unsigned long count = 0;
965
966 for (i = 0; i < MAX_NR_ZONES; i++)
967 count += zone_page_state(zones + i, item);
968
969 return count;
970}
971
972
973
974
975
976unsigned long sum_zone_numa_state(int node,
977 enum numa_stat_item item)
978{
979 struct zone *zones = NODE_DATA(node)->node_zones;
980 int i;
981 unsigned long count = 0;
982
983 for (i = 0; i < MAX_NR_ZONES; i++)
984 count += zone_numa_state_snapshot(zones + i, item);
985
986 return count;
987}
988
989
990
991
992unsigned long node_page_state(struct pglist_data *pgdat,
993 enum node_stat_item item)
994{
995 long x = atomic_long_read(&pgdat->vm_stat[item]);
996#ifdef CONFIG_SMP
997 if (x < 0)
998 x = 0;
999#endif
1000 return x;
1001}
1002#endif
1003
1004#ifdef CONFIG_COMPACTION
1005
1006struct contig_page_info {
1007 unsigned long free_pages;
1008 unsigned long free_blocks_total;
1009 unsigned long free_blocks_suitable;
1010};
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020static void fill_contig_page_info(struct zone *zone,
1021 unsigned int suitable_order,
1022 struct contig_page_info *info)
1023{
1024 unsigned int order;
1025
1026 info->free_pages = 0;
1027 info->free_blocks_total = 0;
1028 info->free_blocks_suitable = 0;
1029
1030 for (order = 0; order < MAX_ORDER; order++) {
1031 unsigned long blocks;
1032
1033
1034 blocks = zone->free_area[order].nr_free;
1035 info->free_blocks_total += blocks;
1036
1037
1038 info->free_pages += blocks << order;
1039
1040
1041 if (order >= suitable_order)
1042 info->free_blocks_suitable += blocks <<
1043 (order - suitable_order);
1044 }
1045}
1046
1047
1048
1049
1050
1051
1052
1053
1054static int __fragmentation_index(unsigned int order, struct contig_page_info *info)
1055{
1056 unsigned long requested = 1UL << order;
1057
1058 if (WARN_ON_ONCE(order >= MAX_ORDER))
1059 return 0;
1060
1061 if (!info->free_blocks_total)
1062 return 0;
1063
1064
1065 if (info->free_blocks_suitable)
1066 return -1000;
1067
1068
1069
1070
1071
1072
1073
1074 return 1000 - div_u64( (1000+(div_u64(info->free_pages * 1000ULL, requested))), info->free_blocks_total);
1075}
1076
1077
1078int fragmentation_index(struct zone *zone, unsigned int order)
1079{
1080 struct contig_page_info info;
1081
1082 fill_contig_page_info(zone, order, &info);
1083 return __fragmentation_index(order, &info);
1084}
1085#endif
1086
1087#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || defined(CONFIG_NUMA)
1088#ifdef CONFIG_ZONE_DMA
1089#define TEXT_FOR_DMA(xx) xx "_dma",
1090#else
1091#define TEXT_FOR_DMA(xx)
1092#endif
1093
1094#ifdef CONFIG_ZONE_DMA32
1095#define TEXT_FOR_DMA32(xx) xx "_dma32",
1096#else
1097#define TEXT_FOR_DMA32(xx)
1098#endif
1099
1100#ifdef CONFIG_HIGHMEM
1101#define TEXT_FOR_HIGHMEM(xx) xx "_high",
1102#else
1103#define TEXT_FOR_HIGHMEM(xx)
1104#endif
1105
1106#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
1107 TEXT_FOR_HIGHMEM(xx) xx "_movable",
1108
1109const char * const vmstat_text[] = {
1110
1111 "nr_free_pages",
1112 "nr_zone_inactive_anon",
1113 "nr_zone_active_anon",
1114 "nr_zone_inactive_file",
1115 "nr_zone_active_file",
1116 "nr_zone_unevictable",
1117 "nr_zone_write_pending",
1118 "nr_mlock",
1119 "nr_page_table_pages",
1120 "nr_kernel_stack",
1121 "nr_bounce",
1122#if IS_ENABLED(CONFIG_ZSMALLOC)
1123 "nr_zspages",
1124#endif
1125 "nr_free_cma",
1126
1127
1128#ifdef CONFIG_NUMA
1129 "numa_hit",
1130 "numa_miss",
1131 "numa_foreign",
1132 "numa_interleave",
1133 "numa_local",
1134 "numa_other",
1135#endif
1136
1137
1138 "nr_inactive_anon",
1139 "nr_active_anon",
1140 "nr_inactive_file",
1141 "nr_active_file",
1142 "nr_unevictable",
1143 "nr_slab_reclaimable",
1144 "nr_slab_unreclaimable",
1145 "nr_isolated_anon",
1146 "nr_isolated_file",
1147 "workingset_nodes",
1148 "workingset_refault",
1149 "workingset_activate",
1150 "workingset_restore",
1151 "workingset_nodereclaim",
1152 "nr_anon_pages",
1153 "nr_mapped",
1154 "nr_file_pages",
1155 "nr_dirty",
1156 "nr_writeback",
1157 "nr_writeback_temp",
1158 "nr_shmem",
1159 "nr_shmem_hugepages",
1160 "nr_shmem_pmdmapped",
1161 "nr_anon_transparent_hugepages",
1162 "nr_unstable",
1163 "nr_vmscan_write",
1164 "nr_vmscan_immediate_reclaim",
1165 "nr_dirtied",
1166 "nr_written",
1167 "nr_kernel_misc_reclaimable",
1168
1169
1170 "nr_dirty_threshold",
1171 "nr_dirty_background_threshold",
1172
1173#ifdef CONFIG_VM_EVENT_COUNTERS
1174
1175 "pgpgin",
1176 "pgpgout",
1177 "pswpin",
1178 "pswpout",
1179
1180 TEXTS_FOR_ZONES("pgalloc")
1181 TEXTS_FOR_ZONES("allocstall")
1182 TEXTS_FOR_ZONES("pgskip")
1183
1184 "pgfree",
1185 "pgactivate",
1186 "pgdeactivate",
1187 "pglazyfree",
1188
1189 "pgfault",
1190 "pgmajfault",
1191 "pglazyfreed",
1192
1193 "pgrefill",
1194 "pgsteal_kswapd",
1195 "pgsteal_direct",
1196 "pgscan_kswapd",
1197 "pgscan_direct",
1198 "pgscan_direct_throttle",
1199
1200#ifdef CONFIG_NUMA
1201 "zone_reclaim_failed",
1202#endif
1203 "pginodesteal",
1204 "slabs_scanned",
1205 "kswapd_inodesteal",
1206 "kswapd_low_wmark_hit_quickly",
1207 "kswapd_high_wmark_hit_quickly",
1208 "pageoutrun",
1209
1210 "pgrotated",
1211
1212 "drop_pagecache",
1213 "drop_slab",
1214 "oom_kill",
1215
1216#ifdef CONFIG_NUMA_BALANCING
1217 "numa_pte_updates",
1218 "numa_huge_pte_updates",
1219 "numa_hint_faults",
1220 "numa_hint_faults_local",
1221 "numa_pages_migrated",
1222#endif
1223#ifdef CONFIG_MIGRATION
1224 "pgmigrate_success",
1225 "pgmigrate_fail",
1226#endif
1227#ifdef CONFIG_COMPACTION
1228 "compact_migrate_scanned",
1229 "compact_free_scanned",
1230 "compact_isolated",
1231 "compact_stall",
1232 "compact_fail",
1233 "compact_success",
1234 "compact_daemon_wake",
1235 "compact_daemon_migrate_scanned",
1236 "compact_daemon_free_scanned",
1237#endif
1238
1239#ifdef CONFIG_HUGETLB_PAGE
1240 "htlb_buddy_alloc_success",
1241 "htlb_buddy_alloc_fail",
1242#endif
1243 "unevictable_pgs_culled",
1244 "unevictable_pgs_scanned",
1245 "unevictable_pgs_rescued",
1246 "unevictable_pgs_mlocked",
1247 "unevictable_pgs_munlocked",
1248 "unevictable_pgs_cleared",
1249 "unevictable_pgs_stranded",
1250
1251#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1252 "thp_fault_alloc",
1253 "thp_fault_fallback",
1254 "thp_collapse_alloc",
1255 "thp_collapse_alloc_failed",
1256 "thp_file_alloc",
1257 "thp_file_mapped",
1258 "thp_split_page",
1259 "thp_split_page_failed",
1260 "thp_deferred_split_page",
1261 "thp_split_pmd",
1262#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
1263 "thp_split_pud",
1264#endif
1265 "thp_zero_page_alloc",
1266 "thp_zero_page_alloc_failed",
1267 "thp_swpout",
1268 "thp_swpout_fallback",
1269#endif
1270#ifdef CONFIG_MEMORY_BALLOON
1271 "balloon_inflate",
1272 "balloon_deflate",
1273#ifdef CONFIG_BALLOON_COMPACTION
1274 "balloon_migrate",
1275#endif
1276#endif
1277#ifdef CONFIG_DEBUG_TLBFLUSH
1278 "nr_tlb_remote_flush",
1279 "nr_tlb_remote_flush_received",
1280 "nr_tlb_local_flush_all",
1281 "nr_tlb_local_flush_one",
1282#endif
1283
1284#ifdef CONFIG_DEBUG_VM_VMACACHE
1285 "vmacache_find_calls",
1286 "vmacache_find_hits",
1287#endif
1288#ifdef CONFIG_SWAP
1289 "swap_ra",
1290 "swap_ra_hit",
1291#endif
1292#endif
1293};
1294#endif
1295
1296#if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \
1297 defined(CONFIG_PROC_FS)
1298static void *frag_start(struct seq_file *m, loff_t *pos)
1299{
1300 pg_data_t *pgdat;
1301 loff_t node = *pos;
1302
1303 for (pgdat = first_online_pgdat();
1304 pgdat && node;
1305 pgdat = next_online_pgdat(pgdat))
1306 --node;
1307
1308 return pgdat;
1309}
1310
1311static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
1312{
1313 pg_data_t *pgdat = (pg_data_t *)arg;
1314
1315 (*pos)++;
1316 return next_online_pgdat(pgdat);
1317}
1318
1319static void frag_stop(struct seq_file *m, void *arg)
1320{
1321}
1322
1323
1324
1325
1326
1327static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
1328 bool assert_populated, bool nolock,
1329 void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
1330{
1331 struct zone *zone;
1332 struct zone *node_zones = pgdat->node_zones;
1333 unsigned long flags;
1334
1335 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
1336 if (assert_populated && !populated_zone(zone))
1337 continue;
1338
1339 if (!nolock)
1340 spin_lock_irqsave(&zone->lock, flags);
1341 print(m, pgdat, zone);
1342 if (!nolock)
1343 spin_unlock_irqrestore(&zone->lock, flags);
1344 }
1345}
1346#endif
1347
1348#ifdef CONFIG_PROC_FS
1349static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
1350 struct zone *zone)
1351{
1352 int order;
1353
1354 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1355 for (order = 0; order < MAX_ORDER; ++order)
1356 seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
1357 seq_putc(m, '\n');
1358}
1359
1360
1361
1362
1363static int frag_show(struct seq_file *m, void *arg)
1364{
1365 pg_data_t *pgdat = (pg_data_t *)arg;
1366 walk_zones_in_node(m, pgdat, true, false, frag_show_print);
1367 return 0;
1368}
1369
1370static void pagetypeinfo_showfree_print(struct seq_file *m,
1371 pg_data_t *pgdat, struct zone *zone)
1372{
1373 int order, mtype;
1374
1375 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) {
1376 seq_printf(m, "Node %4d, zone %8s, type %12s ",
1377 pgdat->node_id,
1378 zone->name,
1379 migratetype_names[mtype]);
1380 for (order = 0; order < MAX_ORDER; ++order) {
1381 unsigned long freecount = 0;
1382 struct free_area *area;
1383 struct list_head *curr;
1384
1385 area = &(zone->free_area[order]);
1386
1387 list_for_each(curr, &area->free_list[mtype])
1388 freecount++;
1389 seq_printf(m, "%6lu ", freecount);
1390 }
1391 seq_putc(m, '\n');
1392 }
1393}
1394
1395
1396static int pagetypeinfo_showfree(struct seq_file *m, void *arg)
1397{
1398 int order;
1399 pg_data_t *pgdat = (pg_data_t *)arg;
1400
1401
1402 seq_printf(m, "%-43s ", "Free pages count per migrate type at order");
1403 for (order = 0; order < MAX_ORDER; ++order)
1404 seq_printf(m, "%6d ", order);
1405 seq_putc(m, '\n');
1406
1407 walk_zones_in_node(m, pgdat, true, false, pagetypeinfo_showfree_print);
1408
1409 return 0;
1410}
1411
1412static void pagetypeinfo_showblockcount_print(struct seq_file *m,
1413 pg_data_t *pgdat, struct zone *zone)
1414{
1415 int mtype;
1416 unsigned long pfn;
1417 unsigned long start_pfn = zone->zone_start_pfn;
1418 unsigned long end_pfn = zone_end_pfn(zone);
1419 unsigned long count[MIGRATE_TYPES] = { 0, };
1420
1421 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
1422 struct page *page;
1423
1424 page = pfn_to_online_page(pfn);
1425 if (!page)
1426 continue;
1427
1428
1429 if (!memmap_valid_within(pfn, page, zone))
1430 continue;
1431
1432 if (page_zone(page) != zone)
1433 continue;
1434
1435 mtype = get_pageblock_migratetype(page);
1436
1437 if (mtype < MIGRATE_TYPES)
1438 count[mtype]++;
1439 }
1440
1441
1442 seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
1443 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1444 seq_printf(m, "%12lu ", count[mtype]);
1445 seq_putc(m, '\n');
1446}
1447
1448
1449static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
1450{
1451 int mtype;
1452 pg_data_t *pgdat = (pg_data_t *)arg;
1453
1454 seq_printf(m, "\n%-23s", "Number of blocks type ");
1455 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1456 seq_printf(m, "%12s ", migratetype_names[mtype]);
1457 seq_putc(m, '\n');
1458 walk_zones_in_node(m, pgdat, true, false,
1459 pagetypeinfo_showblockcount_print);
1460
1461 return 0;
1462}
1463
1464
1465
1466
1467
1468
1469
1470static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat)
1471{
1472#ifdef CONFIG_PAGE_OWNER
1473 int mtype;
1474
1475 if (!static_branch_unlikely(&page_owner_inited))
1476 return;
1477
1478 drain_all_pages(NULL);
1479
1480 seq_printf(m, "\n%-23s", "Number of mixed blocks ");
1481 for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
1482 seq_printf(m, "%12s ", migratetype_names[mtype]);
1483 seq_putc(m, '\n');
1484
1485 walk_zones_in_node(m, pgdat, true, true,
1486 pagetypeinfo_showmixedcount_print);
1487#endif
1488}
1489
1490
1491
1492
1493
1494static int pagetypeinfo_show(struct seq_file *m, void *arg)
1495{
1496 pg_data_t *pgdat = (pg_data_t *)arg;
1497
1498
1499 if (!node_state(pgdat->node_id, N_MEMORY))
1500 return 0;
1501
1502 seq_printf(m, "Page block order: %d\n", pageblock_order);
1503 seq_printf(m, "Pages per block: %lu\n", pageblock_nr_pages);
1504 seq_putc(m, '\n');
1505 pagetypeinfo_showfree(m, pgdat);
1506 pagetypeinfo_showblockcount(m, pgdat);
1507 pagetypeinfo_showmixedcount(m, pgdat);
1508
1509 return 0;
1510}
1511
1512static const struct seq_operations fragmentation_op = {
1513 .start = frag_start,
1514 .next = frag_next,
1515 .stop = frag_stop,
1516 .show = frag_show,
1517};
1518
1519static const struct seq_operations pagetypeinfo_op = {
1520 .start = frag_start,
1521 .next = frag_next,
1522 .stop = frag_stop,
1523 .show = pagetypeinfo_show,
1524};
1525
1526static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone)
1527{
1528 int zid;
1529
1530 for (zid = 0; zid < MAX_NR_ZONES; zid++) {
1531 struct zone *compare = &pgdat->node_zones[zid];
1532
1533 if (populated_zone(compare))
1534 return zone == compare;
1535 }
1536
1537 return false;
1538}
1539
1540static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
1541 struct zone *zone)
1542{
1543 int i;
1544 seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
1545 if (is_zone_first_populated(pgdat, zone)) {
1546 seq_printf(m, "\n per-node stats");
1547 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
1548 seq_printf(m, "\n %-12s %lu",
1549 vmstat_text[i + NR_VM_ZONE_STAT_ITEMS +
1550 NR_VM_NUMA_STAT_ITEMS],
1551 node_page_state(pgdat, i));
1552 }
1553 }
1554 seq_printf(m,
1555 "\n pages free %lu"
1556 "\n min %lu"
1557 "\n low %lu"
1558 "\n high %lu"
1559 "\n spanned %lu"
1560 "\n present %lu"
1561 "\n managed %lu",
1562 zone_page_state(zone, NR_FREE_PAGES),
1563 min_wmark_pages(zone),
1564 low_wmark_pages(zone),
1565 high_wmark_pages(zone),
1566 zone->spanned_pages,
1567 zone->present_pages,
1568 zone_managed_pages(zone));
1569
1570 seq_printf(m,
1571 "\n protection: (%ld",
1572 zone->lowmem_reserve[0]);
1573 for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
1574 seq_printf(m, ", %ld", zone->lowmem_reserve[i]);
1575 seq_putc(m, ')');
1576
1577
1578 if (!populated_zone(zone)) {
1579 seq_putc(m, '\n');
1580 return;
1581 }
1582
1583 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
1584 seq_printf(m, "\n %-12s %lu", vmstat_text[i],
1585 zone_page_state(zone, i));
1586
1587#ifdef CONFIG_NUMA
1588 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
1589 seq_printf(m, "\n %-12s %lu",
1590 vmstat_text[i + NR_VM_ZONE_STAT_ITEMS],
1591 zone_numa_state_snapshot(zone, i));
1592#endif
1593
1594 seq_printf(m, "\n pagesets");
1595 for_each_online_cpu(i) {
1596 struct per_cpu_pageset *pageset;
1597
1598 pageset = per_cpu_ptr(zone->pageset, i);
1599 seq_printf(m,
1600 "\n cpu: %i"
1601 "\n count: %i"
1602 "\n high: %i"
1603 "\n batch: %i",
1604 i,
1605 pageset->pcp.count,
1606 pageset->pcp.high,
1607 pageset->pcp.batch);
1608#ifdef CONFIG_SMP
1609 seq_printf(m, "\n vm stats threshold: %d",
1610 pageset->stat_threshold);
1611#endif
1612 }
1613 seq_printf(m,
1614 "\n node_unreclaimable: %u"
1615 "\n start_pfn: %lu",
1616 pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES,
1617 zone->zone_start_pfn);
1618 seq_putc(m, '\n');
1619}
1620
1621
1622
1623
1624
1625
1626
1627static int zoneinfo_show(struct seq_file *m, void *arg)
1628{
1629 pg_data_t *pgdat = (pg_data_t *)arg;
1630 walk_zones_in_node(m, pgdat, false, false, zoneinfo_show_print);
1631 return 0;
1632}
1633
1634static const struct seq_operations zoneinfo_op = {
1635 .start = frag_start,
1636
1637 .next = frag_next,
1638 .stop = frag_stop,
1639 .show = zoneinfo_show,
1640};
1641
1642enum writeback_stat_item {
1643 NR_DIRTY_THRESHOLD,
1644 NR_DIRTY_BG_THRESHOLD,
1645 NR_VM_WRITEBACK_STAT_ITEMS,
1646};
1647
1648static void *vmstat_start(struct seq_file *m, loff_t *pos)
1649{
1650 unsigned long *v;
1651 int i, stat_items_size;
1652
1653 if (*pos >= ARRAY_SIZE(vmstat_text))
1654 return NULL;
1655 stat_items_size = NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) +
1656 NR_VM_NUMA_STAT_ITEMS * sizeof(unsigned long) +
1657 NR_VM_NODE_STAT_ITEMS * sizeof(unsigned long) +
1658 NR_VM_WRITEBACK_STAT_ITEMS * sizeof(unsigned long);
1659
1660#ifdef CONFIG_VM_EVENT_COUNTERS
1661 stat_items_size += sizeof(struct vm_event_state);
1662#endif
1663
1664 BUILD_BUG_ON(stat_items_size !=
1665 ARRAY_SIZE(vmstat_text) * sizeof(unsigned long));
1666 v = kmalloc(stat_items_size, GFP_KERNEL);
1667 m->private = v;
1668 if (!v)
1669 return ERR_PTR(-ENOMEM);
1670 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
1671 v[i] = global_zone_page_state(i);
1672 v += NR_VM_ZONE_STAT_ITEMS;
1673
1674#ifdef CONFIG_NUMA
1675 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
1676 v[i] = global_numa_state(i);
1677 v += NR_VM_NUMA_STAT_ITEMS;
1678#endif
1679
1680 for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
1681 v[i] = global_node_page_state(i);
1682 v += NR_VM_NODE_STAT_ITEMS;
1683
1684 global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,
1685 v + NR_DIRTY_THRESHOLD);
1686 v += NR_VM_WRITEBACK_STAT_ITEMS;
1687
1688#ifdef CONFIG_VM_EVENT_COUNTERS
1689 all_vm_events(v);
1690 v[PGPGIN] /= 2;
1691 v[PGPGOUT] /= 2;
1692#endif
1693 return (unsigned long *)m->private + *pos;
1694}
1695
1696static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
1697{
1698 (*pos)++;
1699 if (*pos >= ARRAY_SIZE(vmstat_text))
1700 return NULL;
1701 return (unsigned long *)m->private + *pos;
1702}
1703
1704static int vmstat_show(struct seq_file *m, void *arg)
1705{
1706 unsigned long *l = arg;
1707 unsigned long off = l - (unsigned long *)m->private;
1708
1709 seq_puts(m, vmstat_text[off]);
1710 seq_put_decimal_ull(m, " ", *l);
1711 seq_putc(m, '\n');
1712 return 0;
1713}
1714
1715static void vmstat_stop(struct seq_file *m, void *arg)
1716{
1717 kfree(m->private);
1718 m->private = NULL;
1719}
1720
1721static const struct seq_operations vmstat_op = {
1722 .start = vmstat_start,
1723 .next = vmstat_next,
1724 .stop = vmstat_stop,
1725 .show = vmstat_show,
1726};
1727#endif
1728
1729#ifdef CONFIG_SMP
1730static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
1731int sysctl_stat_interval __read_mostly = HZ;
1732
1733#ifdef CONFIG_PROC_FS
1734static void refresh_vm_stats(struct work_struct *work)
1735{
1736 refresh_cpu_vm_stats(true);
1737}
1738
1739int vmstat_refresh(struct ctl_table *table, int write,
1740 void __user *buffer, size_t *lenp, loff_t *ppos)
1741{
1742 long val;
1743 int err;
1744 int i;
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758 err = schedule_on_each_cpu(refresh_vm_stats);
1759 if (err)
1760 return err;
1761 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
1762 val = atomic_long_read(&vm_zone_stat[i]);
1763 if (val < 0) {
1764 pr_warn("%s: %s %ld\n",
1765 __func__, vmstat_text[i], val);
1766 err = -EINVAL;
1767 }
1768 }
1769#ifdef CONFIG_NUMA
1770 for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) {
1771 val = atomic_long_read(&vm_numa_stat[i]);
1772 if (val < 0) {
1773 pr_warn("%s: %s %ld\n",
1774 __func__, vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], val);
1775 err = -EINVAL;
1776 }
1777 }
1778#endif
1779 if (err)
1780 return err;
1781 if (write)
1782 *ppos += *lenp;
1783 else
1784 *lenp = 0;
1785 return 0;
1786}
1787#endif
1788
1789static void vmstat_update(struct work_struct *w)
1790{
1791 if (refresh_cpu_vm_stats(true)) {
1792
1793
1794
1795
1796
1797 queue_delayed_work_on(smp_processor_id(), mm_percpu_wq,
1798 this_cpu_ptr(&vmstat_work),
1799 round_jiffies_relative(sysctl_stat_interval));
1800 }
1801}
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812static bool need_update(int cpu)
1813{
1814 struct zone *zone;
1815
1816 for_each_populated_zone(zone) {
1817 struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu);
1818
1819 BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1);
1820#ifdef CONFIG_NUMA
1821 BUILD_BUG_ON(sizeof(p->vm_numa_stat_diff[0]) != 2);
1822#endif
1823
1824
1825
1826
1827 if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS *
1828 sizeof(p->vm_stat_diff[0])))
1829 return true;
1830#ifdef CONFIG_NUMA
1831 if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS *
1832 sizeof(p->vm_numa_stat_diff[0])))
1833 return true;
1834#endif
1835 }
1836 return false;
1837}
1838
1839
1840
1841
1842
1843
1844void quiet_vmstat(void)
1845{
1846 if (system_state != SYSTEM_RUNNING)
1847 return;
1848
1849 if (!delayed_work_pending(this_cpu_ptr(&vmstat_work)))
1850 return;
1851
1852 if (!need_update(smp_processor_id()))
1853 return;
1854
1855
1856
1857
1858
1859
1860
1861 refresh_cpu_vm_stats(false);
1862}
1863
1864
1865
1866
1867
1868
1869
1870static void vmstat_shepherd(struct work_struct *w);
1871
1872static DECLARE_DEFERRABLE_WORK(shepherd, vmstat_shepherd);
1873
1874static void vmstat_shepherd(struct work_struct *w)
1875{
1876 int cpu;
1877
1878 get_online_cpus();
1879
1880 for_each_online_cpu(cpu) {
1881 struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
1882
1883 if (!delayed_work_pending(dw) && need_update(cpu))
1884 queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0);
1885 }
1886 put_online_cpus();
1887
1888 schedule_delayed_work(&shepherd,
1889 round_jiffies_relative(sysctl_stat_interval));
1890}
1891
1892static void __init start_shepherd_timer(void)
1893{
1894 int cpu;
1895
1896 for_each_possible_cpu(cpu)
1897 INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
1898 vmstat_update);
1899
1900 schedule_delayed_work(&shepherd,
1901 round_jiffies_relative(sysctl_stat_interval));
1902}
1903
1904static void __init init_cpu_node_state(void)
1905{
1906 int node;
1907
1908 for_each_online_node(node) {
1909 if (cpumask_weight(cpumask_of_node(node)) > 0)
1910 node_set_state(node, N_CPU);
1911 }
1912}
1913
1914static int vmstat_cpu_online(unsigned int cpu)
1915{
1916 refresh_zone_stat_thresholds();
1917 node_set_state(cpu_to_node(cpu), N_CPU);
1918 return 0;
1919}
1920
1921static int vmstat_cpu_down_prep(unsigned int cpu)
1922{
1923 cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
1924 return 0;
1925}
1926
1927static int vmstat_cpu_dead(unsigned int cpu)
1928{
1929 const struct cpumask *node_cpus;
1930 int node;
1931
1932 node = cpu_to_node(cpu);
1933
1934 refresh_zone_stat_thresholds();
1935 node_cpus = cpumask_of_node(node);
1936 if (cpumask_weight(node_cpus) > 0)
1937 return 0;
1938
1939 node_clear_state(node, N_CPU);
1940 return 0;
1941}
1942
1943#endif
1944
1945struct workqueue_struct *mm_percpu_wq;
1946
1947void __init init_mm_internals(void)
1948{
1949 int ret __maybe_unused;
1950
1951 mm_percpu_wq = alloc_workqueue("mm_percpu_wq", WQ_MEM_RECLAIM, 0);
1952
1953#ifdef CONFIG_SMP
1954 ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead",
1955 NULL, vmstat_cpu_dead);
1956 if (ret < 0)
1957 pr_err("vmstat: failed to register 'dead' hotplug state\n");
1958
1959 ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "mm/vmstat:online",
1960 vmstat_cpu_online,
1961 vmstat_cpu_down_prep);
1962 if (ret < 0)
1963 pr_err("vmstat: failed to register 'online' hotplug state\n");
1964
1965 get_online_cpus();
1966 init_cpu_node_state();
1967 put_online_cpus();
1968
1969 start_shepherd_timer();
1970#endif
1971#ifdef CONFIG_PROC_FS
1972 proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op);
1973 proc_create_seq("pagetypeinfo", 0444, NULL, &pagetypeinfo_op);
1974 proc_create_seq("vmstat", 0444, NULL, &vmstat_op);
1975 proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op);
1976#endif
1977}
1978
1979#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)
1980
1981
1982
1983
1984
1985static int unusable_free_index(unsigned int order,
1986 struct contig_page_info *info)
1987{
1988
1989 if (info->free_pages == 0)
1990 return 1000;
1991
1992
1993
1994
1995
1996
1997
1998
1999 return div_u64((info->free_pages - (info->free_blocks_suitable << order)) * 1000ULL, info->free_pages);
2000
2001}
2002
2003static void unusable_show_print(struct seq_file *m,
2004 pg_data_t *pgdat, struct zone *zone)
2005{
2006 unsigned int order;
2007 int index;
2008 struct contig_page_info info;
2009
2010 seq_printf(m, "Node %d, zone %8s ",
2011 pgdat->node_id,
2012 zone->name);
2013 for (order = 0; order < MAX_ORDER; ++order) {
2014 fill_contig_page_info(zone, order, &info);
2015 index = unusable_free_index(order, &info);
2016 seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
2017 }
2018
2019 seq_putc(m, '\n');
2020}
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031static int unusable_show(struct seq_file *m, void *arg)
2032{
2033 pg_data_t *pgdat = (pg_data_t *)arg;
2034
2035
2036 if (!node_state(pgdat->node_id, N_MEMORY))
2037 return 0;
2038
2039 walk_zones_in_node(m, pgdat, true, false, unusable_show_print);
2040
2041 return 0;
2042}
2043
2044static const struct seq_operations unusable_op = {
2045 .start = frag_start,
2046 .next = frag_next,
2047 .stop = frag_stop,
2048 .show = unusable_show,
2049};
2050
2051static int unusable_open(struct inode *inode, struct file *file)
2052{
2053 return seq_open(file, &unusable_op);
2054}
2055
2056static const struct file_operations unusable_file_ops = {
2057 .open = unusable_open,
2058 .read = seq_read,
2059 .llseek = seq_lseek,
2060 .release = seq_release,
2061};
2062
2063static void extfrag_show_print(struct seq_file *m,
2064 pg_data_t *pgdat, struct zone *zone)
2065{
2066 unsigned int order;
2067 int index;
2068
2069
2070 struct contig_page_info info;
2071
2072 seq_printf(m, "Node %d, zone %8s ",
2073 pgdat->node_id,
2074 zone->name);
2075 for (order = 0; order < MAX_ORDER; ++order) {
2076 fill_contig_page_info(zone, order, &info);
2077 index = __fragmentation_index(order, &info);
2078 seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
2079 }
2080
2081 seq_putc(m, '\n');
2082}
2083
2084
2085
2086
2087static int extfrag_show(struct seq_file *m, void *arg)
2088{
2089 pg_data_t *pgdat = (pg_data_t *)arg;
2090
2091 walk_zones_in_node(m, pgdat, true, false, extfrag_show_print);
2092
2093 return 0;
2094}
2095
2096static const struct seq_operations extfrag_op = {
2097 .start = frag_start,
2098 .next = frag_next,
2099 .stop = frag_stop,
2100 .show = extfrag_show,
2101};
2102
2103static int extfrag_open(struct inode *inode, struct file *file)
2104{
2105 return seq_open(file, &extfrag_op);
2106}
2107
2108static const struct file_operations extfrag_file_ops = {
2109 .open = extfrag_open,
2110 .read = seq_read,
2111 .llseek = seq_lseek,
2112 .release = seq_release,
2113};
2114
2115static int __init extfrag_debug_init(void)
2116{
2117 struct dentry *extfrag_debug_root;
2118
2119 extfrag_debug_root = debugfs_create_dir("extfrag", NULL);
2120
2121 debugfs_create_file("unusable_index", 0444, extfrag_debug_root, NULL,
2122 &unusable_file_ops);
2123
2124 debugfs_create_file("extfrag_index", 0444, extfrag_debug_root, NULL,
2125 &extfrag_file_ops);
2126
2127 return 0;
2128}
2129
2130module_init(extfrag_debug_init);
2131#endif
2132