1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28#include <linux/page_counter.h>
29#include <linux/memcontrol.h>
30#include <linux/cgroup.h>
31#include <linux/mm.h>
32#include <linux/hugetlb.h>
33#include <linux/pagemap.h>
34#include <linux/smp.h>
35#include <linux/page-flags.h>
36#include <linux/backing-dev.h>
37#include <linux/bit_spinlock.h>
38#include <linux/rcupdate.h>
39#include <linux/limits.h>
40#include <linux/export.h>
41#include <linux/mutex.h>
42#include <linux/rbtree.h>
43#include <linux/slab.h>
44#include <linux/swap.h>
45#include <linux/swapops.h>
46#include <linux/spinlock.h>
47#include <linux/eventfd.h>
48#include <linux/sort.h>
49#include <linux/fs.h>
50#include <linux/seq_file.h>
51#include <linux/vmalloc.h>
52#include <linux/vmpressure.h>
53#include <linux/mm_inline.h>
54#include <linux/page_cgroup.h>
55#include <linux/cpu.h>
56#include <linux/oom.h>
57#include "internal.h"
58#include <net/sock.h>
59#include <net/ip.h>
60#include <net/tcp_memcontrol.h>
61#include "slab.h"
62
63#include <asm/uaccess.h>
64
65#include <trace/events/vmscan.h>
66
67struct cgroup_subsys mem_cgroup_subsys __read_mostly;
68EXPORT_SYMBOL(mem_cgroup_subsys);
69
70#define MEM_CGROUP_RECLAIM_RETRIES 5
71static struct mem_cgroup *root_mem_cgroup __read_mostly;
72
73#ifdef CONFIG_MEMCG_SWAP
74
75int do_swap_account __read_mostly;
76
77
78#ifdef CONFIG_MEMCG_SWAP_ENABLED
79static int really_do_swap_account __initdata = 1;
80#else
81static int really_do_swap_account __initdata = 0;
82#endif
83
84#else
85#define do_swap_account 0
86#endif
87
88
89
90
91
92enum mem_cgroup_stat_index {
93
94
95
96 MEM_CGROUP_STAT_CACHE,
97 MEM_CGROUP_STAT_RSS,
98 MEM_CGROUP_STAT_RSS_HUGE,
99 MEM_CGROUP_STAT_FILE_MAPPED,
100 MEM_CGROUP_STAT_SWAP,
101 MEM_CGROUP_STAT_NSTATS,
102};
103
104static const char * const mem_cgroup_stat_names[] = {
105 "cache",
106 "rss",
107 "rss_huge",
108 "mapped_file",
109 "swap",
110};
111
112enum mem_cgroup_events_index {
113 MEM_CGROUP_EVENTS_PGPGIN,
114 MEM_CGROUP_EVENTS_PGPGOUT,
115 MEM_CGROUP_EVENTS_PGFAULT,
116 MEM_CGROUP_EVENTS_PGMAJFAULT,
117 MEM_CGROUP_EVENTS_NSTATS,
118};
119
120static const char * const mem_cgroup_events_names[] = {
121 "pgpgin",
122 "pgpgout",
123 "pgfault",
124 "pgmajfault",
125};
126
127static const char * const mem_cgroup_lru_names[] = {
128 "inactive_anon",
129 "active_anon",
130 "inactive_file",
131 "active_file",
132 "unevictable",
133};
134
135
136
137
138
139
140
141enum mem_cgroup_events_target {
142 MEM_CGROUP_TARGET_THRESH,
143 MEM_CGROUP_TARGET_SOFTLIMIT,
144 MEM_CGROUP_TARGET_NUMAINFO,
145 MEM_CGROUP_NTARGETS,
146};
147#define THRESHOLDS_EVENTS_TARGET 128
148#define SOFTLIMIT_EVENTS_TARGET 1024
149#define NUMAINFO_EVENTS_TARGET 1024
150
151#define MEM_CGROUP_ID_MAX USHRT_MAX
152
153static void mem_cgroup_id_put(struct mem_cgroup *memcg);
154static unsigned short mem_cgroup_id(struct mem_cgroup *memcg);
155
156struct mem_cgroup_stat_cpu {
157 long count[MEM_CGROUP_STAT_NSTATS];
158 unsigned long events[MEM_CGROUP_EVENTS_NSTATS];
159 unsigned long nr_page_events;
160 unsigned long targets[MEM_CGROUP_NTARGETS];
161};
162
163struct mem_cgroup_reclaim_iter {
164
165
166
167
168 struct mem_cgroup *last_visited;
169 unsigned long last_dead_count;
170
171
172 unsigned int generation;
173};
174
175
176
177
178struct mem_cgroup_per_zone {
179 struct lruvec lruvec;
180 unsigned long lru_size[NR_LRU_LISTS];
181
182 struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1];
183
184 struct rb_node tree_node;
185 unsigned long usage_in_excess;
186
187 bool on_tree;
188 struct mem_cgroup *memcg;
189
190};
191
192struct mem_cgroup_per_node {
193 struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES];
194};
195
196struct mem_cgroup_lru_info {
197 struct mem_cgroup_per_node *nodeinfo[0];
198};
199
200
201
202
203
204
205struct mem_cgroup_tree_per_zone {
206 struct rb_root rb_root;
207 spinlock_t lock;
208};
209
210struct mem_cgroup_tree_per_node {
211 struct mem_cgroup_tree_per_zone rb_tree_per_zone[MAX_NR_ZONES];
212};
213
214struct mem_cgroup_tree {
215 struct mem_cgroup_tree_per_node *rb_tree_per_node[MAX_NUMNODES];
216};
217
218static struct mem_cgroup_tree soft_limit_tree __read_mostly;
219
220struct mem_cgroup_threshold {
221 struct eventfd_ctx *eventfd;
222 unsigned long threshold;
223};
224
225
226struct mem_cgroup_threshold_ary {
227
228 int current_threshold;
229
230 unsigned int size;
231
232 struct mem_cgroup_threshold entries[0];
233};
234
235struct mem_cgroup_thresholds {
236
237 struct mem_cgroup_threshold_ary *primary;
238
239
240
241
242
243 struct mem_cgroup_threshold_ary *spare;
244};
245
246
247struct mem_cgroup_eventfd_list {
248 struct list_head list;
249 struct eventfd_ctx *eventfd;
250};
251
252static void mem_cgroup_threshold(struct mem_cgroup *memcg);
253static void mem_cgroup_oom_notify(struct mem_cgroup *memcg);
254
255
256
257
258
259
260
261
262
263
264
265
266struct mem_cgroup {
267 struct cgroup_subsys_state css;
268
269
270 unsigned short id;
271
272
273
274
275 struct page_counter memory;
276
277 unsigned long soft_limit;
278
279
280 struct vmpressure vmpressure;
281
282 union {
283
284
285
286 struct page_counter memsw;
287
288
289
290
291
292
293
294
295
296 struct rcu_head rcu_freeing;
297
298
299
300
301 struct work_struct work_freeing;
302 };
303
304
305
306 struct page_counter kmem;
307
308
309
310 bool use_hierarchy;
311 unsigned long kmem_account_flags;
312
313 bool oom_lock;
314 atomic_t under_oom;
315 atomic_t oom_wakeups;
316
317 atomic_t refcnt;
318
319 int swappiness;
320
321 int oom_kill_disable;
322
323
324 bool memsw_is_minimum;
325
326
327 struct mutex thresholds_lock;
328
329
330 struct mem_cgroup_thresholds thresholds;
331
332
333 struct mem_cgroup_thresholds memsw_thresholds;
334
335
336 struct list_head oom_notify;
337
338
339
340
341
342 unsigned long move_charge_at_immigrate;
343
344
345
346 atomic_t moving_account;
347
348 spinlock_t move_lock;
349
350
351
352 struct mem_cgroup_stat_cpu __percpu *stat;
353
354
355
356
357 struct mem_cgroup_stat_cpu nocpu_base;
358 spinlock_t pcp_counter_lock;
359
360 atomic_t dead_count;
361#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET)
362 struct tcp_memcontrol tcp_mem;
363#endif
364#if defined(CONFIG_MEMCG_KMEM)
365
366 struct list_head memcg_slab_caches;
367
368 struct mutex slab_caches_mutex;
369
370 int kmemcg_id;
371#endif
372
373 int last_scanned_node;
374#if MAX_NUMNODES > 1
375 nodemask_t scan_nodes;
376 atomic_t numainfo_events;
377 atomic_t numainfo_updating;
378#endif
379
380
381
382
383
384
385
386
387 struct mem_cgroup_lru_info info;
388};
389
390static size_t memcg_size(void)
391{
392 return sizeof(struct mem_cgroup) +
393 nr_node_ids * sizeof(struct mem_cgroup_per_node *);
394}
395
396
397enum {
398 KMEM_ACCOUNTED_ACTIVE = 0,
399 KMEM_ACCOUNTED_ACTIVATED,
400 KMEM_ACCOUNTED_DEAD,
401};
402
403
404#define KMEM_ACCOUNTED_MASK \
405 ((1 << KMEM_ACCOUNTED_ACTIVE) | (1 << KMEM_ACCOUNTED_ACTIVATED))
406
407#ifdef CONFIG_MEMCG_KMEM
408static inline void memcg_kmem_set_active(struct mem_cgroup *memcg)
409{
410 set_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags);
411}
412
413static bool memcg_kmem_is_active(struct mem_cgroup *memcg)
414{
415 return test_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags);
416}
417
418static void memcg_kmem_set_activated(struct mem_cgroup *memcg)
419{
420 set_bit(KMEM_ACCOUNTED_ACTIVATED, &memcg->kmem_account_flags);
421}
422
423static void memcg_kmem_clear_activated(struct mem_cgroup *memcg)
424{
425 clear_bit(KMEM_ACCOUNTED_ACTIVATED, &memcg->kmem_account_flags);
426}
427
428static void memcg_kmem_mark_dead(struct mem_cgroup *memcg)
429{
430 if (test_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags))
431 set_bit(KMEM_ACCOUNTED_DEAD, &memcg->kmem_account_flags);
432}
433
434static bool memcg_kmem_test_and_clear_dead(struct mem_cgroup *memcg)
435{
436 return test_and_clear_bit(KMEM_ACCOUNTED_DEAD,
437 &memcg->kmem_account_flags);
438}
439#endif
440
441
442
443
444
445
446enum move_type {
447 MOVE_CHARGE_TYPE_ANON,
448 MOVE_CHARGE_TYPE_FILE,
449 NR_MOVE_TYPE,
450};
451
452
453static struct move_charge_struct {
454 spinlock_t lock;
455 struct mem_cgroup *from;
456 struct mem_cgroup *to;
457 unsigned long immigrate_flags;
458 unsigned long precharge;
459 unsigned long moved_charge;
460 unsigned long moved_swap;
461 struct task_struct *moving_task;
462 wait_queue_head_t waitq;
463} mc = {
464 .lock = __SPIN_LOCK_UNLOCKED(mc.lock),
465 .waitq = __WAIT_QUEUE_HEAD_INITIALIZER(mc.waitq),
466};
467
468static bool move_anon(void)
469{
470 return test_bit(MOVE_CHARGE_TYPE_ANON, &mc.immigrate_flags);
471}
472
473static bool move_file(void)
474{
475 return test_bit(MOVE_CHARGE_TYPE_FILE, &mc.immigrate_flags);
476}
477
478
479
480
481
482#define MEM_CGROUP_MAX_RECLAIM_LOOPS 100
483#define MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS 2
484
485enum charge_type {
486 MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
487 MEM_CGROUP_CHARGE_TYPE_ANON,
488 MEM_CGROUP_CHARGE_TYPE_SWAPOUT,
489 MEM_CGROUP_CHARGE_TYPE_DROP,
490 NR_CHARGE_TYPE,
491};
492
493
494enum res_type {
495 _MEM,
496 _MEMSWAP,
497 _OOM_TYPE,
498 _KMEM,
499};
500
501#define MEMFILE_PRIVATE(x, val) ((x) << 16 | (val))
502#define MEMFILE_TYPE(val) ((val) >> 16 & 0xffff)
503#define MEMFILE_ATTR(val) ((val) & 0xffff)
504
505#define OOM_CONTROL (0)
506
507
508
509
510#define MEM_CGROUP_RECLAIM_NOSWAP_BIT 0x0
511#define MEM_CGROUP_RECLAIM_NOSWAP (1 << MEM_CGROUP_RECLAIM_NOSWAP_BIT)
512#define MEM_CGROUP_RECLAIM_SHRINK_BIT 0x1
513#define MEM_CGROUP_RECLAIM_SHRINK (1 << MEM_CGROUP_RECLAIM_SHRINK_BIT)
514
515
516
517
518
519
520static DEFINE_MUTEX(memcg_create_mutex);
521
522static void mem_cgroup_get(struct mem_cgroup *memcg);
523static void mem_cgroup_put(struct mem_cgroup *memcg);
524
525static inline
526struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *s)
527{
528 return container_of(s, struct mem_cgroup, css);
529}
530
531
532struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg)
533{
534 if (!memcg)
535 memcg = root_mem_cgroup;
536 return &memcg->vmpressure;
537}
538
539struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr)
540{
541 return &container_of(vmpr, struct mem_cgroup, vmpressure)->css;
542}
543
544struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css)
545{
546 return &mem_cgroup_from_css(css)->vmpressure;
547}
548
549static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
550{
551 return (memcg == root_mem_cgroup);
552}
553
554
555#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
556
557void sock_update_memcg(struct sock *sk)
558{
559 if (mem_cgroup_sockets_enabled) {
560 struct mem_cgroup *memcg;
561 struct cg_proto *cg_proto;
562
563 BUG_ON(!sk->sk_prot->proto_cgroup);
564
565
566
567
568
569
570
571
572
573 if (sk->sk_cgrp) {
574 BUG_ON(mem_cgroup_is_root(sk->sk_cgrp->memcg));
575 mem_cgroup_get(sk->sk_cgrp->memcg);
576 return;
577 }
578
579 rcu_read_lock();
580 memcg = mem_cgroup_from_task(current);
581 cg_proto = sk->sk_prot->proto_cgroup(memcg);
582 if (!mem_cgroup_is_root(memcg) && memcg_proto_active(cg_proto)) {
583 mem_cgroup_get(memcg);
584 sk->sk_cgrp = cg_proto;
585 }
586 rcu_read_unlock();
587 }
588}
589EXPORT_SYMBOL(sock_update_memcg);
590
591void sock_release_memcg(struct sock *sk)
592{
593 if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
594 struct mem_cgroup *memcg;
595 WARN_ON(!sk->sk_cgrp->memcg);
596 memcg = sk->sk_cgrp->memcg;
597 mem_cgroup_put(memcg);
598 }
599}
600
601struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg)
602{
603 if (!memcg || mem_cgroup_is_root(memcg))
604 return NULL;
605
606 return &memcg->tcp_mem.cg_proto;
607}
608EXPORT_SYMBOL(tcp_proto_cgroup);
609
610static void disarm_sock_keys(struct mem_cgroup *memcg)
611{
612 if (!memcg_proto_activated(&memcg->tcp_mem.cg_proto))
613 return;
614 static_key_slow_dec(&memcg_socket_limit_enabled);
615}
616#else
617static void disarm_sock_keys(struct mem_cgroup *memcg)
618{
619}
620#endif
621
622#ifdef CONFIG_MEMCG_KMEM
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640static DEFINE_IDA(kmem_limited_groups);
641int memcg_limited_groups_array_size;
642
643
644
645
646
647
648
649
650
651
652
653
654
655#define MEMCG_CACHES_MIN_SIZE 4
656#define MEMCG_CACHES_MAX_SIZE 65535
657
658
659
660
661
662
663
664struct static_key memcg_kmem_enabled_key;
665EXPORT_SYMBOL(memcg_kmem_enabled_key);
666
667static void disarm_kmem_keys(struct mem_cgroup *memcg)
668{
669 if (memcg_kmem_is_active(memcg)) {
670 static_key_slow_dec(&memcg_kmem_enabled_key);
671 ida_simple_remove(&kmem_limited_groups, memcg->kmemcg_id);
672 }
673
674
675
676
677 WARN_ON(page_counter_read(&memcg->kmem));
678}
679#else
680static void disarm_kmem_keys(struct mem_cgroup *memcg)
681{
682}
683#endif
684
685static void disarm_static_keys(struct mem_cgroup *memcg)
686{
687 disarm_sock_keys(memcg);
688 disarm_kmem_keys(memcg);
689}
690
691static void drain_all_stock_async(struct mem_cgroup *memcg);
692
693static struct mem_cgroup_per_zone *
694mem_cgroup_zoneinfo(struct mem_cgroup *memcg, int nid, int zid)
695{
696 VM_BUG_ON((unsigned)nid >= nr_node_ids);
697 return &memcg->info.nodeinfo[nid]->zoneinfo[zid];
698}
699
700struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg)
701{
702 return &memcg->css;
703}
704
705static struct mem_cgroup_per_zone *
706page_cgroup_zoneinfo(struct mem_cgroup *memcg, struct page *page)
707{
708 int nid = page_to_nid(page);
709 int zid = page_zonenum(page);
710
711 return mem_cgroup_zoneinfo(memcg, nid, zid);
712}
713
714static struct mem_cgroup_tree_per_zone *
715soft_limit_tree_node_zone(int nid, int zid)
716{
717 return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
718}
719
720static struct mem_cgroup_tree_per_zone *
721soft_limit_tree_from_page(struct page *page)
722{
723 int nid = page_to_nid(page);
724 int zid = page_zonenum(page);
725
726 return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
727}
728
729static void
730__mem_cgroup_insert_exceeded(struct mem_cgroup *memcg,
731 struct mem_cgroup_per_zone *mz,
732 struct mem_cgroup_tree_per_zone *mctz,
733 unsigned long new_usage_in_excess)
734{
735 struct rb_node **p = &mctz->rb_root.rb_node;
736 struct rb_node *parent = NULL;
737 struct mem_cgroup_per_zone *mz_node;
738
739 if (mz->on_tree)
740 return;
741
742 mz->usage_in_excess = new_usage_in_excess;
743 if (!mz->usage_in_excess)
744 return;
745 while (*p) {
746 parent = *p;
747 mz_node = rb_entry(parent, struct mem_cgroup_per_zone,
748 tree_node);
749 if (mz->usage_in_excess < mz_node->usage_in_excess)
750 p = &(*p)->rb_left;
751
752
753
754
755 else if (mz->usage_in_excess >= mz_node->usage_in_excess)
756 p = &(*p)->rb_right;
757 }
758 rb_link_node(&mz->tree_node, parent, p);
759 rb_insert_color(&mz->tree_node, &mctz->rb_root);
760 mz->on_tree = true;
761}
762
763static void
764__mem_cgroup_remove_exceeded(struct mem_cgroup *memcg,
765 struct mem_cgroup_per_zone *mz,
766 struct mem_cgroup_tree_per_zone *mctz)
767{
768 if (!mz->on_tree)
769 return;
770 rb_erase(&mz->tree_node, &mctz->rb_root);
771 mz->on_tree = false;
772}
773
774static void
775mem_cgroup_remove_exceeded(struct mem_cgroup *memcg,
776 struct mem_cgroup_per_zone *mz,
777 struct mem_cgroup_tree_per_zone *mctz)
778{
779 spin_lock(&mctz->lock);
780 __mem_cgroup_remove_exceeded(memcg, mz, mctz);
781 spin_unlock(&mctz->lock);
782}
783
784static unsigned long soft_limit_excess(struct mem_cgroup *memcg)
785{
786 unsigned long nr_pages = page_counter_read(&memcg->memory);
787 unsigned long soft_limit = ACCESS_ONCE(memcg->soft_limit);
788 unsigned long excess = 0;
789
790 if (nr_pages > soft_limit)
791 excess = nr_pages - soft_limit;
792
793 return excess;
794}
795
796static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
797{
798 unsigned long excess;
799 struct mem_cgroup_per_zone *mz;
800 struct mem_cgroup_tree_per_zone *mctz;
801 int nid = page_to_nid(page);
802 int zid = page_zonenum(page);
803 mctz = soft_limit_tree_from_page(page);
804
805
806
807
808
809 for (; memcg; memcg = parent_mem_cgroup(memcg)) {
810 mz = mem_cgroup_zoneinfo(memcg, nid, zid);
811 excess = soft_limit_excess(memcg);
812
813
814
815
816 if (excess || mz->on_tree) {
817 spin_lock(&mctz->lock);
818
819 if (mz->on_tree)
820 __mem_cgroup_remove_exceeded(memcg, mz, mctz);
821
822
823
824
825 __mem_cgroup_insert_exceeded(memcg, mz, mctz, excess);
826 spin_unlock(&mctz->lock);
827 }
828 }
829}
830
831static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg)
832{
833 int node, zone;
834 struct mem_cgroup_per_zone *mz;
835 struct mem_cgroup_tree_per_zone *mctz;
836
837 for_each_node(node) {
838 for (zone = 0; zone < MAX_NR_ZONES; zone++) {
839 mz = mem_cgroup_zoneinfo(memcg, node, zone);
840 mctz = soft_limit_tree_node_zone(node, zone);
841 mem_cgroup_remove_exceeded(memcg, mz, mctz);
842 }
843 }
844}
845
846static struct mem_cgroup_per_zone *
847__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
848{
849 struct rb_node *rightmost = NULL;
850 struct mem_cgroup_per_zone *mz;
851
852retry:
853 mz = NULL;
854 rightmost = rb_last(&mctz->rb_root);
855 if (!rightmost)
856 goto done;
857
858 mz = rb_entry(rightmost, struct mem_cgroup_per_zone, tree_node);
859
860
861
862
863
864 __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz);
865 if (!soft_limit_excess(mz->memcg) ||
866 !css_tryget(&mz->memcg->css))
867 goto retry;
868done:
869 return mz;
870}
871
872static struct mem_cgroup_per_zone *
873mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
874{
875 struct mem_cgroup_per_zone *mz;
876
877 spin_lock(&mctz->lock);
878 mz = __mem_cgroup_largest_soft_limit_node(mctz);
879 spin_unlock(&mctz->lock);
880 return mz;
881}
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902static long mem_cgroup_read_stat(struct mem_cgroup *memcg,
903 enum mem_cgroup_stat_index idx)
904{
905 long val = 0;
906 int cpu;
907
908 get_online_cpus();
909 for_each_online_cpu(cpu)
910 val += per_cpu(memcg->stat->count[idx], cpu);
911#ifdef CONFIG_HOTPLUG_CPU
912 spin_lock(&memcg->pcp_counter_lock);
913 val += memcg->nocpu_base.count[idx];
914 spin_unlock(&memcg->pcp_counter_lock);
915#endif
916 put_online_cpus();
917 return val;
918}
919
920static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
921 bool charge)
922{
923 int val = (charge) ? 1 : -1;
924 this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAP], val);
925}
926
927static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
928 enum mem_cgroup_events_index idx)
929{
930 unsigned long val = 0;
931 int cpu;
932
933 for_each_online_cpu(cpu)
934 val += per_cpu(memcg->stat->events[idx], cpu);
935#ifdef CONFIG_HOTPLUG_CPU
936 spin_lock(&memcg->pcp_counter_lock);
937 val += memcg->nocpu_base.events[idx];
938 spin_unlock(&memcg->pcp_counter_lock);
939#endif
940 return val;
941}
942
943static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
944 struct page *page,
945 bool anon, int nr_pages)
946{
947 preempt_disable();
948
949
950
951
952
953 if (anon)
954 __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_RSS],
955 nr_pages);
956 else
957 __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_CACHE],
958 nr_pages);
959
960 if (PageTransHuge(page))
961 __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
962 nr_pages);
963
964
965 if (nr_pages > 0)
966 __this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGIN]);
967 else {
968 __this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT]);
969 nr_pages = -nr_pages;
970 }
971
972 __this_cpu_add(memcg->stat->nr_page_events, nr_pages);
973
974 preempt_enable();
975}
976
977unsigned long
978mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
979{
980 struct mem_cgroup_per_zone *mz;
981
982 mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec);
983 return mz->lru_size[lru];
984}
985
986static unsigned long
987mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, int nid, int zid,
988 unsigned int lru_mask)
989{
990 struct mem_cgroup_per_zone *mz;
991 enum lru_list lru;
992 unsigned long ret = 0;
993
994 mz = mem_cgroup_zoneinfo(memcg, nid, zid);
995
996 for_each_lru(lru) {
997 if (BIT(lru) & lru_mask)
998 ret += mz->lru_size[lru];
999 }
1000 return ret;
1001}
1002
1003static unsigned long
1004mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
1005 int nid, unsigned int lru_mask)
1006{
1007 u64 total = 0;
1008 int zid;
1009
1010 for (zid = 0; zid < MAX_NR_ZONES; zid++)
1011 total += mem_cgroup_zone_nr_lru_pages(memcg,
1012 nid, zid, lru_mask);
1013
1014 return total;
1015}
1016
1017static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg,
1018 unsigned int lru_mask)
1019{
1020 int nid;
1021 u64 total = 0;
1022
1023 for_each_node_state(nid, N_MEMORY)
1024 total += mem_cgroup_node_nr_lru_pages(memcg, nid, lru_mask);
1025 return total;
1026}
1027
1028static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
1029 enum mem_cgroup_events_target target)
1030{
1031 unsigned long val, next;
1032
1033 val = __this_cpu_read(memcg->stat->nr_page_events);
1034 next = __this_cpu_read(memcg->stat->targets[target]);
1035
1036 if ((long)next - (long)val < 0) {
1037 switch (target) {
1038 case MEM_CGROUP_TARGET_THRESH:
1039 next = val + THRESHOLDS_EVENTS_TARGET;
1040 break;
1041 case MEM_CGROUP_TARGET_SOFTLIMIT:
1042 next = val + SOFTLIMIT_EVENTS_TARGET;
1043 break;
1044 case MEM_CGROUP_TARGET_NUMAINFO:
1045 next = val + NUMAINFO_EVENTS_TARGET;
1046 break;
1047 default:
1048 break;
1049 }
1050 __this_cpu_write(memcg->stat->targets[target], next);
1051 return true;
1052 }
1053 return false;
1054}
1055
1056
1057
1058
1059
1060static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
1061{
1062 preempt_disable();
1063
1064 if (unlikely(mem_cgroup_event_ratelimit(memcg,
1065 MEM_CGROUP_TARGET_THRESH))) {
1066 bool do_softlimit;
1067 bool do_numainfo __maybe_unused;
1068
1069 do_softlimit = mem_cgroup_event_ratelimit(memcg,
1070 MEM_CGROUP_TARGET_SOFTLIMIT);
1071#if MAX_NUMNODES > 1
1072 do_numainfo = mem_cgroup_event_ratelimit(memcg,
1073 MEM_CGROUP_TARGET_NUMAINFO);
1074#endif
1075 preempt_enable();
1076
1077 mem_cgroup_threshold(memcg);
1078 if (unlikely(do_softlimit))
1079 mem_cgroup_update_tree(memcg, page);
1080#if MAX_NUMNODES > 1
1081 if (unlikely(do_numainfo))
1082 atomic_inc(&memcg->numainfo_events);
1083#endif
1084 } else
1085 preempt_enable();
1086}
1087
1088struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
1089{
1090 return mem_cgroup_from_css(
1091 cgroup_subsys_state(cont, mem_cgroup_subsys_id));
1092}
1093
1094struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
1095{
1096
1097
1098
1099
1100
1101 if (unlikely(!p))
1102 return NULL;
1103
1104 return mem_cgroup_from_css(task_subsys_state(p, mem_cgroup_subsys_id));
1105}
1106
1107struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
1108{
1109 struct mem_cgroup *memcg = NULL;
1110
1111 if (!mm)
1112 return NULL;
1113
1114
1115
1116
1117
1118 rcu_read_lock();
1119 do {
1120 memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
1121 if (unlikely(!memcg))
1122 break;
1123 } while (!css_tryget(&memcg->css));
1124 rcu_read_unlock();
1125 return memcg;
1126}
1127
1128
1129
1130
1131
1132
1133
1134static struct mem_cgroup *__mem_cgroup_iter_next(struct mem_cgroup *root,
1135 struct mem_cgroup *last_visited)
1136{
1137 struct cgroup *prev_cgroup, *next_cgroup;
1138
1139
1140
1141
1142
1143 if (!last_visited)
1144 return root;
1145
1146 prev_cgroup = (last_visited == root) ? NULL
1147 : last_visited->css.cgroup;
1148skip_node:
1149 next_cgroup = cgroup_next_descendant_pre(
1150 prev_cgroup, root->css.cgroup);
1151
1152
1153
1154
1155
1156
1157
1158
1159 if (next_cgroup) {
1160 struct mem_cgroup *mem = mem_cgroup_from_cont(
1161 next_cgroup);
1162 if (css_tryget(&mem->css))
1163 return mem;
1164 else {
1165 prev_cgroup = next_cgroup;
1166 goto skip_node;
1167 }
1168 }
1169
1170 return NULL;
1171}
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
1191 struct mem_cgroup *prev,
1192 struct mem_cgroup_reclaim_cookie *reclaim)
1193{
1194 struct mem_cgroup *memcg = NULL;
1195 struct mem_cgroup *last_visited = NULL;
1196 unsigned long uninitialized_var(dead_count);
1197
1198 if (mem_cgroup_disabled())
1199 return NULL;
1200
1201 if (!root)
1202 root = root_mem_cgroup;
1203
1204 if (prev && !reclaim)
1205 last_visited = prev;
1206
1207 if (!root->use_hierarchy && root != root_mem_cgroup) {
1208 if (prev)
1209 goto out_css_put;
1210 return root;
1211 }
1212
1213 rcu_read_lock();
1214 while (!memcg) {
1215 struct mem_cgroup_reclaim_iter *uninitialized_var(iter);
1216
1217 if (reclaim) {
1218 int nid = zone_to_nid(reclaim->zone);
1219 int zid = zone_idx(reclaim->zone);
1220 struct mem_cgroup_per_zone *mz;
1221
1222 mz = mem_cgroup_zoneinfo(root, nid, zid);
1223 iter = &mz->reclaim_iter[reclaim->priority];
1224 if (prev && reclaim->generation != iter->generation) {
1225 iter->last_visited = NULL;
1226 goto out_unlock;
1227 }
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241 dead_count = atomic_read(&root->dead_count);
1242 if (dead_count == iter->last_dead_count) {
1243 smp_rmb();
1244 last_visited = iter->last_visited;
1245 if (last_visited && last_visited != root &&
1246 !css_tryget(&last_visited->css))
1247 last_visited = NULL;
1248 }
1249 }
1250
1251 memcg = __mem_cgroup_iter_next(root, last_visited);
1252
1253 if (reclaim) {
1254 if (last_visited && last_visited != root)
1255 css_put(&last_visited->css);
1256
1257 iter->last_visited = memcg;
1258 smp_wmb();
1259 iter->last_dead_count = dead_count;
1260
1261 if (!memcg)
1262 iter->generation++;
1263 else if (!prev && memcg)
1264 reclaim->generation = iter->generation;
1265 }
1266
1267 if (prev && !memcg)
1268 goto out_unlock;
1269 }
1270out_unlock:
1271 rcu_read_unlock();
1272out_css_put:
1273 if (prev && prev != root)
1274 css_put(&prev->css);
1275
1276 return memcg;
1277}
1278
1279
1280
1281
1282
1283
1284void mem_cgroup_iter_break(struct mem_cgroup *root,
1285 struct mem_cgroup *prev)
1286{
1287 if (!root)
1288 root = root_mem_cgroup;
1289 if (prev && prev != root)
1290 css_put(&prev->css);
1291}
1292
1293
1294
1295
1296
1297
1298#define for_each_mem_cgroup_tree(iter, root) \
1299 for (iter = mem_cgroup_iter(root, NULL, NULL); \
1300 iter != NULL; \
1301 iter = mem_cgroup_iter(root, iter, NULL))
1302
1303#define for_each_mem_cgroup(iter) \
1304 for (iter = mem_cgroup_iter(NULL, NULL, NULL); \
1305 iter != NULL; \
1306 iter = mem_cgroup_iter(NULL, iter, NULL))
1307
1308void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
1309{
1310 struct mem_cgroup *memcg;
1311
1312 rcu_read_lock();
1313 memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
1314 if (unlikely(!memcg))
1315 goto out;
1316
1317 switch (idx) {
1318 case PGFAULT:
1319 this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGFAULT]);
1320 break;
1321 case PGMAJFAULT:
1322 this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT]);
1323 break;
1324 default:
1325 BUG();
1326 }
1327out:
1328 rcu_read_unlock();
1329}
1330EXPORT_SYMBOL(__mem_cgroup_count_vm_event);
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
1342 struct mem_cgroup *memcg)
1343{
1344 struct mem_cgroup_per_zone *mz;
1345 struct lruvec *lruvec;
1346
1347 if (mem_cgroup_disabled()) {
1348 lruvec = &zone->lruvec;
1349 goto out;
1350 }
1351
1352 mz = mem_cgroup_zoneinfo(memcg, zone_to_nid(zone), zone_idx(zone));
1353 lruvec = &mz->lruvec;
1354out:
1355
1356
1357
1358
1359
1360 if (unlikely(lruvec->zone != zone))
1361 lruvec->zone = zone;
1362 return lruvec;
1363}
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone)
1385{
1386 struct mem_cgroup_per_zone *mz;
1387 struct mem_cgroup *memcg;
1388 struct page_cgroup *pc;
1389 struct lruvec *lruvec;
1390
1391 if (mem_cgroup_disabled()) {
1392 lruvec = &zone->lruvec;
1393 goto out;
1394 }
1395
1396 pc = lookup_page_cgroup(page);
1397 memcg = pc->mem_cgroup;
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408 if (!PageLRU(page) && !PageCgroupUsed(pc) && memcg != root_mem_cgroup)
1409 pc->mem_cgroup = memcg = root_mem_cgroup;
1410
1411 mz = page_cgroup_zoneinfo(memcg, page);
1412 lruvec = &mz->lruvec;
1413out:
1414
1415
1416
1417
1418
1419 if (unlikely(lruvec->zone != zone))
1420 lruvec->zone = zone;
1421 return lruvec;
1422}
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
1434 int nr_pages)
1435{
1436 struct mem_cgroup_per_zone *mz;
1437 unsigned long *lru_size;
1438
1439 if (mem_cgroup_disabled())
1440 return;
1441
1442 mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec);
1443 lru_size = mz->lru_size + lru;
1444 *lru_size += nr_pages;
1445 VM_BUG_ON((long)(*lru_size) < 0);
1446}
1447
1448
1449
1450
1451
1452bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
1453 struct mem_cgroup *memcg)
1454{
1455 if (root_memcg == memcg)
1456 return true;
1457 if (!root_memcg->use_hierarchy || !memcg)
1458 return false;
1459 return cgroup_is_descendant(memcg->css.cgroup, root_memcg->css.cgroup);
1460}
1461
1462static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
1463 struct mem_cgroup *memcg)
1464{
1465 bool ret;
1466
1467 rcu_read_lock();
1468 ret = __mem_cgroup_same_or_subtree(root_memcg, memcg);
1469 rcu_read_unlock();
1470 return ret;
1471}
1472
1473int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg)
1474{
1475 int ret;
1476 struct mem_cgroup *curr = NULL;
1477 struct task_struct *p;
1478
1479 p = find_lock_task_mm(task);
1480 if (p) {
1481 curr = try_get_mem_cgroup_from_mm(p->mm);
1482 task_unlock(p);
1483 } else {
1484
1485
1486
1487
1488
1489 task_lock(task);
1490 curr = mem_cgroup_from_task(task);
1491 if (curr)
1492 css_get(&curr->css);
1493 task_unlock(task);
1494 }
1495 if (!curr)
1496 return 0;
1497
1498
1499
1500
1501
1502
1503 ret = mem_cgroup_same_or_subtree(memcg, curr);
1504 css_put(&curr->css);
1505 return ret;
1506}
1507
1508int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
1509{
1510 unsigned long inactive_ratio;
1511 unsigned long inactive;
1512 unsigned long active;
1513 unsigned long gb;
1514
1515 inactive = mem_cgroup_get_lru_size(lruvec, LRU_INACTIVE_ANON);
1516 active = mem_cgroup_get_lru_size(lruvec, LRU_ACTIVE_ANON);
1517
1518 gb = (inactive + active) >> (30 - PAGE_SHIFT);
1519 if (gb)
1520 inactive_ratio = int_sqrt(10 * gb);
1521 else
1522 inactive_ratio = 1;
1523
1524 return inactive * inactive_ratio < active;
1525}
1526
1527#define mem_cgroup_from_counter(counter, member) \
1528 container_of(counter, struct mem_cgroup, member)
1529
1530
1531
1532
1533
1534
1535
1536
1537static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
1538{
1539 unsigned long margin = 0;
1540 unsigned long count;
1541 unsigned long limit;
1542
1543 count = page_counter_read(&memcg->memory);
1544 limit = ACCESS_ONCE(memcg->memory.limit);
1545 if (count < limit)
1546 margin = limit - count;
1547
1548 if (do_swap_account) {
1549 count = page_counter_read(&memcg->memsw);
1550 limit = ACCESS_ONCE(memcg->memsw.limit);
1551 if (count <= limit)
1552 margin = min(margin, limit - count);
1553 }
1554
1555 return margin;
1556}
1557
1558int mem_cgroup_swappiness(struct mem_cgroup *memcg)
1559{
1560 struct cgroup *cgrp = memcg->css.cgroup;
1561
1562
1563 if (cgrp->parent == NULL)
1564 return vm_swappiness;
1565
1566 return memcg->swappiness;
1567}
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585atomic_t memcg_moving __read_mostly;
1586
1587static void mem_cgroup_start_move(struct mem_cgroup *memcg)
1588{
1589 atomic_inc(&memcg_moving);
1590 atomic_inc(&memcg->moving_account);
1591 synchronize_rcu();
1592}
1593
1594static void mem_cgroup_end_move(struct mem_cgroup *memcg)
1595{
1596
1597
1598
1599
1600 if (memcg) {
1601 atomic_dec(&memcg_moving);
1602 atomic_dec(&memcg->moving_account);
1603 }
1604}
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618static bool mem_cgroup_stolen(struct mem_cgroup *memcg)
1619{
1620 VM_BUG_ON(!rcu_read_lock_held());
1621 return atomic_read(&memcg->moving_account) > 0;
1622}
1623
1624static bool mem_cgroup_under_move(struct mem_cgroup *memcg)
1625{
1626 struct mem_cgroup *from;
1627 struct mem_cgroup *to;
1628 bool ret = false;
1629
1630
1631
1632
1633 spin_lock(&mc.lock);
1634 from = mc.from;
1635 to = mc.to;
1636 if (!from)
1637 goto unlock;
1638
1639 ret = mem_cgroup_same_or_subtree(memcg, from)
1640 || mem_cgroup_same_or_subtree(memcg, to);
1641unlock:
1642 spin_unlock(&mc.lock);
1643 return ret;
1644}
1645
1646static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg)
1647{
1648 if (mc.moving_task && current != mc.moving_task) {
1649 if (mem_cgroup_under_move(memcg)) {
1650 DEFINE_WAIT(wait);
1651 prepare_to_wait(&mc.waitq, &wait, TASK_INTERRUPTIBLE);
1652
1653 if (mc.moving_task)
1654 schedule();
1655 finish_wait(&mc.waitq, &wait);
1656 return true;
1657 }
1658 }
1659 return false;
1660}
1661
1662
1663
1664
1665
1666
1667
1668static void move_lock_mem_cgroup(struct mem_cgroup *memcg,
1669 unsigned long *flags)
1670{
1671 spin_lock_irqsave(&memcg->move_lock, *flags);
1672}
1673
1674static void move_unlock_mem_cgroup(struct mem_cgroup *memcg,
1675 unsigned long *flags)
1676{
1677 spin_unlock_irqrestore(&memcg->move_lock, *flags);
1678}
1679
1680#define K(x) ((x) << (PAGE_SHIFT-10))
1681
1682
1683
1684
1685
1686
1687
1688
1689void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
1690{
1691 struct cgroup *task_cgrp;
1692 struct cgroup *mem_cgrp;
1693
1694
1695
1696
1697
1698 static char memcg_name[PATH_MAX];
1699 int ret;
1700 struct mem_cgroup *iter;
1701 unsigned int i;
1702
1703 if (!p)
1704 return;
1705
1706 rcu_read_lock();
1707
1708 mem_cgrp = memcg->css.cgroup;
1709 task_cgrp = task_cgroup(p, mem_cgroup_subsys_id);
1710
1711 ret = cgroup_path(task_cgrp, memcg_name, PATH_MAX);
1712 if (ret < 0) {
1713
1714
1715
1716
1717 rcu_read_unlock();
1718 goto done;
1719 }
1720 rcu_read_unlock();
1721
1722 pr_info("Task in %s killed", memcg_name);
1723
1724 rcu_read_lock();
1725 ret = cgroup_path(mem_cgrp, memcg_name, PATH_MAX);
1726 if (ret < 0) {
1727 rcu_read_unlock();
1728 goto done;
1729 }
1730 rcu_read_unlock();
1731
1732
1733
1734
1735 pr_cont(" as a result of limit of %s\n", memcg_name);
1736done:
1737
1738 pr_info("memory: usage %llukB, limit %llukB, failcnt %lu\n",
1739 K((u64)page_counter_read(&memcg->memory)),
1740 K((u64)memcg->memory.limit), memcg->memory.failcnt);
1741 pr_info("memory+swap: usage %llukB, limit %llukB, failcnt %lu\n",
1742 K((u64)page_counter_read(&memcg->memsw)),
1743 K((u64)memcg->memsw.limit), memcg->memsw.failcnt);
1744 pr_info("kmem: usage %llukB, limit %llukB, failcnt %lu\n",
1745 K((u64)page_counter_read(&memcg->kmem)),
1746 K((u64)memcg->kmem.limit), memcg->kmem.failcnt);
1747
1748 for_each_mem_cgroup_tree(iter, memcg) {
1749 pr_info("Memory cgroup stats");
1750
1751 rcu_read_lock();
1752 ret = cgroup_path(iter->css.cgroup, memcg_name, PATH_MAX);
1753 if (!ret)
1754 pr_cont(" for %s", memcg_name);
1755 rcu_read_unlock();
1756 pr_cont(":");
1757
1758 for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
1759 if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
1760 continue;
1761 pr_cont(" %s:%ldKB", mem_cgroup_stat_names[i],
1762 K(mem_cgroup_read_stat(iter, i)));
1763 }
1764
1765 for (i = 0; i < NR_LRU_LISTS; i++)
1766 pr_cont(" %s:%luKB", mem_cgroup_lru_names[i],
1767 K(mem_cgroup_nr_lru_pages(iter, BIT(i))));
1768
1769 pr_cont("\n");
1770 }
1771}
1772
1773
1774
1775
1776
1777static int mem_cgroup_count_children(struct mem_cgroup *memcg)
1778{
1779 int num = 0;
1780 struct mem_cgroup *iter;
1781
1782 for_each_mem_cgroup_tree(iter, memcg)
1783 num++;
1784 return num;
1785}
1786
1787
1788
1789
1790static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
1791{
1792 unsigned long limit;
1793
1794 limit = memcg->memory.limit;
1795 if (mem_cgroup_swappiness(memcg)) {
1796 unsigned long memsw_limit;
1797
1798 memsw_limit = memcg->memsw.limit;
1799 limit = min(limit + total_swap_pages, memsw_limit);
1800 }
1801 return limit;
1802}
1803
1804static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
1805 int order)
1806{
1807 struct mem_cgroup *iter;
1808 unsigned long chosen_points = 0;
1809 unsigned long totalpages;
1810 unsigned int points = 0;
1811 struct task_struct *chosen = NULL;
1812
1813
1814
1815
1816
1817
1818 if (fatal_signal_pending(current) || current->flags & PF_EXITING) {
1819 set_thread_flag(TIF_MEMDIE);
1820 return;
1821 }
1822
1823 check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL);
1824 totalpages = mem_cgroup_get_limit(memcg) ? : 1;
1825 for_each_mem_cgroup_tree(iter, memcg) {
1826 struct cgroup *cgroup = iter->css.cgroup;
1827 struct cgroup_iter it;
1828 struct task_struct *task;
1829
1830 cgroup_iter_start(cgroup, &it);
1831 while ((task = cgroup_iter_next(cgroup, &it))) {
1832 switch (oom_scan_process_thread(task, totalpages, NULL,
1833 false)) {
1834 case OOM_SCAN_SELECT:
1835 if (chosen)
1836 put_task_struct(chosen);
1837 chosen = task;
1838 chosen_points = ULONG_MAX;
1839 get_task_struct(chosen);
1840
1841 case OOM_SCAN_CONTINUE:
1842 continue;
1843 case OOM_SCAN_ABORT:
1844 cgroup_iter_end(cgroup, &it);
1845 mem_cgroup_iter_break(memcg, iter);
1846 if (chosen)
1847 put_task_struct(chosen);
1848 return;
1849 case OOM_SCAN_OK:
1850 break;
1851 };
1852 points = oom_badness(task, memcg, NULL, totalpages);
1853 if (points > chosen_points) {
1854 if (chosen)
1855 put_task_struct(chosen);
1856 chosen = task;
1857 chosen_points = points;
1858 get_task_struct(chosen);
1859 }
1860 }
1861 cgroup_iter_end(cgroup, &it);
1862 }
1863
1864 if (!chosen)
1865 return;
1866 points = chosen_points * 1000 / totalpages;
1867 oom_kill_process(chosen, gfp_mask, order, points, totalpages, memcg,
1868 NULL, "Memory cgroup out of memory");
1869}
1870
1871static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg,
1872 gfp_t gfp_mask,
1873 unsigned long flags)
1874{
1875 unsigned long total = 0;
1876 bool noswap = false;
1877 int loop;
1878
1879 if (flags & MEM_CGROUP_RECLAIM_NOSWAP)
1880 noswap = true;
1881 if (!(flags & MEM_CGROUP_RECLAIM_SHRINK) && memcg->memsw_is_minimum)
1882 noswap = true;
1883
1884 for (loop = 0; loop < MEM_CGROUP_MAX_RECLAIM_LOOPS; loop++) {
1885 if (loop)
1886 drain_all_stock_async(memcg);
1887 total += try_to_free_mem_cgroup_pages(memcg, gfp_mask, noswap);
1888
1889
1890
1891
1892
1893 if (total && (flags & MEM_CGROUP_RECLAIM_SHRINK))
1894 break;
1895 if (mem_cgroup_margin(memcg))
1896 break;
1897
1898
1899
1900
1901 if (loop && !total)
1902 break;
1903 }
1904 return total;
1905}
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *memcg,
1918 int nid, bool noswap)
1919{
1920 if (mem_cgroup_node_nr_lru_pages(memcg, nid, LRU_ALL_FILE))
1921 return true;
1922 if (noswap || !total_swap_pages)
1923 return false;
1924 if (mem_cgroup_node_nr_lru_pages(memcg, nid, LRU_ALL_ANON))
1925 return true;
1926 return false;
1927
1928}
1929#if MAX_NUMNODES > 1
1930
1931
1932
1933
1934
1935
1936
1937static void mem_cgroup_may_update_nodemask(struct mem_cgroup *memcg)
1938{
1939 int nid;
1940
1941
1942
1943
1944 if (!atomic_read(&memcg->numainfo_events))
1945 return;
1946 if (atomic_inc_return(&memcg->numainfo_updating) > 1)
1947 return;
1948
1949
1950 memcg->scan_nodes = node_states[N_MEMORY];
1951
1952 for_each_node_mask(nid, node_states[N_MEMORY]) {
1953
1954 if (!test_mem_cgroup_node_reclaimable(memcg, nid, false))
1955 node_clear(nid, memcg->scan_nodes);
1956 }
1957
1958 atomic_set(&memcg->numainfo_events, 0);
1959 atomic_set(&memcg->numainfo_updating, 0);
1960}
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
1975{
1976 int node;
1977
1978 mem_cgroup_may_update_nodemask(memcg);
1979 node = memcg->last_scanned_node;
1980
1981 node = next_node(node, memcg->scan_nodes);
1982 if (node == MAX_NUMNODES)
1983 node = first_node(memcg->scan_nodes);
1984
1985
1986
1987
1988
1989
1990 if (unlikely(node == MAX_NUMNODES))
1991 node = numa_node_id();
1992
1993 memcg->last_scanned_node = node;
1994 return node;
1995}
1996
1997
1998
1999
2000
2001
2002
2003static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
2004{
2005 int nid;
2006
2007
2008
2009
2010
2011 if (!nodes_empty(memcg->scan_nodes)) {
2012 for (nid = first_node(memcg->scan_nodes);
2013 nid < MAX_NUMNODES;
2014 nid = next_node(nid, memcg->scan_nodes)) {
2015
2016 if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap))
2017 return true;
2018 }
2019 }
2020
2021
2022
2023 for_each_node_state(nid, N_MEMORY) {
2024 if (node_isset(nid, memcg->scan_nodes))
2025 continue;
2026 if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap))
2027 return true;
2028 }
2029 return false;
2030}
2031
2032#else
2033int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
2034{
2035 return 0;
2036}
2037
2038static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
2039{
2040 return test_mem_cgroup_node_reclaimable(memcg, 0, noswap);
2041}
2042#endif
2043
2044static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
2045 struct zone *zone,
2046 gfp_t gfp_mask,
2047 unsigned long *total_scanned)
2048{
2049 struct mem_cgroup *victim = NULL;
2050 int total = 0;
2051 int loop = 0;
2052 unsigned long excess;
2053 unsigned long nr_scanned;
2054 struct mem_cgroup_reclaim_cookie reclaim = {
2055 .zone = zone,
2056 .priority = 0,
2057 };
2058
2059 excess = soft_limit_excess(root_memcg);
2060
2061 while (1) {
2062 victim = mem_cgroup_iter(root_memcg, victim, &reclaim);
2063 if (!victim) {
2064 loop++;
2065 if (loop >= 2) {
2066
2067
2068
2069
2070
2071 if (!total)
2072 break;
2073
2074
2075
2076
2077
2078
2079 if (total >= (excess >> 2) ||
2080 (loop > MEM_CGROUP_MAX_RECLAIM_LOOPS))
2081 break;
2082 }
2083 continue;
2084 }
2085 if (!mem_cgroup_reclaimable(victim, false))
2086 continue;
2087 total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false,
2088 zone, &nr_scanned);
2089 *total_scanned += nr_scanned;
2090 if (!soft_limit_excess(root_memcg))
2091 break;
2092 }
2093 mem_cgroup_iter_break(root_memcg, victim);
2094 return total;
2095}
2096
2097static DEFINE_SPINLOCK(memcg_oom_lock);
2098
2099
2100
2101
2102
2103static bool mem_cgroup_oom_trylock(struct mem_cgroup *memcg)
2104{
2105 struct mem_cgroup *iter, *failed = NULL;
2106
2107 spin_lock(&memcg_oom_lock);
2108
2109 for_each_mem_cgroup_tree(iter, memcg) {
2110 if (iter->oom_lock) {
2111
2112
2113
2114
2115 failed = iter;
2116 mem_cgroup_iter_break(memcg, iter);
2117 break;
2118 } else
2119 iter->oom_lock = true;
2120 }
2121
2122 if (failed) {
2123
2124
2125
2126
2127 for_each_mem_cgroup_tree(iter, memcg) {
2128 if (iter == failed) {
2129 mem_cgroup_iter_break(memcg, iter);
2130 break;
2131 }
2132 iter->oom_lock = false;
2133 }
2134 }
2135
2136 spin_unlock(&memcg_oom_lock);
2137
2138 return !failed;
2139}
2140
2141static void mem_cgroup_oom_unlock(struct mem_cgroup *memcg)
2142{
2143 struct mem_cgroup *iter;
2144
2145 spin_lock(&memcg_oom_lock);
2146 for_each_mem_cgroup_tree(iter, memcg)
2147 iter->oom_lock = false;
2148 spin_unlock(&memcg_oom_lock);
2149}
2150
2151static void mem_cgroup_mark_under_oom(struct mem_cgroup *memcg)
2152{
2153 struct mem_cgroup *iter;
2154
2155 for_each_mem_cgroup_tree(iter, memcg)
2156 atomic_inc(&iter->under_oom);
2157}
2158
2159static void mem_cgroup_unmark_under_oom(struct mem_cgroup *memcg)
2160{
2161 struct mem_cgroup *iter;
2162
2163
2164
2165
2166
2167
2168 for_each_mem_cgroup_tree(iter, memcg)
2169 atomic_add_unless(&iter->under_oom, -1, 0);
2170}
2171
2172static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq);
2173
2174struct oom_wait_info {
2175 struct mem_cgroup *memcg;
2176 wait_queue_t wait;
2177};
2178
2179static int memcg_oom_wake_function(wait_queue_t *wait,
2180 unsigned mode, int sync, void *arg)
2181{
2182 struct mem_cgroup *wake_memcg = (struct mem_cgroup *)arg;
2183 struct mem_cgroup *oom_wait_memcg;
2184 struct oom_wait_info *oom_wait_info;
2185
2186 oom_wait_info = container_of(wait, struct oom_wait_info, wait);
2187 oom_wait_memcg = oom_wait_info->memcg;
2188
2189
2190
2191
2192
2193 if (!mem_cgroup_same_or_subtree(oom_wait_memcg, wake_memcg)
2194 && !mem_cgroup_same_or_subtree(wake_memcg, oom_wait_memcg))
2195 return 0;
2196 return autoremove_wake_function(wait, mode, sync, arg);
2197}
2198
2199static void memcg_wakeup_oom(struct mem_cgroup *memcg)
2200{
2201 atomic_inc(&memcg->oom_wakeups);
2202
2203 __wake_up(&memcg_oom_waitq, TASK_NORMAL, 0, memcg);
2204}
2205
2206static void memcg_oom_recover(struct mem_cgroup *memcg)
2207{
2208 if (memcg && atomic_read(&memcg->under_oom))
2209 memcg_wakeup_oom(memcg);
2210}
2211
2212static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
2213{
2214 if (!current->memcg_oom.may_oom)
2215 return;
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230 css_get(&memcg->css);
2231 current->memcg_oom.memcg = memcg;
2232 current->memcg_oom.gfp_mask = mask;
2233 current->memcg_oom.order = order;
2234}
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253bool mem_cgroup_oom_synchronize(bool handle)
2254{
2255 struct mem_cgroup *memcg = current->memcg_oom.memcg;
2256 struct oom_wait_info owait;
2257 bool locked;
2258
2259
2260 if (!memcg)
2261 return false;
2262
2263 if (!handle)
2264 goto cleanup;
2265
2266 owait.memcg = memcg;
2267 owait.wait.flags = 0;
2268 owait.wait.func = memcg_oom_wake_function;
2269 owait.wait.private = current;
2270 INIT_LIST_HEAD(&owait.wait.task_list);
2271
2272 prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE);
2273 mem_cgroup_mark_under_oom(memcg);
2274
2275 locked = mem_cgroup_oom_trylock(memcg);
2276
2277 if (locked)
2278 mem_cgroup_oom_notify(memcg);
2279
2280 if (locked && !memcg->oom_kill_disable) {
2281 mem_cgroup_unmark_under_oom(memcg);
2282 finish_wait(&memcg_oom_waitq, &owait.wait);
2283 mem_cgroup_out_of_memory(memcg, current->memcg_oom.gfp_mask,
2284 current->memcg_oom.order);
2285 } else {
2286 schedule();
2287 mem_cgroup_unmark_under_oom(memcg);
2288 finish_wait(&memcg_oom_waitq, &owait.wait);
2289 }
2290
2291 if (locked) {
2292 mem_cgroup_oom_unlock(memcg);
2293
2294
2295
2296
2297
2298 memcg_oom_recover(memcg);
2299 }
2300cleanup:
2301 current->memcg_oom.memcg = NULL;
2302 css_put(&memcg->css);
2303 return true;
2304}
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330void __mem_cgroup_begin_update_page_stat(struct page *page,
2331 bool *locked, unsigned long *flags)
2332{
2333 struct mem_cgroup *memcg;
2334 struct page_cgroup *pc;
2335
2336 pc = lookup_page_cgroup(page);
2337again:
2338 memcg = pc->mem_cgroup;
2339 if (unlikely(!memcg || !PageCgroupUsed(pc)))
2340 return;
2341
2342
2343
2344
2345
2346
2347 if (!mem_cgroup_stolen(memcg))
2348 return;
2349
2350 move_lock_mem_cgroup(memcg, flags);
2351 if (memcg != pc->mem_cgroup || !PageCgroupUsed(pc)) {
2352 move_unlock_mem_cgroup(memcg, flags);
2353 goto again;
2354 }
2355 *locked = true;
2356}
2357
2358void __mem_cgroup_end_update_page_stat(struct page *page, unsigned long *flags)
2359{
2360 struct page_cgroup *pc = lookup_page_cgroup(page);
2361
2362
2363
2364
2365
2366
2367 move_unlock_mem_cgroup(pc->mem_cgroup, flags);
2368}
2369
2370void mem_cgroup_update_page_stat(struct page *page,
2371 enum mem_cgroup_page_stat_item idx, int val)
2372{
2373 struct mem_cgroup *memcg;
2374 struct page_cgroup *pc = lookup_page_cgroup(page);
2375 unsigned long uninitialized_var(flags);
2376
2377 if (mem_cgroup_disabled())
2378 return;
2379
2380 memcg = pc->mem_cgroup;
2381 if (unlikely(!memcg || !PageCgroupUsed(pc)))
2382 return;
2383
2384 switch (idx) {
2385 case MEMCG_NR_FILE_MAPPED:
2386 idx = MEM_CGROUP_STAT_FILE_MAPPED;
2387 break;
2388 default:
2389 BUG();
2390 }
2391
2392 this_cpu_add(memcg->stat->count[idx], val);
2393}
2394
2395
2396
2397
2398
2399#define CHARGE_BATCH 32U
2400struct memcg_stock_pcp {
2401 struct mem_cgroup *cached;
2402 unsigned int nr_pages;
2403 struct work_struct work;
2404 unsigned long flags;
2405#define FLUSHING_CACHED_CHARGE 0
2406};
2407static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock);
2408static DEFINE_MUTEX(percpu_charge_mutex);
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
2422{
2423 struct memcg_stock_pcp *stock;
2424 bool ret = false;
2425
2426 if (nr_pages > CHARGE_BATCH)
2427 return ret;
2428
2429 stock = &get_cpu_var(memcg_stock);
2430 if (memcg == stock->cached && stock->nr_pages >= nr_pages) {
2431 stock->nr_pages -= nr_pages;
2432 ret = true;
2433 }
2434 put_cpu_var(memcg_stock);
2435 return ret;
2436}
2437
2438
2439
2440
2441static void drain_stock(struct memcg_stock_pcp *stock)
2442{
2443 struct mem_cgroup *old = stock->cached;
2444
2445 if (stock->nr_pages) {
2446 page_counter_uncharge(&old->memory, stock->nr_pages);
2447 if (do_swap_account)
2448 page_counter_uncharge(&old->memsw, stock->nr_pages);
2449 stock->nr_pages = 0;
2450 }
2451 stock->cached = NULL;
2452}
2453
2454
2455
2456
2457
2458static void drain_local_stock(struct work_struct *dummy)
2459{
2460 struct memcg_stock_pcp *stock = this_cpu_ptr(&memcg_stock);
2461 drain_stock(stock);
2462 clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
2463}
2464
2465static void __init memcg_stock_init(void)
2466{
2467 int cpu;
2468
2469 for_each_possible_cpu(cpu) {
2470 struct memcg_stock_pcp *stock =
2471 &per_cpu(memcg_stock, cpu);
2472 INIT_WORK(&stock->work, drain_local_stock);
2473 }
2474}
2475
2476
2477
2478
2479
2480static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
2481{
2482 struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock);
2483
2484 if (stock->cached != memcg) {
2485 drain_stock(stock);
2486 stock->cached = memcg;
2487 }
2488 stock->nr_pages += nr_pages;
2489 put_cpu_var(memcg_stock);
2490}
2491
2492
2493
2494
2495
2496
2497static void drain_all_stock(struct mem_cgroup *root_memcg, bool sync)
2498{
2499 int cpu, curcpu;
2500
2501
2502 get_online_cpus();
2503 curcpu = get_cpu();
2504 for_each_online_cpu(cpu) {
2505 struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
2506 struct mem_cgroup *memcg;
2507
2508 memcg = stock->cached;
2509 if (!memcg || !stock->nr_pages)
2510 continue;
2511 if (!mem_cgroup_same_or_subtree(root_memcg, memcg))
2512 continue;
2513 if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) {
2514 if (cpu == curcpu)
2515 drain_local_stock(&stock->work);
2516 else
2517 schedule_work_on(cpu, &stock->work);
2518 }
2519 }
2520 put_cpu();
2521
2522 if (!sync)
2523 goto out;
2524
2525 for_each_online_cpu(cpu) {
2526 struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
2527 if (test_bit(FLUSHING_CACHED_CHARGE, &stock->flags))
2528 flush_work(&stock->work);
2529 }
2530out:
2531 put_online_cpus();
2532}
2533
2534
2535
2536
2537
2538
2539static void drain_all_stock_async(struct mem_cgroup *root_memcg)
2540{
2541
2542
2543
2544 if (!mutex_trylock(&percpu_charge_mutex))
2545 return;
2546 drain_all_stock(root_memcg, false);
2547 mutex_unlock(&percpu_charge_mutex);
2548}
2549
2550
2551static void drain_all_stock_sync(struct mem_cgroup *root_memcg)
2552{
2553
2554 mutex_lock(&percpu_charge_mutex);
2555 drain_all_stock(root_memcg, true);
2556 mutex_unlock(&percpu_charge_mutex);
2557}
2558
2559
2560
2561
2562
2563static void mem_cgroup_drain_pcp_counter(struct mem_cgroup *memcg, int cpu)
2564{
2565 int i;
2566
2567 spin_lock(&memcg->pcp_counter_lock);
2568 for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
2569 long x = per_cpu(memcg->stat->count[i], cpu);
2570
2571 per_cpu(memcg->stat->count[i], cpu) = 0;
2572 memcg->nocpu_base.count[i] += x;
2573 }
2574 for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) {
2575 unsigned long x = per_cpu(memcg->stat->events[i], cpu);
2576
2577 per_cpu(memcg->stat->events[i], cpu) = 0;
2578 memcg->nocpu_base.events[i] += x;
2579 }
2580 spin_unlock(&memcg->pcp_counter_lock);
2581}
2582
2583static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
2584 unsigned long action,
2585 void *hcpu)
2586{
2587 int cpu = (unsigned long)hcpu;
2588 struct memcg_stock_pcp *stock;
2589 struct mem_cgroup *iter;
2590
2591 if (action == CPU_ONLINE)
2592 return NOTIFY_OK;
2593
2594 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
2595 return NOTIFY_OK;
2596
2597 for_each_mem_cgroup(iter)
2598 mem_cgroup_drain_pcp_counter(iter, cpu);
2599
2600 stock = &per_cpu(memcg_stock, cpu);
2601 drain_stock(stock);
2602 return NOTIFY_OK;
2603}
2604
2605
2606
2607enum {
2608 CHARGE_OK,
2609 CHARGE_RETRY,
2610 CHARGE_NOMEM,
2611 CHARGE_WOULDBLOCK,
2612};
2613
2614static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
2615 unsigned int nr_pages, unsigned int min_pages,
2616 bool invoke_oom)
2617{
2618 struct mem_cgroup *mem_over_limit;
2619 struct page_counter *counter;
2620 unsigned long flags = 0;
2621 int ret;
2622
2623 ret = page_counter_try_charge(&memcg->memory, nr_pages, &counter);
2624
2625 if (likely(!ret)) {
2626 if (!do_swap_account)
2627 return CHARGE_OK;
2628 ret = page_counter_try_charge(&memcg->memsw, nr_pages, &counter);
2629 if (likely(!ret))
2630 return CHARGE_OK;
2631
2632 page_counter_uncharge(&memcg->memory, nr_pages);
2633 mem_over_limit = mem_cgroup_from_counter(counter, memsw);
2634 flags |= MEM_CGROUP_RECLAIM_NOSWAP;
2635 } else
2636 mem_over_limit = mem_cgroup_from_counter(counter, memory);
2637
2638
2639
2640
2641 if (nr_pages > min_pages)
2642 return CHARGE_RETRY;
2643
2644 if (!(gfp_mask & __GFP_WAIT))
2645 return CHARGE_WOULDBLOCK;
2646
2647 if (gfp_mask & __GFP_NORETRY)
2648 return CHARGE_NOMEM;
2649
2650 ret = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags);
2651 if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
2652 return CHARGE_RETRY;
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662 if (nr_pages <= (1 << PAGE_ALLOC_COSTLY_ORDER) && ret)
2663 return CHARGE_RETRY;
2664
2665
2666
2667
2668
2669 if (mem_cgroup_wait_acct_move(mem_over_limit))
2670 return CHARGE_RETRY;
2671
2672 if (invoke_oom)
2673 mem_cgroup_oom(mem_over_limit, gfp_mask,
2674 get_order(nr_pages * PAGE_SIZE));
2675
2676 return CHARGE_NOMEM;
2677}
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700static int __mem_cgroup_try_charge(struct mm_struct *mm,
2701 gfp_t gfp_mask,
2702 unsigned int nr_pages,
2703 struct mem_cgroup **ptr,
2704 bool oom)
2705{
2706 unsigned int batch = max(CHARGE_BATCH, nr_pages);
2707 int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES;
2708 struct mem_cgroup *memcg = NULL;
2709 int ret;
2710
2711
2712
2713
2714
2715
2716 if (unlikely(test_thread_flag(TIF_MEMDIE)
2717 || fatal_signal_pending(current)))
2718 goto bypass;
2719
2720
2721
2722
2723
2724
2725
2726 if (unlikely(current->flags & PF_MEMALLOC))
2727 goto bypass;
2728
2729 if (unlikely(task_in_memcg_oom(current)))
2730 goto nomem;
2731
2732 if (gfp_mask & __GFP_NOFAIL)
2733 oom = false;
2734
2735
2736
2737
2738
2739
2740
2741 if (!*ptr && !mm)
2742 *ptr = root_mem_cgroup;
2743again:
2744 if (*ptr) {
2745 memcg = *ptr;
2746 if (mem_cgroup_is_root(memcg))
2747 goto done;
2748 if (consume_stock(memcg, nr_pages))
2749 goto done;
2750 css_get(&memcg->css);
2751 } else {
2752 struct task_struct *p;
2753
2754 rcu_read_lock();
2755 p = rcu_dereference(mm->owner);
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766 memcg = mem_cgroup_from_task(p);
2767 if (!memcg)
2768 memcg = root_mem_cgroup;
2769 if (mem_cgroup_is_root(memcg)) {
2770 rcu_read_unlock();
2771 goto done;
2772 }
2773 if (consume_stock(memcg, nr_pages)) {
2774
2775
2776
2777
2778
2779
2780
2781
2782 rcu_read_unlock();
2783 goto done;
2784 }
2785
2786 if (!css_tryget(&memcg->css)) {
2787 rcu_read_unlock();
2788 goto again;
2789 }
2790 rcu_read_unlock();
2791 }
2792
2793 do {
2794 bool invoke_oom = oom && !nr_oom_retries;
2795
2796
2797 if (fatal_signal_pending(current)) {
2798 css_put(&memcg->css);
2799 goto bypass;
2800 }
2801
2802 ret = mem_cgroup_do_charge(memcg, gfp_mask, batch,
2803 nr_pages, invoke_oom);
2804 switch (ret) {
2805 case CHARGE_OK:
2806 break;
2807 case CHARGE_RETRY:
2808 batch = nr_pages;
2809 css_put(&memcg->css);
2810 memcg = NULL;
2811 goto again;
2812 case CHARGE_WOULDBLOCK:
2813 css_put(&memcg->css);
2814 goto nomem;
2815 case CHARGE_NOMEM:
2816 if (!oom || invoke_oom) {
2817 css_put(&memcg->css);
2818 goto nomem;
2819 }
2820 nr_oom_retries--;
2821 break;
2822 }
2823 } while (ret != CHARGE_OK);
2824
2825 if (batch > nr_pages)
2826 refill_stock(memcg, batch - nr_pages);
2827 css_put(&memcg->css);
2828done:
2829 *ptr = memcg;
2830 return 0;
2831nomem:
2832 if (!(gfp_mask & __GFP_NOFAIL)) {
2833 *ptr = NULL;
2834 return -ENOMEM;
2835 }
2836bypass:
2837 *ptr = root_mem_cgroup;
2838 return -EINTR;
2839}
2840
2841
2842
2843
2844
2845
2846static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg,
2847 unsigned int nr_pages)
2848{
2849 if (!mem_cgroup_is_root(memcg)) {
2850 page_counter_uncharge(&memcg->memory, nr_pages);
2851 if (do_swap_account)
2852 page_counter_uncharge(&memcg->memsw, nr_pages);
2853 }
2854}
2855
2856struct mem_cgroup *mem_cgroup_from_id(unsigned short id);
2857
2858
2859
2860
2861
2862
2863static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
2864{
2865
2866 if (!id)
2867 return NULL;
2868 return mem_cgroup_from_id(id);
2869}
2870
2871struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
2872{
2873 struct mem_cgroup *memcg = NULL;
2874 struct page_cgroup *pc;
2875 unsigned short id;
2876 swp_entry_t ent;
2877
2878 VM_BUG_ON_PAGE(!PageLocked(page), page);
2879
2880 pc = lookup_page_cgroup(page);
2881 lock_page_cgroup(pc);
2882 if (PageCgroupUsed(pc)) {
2883 memcg = pc->mem_cgroup;
2884 if (memcg && !css_tryget(&memcg->css))
2885 memcg = NULL;
2886 } else if (PageSwapCache(page)) {
2887 ent.val = page_private(page);
2888 id = lookup_swap_cgroup_id(ent);
2889 rcu_read_lock();
2890 memcg = mem_cgroup_lookup(id);
2891 if (memcg && !css_tryget(&memcg->css))
2892 memcg = NULL;
2893 rcu_read_unlock();
2894 }
2895 unlock_page_cgroup(pc);
2896 return memcg;
2897}
2898
2899static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
2900 struct page *page,
2901 unsigned int nr_pages,
2902 enum charge_type ctype,
2903 bool lrucare)
2904{
2905 struct page_cgroup *pc = lookup_page_cgroup(page);
2906 struct zone *uninitialized_var(zone);
2907 struct lruvec *lruvec;
2908 bool was_on_lru = false;
2909 bool anon;
2910
2911 lock_page_cgroup(pc);
2912 VM_BUG_ON_PAGE(PageCgroupUsed(pc), page);
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922 if (lrucare) {
2923 zone = page_zone(page);
2924 spin_lock_irq(&zone->lru_lock);
2925 if (PageLRU(page)) {
2926 lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup);
2927 ClearPageLRU(page);
2928 del_page_from_lru_list(page, lruvec, page_lru(page));
2929 was_on_lru = true;
2930 }
2931 }
2932
2933 pc->mem_cgroup = memcg;
2934
2935
2936
2937
2938
2939
2940
2941 smp_wmb();
2942 SetPageCgroupUsed(pc);
2943
2944 if (lrucare) {
2945 if (was_on_lru) {
2946 lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup);
2947 VM_BUG_ON_PAGE(PageLRU(page), page);
2948 SetPageLRU(page);
2949 add_page_to_lru_list(page, lruvec, page_lru(page));
2950 }
2951 spin_unlock_irq(&zone->lru_lock);
2952 }
2953
2954 if (ctype == MEM_CGROUP_CHARGE_TYPE_ANON)
2955 anon = true;
2956 else
2957 anon = false;
2958
2959 mem_cgroup_charge_statistics(memcg, page, anon, nr_pages);
2960 unlock_page_cgroup(pc);
2961
2962
2963
2964
2965
2966
2967 memcg_check_events(memcg, page);
2968}
2969
2970#ifdef CONFIG_MEMCG_KMEM
2971static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg)
2972{
2973 return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg) &&
2974 (memcg->kmem_account_flags & KMEM_ACCOUNTED_MASK);
2975}
2976
2977
2978
2979
2980
2981static struct kmem_cache *memcg_params_to_cache(struct memcg_cache_params *p)
2982{
2983 struct kmem_cache *cachep;
2984
2985 VM_BUG_ON(p->is_root_cache);
2986 cachep = p->root_cache;
2987 return cachep->memcg_params->memcg_caches[memcg_cache_id(p->memcg)];
2988}
2989
2990#ifdef CONFIG_SLABINFO
2991static int mem_cgroup_slabinfo_read(struct cgroup *cont, struct cftype *cft,
2992 struct seq_file *m)
2993{
2994 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
2995 struct memcg_cache_params *params;
2996
2997 if (!memcg_can_account_kmem(memcg))
2998 return -EIO;
2999
3000 print_slabinfo_header(m);
3001
3002 mutex_lock(&memcg->slab_caches_mutex);
3003 list_for_each_entry(params, &memcg->memcg_slab_caches, list)
3004 cache_show(memcg_params_to_cache(params), m);
3005 mutex_unlock(&memcg->slab_caches_mutex);
3006
3007 return 0;
3008}
3009#endif
3010
3011static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
3012 unsigned long nr_pages)
3013{
3014 struct page_counter *counter;
3015 struct mem_cgroup *_memcg;
3016 int ret = 0;
3017 bool may_oom;
3018
3019 ret = page_counter_try_charge(&memcg->kmem, nr_pages, &counter);
3020 if (ret < 0)
3021 return ret;
3022
3023
3024
3025
3026
3027 may_oom = (gfp & __GFP_FS) && !(gfp & __GFP_NORETRY);
3028
3029 _memcg = memcg;
3030 ret = __mem_cgroup_try_charge(NULL, gfp, nr_pages, &_memcg, may_oom);
3031
3032 if (ret == -EINTR) {
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048 page_counter_charge(&memcg->memory, nr_pages);
3049 if (do_swap_account)
3050 page_counter_charge(&memcg->memsw, nr_pages);
3051 ret = 0;
3052 } else if (ret)
3053 page_counter_uncharge(&memcg->kmem, nr_pages);
3054
3055 return ret;
3056}
3057
3058static void memcg_uncharge_kmem(struct mem_cgroup *memcg,
3059 unsigned long nr_pages)
3060{
3061 page_counter_uncharge(&memcg->memory, nr_pages);
3062 if (do_swap_account)
3063 page_counter_uncharge(&memcg->memsw, nr_pages);
3064
3065
3066 if (page_counter_uncharge(&memcg->kmem, nr_pages))
3067 return;
3068
3069 if (memcg_kmem_test_and_clear_dead(memcg))
3070 mem_cgroup_put(memcg);
3071}
3072
3073
3074
3075
3076
3077
3078int memcg_cache_id(struct mem_cgroup *memcg)
3079{
3080 return memcg ? memcg->kmemcg_id : -1;
3081}
3082
3083
3084
3085
3086
3087
3088
3089
3090int memcg_update_cache_sizes(struct mem_cgroup *memcg)
3091{
3092 int num, ret;
3093
3094 num = ida_simple_get(&kmem_limited_groups,
3095 0, MEMCG_CACHES_MAX_SIZE, GFP_KERNEL);
3096 if (num < 0)
3097 return num;
3098
3099
3100
3101
3102
3103
3104
3105 memcg_kmem_set_activated(memcg);
3106
3107 ret = memcg_update_all_caches(num+1);
3108 if (ret) {
3109 ida_simple_remove(&kmem_limited_groups, num);
3110 memcg_kmem_clear_activated(memcg);
3111 return ret;
3112 }
3113
3114 memcg->kmemcg_id = num;
3115 INIT_LIST_HEAD(&memcg->memcg_slab_caches);
3116 mutex_init(&memcg->slab_caches_mutex);
3117 return 0;
3118}
3119
3120static size_t memcg_caches_array_size(int num_groups)
3121{
3122 ssize_t size;
3123 if (num_groups <= 0)
3124 return 0;
3125
3126 size = 2 * num_groups;
3127 if (size < MEMCG_CACHES_MIN_SIZE)
3128 size = MEMCG_CACHES_MIN_SIZE;
3129 else if (size > MEMCG_CACHES_MAX_SIZE)
3130 size = MEMCG_CACHES_MAX_SIZE;
3131
3132 return size;
3133}
3134
3135
3136
3137
3138
3139
3140void memcg_update_array_size(int num)
3141{
3142 if (num > memcg_limited_groups_array_size)
3143 memcg_limited_groups_array_size = memcg_caches_array_size(num);
3144}
3145
3146static void kmem_cache_destroy_work_func(struct work_struct *w);
3147
3148int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
3149{
3150 struct memcg_cache_params *cur_params = s->memcg_params;
3151
3152 VM_BUG_ON(s->memcg_params && !s->memcg_params->is_root_cache);
3153
3154
3155
3156
3157
3158 if (num_groups > memcg_limited_groups_array_size || !cur_params) {
3159 int i;
3160 ssize_t size = memcg_caches_array_size(num_groups);
3161
3162 size *= sizeof(void *);
3163 size += sizeof(struct memcg_cache_params);
3164
3165 s->memcg_params = kzalloc(size, GFP_KERNEL);
3166 if (!s->memcg_params) {
3167 s->memcg_params = cur_params;
3168 return -ENOMEM;
3169 }
3170
3171 s->memcg_params->is_root_cache = true;
3172
3173
3174 if (!cur_params)
3175 return 0;
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186 for (i = 0; i < memcg_limited_groups_array_size; i++) {
3187 if (!cur_params->memcg_caches[i])
3188 continue;
3189 s->memcg_params->memcg_caches[i] =
3190 cur_params->memcg_caches[i];
3191 }
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202 kfree(cur_params);
3203 }
3204 return 0;
3205}
3206
3207int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s,
3208 struct kmem_cache *root_cache)
3209{
3210 size_t size = sizeof(struct memcg_cache_params);
3211
3212 if (!memcg_kmem_enabled())
3213 return 0;
3214
3215 if (!memcg)
3216 size += memcg_limited_groups_array_size * sizeof(void *);
3217
3218 s->memcg_params = kzalloc(size, GFP_KERNEL);
3219 if (!s->memcg_params)
3220 return -ENOMEM;
3221
3222 if (memcg) {
3223 s->memcg_params->memcg = memcg;
3224 s->memcg_params->root_cache = root_cache;
3225 INIT_WORK(&s->memcg_params->destroy,
3226 kmem_cache_destroy_work_func);
3227 } else
3228 s->memcg_params->is_root_cache = true;
3229
3230 return 0;
3231}
3232
3233void memcg_free_cache_params(struct kmem_cache *s)
3234{
3235 kfree(s->memcg_params);
3236}
3237
3238void memcg_register_cache(struct kmem_cache *s)
3239{
3240 struct kmem_cache *root;
3241 struct mem_cgroup *memcg;
3242 int id;
3243
3244 if (is_root_cache(s))
3245 return;
3246
3247
3248
3249
3250
3251 lockdep_assert_held(&slab_mutex);
3252
3253 root = s->memcg_params->root_cache;
3254 memcg = s->memcg_params->memcg;
3255 id = memcg_cache_id(memcg);
3256
3257 mutex_lock(&memcg->slab_caches_mutex);
3258 list_add(&s->memcg_params->list, &memcg->memcg_slab_caches);
3259 mutex_unlock(&memcg->slab_caches_mutex);
3260
3261 VM_BUG_ON(root->memcg_params->memcg_caches[id]);
3262 root->memcg_params->memcg_caches[id] = s;
3263
3264
3265
3266
3267 wmb();
3268}
3269
3270void memcg_unregister_cache(struct kmem_cache *s)
3271{
3272 struct kmem_cache *root;
3273 struct mem_cgroup *memcg;
3274 int id;
3275
3276
3277
3278
3279
3280 if (!s->memcg_params)
3281 return;
3282
3283 if (s->memcg_params->is_root_cache)
3284 return;
3285
3286
3287
3288
3289
3290 lockdep_assert_held(&slab_mutex);
3291
3292 memcg = s->memcg_params->memcg;
3293 id = memcg_cache_id(memcg);
3294
3295 root = s->memcg_params->root_cache;
3296 VM_BUG_ON(!root->memcg_params->memcg_caches[id]);
3297 root->memcg_params->memcg_caches[id] = NULL;
3298
3299 mutex_lock(&memcg->slab_caches_mutex);
3300 list_del(&s->memcg_params->list);
3301 mutex_unlock(&memcg->slab_caches_mutex);
3302
3303 mem_cgroup_put(memcg);
3304}
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325static inline void memcg_stop_kmem_account(void)
3326{
3327 VM_BUG_ON(!current->mm);
3328 current->memcg_kmem_skip_account++;
3329}
3330
3331static inline void memcg_resume_kmem_account(void)
3332{
3333 VM_BUG_ON(!current->mm);
3334 current->memcg_kmem_skip_account--;
3335}
3336
3337static void kmem_cache_destroy_work_func(struct work_struct *w)
3338{
3339 struct kmem_cache *cachep;
3340 struct memcg_cache_params *p;
3341
3342 p = container_of(w, struct memcg_cache_params, destroy);
3343
3344 cachep = memcg_params_to_cache(p);
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362 if (atomic_read(&cachep->memcg_params->nr_pages) != 0) {
3363 kmem_cache_shrink(cachep);
3364 if (atomic_read(&cachep->memcg_params->nr_pages) == 0)
3365 return;
3366 } else
3367 kmem_cache_destroy(cachep);
3368}
3369
3370void mem_cgroup_destroy_cache(struct kmem_cache *cachep)
3371{
3372 if (!cachep->memcg_params->dead)
3373 return;
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393 if (work_pending(&cachep->memcg_params->destroy))
3394 return;
3395
3396
3397
3398
3399 schedule_work(&cachep->memcg_params->destroy);
3400}
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410static DEFINE_MUTEX(memcg_cache_mutex);
3411
3412
3413
3414
3415static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg,
3416 struct kmem_cache *s)
3417{
3418 struct kmem_cache *new;
3419 static char *tmp_name = NULL;
3420
3421 lockdep_assert_held(&memcg_cache_mutex);
3422
3423
3424
3425
3426
3427
3428
3429 if (!tmp_name) {
3430 tmp_name = kmalloc(PATH_MAX, GFP_KERNEL);
3431 if (!tmp_name)
3432 return NULL;
3433 }
3434
3435 rcu_read_lock();
3436 snprintf(tmp_name, PATH_MAX, "%s(%d:%s)", s->name,
3437 memcg_cache_id(memcg), cgroup_name(memcg->css.cgroup));
3438 rcu_read_unlock();
3439
3440 new = kmem_cache_create_memcg(memcg, tmp_name, s->object_size, s->align,
3441 (s->flags & ~SLAB_PANIC), s->ctor, s);
3442
3443 if (new)
3444 new->allocflags |= __GFP_KMEMCG;
3445
3446 return new;
3447}
3448
3449static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
3450 struct kmem_cache *cachep)
3451{
3452 struct kmem_cache *new_cachep;
3453
3454 BUG_ON(!memcg_can_account_kmem(memcg));
3455
3456 mutex_lock(&memcg_cache_mutex);
3457
3458 new_cachep = kmem_cache_dup(memcg, cachep);
3459 if (new_cachep == NULL) {
3460 new_cachep = cachep;
3461 goto out;
3462 }
3463
3464 mem_cgroup_get(memcg);
3465out:
3466 mutex_unlock(&memcg_cache_mutex);
3467 return new_cachep;
3468}
3469
3470static DEFINE_MUTEX(memcg_limit_mutex);
3471
3472int __kmem_cache_destroy_memcg_children(struct kmem_cache *s)
3473{
3474 struct kmem_cache *c;
3475 int i, failed = 0;
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486 mutex_lock(&memcg_limit_mutex);
3487 for (i = 0; i < memcg_limited_groups_array_size; i++) {
3488 c = s->memcg_params->memcg_caches[i];
3489 if (!c)
3490 continue;
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505 c->memcg_params->dead = false;
3506 cancel_work_sync(&c->memcg_params->destroy);
3507 kmem_cache_destroy(c);
3508
3509 if (cache_from_memcg(s, i))
3510 failed++;
3511 }
3512 mutex_unlock(&memcg_limit_mutex);
3513 return failed;
3514}
3515
3516struct create_work {
3517 struct mem_cgroup *memcg;
3518 struct kmem_cache *cachep;
3519 struct work_struct work;
3520};
3521
3522static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
3523{
3524 struct kmem_cache *cachep;
3525 struct memcg_cache_params *params;
3526
3527 if (!memcg_kmem_is_active(memcg))
3528 return;
3529
3530 mutex_lock(&memcg->slab_caches_mutex);
3531 list_for_each_entry(params, &memcg->memcg_slab_caches, list) {
3532 cachep = memcg_params_to_cache(params);
3533 cachep->memcg_params->dead = true;
3534 schedule_work(&cachep->memcg_params->destroy);
3535 }
3536 mutex_unlock(&memcg->slab_caches_mutex);
3537}
3538
3539static void memcg_create_cache_work_func(struct work_struct *w)
3540{
3541 struct create_work *cw;
3542
3543 cw = container_of(w, struct create_work, work);
3544 memcg_create_kmem_cache(cw->memcg, cw->cachep);
3545
3546 css_put(&cw->memcg->css);
3547 kfree(cw);
3548}
3549
3550
3551
3552
3553static void __memcg_create_cache_enqueue(struct mem_cgroup *memcg,
3554 struct kmem_cache *cachep)
3555{
3556 struct create_work *cw;
3557
3558 cw = kmalloc(sizeof(struct create_work), GFP_NOWAIT);
3559 if (cw == NULL) {
3560 css_put(&memcg->css);
3561 return;
3562 }
3563
3564 cw->memcg = memcg;
3565 cw->cachep = cachep;
3566
3567 INIT_WORK(&cw->work, memcg_create_cache_work_func);
3568 schedule_work(&cw->work);
3569}
3570
3571static void memcg_create_cache_enqueue(struct mem_cgroup *memcg,
3572 struct kmem_cache *cachep)
3573{
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585 memcg_stop_kmem_account();
3586 __memcg_create_cache_enqueue(memcg, cachep);
3587 memcg_resume_kmem_account();
3588}
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep,
3603 gfp_t gfp)
3604{
3605 struct mem_cgroup *memcg;
3606 int idx;
3607
3608 VM_BUG_ON(!cachep->memcg_params);
3609 VM_BUG_ON(!cachep->memcg_params->is_root_cache);
3610
3611 if (!current->mm || current->memcg_kmem_skip_account)
3612 return cachep;
3613
3614 rcu_read_lock();
3615 memcg = mem_cgroup_from_task(rcu_dereference(current->mm->owner));
3616
3617 if (!memcg_can_account_kmem(memcg))
3618 goto out;
3619
3620 idx = memcg_cache_id(memcg);
3621
3622
3623
3624
3625
3626 read_barrier_depends();
3627 if (likely(cachep->memcg_params->memcg_caches[idx])) {
3628 cachep = cachep->memcg_params->memcg_caches[idx];
3629 goto out;
3630 }
3631
3632
3633 if (!css_tryget(&memcg->css))
3634 goto out;
3635 rcu_read_unlock();
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654 memcg_create_cache_enqueue(memcg, cachep);
3655 return cachep;
3656out:
3657 rcu_read_unlock();
3658 return cachep;
3659}
3660EXPORT_SYMBOL(__memcg_kmem_get_cache);
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676bool
3677__memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order)
3678{
3679 struct mem_cgroup *memcg;
3680 int ret;
3681
3682 *_memcg = NULL;
3683 memcg = try_get_mem_cgroup_from_mm(current->mm);
3684
3685
3686
3687
3688
3689
3690 if (unlikely(!memcg))
3691 return true;
3692
3693 if (!memcg_can_account_kmem(memcg)) {
3694 css_put(&memcg->css);
3695 return true;
3696 }
3697
3698 ret = memcg_charge_kmem(memcg, gfp, 1 << order);
3699 if (!ret)
3700 *_memcg = memcg;
3701
3702 css_put(&memcg->css);
3703 return (ret == 0);
3704}
3705
3706void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg,
3707 int order)
3708{
3709 struct page_cgroup *pc;
3710
3711 VM_BUG_ON(mem_cgroup_is_root(memcg));
3712
3713
3714 if (!page) {
3715 memcg_uncharge_kmem(memcg, 1 << order);
3716 return;
3717 }
3718
3719 pc = lookup_page_cgroup(page);
3720 lock_page_cgroup(pc);
3721 pc->mem_cgroup = memcg;
3722 SetPageCgroupUsed(pc);
3723 unlock_page_cgroup(pc);
3724}
3725
3726void __memcg_kmem_uncharge_pages(struct page *page, int order)
3727{
3728 struct mem_cgroup *memcg = NULL;
3729 struct page_cgroup *pc;
3730
3731
3732 pc = lookup_page_cgroup(page);
3733
3734
3735
3736
3737 if (!PageCgroupUsed(pc))
3738 return;
3739
3740 lock_page_cgroup(pc);
3741 if (PageCgroupUsed(pc)) {
3742 memcg = pc->mem_cgroup;
3743 ClearPageCgroupUsed(pc);
3744 }
3745 unlock_page_cgroup(pc);
3746
3747
3748
3749
3750
3751 if (!memcg)
3752 return;
3753
3754 VM_BUG_ON_PAGE(mem_cgroup_is_root(memcg), page);
3755 memcg_uncharge_kmem(memcg, 1 << order);
3756}
3757#else
3758static inline void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
3759{
3760}
3761#endif
3762
3763#ifdef CONFIG_TRANSPARENT_HUGEPAGE
3764
3765#define PCGF_NOCOPY_AT_SPLIT (1 << PCG_LOCK | 1 << PCG_MIGRATION)
3766
3767
3768
3769
3770
3771
3772void mem_cgroup_split_huge_fixup(struct page *head)
3773{
3774 struct page_cgroup *head_pc = lookup_page_cgroup(head);
3775 struct page_cgroup *pc;
3776 struct mem_cgroup *memcg;
3777 int i;
3778
3779 if (mem_cgroup_disabled())
3780 return;
3781
3782 memcg = head_pc->mem_cgroup;
3783 for (i = 1; i < HPAGE_PMD_NR; i++) {
3784 pc = head_pc + i;
3785 pc->mem_cgroup = memcg;
3786 smp_wmb();
3787 pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
3788 }
3789 __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
3790 HPAGE_PMD_NR);
3791}
3792#endif
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809static int mem_cgroup_move_account(struct page *page,
3810 unsigned int nr_pages,
3811 struct page_cgroup *pc,
3812 struct mem_cgroup *from,
3813 struct mem_cgroup *to)
3814{
3815 unsigned long flags;
3816 int ret;
3817 bool anon = PageAnon(page);
3818
3819 VM_BUG_ON(from == to);
3820 VM_BUG_ON_PAGE(PageLRU(page), page);
3821
3822
3823
3824
3825
3826
3827 ret = -EBUSY;
3828 if (nr_pages > 1 && !PageTransHuge(page))
3829 goto out;
3830
3831 lock_page_cgroup(pc);
3832
3833 ret = -EINVAL;
3834 if (!PageCgroupUsed(pc) || pc->mem_cgroup != from)
3835 goto unlock;
3836
3837 move_lock_mem_cgroup(from, &flags);
3838
3839 if (!anon && page_mapped(page)) {
3840
3841 preempt_disable();
3842 __this_cpu_dec(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
3843 __this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
3844 preempt_enable();
3845 }
3846 mem_cgroup_charge_statistics(from, page, anon, -nr_pages);
3847
3848
3849 pc->mem_cgroup = to;
3850 mem_cgroup_charge_statistics(to, page, anon, nr_pages);
3851 move_unlock_mem_cgroup(from, &flags);
3852 ret = 0;
3853unlock:
3854 unlock_page_cgroup(pc);
3855
3856
3857
3858 memcg_check_events(to, page);
3859 memcg_check_events(from, page);
3860out:
3861 return ret;
3862}
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885static int mem_cgroup_move_parent(struct page *page,
3886 struct page_cgroup *pc,
3887 struct mem_cgroup *child)
3888{
3889 struct mem_cgroup *parent;
3890 unsigned int nr_pages;
3891 unsigned long uninitialized_var(flags);
3892 int ret;
3893
3894 VM_BUG_ON(mem_cgroup_is_root(child));
3895
3896 ret = -EBUSY;
3897 if (!get_page_unless_zero(page))
3898 goto out;
3899 if (isolate_lru_page(page))
3900 goto put;
3901
3902 nr_pages = hpage_nr_pages(page);
3903
3904 parent = parent_mem_cgroup(child);
3905
3906
3907
3908 if (!parent)
3909 parent = root_mem_cgroup;
3910
3911 if (nr_pages > 1) {
3912 VM_BUG_ON_PAGE(!PageTransHuge(page), page);
3913 flags = compound_lock_irqsave(page);
3914 }
3915
3916 ret = mem_cgroup_move_account(page, nr_pages,
3917 pc, child, parent);
3918 if (!ret) {
3919
3920 page_counter_cancel(&child->memory, nr_pages);
3921 if (do_swap_account)
3922 page_counter_cancel(&child->memsw, nr_pages);
3923 }
3924
3925 if (nr_pages > 1)
3926 compound_unlock_irqrestore(page, flags);
3927 putback_lru_page(page);
3928put:
3929 put_page(page);
3930out:
3931 return ret;
3932}
3933
3934
3935
3936
3937
3938
3939
3940static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
3941 gfp_t gfp_mask, enum charge_type ctype)
3942{
3943 struct mem_cgroup *memcg = NULL;
3944 unsigned int nr_pages = 1;
3945 bool oom = true;
3946 int ret;
3947
3948 if (PageTransHuge(page)) {
3949 nr_pages <<= compound_order(page);
3950 VM_BUG_ON_PAGE(!PageTransHuge(page), page);
3951
3952
3953
3954
3955 oom = false;
3956 }
3957
3958 ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &memcg, oom);
3959 if (ret == -ENOMEM)
3960 return ret;
3961 __mem_cgroup_commit_charge(memcg, page, nr_pages, ctype, false);
3962 return 0;
3963}
3964
3965int mem_cgroup_newpage_charge(struct page *page,
3966 struct mm_struct *mm, gfp_t gfp_mask)
3967{
3968 if (mem_cgroup_disabled())
3969 return 0;
3970 VM_BUG_ON_PAGE(page_mapped(page), page);
3971 VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page);
3972 VM_BUG_ON(!mm);
3973 return mem_cgroup_charge_common(page, mm, gfp_mask,
3974 MEM_CGROUP_CHARGE_TYPE_ANON);
3975}
3976
3977
3978
3979
3980
3981
3982
3983static int __mem_cgroup_try_charge_swapin(struct mm_struct *mm,
3984 struct page *page,
3985 gfp_t mask,
3986 struct mem_cgroup **memcgp)
3987{
3988 struct mem_cgroup *memcg;
3989 struct page_cgroup *pc;
3990 int ret;
3991
3992 pc = lookup_page_cgroup(page);
3993
3994
3995
3996
3997
3998
3999
4000 if (PageCgroupUsed(pc))
4001 return 0;
4002 if (!do_swap_account)
4003 goto charge_cur_mm;
4004 memcg = try_get_mem_cgroup_from_page(page);
4005 if (!memcg)
4006 goto charge_cur_mm;
4007 *memcgp = memcg;
4008 ret = __mem_cgroup_try_charge(NULL, mask, 1, memcgp, true);
4009 css_put(&memcg->css);
4010 if (ret == -EINTR)
4011 ret = 0;
4012 return ret;
4013charge_cur_mm:
4014 ret = __mem_cgroup_try_charge(mm, mask, 1, memcgp, true);
4015 if (ret == -EINTR)
4016 ret = 0;
4017 return ret;
4018}
4019
4020int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page,
4021 gfp_t gfp_mask, struct mem_cgroup **memcgp)
4022{
4023 *memcgp = NULL;
4024 if (mem_cgroup_disabled())
4025 return 0;
4026
4027
4028
4029
4030
4031
4032 if (!PageSwapCache(page)) {
4033 int ret;
4034
4035 ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, memcgp, true);
4036 if (ret == -EINTR)
4037 ret = 0;
4038 return ret;
4039 }
4040 return __mem_cgroup_try_charge_swapin(mm, page, gfp_mask, memcgp);
4041}
4042
4043void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
4044{
4045 if (mem_cgroup_disabled())
4046 return;
4047 if (!memcg)
4048 return;
4049 __mem_cgroup_cancel_charge(memcg, 1);
4050}
4051
4052static void
4053__mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg,
4054 enum charge_type ctype)
4055{
4056 if (mem_cgroup_disabled())
4057 return;
4058 if (!memcg)
4059 return;
4060
4061 __mem_cgroup_commit_charge(memcg, page, 1, ctype, true);
4062
4063
4064
4065
4066
4067
4068
4069 if (do_swap_account && PageSwapCache(page)) {
4070 swp_entry_t ent = {.val = page_private(page)};
4071 mem_cgroup_uncharge_swap(ent);
4072 }
4073}
4074
4075void mem_cgroup_commit_charge_swapin(struct page *page,
4076 struct mem_cgroup *memcg)
4077{
4078 __mem_cgroup_commit_charge_swapin(page, memcg,
4079 MEM_CGROUP_CHARGE_TYPE_ANON);
4080}
4081
4082int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
4083 gfp_t gfp_mask)
4084{
4085 struct mem_cgroup *memcg = NULL;
4086 enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
4087 int ret;
4088
4089 if (mem_cgroup_disabled())
4090 return 0;
4091 if (PageCompound(page))
4092 return 0;
4093
4094 if (!PageSwapCache(page))
4095 ret = mem_cgroup_charge_common(page, mm, gfp_mask, type);
4096 else {
4097 ret = __mem_cgroup_try_charge_swapin(mm, page,
4098 gfp_mask, &memcg);
4099 if (!ret)
4100 __mem_cgroup_commit_charge_swapin(page, memcg, type);
4101 }
4102 return ret;
4103}
4104
4105static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg,
4106 unsigned int nr_pages,
4107 const enum charge_type ctype)
4108{
4109 struct memcg_batch_info *batch = NULL;
4110 bool uncharge_memsw = true;
4111
4112
4113 if (!do_swap_account || ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
4114 uncharge_memsw = false;
4115
4116 batch = ¤t->memcg_batch;
4117
4118
4119
4120
4121
4122 if (!batch->memcg)
4123 batch->memcg = memcg;
4124
4125
4126
4127
4128
4129
4130
4131
4132 if (!batch->do_batch || test_thread_flag(TIF_MEMDIE))
4133 goto direct_uncharge;
4134
4135 if (nr_pages > 1)
4136 goto direct_uncharge;
4137
4138
4139
4140
4141
4142
4143 if (batch->memcg != memcg)
4144 goto direct_uncharge;
4145
4146 batch->nr_pages++;
4147 if (uncharge_memsw)
4148 batch->memsw_nr_pages++;
4149 return;
4150direct_uncharge:
4151 page_counter_uncharge(&memcg->memory, nr_pages);
4152 if (uncharge_memsw)
4153 page_counter_uncharge(&memcg->memsw, nr_pages);
4154 if (unlikely(batch->memcg != memcg))
4155 memcg_oom_recover(memcg);
4156}
4157
4158
4159
4160
4161static struct mem_cgroup *
4162__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype,
4163 bool end_migration)
4164{
4165 struct mem_cgroup *memcg = NULL;
4166 unsigned int nr_pages = 1;
4167 struct page_cgroup *pc;
4168 bool anon;
4169
4170 if (mem_cgroup_disabled())
4171 return NULL;
4172
4173 if (PageTransHuge(page)) {
4174 nr_pages <<= compound_order(page);
4175 VM_BUG_ON_PAGE(!PageTransHuge(page), page);
4176 }
4177
4178
4179
4180 pc = lookup_page_cgroup(page);
4181 if (unlikely(!PageCgroupUsed(pc)))
4182 return NULL;
4183
4184 lock_page_cgroup(pc);
4185
4186 memcg = pc->mem_cgroup;
4187
4188 if (!PageCgroupUsed(pc))
4189 goto unlock_out;
4190
4191 anon = PageAnon(page);
4192
4193 switch (ctype) {
4194 case MEM_CGROUP_CHARGE_TYPE_ANON:
4195
4196
4197
4198
4199
4200 anon = true;
4201
4202 case MEM_CGROUP_CHARGE_TYPE_DROP:
4203
4204 if (page_mapped(page))
4205 goto unlock_out;
4206
4207
4208
4209
4210
4211
4212
4213 if (!end_migration && PageCgroupMigration(pc))
4214 goto unlock_out;
4215 break;
4216 case MEM_CGROUP_CHARGE_TYPE_SWAPOUT:
4217 if (!PageAnon(page)) {
4218 if (page->mapping && !page_is_file_cache(page))
4219 goto unlock_out;
4220 } else if (page_mapped(page))
4221 goto unlock_out;
4222 break;
4223 default:
4224 break;
4225 }
4226
4227 mem_cgroup_charge_statistics(memcg, page, anon, -nr_pages);
4228
4229 ClearPageCgroupUsed(pc);
4230
4231
4232
4233
4234
4235
4236
4237 unlock_page_cgroup(pc);
4238
4239
4240
4241
4242 memcg_check_events(memcg, page);
4243 if (do_swap_account && ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) {
4244 mem_cgroup_swap_statistics(memcg, true);
4245 mem_cgroup_get(memcg);
4246 }
4247
4248
4249
4250
4251
4252 if (!end_migration && !mem_cgroup_is_root(memcg))
4253 mem_cgroup_do_uncharge(memcg, nr_pages, ctype);
4254
4255 return memcg;
4256
4257unlock_out:
4258 unlock_page_cgroup(pc);
4259 return NULL;
4260}
4261
4262void mem_cgroup_uncharge_page(struct page *page)
4263{
4264
4265 if (page_mapped(page))
4266 return;
4267 VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page);
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280 if (PageSwapCache(page))
4281 return;
4282 __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON, false);
4283}
4284
4285void mem_cgroup_uncharge_cache_page(struct page *page)
4286{
4287 VM_BUG_ON_PAGE(page_mapped(page), page);
4288 VM_BUG_ON_PAGE(page->mapping, page);
4289 __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE, false);
4290}
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300void mem_cgroup_uncharge_start(void)
4301{
4302 current->memcg_batch.do_batch++;
4303
4304 if (current->memcg_batch.do_batch == 1) {
4305 current->memcg_batch.memcg = NULL;
4306 current->memcg_batch.nr_pages = 0;
4307 current->memcg_batch.memsw_nr_pages = 0;
4308 }
4309}
4310
4311void mem_cgroup_uncharge_end(void)
4312{
4313 struct memcg_batch_info *batch = ¤t->memcg_batch;
4314
4315 if (!batch->do_batch)
4316 return;
4317
4318 batch->do_batch--;
4319 if (batch->do_batch)
4320 return;
4321
4322 if (!batch->memcg)
4323 return;
4324
4325
4326
4327
4328 if (batch->nr_pages)
4329 page_counter_uncharge(&batch->memcg->memory, batch->nr_pages);
4330 if (batch->memsw_nr_pages)
4331 page_counter_uncharge(&batch->memcg->memsw, batch->memsw_nr_pages);
4332 memcg_oom_recover(batch->memcg);
4333
4334 batch->memcg = NULL;
4335}
4336
4337#ifdef CONFIG_SWAP
4338
4339
4340
4341
4342void
4343mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
4344{
4345 struct mem_cgroup *memcg;
4346 int ctype = MEM_CGROUP_CHARGE_TYPE_SWAPOUT;
4347
4348 if (!swapout)
4349 ctype = MEM_CGROUP_CHARGE_TYPE_DROP;
4350
4351 memcg = __mem_cgroup_uncharge_common(page, ctype, false);
4352
4353
4354
4355
4356
4357 if (do_swap_account && swapout && memcg)
4358 swap_cgroup_record(ent, mem_cgroup_id(memcg));
4359}
4360#endif
4361
4362#ifdef CONFIG_MEMCG_SWAP
4363
4364
4365
4366
4367void mem_cgroup_uncharge_swap(swp_entry_t ent)
4368{
4369 struct mem_cgroup *memcg;
4370 unsigned short id;
4371
4372 if (!do_swap_account)
4373 return;
4374
4375 id = swap_cgroup_record(ent, 0);
4376 rcu_read_lock();
4377 memcg = mem_cgroup_lookup(id);
4378 if (memcg) {
4379
4380
4381
4382
4383 if (!mem_cgroup_is_root(memcg))
4384 page_counter_uncharge(&memcg->memsw, 1);
4385 mem_cgroup_swap_statistics(memcg, false);
4386 mem_cgroup_put(memcg);
4387 }
4388 rcu_read_unlock();
4389}
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405static int mem_cgroup_move_swap_account(swp_entry_t entry,
4406 struct mem_cgroup *from, struct mem_cgroup *to)
4407{
4408 unsigned short old_id, new_id;
4409
4410 old_id = mem_cgroup_id(from);
4411 new_id = mem_cgroup_id(to);
4412
4413 if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) {
4414 mem_cgroup_swap_statistics(from, false);
4415 mem_cgroup_swap_statistics(to, true);
4416
4417
4418
4419
4420
4421
4422
4423
4424 mem_cgroup_get(to);
4425 return 0;
4426 }
4427 return -EINVAL;
4428}
4429#else
4430static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
4431 struct mem_cgroup *from, struct mem_cgroup *to)
4432{
4433 return -EINVAL;
4434}
4435#endif
4436
4437
4438
4439
4440
4441void mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
4442 struct mem_cgroup **memcgp)
4443{
4444 struct mem_cgroup *memcg = NULL;
4445 unsigned int nr_pages = 1;
4446 struct page_cgroup *pc;
4447 enum charge_type ctype;
4448
4449 *memcgp = NULL;
4450
4451 if (mem_cgroup_disabled())
4452 return;
4453
4454 if (PageTransHuge(page))
4455 nr_pages <<= compound_order(page);
4456
4457 pc = lookup_page_cgroup(page);
4458 lock_page_cgroup(pc);
4459 if (PageCgroupUsed(pc)) {
4460 memcg = pc->mem_cgroup;
4461 css_get(&memcg->css);
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491 if (PageAnon(page))
4492 SetPageCgroupMigration(pc);
4493 }
4494 unlock_page_cgroup(pc);
4495
4496
4497
4498
4499 if (!memcg)
4500 return;
4501
4502 *memcgp = memcg;
4503
4504
4505
4506
4507
4508
4509 if (PageAnon(page))
4510 ctype = MEM_CGROUP_CHARGE_TYPE_ANON;
4511 else
4512 ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
4513
4514
4515
4516
4517
4518 __mem_cgroup_commit_charge(memcg, newpage, nr_pages, ctype, false);
4519}
4520
4521
4522void mem_cgroup_end_migration(struct mem_cgroup *memcg,
4523 struct page *oldpage, struct page *newpage, bool migration_ok)
4524{
4525 struct page *used, *unused;
4526 struct page_cgroup *pc;
4527 bool anon;
4528
4529 if (!memcg)
4530 return;
4531
4532 if (!migration_ok) {
4533 used = oldpage;
4534 unused = newpage;
4535 } else {
4536 used = newpage;
4537 unused = oldpage;
4538 }
4539 anon = PageAnon(used);
4540 __mem_cgroup_uncharge_common(unused,
4541 anon ? MEM_CGROUP_CHARGE_TYPE_ANON
4542 : MEM_CGROUP_CHARGE_TYPE_CACHE,
4543 true);
4544 css_put(&memcg->css);
4545
4546
4547
4548
4549
4550 pc = lookup_page_cgroup(oldpage);
4551 lock_page_cgroup(pc);
4552 ClearPageCgroupMigration(pc);
4553 unlock_page_cgroup(pc);
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563 if (anon)
4564 mem_cgroup_uncharge_page(used);
4565}
4566
4567
4568
4569
4570
4571
4572void mem_cgroup_replace_page_cache(struct page *oldpage,
4573 struct page *newpage)
4574{
4575 struct mem_cgroup *memcg = NULL;
4576 struct page_cgroup *pc;
4577 enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
4578
4579 if (mem_cgroup_disabled())
4580 return;
4581
4582 pc = lookup_page_cgroup(oldpage);
4583
4584 lock_page_cgroup(pc);
4585 if (PageCgroupUsed(pc)) {
4586 memcg = pc->mem_cgroup;
4587 mem_cgroup_charge_statistics(memcg, oldpage, false, -1);
4588 ClearPageCgroupUsed(pc);
4589 }
4590 unlock_page_cgroup(pc);
4591
4592
4593
4594
4595
4596 if (!memcg)
4597 return;
4598
4599
4600
4601
4602
4603 __mem_cgroup_commit_charge(memcg, newpage, 1, type, true);
4604}
4605
4606#ifdef CONFIG_DEBUG_VM
4607static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
4608{
4609 struct page_cgroup *pc;
4610
4611 pc = lookup_page_cgroup(page);
4612
4613
4614
4615
4616
4617 if (likely(pc) && PageCgroupUsed(pc))
4618 return pc;
4619 return NULL;
4620}
4621
4622bool mem_cgroup_bad_page_check(struct page *page)
4623{
4624 if (mem_cgroup_disabled())
4625 return false;
4626
4627 return lookup_page_cgroup_used(page) != NULL;
4628}
4629
4630void mem_cgroup_print_bad_page(struct page *page)
4631{
4632 struct page_cgroup *pc;
4633
4634 pc = lookup_page_cgroup_used(page);
4635 if (pc) {
4636 pr_alert("pc:%p pc->flags:%lx pc->mem_cgroup:%p\n",
4637 pc, pc->flags, pc->mem_cgroup);
4638 }
4639}
4640#endif
4641
4642static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
4643 unsigned long limit)
4644{
4645 unsigned long curusage;
4646 unsigned long oldusage;
4647 unsigned long memswlimit;
4648 bool enlarge = false;
4649 int retry_count;
4650 int ret;
4651
4652
4653
4654
4655
4656
4657 retry_count = MEM_CGROUP_RECLAIM_RETRIES *
4658 mem_cgroup_count_children(memcg);
4659
4660 oldusage = page_counter_read(&memcg->memory);
4661
4662 do {
4663 if (signal_pending(current)) {
4664 ret = -EINTR;
4665 break;
4666 }
4667 mutex_lock(&memcg_limit_mutex);
4668 memswlimit = memcg->memsw.limit;
4669 if (limit > memswlimit) {
4670 mutex_unlock(&memcg_limit_mutex);
4671 ret = -EINVAL;
4672 break;
4673 }
4674
4675 if (limit > memcg->memory.limit)
4676 enlarge = true;
4677
4678 ret = page_counter_limit(&memcg->memory, limit);
4679 if (!ret) {
4680 if (memswlimit == limit)
4681 memcg->memsw_is_minimum = true;
4682 else
4683 memcg->memsw_is_minimum = false;
4684 }
4685 mutex_unlock(&memcg_limit_mutex);
4686
4687 if (!ret)
4688 break;
4689
4690 mem_cgroup_reclaim(memcg, GFP_KERNEL,
4691 MEM_CGROUP_RECLAIM_SHRINK);
4692 curusage = page_counter_read(&memcg->memory);
4693
4694 if (curusage >= oldusage)
4695 retry_count--;
4696 else
4697 oldusage = curusage;
4698 } while (retry_count);
4699
4700 if (!ret && enlarge)
4701 memcg_oom_recover(memcg);
4702
4703 return ret;
4704}
4705
4706static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
4707 unsigned long limit)
4708{
4709 unsigned long curusage;
4710 unsigned long oldusage;
4711 unsigned long memlimit, memswlimit;
4712 bool enlarge = false;
4713 int retry_count;
4714 int ret;
4715
4716
4717 retry_count = MEM_CGROUP_RECLAIM_RETRIES *
4718 mem_cgroup_count_children(memcg);
4719
4720 oldusage = page_counter_read(&memcg->memsw);
4721
4722 do {
4723 if (signal_pending(current)) {
4724 ret = -EINTR;
4725 break;
4726 }
4727 mutex_lock(&memcg_limit_mutex);
4728 memlimit = memcg->memory.limit;
4729 if (limit < memlimit) {
4730 mutex_unlock(&memcg_limit_mutex);
4731 ret = -EINVAL;
4732 break;
4733 }
4734 memswlimit = memcg->memsw.limit;
4735 if (limit > memswlimit)
4736 enlarge = true;
4737 ret = page_counter_limit(&memcg->memsw, limit);
4738 if (!ret) {
4739 if (memlimit == limit)
4740 memcg->memsw_is_minimum = true;
4741 else
4742 memcg->memsw_is_minimum = false;
4743 }
4744 mutex_unlock(&memcg_limit_mutex);
4745
4746 if (!ret)
4747 break;
4748
4749 mem_cgroup_reclaim(memcg, GFP_KERNEL,
4750 MEM_CGROUP_RECLAIM_NOSWAP |
4751 MEM_CGROUP_RECLAIM_SHRINK);
4752 curusage = page_counter_read(&memcg->memsw);
4753
4754 if (curusage >= oldusage)
4755 retry_count--;
4756 else
4757 oldusage = curusage;
4758 } while (retry_count);
4759
4760 if (!ret && enlarge)
4761 memcg_oom_recover(memcg);
4762 return ret;
4763}
4764
4765unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
4766 gfp_t gfp_mask,
4767 unsigned long *total_scanned)
4768{
4769 unsigned long nr_reclaimed = 0;
4770 struct mem_cgroup_per_zone *mz, *next_mz = NULL;
4771 unsigned long reclaimed;
4772 int loop = 0;
4773 struct mem_cgroup_tree_per_zone *mctz;
4774 unsigned long excess;
4775 unsigned long nr_scanned;
4776
4777 if (order > 0)
4778 return 0;
4779
4780 mctz = soft_limit_tree_node_zone(zone_to_nid(zone), zone_idx(zone));
4781
4782
4783
4784
4785
4786 do {
4787 if (next_mz)
4788 mz = next_mz;
4789 else
4790 mz = mem_cgroup_largest_soft_limit_node(mctz);
4791 if (!mz)
4792 break;
4793
4794 nr_scanned = 0;
4795 reclaimed = mem_cgroup_soft_reclaim(mz->memcg, zone,
4796 gfp_mask, &nr_scanned);
4797 nr_reclaimed += reclaimed;
4798 *total_scanned += nr_scanned;
4799 spin_lock(&mctz->lock);
4800
4801
4802
4803
4804
4805 next_mz = NULL;
4806 if (!reclaimed) {
4807 do {
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819 next_mz =
4820 __mem_cgroup_largest_soft_limit_node(mctz);
4821 if (next_mz == mz)
4822 css_put(&next_mz->memcg->css);
4823 else
4824 break;
4825 } while (1);
4826 }
4827 __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz);
4828 excess = soft_limit_excess(mz->memcg);
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838 __mem_cgroup_insert_exceeded(mz->memcg, mz, mctz, excess);
4839 spin_unlock(&mctz->lock);
4840 css_put(&mz->memcg->css);
4841 loop++;
4842
4843
4844
4845
4846
4847 if (!nr_reclaimed &&
4848 (next_mz == NULL ||
4849 loop > MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS))
4850 break;
4851 } while (!nr_reclaimed);
4852 if (next_mz)
4853 css_put(&next_mz->memcg->css);
4854 return nr_reclaimed;
4855}
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868static void mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
4869 int node, int zid, enum lru_list lru)
4870{
4871 struct lruvec *lruvec;
4872 unsigned long flags;
4873 struct list_head *list;
4874 struct page *busy;
4875 struct zone *zone;
4876
4877 zone = &NODE_DATA(node)->node_zones[zid];
4878 lruvec = mem_cgroup_zone_lruvec(zone, memcg);
4879 list = &lruvec->lists[lru];
4880
4881 busy = NULL;
4882 do {
4883 struct page_cgroup *pc;
4884 struct page *page;
4885
4886 spin_lock_irqsave(&zone->lru_lock, flags);
4887 if (list_empty(list)) {
4888 spin_unlock_irqrestore(&zone->lru_lock, flags);
4889 break;
4890 }
4891 page = list_entry(list->prev, struct page, lru);
4892 if (busy == page) {
4893 list_move(&page->lru, list);
4894 busy = NULL;
4895 spin_unlock_irqrestore(&zone->lru_lock, flags);
4896 continue;
4897 }
4898 spin_unlock_irqrestore(&zone->lru_lock, flags);
4899
4900 pc = lookup_page_cgroup(page);
4901
4902 if (mem_cgroup_move_parent(page, pc, memcg)) {
4903
4904 busy = page;
4905 cond_resched();
4906 } else
4907 busy = NULL;
4908 } while (!list_empty(list));
4909}
4910
4911
4912
4913
4914
4915
4916
4917
4918static void mem_cgroup_reparent_charges(struct mem_cgroup *memcg)
4919{
4920 int node, zid;
4921
4922 do {
4923
4924 lru_add_drain_all();
4925 drain_all_stock_sync(memcg);
4926 mem_cgroup_start_move(memcg);
4927 for_each_node_state(node, N_MEMORY) {
4928 for (zid = 0; zid < MAX_NR_ZONES; zid++) {
4929 enum lru_list lru;
4930 for_each_lru(lru) {
4931 mem_cgroup_force_empty_list(memcg,
4932 node, zid, lru);
4933 }
4934 }
4935 }
4936 mem_cgroup_end_move(memcg);
4937 memcg_oom_recover(memcg);
4938 cond_resched();
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952 } while (page_counter_read(&memcg->memory) -
4953 page_counter_read(&memcg->kmem) > 0);
4954}
4955
4956
4957
4958
4959
4960
4961static inline bool __memcg_has_children(struct mem_cgroup *memcg)
4962{
4963 struct cgroup *pos;
4964
4965
4966 cgroup_for_each_child(pos, memcg->css.cgroup)
4967 return true;
4968 return false;
4969}
4970
4971
4972
4973
4974
4975
4976
4977
4978static inline bool memcg_has_children(struct mem_cgroup *memcg)
4979{
4980 return memcg->use_hierarchy && __memcg_has_children(memcg);
4981}
4982
4983
4984
4985
4986
4987
4988
4989static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
4990{
4991 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
4992 struct cgroup *cgrp = memcg->css.cgroup;
4993
4994
4995 if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children))
4996 return -EBUSY;
4997
4998
4999 lru_add_drain_all();
5000
5001 while (nr_retries && page_counter_read(&memcg->memory)) {
5002 int progress;
5003
5004 if (signal_pending(current))
5005 return -EINTR;
5006
5007 progress = try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL,
5008 false);
5009 if (!progress) {
5010 nr_retries--;
5011
5012 congestion_wait(BLK_RW_ASYNC, HZ/10);
5013 }
5014
5015 }
5016 lru_add_drain();
5017 mem_cgroup_reparent_charges(memcg);
5018
5019 return 0;
5020}
5021
5022static int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event)
5023{
5024 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
5025 int ret;
5026
5027 if (mem_cgroup_is_root(memcg))
5028 return -EINVAL;
5029 css_get(&memcg->css);
5030 ret = mem_cgroup_force_empty(memcg);
5031 css_put(&memcg->css);
5032
5033 return ret;
5034}
5035
5036
5037static u64 mem_cgroup_hierarchy_read(struct cgroup *cont, struct cftype *cft)
5038{
5039 return mem_cgroup_from_cont(cont)->use_hierarchy;
5040}
5041
5042static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,
5043 u64 val)
5044{
5045 int retval = 0;
5046 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
5047 struct cgroup *parent = cont->parent;
5048 struct mem_cgroup *parent_memcg = NULL;
5049
5050 if (parent)
5051 parent_memcg = mem_cgroup_from_cont(parent);
5052
5053 mutex_lock(&memcg_create_mutex);
5054
5055 if (memcg->use_hierarchy == val)
5056 goto out;
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066 if ((!parent_memcg || !parent_memcg->use_hierarchy) &&
5067 (val == 1 || val == 0)) {
5068 if (!__memcg_has_children(memcg))
5069 memcg->use_hierarchy = val;
5070 else
5071 retval = -EBUSY;
5072 } else
5073 retval = -EINVAL;
5074
5075out:
5076 mutex_unlock(&memcg_create_mutex);
5077
5078 return retval;
5079}
5080
5081
5082static unsigned long tree_stat(struct mem_cgroup *memcg,
5083 enum mem_cgroup_stat_index idx)
5084{
5085 struct mem_cgroup *iter;
5086 long val = 0;
5087
5088
5089 for_each_mem_cgroup_tree(iter, memcg)
5090 val += mem_cgroup_read_stat(iter, idx);
5091
5092 if (val < 0)
5093 val = 0;
5094 return val;
5095}
5096
5097static inline unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
5098{
5099 unsigned long val;
5100
5101 if (mem_cgroup_is_root(memcg)) {
5102 val = tree_stat(memcg, MEM_CGROUP_STAT_CACHE);
5103 val += tree_stat(memcg, MEM_CGROUP_STAT_RSS);
5104 if (swap)
5105 val += tree_stat(memcg, MEM_CGROUP_STAT_SWAP);
5106 } else {
5107 if (!swap)
5108 val = page_counter_read(&memcg->memory);
5109 else
5110 val = page_counter_read(&memcg->memsw);
5111 }
5112 return val;
5113}
5114
5115enum {
5116 RES_USAGE,
5117 RES_LIMIT,
5118 RES_MAX_USAGE,
5119 RES_FAILCNT,
5120 RES_SOFT_LIMIT,
5121};
5122
5123static ssize_t mem_cgroup_read(struct cgroup *cont, struct cftype *cft,
5124 struct file *file, char __user *buf,
5125 size_t nbytes, loff_t *ppos)
5126{
5127 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
5128 char str[64];
5129 u64 val;
5130 int len;
5131 struct page_counter *counter;
5132
5133 switch (MEMFILE_TYPE(cft->private)) {
5134 case _MEM:
5135 counter = &memcg->memory;
5136 break;
5137 case _MEMSWAP:
5138 counter = &memcg->memsw;
5139 break;
5140 case _KMEM:
5141 counter = &memcg->kmem;
5142 break;
5143 default:
5144 BUG();
5145 }
5146
5147 switch (MEMFILE_ATTR(cft->private)) {
5148 case RES_USAGE:
5149 if (counter == &memcg->memory)
5150 val = (u64)mem_cgroup_usage(memcg, false) * PAGE_SIZE;
5151 else if (counter == &memcg->memsw)
5152 val = (u64)mem_cgroup_usage(memcg, true) * PAGE_SIZE;
5153 else
5154 val = (u64)page_counter_read(counter) * PAGE_SIZE;
5155 break;
5156 case RES_LIMIT:
5157 val = (u64)counter->limit * PAGE_SIZE;
5158 break;
5159 case RES_MAX_USAGE:
5160 val = (u64)counter->watermark * PAGE_SIZE;
5161 break;
5162 case RES_FAILCNT:
5163 val = (u64)counter->failcnt;
5164 break;
5165 case RES_SOFT_LIMIT:
5166 val = (u64)memcg->soft_limit * PAGE_SIZE;
5167 break;
5168 default:
5169 BUG();
5170 }
5171
5172 len = scnprintf(str, sizeof(str), "%llu\n", (unsigned long long)val);
5173 return simple_read_from_buffer(buf, nbytes, ppos, str, len);
5174}
5175
5176static int memcg_update_kmem_limit(struct cgroup *cont, unsigned long limit)
5177{
5178 int ret = -EINVAL;
5179#ifdef CONFIG_MEMCG_KMEM
5180 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193 mutex_lock(&memcg_create_mutex);
5194 mutex_lock(&memcg_limit_mutex);
5195 if (!memcg->kmem_account_flags && limit != PAGE_COUNTER_MAX) {
5196 if (cgroup_task_count(cont) || memcg_has_children(memcg)) {
5197 ret = -EBUSY;
5198 goto out;
5199 }
5200 ret = page_counter_limit(&memcg->kmem, limit);
5201 VM_BUG_ON(ret);
5202
5203 ret = memcg_update_cache_sizes(memcg);
5204 if (ret) {
5205 page_counter_limit(&memcg->kmem, PAGE_COUNTER_MAX);
5206 goto out;
5207 }
5208 static_key_slow_inc(&memcg_kmem_enabled_key);
5209
5210
5211
5212
5213 memcg_kmem_set_active(memcg);
5214
5215
5216
5217
5218
5219
5220
5221 mem_cgroup_get(memcg);
5222 } else
5223 ret = page_counter_limit(&memcg->kmem, limit);
5224out:
5225 mutex_unlock(&memcg_limit_mutex);
5226 mutex_unlock(&memcg_create_mutex);
5227#endif
5228 return ret;
5229}
5230
5231#ifdef CONFIG_MEMCG_KMEM
5232static int memcg_propagate_kmem(struct mem_cgroup *memcg)
5233{
5234 int ret = 0;
5235 struct mem_cgroup *parent = parent_mem_cgroup(memcg);
5236 if (!parent)
5237 goto out;
5238
5239 memcg->kmem_account_flags = parent->kmem_account_flags;
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250 if (!memcg_kmem_is_active(memcg))
5251 goto out;
5252
5253
5254
5255
5256
5257
5258
5259 mem_cgroup_get(memcg);
5260 static_key_slow_inc(&memcg_kmem_enabled_key);
5261
5262 mutex_lock(&memcg_limit_mutex);
5263 ret = memcg_update_cache_sizes(memcg);
5264 mutex_unlock(&memcg_limit_mutex);
5265out:
5266 return ret;
5267}
5268#endif
5269
5270
5271
5272
5273
5274static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
5275 const char *buffer)
5276{
5277 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
5278 unsigned long nr_pages;
5279 int ret;
5280
5281 ret = page_counter_memparse(buffer, &nr_pages);
5282 if (ret)
5283 return ret;
5284
5285 switch (MEMFILE_ATTR(cft->private)) {
5286 case RES_LIMIT:
5287 if (mem_cgroup_is_root(memcg)) {
5288 ret = -EINVAL;
5289 break;
5290 }
5291 switch (MEMFILE_TYPE(cft->private)) {
5292 case _MEM:
5293 ret = mem_cgroup_resize_limit(memcg, nr_pages);
5294 break;
5295 case _MEMSWAP:
5296 ret = mem_cgroup_resize_memsw_limit(memcg, nr_pages);
5297 break;
5298 case _KMEM:
5299 ret = memcg_update_kmem_limit(cont, nr_pages);
5300 break;
5301 }
5302 break;
5303 case RES_SOFT_LIMIT:
5304 memcg->soft_limit = nr_pages;
5305 ret = 0;
5306 break;
5307 }
5308 return ret;
5309}
5310
5311static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
5312{
5313 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
5314 struct page_counter *counter;
5315
5316 switch (MEMFILE_TYPE(event)) {
5317 case _MEM:
5318 counter = &memcg->memory;
5319 break;
5320 case _MEMSWAP:
5321 counter = &memcg->memsw;
5322 break;
5323 case _KMEM:
5324 counter = &memcg->kmem;
5325 break;
5326 default:
5327 BUG();
5328 }
5329
5330 switch (MEMFILE_ATTR(event)) {
5331 case RES_MAX_USAGE:
5332 page_counter_reset_watermark(counter);
5333 break;
5334 case RES_FAILCNT:
5335 counter->failcnt = 0;
5336 break;
5337 default:
5338 BUG();
5339 }
5340
5341 return 0;
5342}
5343
5344static u64 mem_cgroup_move_charge_read(struct cgroup *cgrp,
5345 struct cftype *cft)
5346{
5347 return mem_cgroup_from_cont(cgrp)->move_charge_at_immigrate;
5348}
5349
5350#ifdef CONFIG_MMU
5351static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
5352 struct cftype *cft, u64 val)
5353{
5354 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
5355
5356 if (val >= (1 << NR_MOVE_TYPE))
5357 return -EINVAL;
5358
5359
5360
5361
5362
5363
5364
5365 memcg->move_charge_at_immigrate = val;
5366 return 0;
5367}
5368#else
5369static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
5370 struct cftype *cft, u64 val)
5371{
5372 return -ENOSYS;
5373}
5374#endif
5375
5376#ifdef CONFIG_NUMA
5377static int memcg_numa_stat_show(struct cgroup *cont, struct cftype *cft,
5378 struct seq_file *m)
5379{
5380 int nid;
5381 unsigned long total_nr, file_nr, anon_nr, unevictable_nr;
5382 unsigned long node_nr;
5383 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
5384
5385 total_nr = mem_cgroup_nr_lru_pages(memcg, LRU_ALL);
5386 seq_printf(m, "total=%lu", total_nr);
5387 for_each_node_state(nid, N_MEMORY) {
5388 node_nr = mem_cgroup_node_nr_lru_pages(memcg, nid, LRU_ALL);
5389 seq_printf(m, " N%d=%lu", nid, node_nr);
5390 }
5391 seq_putc(m, '\n');
5392
5393 file_nr = mem_cgroup_nr_lru_pages(memcg, LRU_ALL_FILE);
5394 seq_printf(m, "file=%lu", file_nr);
5395 for_each_node_state(nid, N_MEMORY) {
5396 node_nr = mem_cgroup_node_nr_lru_pages(memcg, nid,
5397 LRU_ALL_FILE);
5398 seq_printf(m, " N%d=%lu", nid, node_nr);
5399 }
5400 seq_putc(m, '\n');
5401
5402 anon_nr = mem_cgroup_nr_lru_pages(memcg, LRU_ALL_ANON);
5403 seq_printf(m, "anon=%lu", anon_nr);
5404 for_each_node_state(nid, N_MEMORY) {
5405 node_nr = mem_cgroup_node_nr_lru_pages(memcg, nid,
5406 LRU_ALL_ANON);
5407 seq_printf(m, " N%d=%lu", nid, node_nr);
5408 }
5409 seq_putc(m, '\n');
5410
5411 unevictable_nr = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_UNEVICTABLE));
5412 seq_printf(m, "unevictable=%lu", unevictable_nr);
5413 for_each_node_state(nid, N_MEMORY) {
5414 node_nr = mem_cgroup_node_nr_lru_pages(memcg, nid,
5415 BIT(LRU_UNEVICTABLE));
5416 seq_printf(m, " N%d=%lu", nid, node_nr);
5417 }
5418 seq_putc(m, '\n');
5419 return 0;
5420}
5421#endif
5422
5423static inline void mem_cgroup_lru_names_not_uptodate(void)
5424{
5425 BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS);
5426}
5427
5428static int memcg_stat_show(struct cgroup *cont, struct cftype *cft,
5429 struct seq_file *m)
5430{
5431 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
5432 unsigned long memory, memsw;
5433 struct mem_cgroup *mi;
5434 unsigned int i;
5435
5436 for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
5437 if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
5438 continue;
5439 seq_printf(m, "%s %ld\n", mem_cgroup_stat_names[i],
5440 mem_cgroup_read_stat(memcg, i) * PAGE_SIZE);
5441 }
5442
5443 for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++)
5444 seq_printf(m, "%s %lu\n", mem_cgroup_events_names[i],
5445 mem_cgroup_read_events(memcg, i));
5446
5447 for (i = 0; i < NR_LRU_LISTS; i++)
5448 seq_printf(m, "%s %lu\n", mem_cgroup_lru_names[i],
5449 mem_cgroup_nr_lru_pages(memcg, BIT(i)) * PAGE_SIZE);
5450
5451
5452 memory = memsw = PAGE_COUNTER_MAX;
5453 for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) {
5454 memory = min(memory, mi->memory.limit);
5455 memsw = min(memsw, mi->memsw.limit);
5456 }
5457 seq_printf(m, "hierarchical_memory_limit %llu\n",
5458 (u64)memory * PAGE_SIZE);
5459 if (do_swap_account)
5460 seq_printf(m, "hierarchical_memsw_limit %llu\n",
5461 (u64)memsw * PAGE_SIZE);
5462
5463 for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
5464 long long val = 0;
5465
5466 if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
5467 continue;
5468 for_each_mem_cgroup_tree(mi, memcg)
5469 val += mem_cgroup_read_stat(mi, i) * PAGE_SIZE;
5470 seq_printf(m, "total_%s %lld\n", mem_cgroup_stat_names[i], val);
5471 }
5472
5473 for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) {
5474 unsigned long long val = 0;
5475
5476 for_each_mem_cgroup_tree(mi, memcg)
5477 val += mem_cgroup_read_events(mi, i);
5478 seq_printf(m, "total_%s %llu\n",
5479 mem_cgroup_events_names[i], val);
5480 }
5481
5482 for (i = 0; i < NR_LRU_LISTS; i++) {
5483 unsigned long long val = 0;
5484
5485 for_each_mem_cgroup_tree(mi, memcg)
5486 val += mem_cgroup_nr_lru_pages(mi, BIT(i)) * PAGE_SIZE;
5487 seq_printf(m, "total_%s %llu\n", mem_cgroup_lru_names[i], val);
5488 }
5489
5490#ifdef CONFIG_DEBUG_VM
5491 {
5492 int nid, zid;
5493 struct mem_cgroup_per_zone *mz;
5494 struct zone_reclaim_stat *rstat;
5495 unsigned long recent_rotated[2] = {0, 0};
5496 unsigned long recent_scanned[2] = {0, 0};
5497
5498 for_each_online_node(nid)
5499 for (zid = 0; zid < MAX_NR_ZONES; zid++) {
5500 mz = mem_cgroup_zoneinfo(memcg, nid, zid);
5501 rstat = &mz->lruvec.reclaim_stat;
5502
5503 recent_rotated[0] += rstat->recent_rotated[0];
5504 recent_rotated[1] += rstat->recent_rotated[1];
5505 recent_scanned[0] += rstat->recent_scanned[0];
5506 recent_scanned[1] += rstat->recent_scanned[1];
5507 }
5508 seq_printf(m, "recent_rotated_anon %lu\n", recent_rotated[0]);
5509 seq_printf(m, "recent_rotated_file %lu\n", recent_rotated[1]);
5510 seq_printf(m, "recent_scanned_anon %lu\n", recent_scanned[0]);
5511 seq_printf(m, "recent_scanned_file %lu\n", recent_scanned[1]);
5512 }
5513#endif
5514
5515 return 0;
5516}
5517
5518static u64 mem_cgroup_swappiness_read(struct cgroup *cgrp, struct cftype *cft)
5519{
5520 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
5521
5522 return mem_cgroup_swappiness(memcg);
5523}
5524
5525static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft,
5526 u64 val)
5527{
5528 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
5529
5530 if (val > 100)
5531 return -EINVAL;
5532
5533 if (cgrp->parent)
5534 memcg->swappiness = val;
5535 else
5536 vm_swappiness = val;
5537
5538 return 0;
5539}
5540
5541static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap)
5542{
5543 struct mem_cgroup_threshold_ary *t;
5544 unsigned long usage;
5545 int i;
5546
5547 rcu_read_lock();
5548 if (!swap)
5549 t = rcu_dereference(memcg->thresholds.primary);
5550 else
5551 t = rcu_dereference(memcg->memsw_thresholds.primary);
5552
5553 if (!t)
5554 goto unlock;
5555
5556 usage = mem_cgroup_usage(memcg, swap);
5557
5558
5559
5560
5561
5562
5563 i = t->current_threshold;
5564
5565
5566
5567
5568
5569
5570
5571 for (; i >= 0 && unlikely(t->entries[i].threshold > usage); i--)
5572 eventfd_signal(t->entries[i].eventfd, 1);
5573
5574
5575 i++;
5576
5577
5578
5579
5580
5581
5582
5583 for (; i < t->size && unlikely(t->entries[i].threshold <= usage); i++)
5584 eventfd_signal(t->entries[i].eventfd, 1);
5585
5586
5587 t->current_threshold = i - 1;
5588unlock:
5589 rcu_read_unlock();
5590}
5591
5592static void mem_cgroup_threshold(struct mem_cgroup *memcg)
5593{
5594 while (memcg) {
5595 __mem_cgroup_threshold(memcg, false);
5596 if (do_swap_account)
5597 __mem_cgroup_threshold(memcg, true);
5598
5599 memcg = parent_mem_cgroup(memcg);
5600 }
5601}
5602
5603static int compare_thresholds(const void *a, const void *b)
5604{
5605 const struct mem_cgroup_threshold *_a = a;
5606 const struct mem_cgroup_threshold *_b = b;
5607
5608 if (_a->threshold > _b->threshold)
5609 return 1;
5610
5611 if (_a->threshold < _b->threshold)
5612 return -1;
5613
5614 return 0;
5615}
5616
5617static int mem_cgroup_oom_notify_cb(struct mem_cgroup *memcg)
5618{
5619 struct mem_cgroup_eventfd_list *ev;
5620
5621 spin_lock(&memcg_oom_lock);
5622
5623 list_for_each_entry(ev, &memcg->oom_notify, list)
5624 eventfd_signal(ev->eventfd, 1);
5625
5626 spin_unlock(&memcg_oom_lock);
5627 return 0;
5628}
5629
5630static void mem_cgroup_oom_notify(struct mem_cgroup *memcg)
5631{
5632 struct mem_cgroup *iter;
5633
5634 for_each_mem_cgroup_tree(iter, memcg)
5635 mem_cgroup_oom_notify_cb(iter);
5636}
5637
5638static int mem_cgroup_usage_register_event(struct cgroup *cgrp,
5639 struct cftype *cft, struct eventfd_ctx *eventfd, const char *args)
5640{
5641 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
5642 struct mem_cgroup_thresholds *thresholds;
5643 struct mem_cgroup_threshold_ary *new;
5644 enum res_type type = MEMFILE_TYPE(cft->private);
5645 unsigned long threshold;
5646 unsigned long usage;
5647 int i, size, ret;
5648
5649 ret = page_counter_memparse(args, &threshold);
5650 if (ret)
5651 return ret;
5652
5653 mutex_lock(&memcg->thresholds_lock);
5654
5655 if (type == _MEM)
5656 thresholds = &memcg->thresholds;
5657 else if (type == _MEMSWAP)
5658 thresholds = &memcg->memsw_thresholds;
5659 else
5660 BUG();
5661
5662 usage = mem_cgroup_usage(memcg, type == _MEMSWAP);
5663
5664
5665 if (thresholds->primary)
5666 __mem_cgroup_threshold(memcg, type == _MEMSWAP);
5667
5668 size = thresholds->primary ? thresholds->primary->size + 1 : 1;
5669
5670
5671 new = kmalloc(sizeof(*new) + size * sizeof(struct mem_cgroup_threshold),
5672 GFP_KERNEL);
5673 if (!new) {
5674 ret = -ENOMEM;
5675 goto unlock;
5676 }
5677 new->size = size;
5678
5679
5680 if (thresholds->primary) {
5681 memcpy(new->entries, thresholds->primary->entries, (size - 1) *
5682 sizeof(struct mem_cgroup_threshold));
5683 }
5684
5685
5686 new->entries[size - 1].eventfd = eventfd;
5687 new->entries[size - 1].threshold = threshold;
5688
5689
5690 sort(new->entries, size, sizeof(struct mem_cgroup_threshold),
5691 compare_thresholds, NULL);
5692
5693
5694 new->current_threshold = -1;
5695 for (i = 0; i < size; i++) {
5696 if (new->entries[i].threshold <= usage) {
5697
5698
5699
5700
5701
5702 ++new->current_threshold;
5703 } else
5704 break;
5705 }
5706
5707
5708 kfree(thresholds->spare);
5709 thresholds->spare = thresholds->primary;
5710
5711 rcu_assign_pointer(thresholds->primary, new);
5712
5713
5714 synchronize_rcu();
5715
5716unlock:
5717 mutex_unlock(&memcg->thresholds_lock);
5718
5719 return ret;
5720}
5721
5722static void mem_cgroup_usage_unregister_event(struct cgroup *cgrp,
5723 struct cftype *cft, struct eventfd_ctx *eventfd)
5724{
5725 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
5726 struct mem_cgroup_thresholds *thresholds;
5727 struct mem_cgroup_threshold_ary *new;
5728 enum res_type type = MEMFILE_TYPE(cft->private);
5729 unsigned long usage;
5730 int i, j, size;
5731
5732 mutex_lock(&memcg->thresholds_lock);
5733 if (type == _MEM)
5734 thresholds = &memcg->thresholds;
5735 else if (type == _MEMSWAP)
5736 thresholds = &memcg->memsw_thresholds;
5737 else
5738 BUG();
5739
5740 if (!thresholds->primary)
5741 goto unlock;
5742
5743 usage = mem_cgroup_usage(memcg, type == _MEMSWAP);
5744
5745
5746 __mem_cgroup_threshold(memcg, type == _MEMSWAP);
5747
5748
5749 size = 0;
5750 for (i = 0; i < thresholds->primary->size; i++) {
5751 if (thresholds->primary->entries[i].eventfd != eventfd)
5752 size++;
5753 }
5754
5755 new = thresholds->spare;
5756
5757
5758 if (!size) {
5759 kfree(new);
5760 new = NULL;
5761 goto swap_buffers;
5762 }
5763
5764 new->size = size;
5765
5766
5767 new->current_threshold = -1;
5768 for (i = 0, j = 0; i < thresholds->primary->size; i++) {
5769 if (thresholds->primary->entries[i].eventfd == eventfd)
5770 continue;
5771
5772 new->entries[j] = thresholds->primary->entries[i];
5773 if (new->entries[j].threshold <= usage) {
5774
5775
5776
5777
5778
5779 ++new->current_threshold;
5780 }
5781 j++;
5782 }
5783
5784swap_buffers:
5785
5786 thresholds->spare = thresholds->primary;
5787
5788 if (!new) {
5789 kfree(thresholds->spare);
5790 thresholds->spare = NULL;
5791 }
5792
5793 rcu_assign_pointer(thresholds->primary, new);
5794
5795
5796 synchronize_rcu();
5797unlock:
5798 mutex_unlock(&memcg->thresholds_lock);
5799}
5800
5801static int mem_cgroup_oom_register_event(struct cgroup *cgrp,
5802 struct cftype *cft, struct eventfd_ctx *eventfd, const char *args)
5803{
5804 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
5805 struct mem_cgroup_eventfd_list *event;
5806 enum res_type type = MEMFILE_TYPE(cft->private);
5807
5808 BUG_ON(type != _OOM_TYPE);
5809 event = kmalloc(sizeof(*event), GFP_KERNEL);
5810 if (!event)
5811 return -ENOMEM;
5812
5813 spin_lock(&memcg_oom_lock);
5814
5815 event->eventfd = eventfd;
5816 list_add(&event->list, &memcg->oom_notify);
5817
5818
5819 if (atomic_read(&memcg->under_oom))
5820 eventfd_signal(eventfd, 1);
5821 spin_unlock(&memcg_oom_lock);
5822
5823 return 0;
5824}
5825
5826static void mem_cgroup_oom_unregister_event(struct cgroup *cgrp,
5827 struct cftype *cft, struct eventfd_ctx *eventfd)
5828{
5829 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
5830 struct mem_cgroup_eventfd_list *ev, *tmp;
5831 enum res_type type = MEMFILE_TYPE(cft->private);
5832
5833 BUG_ON(type != _OOM_TYPE);
5834
5835 spin_lock(&memcg_oom_lock);
5836
5837 list_for_each_entry_safe(ev, tmp, &memcg->oom_notify, list) {
5838 if (ev->eventfd == eventfd) {
5839 list_del(&ev->list);
5840 kfree(ev);
5841 }
5842 }
5843
5844 spin_unlock(&memcg_oom_lock);
5845}
5846
5847static int mem_cgroup_oom_control_read(struct cgroup *cgrp,
5848 struct cftype *cft, struct cgroup_map_cb *cb)
5849{
5850 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
5851
5852 cb->fill(cb, "oom_kill_disable", memcg->oom_kill_disable);
5853
5854 if (atomic_read(&memcg->under_oom))
5855 cb->fill(cb, "under_oom", 1);
5856 else
5857 cb->fill(cb, "under_oom", 0);
5858 return 0;
5859}
5860
5861static int mem_cgroup_oom_control_write(struct cgroup *cgrp,
5862 struct cftype *cft, u64 val)
5863{
5864 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
5865
5866
5867 if (!cgrp->parent || !((val == 0) || (val == 1)))
5868 return -EINVAL;
5869
5870 memcg->oom_kill_disable = val;
5871 if (!val)
5872 memcg_oom_recover(memcg);
5873
5874 return 0;
5875}
5876
5877#ifdef CONFIG_MEMCG_KMEM
5878static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
5879{
5880 int ret;
5881
5882 memcg->kmemcg_id = -1;
5883 ret = memcg_propagate_kmem(memcg);
5884 if (ret)
5885 return ret;
5886
5887 return mem_cgroup_sockets_init(memcg, ss);
5888}
5889
5890static void kmem_cgroup_destroy(struct mem_cgroup *memcg)
5891{
5892 mem_cgroup_sockets_destroy(memcg);
5893
5894 memcg_kmem_mark_dead(memcg);
5895
5896 if (page_counter_read(&memcg->kmem))
5897 return;
5898
5899
5900
5901
5902
5903
5904
5905 if (memcg_kmem_test_and_clear_dead(memcg))
5906 mem_cgroup_put(memcg);
5907}
5908#else
5909static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
5910{
5911 return 0;
5912}
5913
5914static void kmem_cgroup_destroy(struct mem_cgroup *memcg)
5915{
5916}
5917#endif
5918
5919static struct cftype mem_cgroup_files[] = {
5920 {
5921 .name = "usage_in_bytes",
5922 .private = MEMFILE_PRIVATE(_MEM, RES_USAGE),
5923 .read = mem_cgroup_read,
5924 .register_event = mem_cgroup_usage_register_event,
5925 .unregister_event = mem_cgroup_usage_unregister_event,
5926 },
5927 {
5928 .name = "max_usage_in_bytes",
5929 .private = MEMFILE_PRIVATE(_MEM, RES_MAX_USAGE),
5930 .trigger = mem_cgroup_reset,
5931 .read = mem_cgroup_read,
5932 },
5933 {
5934 .name = "limit_in_bytes",
5935 .private = MEMFILE_PRIVATE(_MEM, RES_LIMIT),
5936 .write_string = mem_cgroup_write,
5937 .read = mem_cgroup_read,
5938 },
5939 {
5940 .name = "soft_limit_in_bytes",
5941 .private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT),
5942 .write_string = mem_cgroup_write,
5943 .read = mem_cgroup_read,
5944 },
5945 {
5946 .name = "failcnt",
5947 .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT),
5948 .trigger = mem_cgroup_reset,
5949 .read = mem_cgroup_read,
5950 },
5951 {
5952 .name = "stat",
5953 .read_seq_string = memcg_stat_show,
5954 },
5955 {
5956 .name = "force_empty",
5957 .trigger = mem_cgroup_force_empty_write,
5958 },
5959 {
5960 .name = "use_hierarchy",
5961 .flags = CFTYPE_INSANE,
5962 .write_u64 = mem_cgroup_hierarchy_write,
5963 .read_u64 = mem_cgroup_hierarchy_read,
5964 },
5965 {
5966 .name = "swappiness",
5967 .read_u64 = mem_cgroup_swappiness_read,
5968 .write_u64 = mem_cgroup_swappiness_write,
5969 },
5970 {
5971 .name = "move_charge_at_immigrate",
5972 .read_u64 = mem_cgroup_move_charge_read,
5973 .write_u64 = mem_cgroup_move_charge_write,
5974 },
5975 {
5976 .name = "oom_control",
5977 .read_map = mem_cgroup_oom_control_read,
5978 .write_u64 = mem_cgroup_oom_control_write,
5979 .register_event = mem_cgroup_oom_register_event,
5980 .unregister_event = mem_cgroup_oom_unregister_event,
5981 .private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL),
5982 },
5983 {
5984 .name = "pressure_level",
5985 .register_event = vmpressure_register_event,
5986 .unregister_event = vmpressure_unregister_event,
5987 },
5988#ifdef CONFIG_NUMA
5989 {
5990 .name = "numa_stat",
5991 .read_seq_string = memcg_numa_stat_show,
5992 },
5993#endif
5994#ifdef CONFIG_MEMCG_KMEM
5995 {
5996 .name = "kmem.limit_in_bytes",
5997 .private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT),
5998 .write_string = mem_cgroup_write,
5999 .read = mem_cgroup_read,
6000 },
6001 {
6002 .name = "kmem.usage_in_bytes",
6003 .private = MEMFILE_PRIVATE(_KMEM, RES_USAGE),
6004 .read = mem_cgroup_read,
6005 },
6006 {
6007 .name = "kmem.failcnt",
6008 .private = MEMFILE_PRIVATE(_KMEM, RES_FAILCNT),
6009 .trigger = mem_cgroup_reset,
6010 .read = mem_cgroup_read,
6011 },
6012 {
6013 .name = "kmem.max_usage_in_bytes",
6014 .private = MEMFILE_PRIVATE(_KMEM, RES_MAX_USAGE),
6015 .trigger = mem_cgroup_reset,
6016 .read = mem_cgroup_read,
6017 },
6018#ifdef CONFIG_SLABINFO
6019 {
6020 .name = "kmem.slabinfo",
6021 .read_seq_string = mem_cgroup_slabinfo_read,
6022 },
6023#endif
6024#endif
6025 { },
6026};
6027
6028#ifdef CONFIG_MEMCG_SWAP
6029static struct cftype memsw_cgroup_files[] = {
6030 {
6031 .name = "memsw.usage_in_bytes",
6032 .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE),
6033 .read = mem_cgroup_read,
6034 .register_event = mem_cgroup_usage_register_event,
6035 .unregister_event = mem_cgroup_usage_unregister_event,
6036 },
6037 {
6038 .name = "memsw.max_usage_in_bytes",
6039 .private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE),
6040 .trigger = mem_cgroup_reset,
6041 .read = mem_cgroup_read,
6042 },
6043 {
6044 .name = "memsw.limit_in_bytes",
6045 .private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT),
6046 .write_string = mem_cgroup_write,
6047 .read = mem_cgroup_read,
6048 },
6049 {
6050 .name = "memsw.failcnt",
6051 .private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT),
6052 .trigger = mem_cgroup_reset,
6053 .read = mem_cgroup_read,
6054 },
6055 { },
6056};
6057#endif
6058
6059
6060
6061
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071
6072
6073
6074
6075
6076
6077
6078
6079
6080
6081
6082
6083static DEFINE_IDR(mem_cgroup_idr);
6084
6085static unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
6086{
6087 return memcg->id;
6088}
6089
6090static void mem_cgroup_id_put(struct mem_cgroup *memcg)
6091{
6092 idr_remove(&mem_cgroup_idr, memcg->id);
6093 memcg->id = 0;
6094 synchronize_rcu();
6095}
6096
6097
6098
6099
6100
6101
6102
6103struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
6104{
6105 WARN_ON_ONCE(!rcu_read_lock_held());
6106 return idr_find(&mem_cgroup_idr, id);
6107}
6108
6109static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
6110{
6111 struct mem_cgroup_per_node *pn;
6112 struct mem_cgroup_per_zone *mz;
6113 int zone, tmp = node;
6114
6115
6116
6117
6118
6119
6120
6121
6122 if (!node_state(node, N_NORMAL_MEMORY))
6123 tmp = -1;
6124 pn = kzalloc_node(sizeof(*pn), GFP_KERNEL, tmp);
6125 if (!pn)
6126 return 1;
6127
6128 for (zone = 0; zone < MAX_NR_ZONES; zone++) {
6129 mz = &pn->zoneinfo[zone];
6130 lruvec_init(&mz->lruvec);
6131 mz->usage_in_excess = 0;
6132 mz->on_tree = false;
6133 mz->memcg = memcg;
6134 }
6135 memcg->info.nodeinfo[node] = pn;
6136 return 0;
6137}
6138
6139static void free_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
6140{
6141 kfree(memcg->info.nodeinfo[node]);
6142}
6143
6144static struct mem_cgroup *mem_cgroup_alloc(void)
6145{
6146 struct mem_cgroup *memcg;
6147 size_t size = memcg_size();
6148 int id;
6149
6150
6151 if (size < PAGE_SIZE)
6152 memcg = kzalloc(size, GFP_KERNEL);
6153 else
6154 memcg = vzalloc(size);
6155
6156 if (!memcg)
6157 return NULL;
6158
6159 id = idr_alloc(&mem_cgroup_idr, NULL,
6160 1, MEM_CGROUP_ID_MAX,
6161 GFP_KERNEL);
6162 if (id < 0)
6163 goto fail;
6164
6165 memcg->id = id;
6166
6167 memcg->stat = alloc_percpu(struct mem_cgroup_stat_cpu);
6168 if (!memcg->stat)
6169 goto out_free;
6170 spin_lock_init(&memcg->pcp_counter_lock);
6171 idr_replace(&mem_cgroup_idr, memcg, memcg->id);
6172 synchronize_rcu();
6173 return memcg;
6174
6175out_free:
6176 if (memcg->id > 0) {
6177 idr_remove(&mem_cgroup_idr, memcg->id);
6178 synchronize_rcu();
6179 }
6180fail:
6181 if (size < PAGE_SIZE)
6182 kfree(memcg);
6183 else
6184 vfree(memcg);
6185 return NULL;
6186}
6187
6188
6189
6190
6191
6192
6193
6194
6195
6196
6197
6198
6199static void __mem_cgroup_free(struct mem_cgroup *memcg)
6200{
6201 int node;
6202 size_t size = memcg_size();
6203
6204 mem_cgroup_remove_from_trees(memcg);
6205
6206 mem_cgroup_id_put(memcg);
6207
6208 for_each_node(node)
6209 free_mem_cgroup_per_zone_info(memcg, node);
6210
6211 free_percpu(memcg->stat);
6212
6213
6214
6215
6216
6217
6218
6219
6220
6221
6222
6223
6224 disarm_static_keys(memcg);
6225 if (size < PAGE_SIZE)
6226 kfree(memcg);
6227 else
6228 vfree(memcg);
6229}
6230
6231
6232
6233
6234
6235
6236
6237static void free_work(struct work_struct *work)
6238{
6239 struct mem_cgroup *memcg;
6240
6241 memcg = container_of(work, struct mem_cgroup, work_freeing);
6242 __mem_cgroup_free(memcg);
6243}
6244
6245static void free_rcu(struct rcu_head *rcu_head)
6246{
6247 struct mem_cgroup *memcg;
6248
6249 memcg = container_of(rcu_head, struct mem_cgroup, rcu_freeing);
6250 INIT_WORK(&memcg->work_freeing, free_work);
6251 schedule_work(&memcg->work_freeing);
6252}
6253
6254static void mem_cgroup_get(struct mem_cgroup *memcg)
6255{
6256 atomic_inc(&memcg->refcnt);
6257}
6258
6259static void __mem_cgroup_put(struct mem_cgroup *memcg, int count)
6260{
6261 if (atomic_sub_and_test(count, &memcg->refcnt)) {
6262 struct mem_cgroup *parent = parent_mem_cgroup(memcg);
6263 call_rcu(&memcg->rcu_freeing, free_rcu);
6264 if (parent)
6265 mem_cgroup_put(parent);
6266 }
6267}
6268
6269static void mem_cgroup_put(struct mem_cgroup *memcg)
6270{
6271 __mem_cgroup_put(memcg, 1);
6272}
6273
6274
6275
6276
6277struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
6278{
6279 if (!memcg->memory.parent)
6280 return NULL;
6281 return mem_cgroup_from_counter(memcg->memory.parent, memory);
6282}
6283EXPORT_SYMBOL(parent_mem_cgroup);
6284
6285static void __init mem_cgroup_soft_limit_tree_init(void)
6286{
6287 struct mem_cgroup_tree_per_node *rtpn;
6288 struct mem_cgroup_tree_per_zone *rtpz;
6289 int tmp, node, zone;
6290
6291 for_each_node(node) {
6292 tmp = node;
6293 if (!node_state(node, N_NORMAL_MEMORY))
6294 tmp = -1;
6295 rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, tmp);
6296 BUG_ON(!rtpn);
6297
6298 soft_limit_tree.rb_tree_per_node[node] = rtpn;
6299
6300 for (zone = 0; zone < MAX_NR_ZONES; zone++) {
6301 rtpz = &rtpn->rb_tree_per_zone[zone];
6302 rtpz->rb_root = RB_ROOT;
6303 spin_lock_init(&rtpz->lock);
6304 }
6305 }
6306}
6307
6308static struct cgroup_subsys_state * __ref
6309mem_cgroup_css_alloc(struct cgroup *cont)
6310{
6311 struct mem_cgroup *memcg;
6312 long error = -ENOMEM;
6313 int node;
6314
6315 memcg = mem_cgroup_alloc();
6316 if (!memcg)
6317 return ERR_PTR(error);
6318
6319 for_each_node(node)
6320 if (alloc_mem_cgroup_per_zone_info(memcg, node))
6321 goto free_out;
6322
6323
6324 if (cont->parent == NULL) {
6325 root_mem_cgroup = memcg;
6326 page_counter_init(&memcg->memory, NULL);
6327 memcg->soft_limit = PAGE_COUNTER_MAX;
6328 page_counter_init(&memcg->memsw, NULL);
6329 page_counter_init(&memcg->kmem, NULL);
6330 }
6331
6332 memcg->last_scanned_node = MAX_NUMNODES;
6333 INIT_LIST_HEAD(&memcg->oom_notify);
6334 atomic_set(&memcg->refcnt, 1);
6335 memcg->move_charge_at_immigrate = 0;
6336 mutex_init(&memcg->thresholds_lock);
6337 spin_lock_init(&memcg->move_lock);
6338 vmpressure_init(&memcg->vmpressure);
6339
6340 return &memcg->css;
6341
6342free_out:
6343 __mem_cgroup_free(memcg);
6344 return ERR_PTR(error);
6345}
6346
6347static int
6348mem_cgroup_css_online(struct cgroup *cont)
6349{
6350 struct mem_cgroup *memcg, *parent;
6351 int error = 0;
6352
6353 if (!cont->parent)
6354 return 0;
6355
6356 mutex_lock(&memcg_create_mutex);
6357 memcg = mem_cgroup_from_cont(cont);
6358 parent = mem_cgroup_from_cont(cont->parent);
6359
6360 memcg->use_hierarchy = parent->use_hierarchy;
6361 memcg->oom_kill_disable = parent->oom_kill_disable;
6362 memcg->swappiness = mem_cgroup_swappiness(parent);
6363
6364 if (parent->use_hierarchy) {
6365 page_counter_init(&memcg->memory, &parent->memory);
6366 memcg->soft_limit = PAGE_COUNTER_MAX;
6367 page_counter_init(&memcg->memsw, &parent->memsw);
6368 page_counter_init(&memcg->kmem, &parent->kmem);
6369
6370
6371
6372
6373
6374
6375
6376 mem_cgroup_get(parent);
6377 } else {
6378 page_counter_init(&memcg->memory, NULL);
6379 memcg->soft_limit = PAGE_COUNTER_MAX;
6380 page_counter_init(&memcg->memsw, NULL);
6381 page_counter_init(&memcg->kmem, NULL);
6382
6383
6384
6385
6386
6387 if (parent != root_mem_cgroup)
6388 mem_cgroup_subsys.broken_hierarchy = true;
6389 }
6390
6391 error = memcg_init_kmem(memcg, &mem_cgroup_subsys);
6392 mutex_unlock(&memcg_create_mutex);
6393 return error;
6394}
6395
6396
6397
6398
6399static void mem_cgroup_invalidate_reclaim_iterators(struct mem_cgroup *memcg)
6400{
6401 struct mem_cgroup *parent = memcg;
6402
6403 while ((parent = parent_mem_cgroup(parent)))
6404 atomic_inc(&parent->dead_count);
6405
6406
6407
6408
6409
6410 if (!root_mem_cgroup->use_hierarchy)
6411 atomic_inc(&root_mem_cgroup->dead_count);
6412}
6413
6414static void mem_cgroup_css_offline(struct cgroup *cont)
6415{
6416 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
6417 struct cgroup *iter;
6418
6419 mem_cgroup_invalidate_reclaim_iterators(memcg);
6420
6421
6422
6423
6424
6425 rcu_read_lock();
6426 cgroup_for_each_descendant_post(iter, cont) {
6427 rcu_read_unlock();
6428 mem_cgroup_reparent_charges(mem_cgroup_from_cont(iter));
6429 rcu_read_lock();
6430 }
6431 rcu_read_unlock();
6432 mem_cgroup_reparent_charges(memcg);
6433
6434 mem_cgroup_destroy_all_caches(memcg);
6435}
6436
6437static void mem_cgroup_css_free(struct cgroup *cont)
6438{
6439 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
6440
6441 kmem_cgroup_destroy(memcg);
6442
6443 mem_cgroup_put(memcg);
6444}
6445
6446#ifdef CONFIG_MMU
6447
6448#define PRECHARGE_COUNT_AT_ONCE 256
6449static int mem_cgroup_do_precharge(unsigned long count)
6450{
6451 int ret = 0;
6452 int batch_count = PRECHARGE_COUNT_AT_ONCE;
6453 struct mem_cgroup *memcg = mc.to;
6454
6455 if (mem_cgroup_is_root(memcg)) {
6456 mc.precharge += count;
6457
6458 return ret;
6459 }
6460
6461 if (count > 1) {
6462 struct page_counter *dummy;
6463
6464
6465
6466
6467
6468
6469 if (page_counter_try_charge(&memcg->memory, count, &dummy))
6470 goto one_by_one;
6471 if (do_swap_account &&
6472 page_counter_try_charge(&memcg->memsw, count, &dummy)) {
6473 page_counter_uncharge(&memcg->memory, count);
6474 goto one_by_one;
6475 }
6476 mc.precharge += count;
6477 return ret;
6478 }
6479one_by_one:
6480
6481 while (count--) {
6482 if (signal_pending(current)) {
6483 ret = -EINTR;
6484 break;
6485 }
6486 if (!batch_count--) {
6487 batch_count = PRECHARGE_COUNT_AT_ONCE;
6488 cond_resched();
6489 }
6490 ret = __mem_cgroup_try_charge(NULL,
6491 GFP_KERNEL, 1, &memcg, false);
6492 if (ret)
6493
6494 return ret;
6495 mc.precharge++;
6496 }
6497 return ret;
6498}
6499
6500
6501
6502
6503
6504
6505
6506
6507
6508
6509
6510
6511
6512
6513
6514
6515
6516
6517
6518union mc_target {
6519 struct page *page;
6520 swp_entry_t ent;
6521};
6522
6523enum mc_target_type {
6524 MC_TARGET_NONE = 0,
6525 MC_TARGET_PAGE,
6526 MC_TARGET_SWAP,
6527};
6528
6529static struct page *mc_handle_present_pte(struct vm_area_struct *vma,
6530 unsigned long addr, pte_t ptent)
6531{
6532 struct page *page = vm_normal_page(vma, addr, ptent);
6533
6534 if (!page || !page_mapped(page))
6535 return NULL;
6536 if (PageAnon(page)) {
6537
6538 if (!move_anon())
6539 return NULL;
6540 } else if (!move_file())
6541
6542 return NULL;
6543 if (!get_page_unless_zero(page))
6544 return NULL;
6545
6546 return page;
6547}
6548
6549#ifdef CONFIG_SWAP
6550static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
6551 unsigned long addr, pte_t ptent, swp_entry_t *entry)
6552{
6553 struct page *page = NULL;
6554 swp_entry_t ent = pte_to_swp_entry(ptent);
6555
6556 if (!move_anon() || non_swap_entry(ent))
6557 return NULL;
6558
6559
6560
6561
6562 page = find_get_page(swap_address_space(ent), ent.val);
6563 if (do_swap_account)
6564 entry->val = ent.val;
6565
6566 return page;
6567}
6568#else
6569static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
6570 unsigned long addr, pte_t ptent, swp_entry_t *entry)
6571{
6572 return NULL;
6573}
6574#endif
6575
6576static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
6577 unsigned long addr, pte_t ptent, swp_entry_t *entry)
6578{
6579 struct page *page = NULL;
6580 struct address_space *mapping;
6581 pgoff_t pgoff;
6582
6583 if (!vma->vm_file)
6584 return NULL;
6585 if (!move_file())
6586 return NULL;
6587
6588 mapping = vma->vm_file->f_mapping;
6589 if (pte_none(ptent))
6590 pgoff = linear_page_index(vma, addr);
6591 else
6592 pgoff = pte_to_pgoff(ptent);
6593
6594
6595#ifdef CONFIG_SWAP
6596
6597 if (shmem_mapping(mapping)) {
6598 page = __find_get_page(mapping, pgoff);
6599 if (radix_tree_exceptional_entry(page)) {
6600 swp_entry_t swp = radix_to_swp_entry(page);
6601 if (do_swap_account)
6602 *entry = swp;
6603 page = find_get_page(swap_address_space(swp), swp.val);
6604 }
6605 } else
6606 page = find_get_page(mapping, pgoff);
6607#else
6608 page = find_get_page(mapping, pgoff);
6609#endif
6610 return page;
6611}
6612
6613static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
6614 unsigned long addr, pte_t ptent, union mc_target *target)
6615{
6616 struct page *page = NULL;
6617 struct page_cgroup *pc;
6618 enum mc_target_type ret = MC_TARGET_NONE;
6619 swp_entry_t ent = { .val = 0 };
6620
6621 if (pte_present(ptent))
6622 page = mc_handle_present_pte(vma, addr, ptent);
6623 else if (is_swap_pte(ptent))
6624 page = mc_handle_swap_pte(vma, addr, ptent, &ent);
6625 else if (pte_none(ptent) || pte_file(ptent))
6626 page = mc_handle_file_pte(vma, addr, ptent, &ent);
6627
6628 if (!page && !ent.val)
6629 return ret;
6630 if (page) {
6631 pc = lookup_page_cgroup(page);
6632
6633
6634
6635
6636
6637 if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
6638 ret = MC_TARGET_PAGE;
6639 if (target)
6640 target->page = page;
6641 }
6642 if (!ret || !target)
6643 put_page(page);
6644 }
6645
6646 if (ent.val && !ret &&
6647 mem_cgroup_id(mc.from) == lookup_swap_cgroup_id(ent)) {
6648 ret = MC_TARGET_SWAP;
6649 if (target)
6650 target->ent = ent;
6651 }
6652 return ret;
6653}
6654
6655#ifdef CONFIG_TRANSPARENT_HUGEPAGE
6656
6657
6658
6659
6660
6661static enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma,
6662 unsigned long addr, pmd_t pmd, union mc_target *target)
6663{
6664 struct page *page = NULL;
6665 struct page_cgroup *pc;
6666 enum mc_target_type ret = MC_TARGET_NONE;
6667
6668 page = pmd_page(pmd);
6669 VM_BUG_ON_PAGE(!page || !PageHead(page), page);
6670 if (!move_anon())
6671 return ret;
6672 pc = lookup_page_cgroup(page);
6673 if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
6674 ret = MC_TARGET_PAGE;
6675 if (target) {
6676 get_page(page);
6677 target->page = page;
6678 }
6679 }
6680 return ret;
6681}
6682#else
6683static inline enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma,
6684 unsigned long addr, pmd_t pmd, union mc_target *target)
6685{
6686 return MC_TARGET_NONE;
6687}
6688#endif
6689
6690static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
6691 unsigned long addr, unsigned long end,
6692 struct mm_walk *walk)
6693{
6694 struct vm_area_struct *vma = walk->private;
6695 pte_t *pte;
6696 spinlock_t *ptl;
6697
6698 if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
6699 if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE)
6700 mc.precharge += HPAGE_PMD_NR;
6701 spin_unlock(ptl);
6702 return 0;
6703 }
6704
6705 if (pmd_trans_unstable(pmd))
6706 return 0;
6707 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
6708 for (; addr != end; pte++, addr += PAGE_SIZE)
6709 if (get_mctgt_type(vma, addr, *pte, NULL))
6710 mc.precharge++;
6711 pte_unmap_unlock(pte - 1, ptl);
6712 cond_resched();
6713
6714 return 0;
6715}
6716
6717static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm)
6718{
6719 unsigned long precharge;
6720 struct vm_area_struct *vma;
6721
6722 down_read(&mm->mmap_sem);
6723 for (vma = mm->mmap; vma; vma = vma->vm_next) {
6724 struct mm_walk mem_cgroup_count_precharge_walk = {
6725 .pmd_entry = mem_cgroup_count_precharge_pte_range,
6726 .mm = mm,
6727 .private = vma,
6728 };
6729 if (is_vm_hugetlb_page(vma))
6730 continue;
6731 walk_page_range(vma->vm_start, vma->vm_end,
6732 &mem_cgroup_count_precharge_walk);
6733 }
6734 up_read(&mm->mmap_sem);
6735
6736 precharge = mc.precharge;
6737 mc.precharge = 0;
6738
6739 return precharge;
6740}
6741
6742static int mem_cgroup_precharge_mc(struct mm_struct *mm)
6743{
6744 unsigned long precharge = mem_cgroup_count_precharge(mm);
6745
6746 VM_BUG_ON(mc.moving_task);
6747 mc.moving_task = current;
6748 return mem_cgroup_do_precharge(precharge);
6749}
6750
6751
6752static void __mem_cgroup_clear_mc(void)
6753{
6754 struct mem_cgroup *from = mc.from;
6755 struct mem_cgroup *to = mc.to;
6756
6757
6758 if (mc.precharge) {
6759 __mem_cgroup_cancel_charge(mc.to, mc.precharge);
6760 mc.precharge = 0;
6761 }
6762
6763
6764
6765
6766 if (mc.moved_charge) {
6767 __mem_cgroup_cancel_charge(mc.from, mc.moved_charge);
6768 mc.moved_charge = 0;
6769 }
6770
6771 if (mc.moved_swap) {
6772
6773 if (!mem_cgroup_is_root(mc.from))
6774 page_counter_uncharge(&mc.from->memsw, mc.moved_swap);
6775
6776 if (!mem_cgroup_is_root(mc.to)) {
6777
6778
6779
6780
6781 page_counter_uncharge(&mc.to->memory, mc.moved_swap);
6782 }
6783 __mem_cgroup_put(mc.from, mc.moved_swap);
6784
6785
6786 mc.moved_swap = 0;
6787 }
6788 memcg_oom_recover(from);
6789 memcg_oom_recover(to);
6790 wake_up_all(&mc.waitq);
6791}
6792
6793static void mem_cgroup_clear_mc(void)
6794{
6795 struct mem_cgroup *from = mc.from;
6796
6797
6798
6799
6800
6801 mc.moving_task = NULL;
6802 __mem_cgroup_clear_mc();
6803 spin_lock(&mc.lock);
6804 mc.from = NULL;
6805 mc.to = NULL;
6806 spin_unlock(&mc.lock);
6807 mem_cgroup_end_move(from);
6808}
6809
6810static int mem_cgroup_can_attach(struct cgroup *cgroup,
6811 struct cgroup_taskset *tset)
6812{
6813 struct task_struct *p = cgroup_taskset_first(tset);
6814 int ret = 0;
6815 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgroup);
6816 unsigned long move_charge_at_immigrate;
6817
6818
6819
6820
6821
6822
6823 move_charge_at_immigrate = memcg->move_charge_at_immigrate;
6824 if (move_charge_at_immigrate) {
6825 struct mm_struct *mm;
6826 struct mem_cgroup *from = mem_cgroup_from_task(p);
6827
6828 VM_BUG_ON(from == memcg);
6829
6830 mm = get_task_mm(p);
6831 if (!mm)
6832 return 0;
6833
6834 if (mm->owner == p) {
6835 VM_BUG_ON(mc.from);
6836 VM_BUG_ON(mc.to);
6837 VM_BUG_ON(mc.precharge);
6838 VM_BUG_ON(mc.moved_charge);
6839 VM_BUG_ON(mc.moved_swap);
6840 mem_cgroup_start_move(from);
6841 spin_lock(&mc.lock);
6842 mc.from = from;
6843 mc.to = memcg;
6844 mc.immigrate_flags = move_charge_at_immigrate;
6845 spin_unlock(&mc.lock);
6846
6847
6848 ret = mem_cgroup_precharge_mc(mm);
6849 if (ret)
6850 mem_cgroup_clear_mc();
6851 }
6852 mmput(mm);
6853 }
6854 return ret;
6855}
6856
6857static void mem_cgroup_cancel_attach(struct cgroup *cgroup,
6858 struct cgroup_taskset *tset)
6859{
6860 mem_cgroup_clear_mc();
6861}
6862
6863static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
6864 unsigned long addr, unsigned long end,
6865 struct mm_walk *walk)
6866{
6867 int ret = 0;
6868 struct vm_area_struct *vma = walk->private;
6869 pte_t *pte;
6870 spinlock_t *ptl;
6871 enum mc_target_type target_type;
6872 union mc_target target;
6873 struct page *page;
6874 struct page_cgroup *pc;
6875
6876
6877
6878
6879
6880
6881
6882
6883
6884
6885
6886 if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
6887 if (mc.precharge < HPAGE_PMD_NR) {
6888 spin_unlock(ptl);
6889 return 0;
6890 }
6891 target_type = get_mctgt_type_thp(vma, addr, *pmd, &target);
6892 if (target_type == MC_TARGET_PAGE) {
6893 page = target.page;
6894 if (!isolate_lru_page(page)) {
6895 pc = lookup_page_cgroup(page);
6896 if (!mem_cgroup_move_account(page, HPAGE_PMD_NR,
6897 pc, mc.from, mc.to)) {
6898 mc.precharge -= HPAGE_PMD_NR;
6899 mc.moved_charge += HPAGE_PMD_NR;
6900 }
6901 putback_lru_page(page);
6902 }
6903 put_page(page);
6904 }
6905 spin_unlock(ptl);
6906 return 0;
6907 }
6908
6909 if (pmd_trans_unstable(pmd))
6910 return 0;
6911retry:
6912 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
6913 for (; addr != end; addr += PAGE_SIZE) {
6914 pte_t ptent = *(pte++);
6915 swp_entry_t ent;
6916
6917 if (!mc.precharge)
6918 break;
6919
6920 switch (get_mctgt_type(vma, addr, ptent, &target)) {
6921 case MC_TARGET_PAGE:
6922 page = target.page;
6923 if (isolate_lru_page(page))
6924 goto put;
6925 pc = lookup_page_cgroup(page);
6926 if (!mem_cgroup_move_account(page, 1, pc,
6927 mc.from, mc.to)) {
6928 mc.precharge--;
6929
6930 mc.moved_charge++;
6931 }
6932 putback_lru_page(page);
6933put:
6934 put_page(page);
6935 break;
6936 case MC_TARGET_SWAP:
6937 ent = target.ent;
6938 if (!mem_cgroup_move_swap_account(ent, mc.from, mc.to)) {
6939 mc.precharge--;
6940
6941 mc.moved_swap++;
6942 }
6943 break;
6944 default:
6945 break;
6946 }
6947 }
6948 pte_unmap_unlock(pte - 1, ptl);
6949 cond_resched();
6950
6951 if (addr != end) {
6952
6953
6954
6955
6956
6957
6958 ret = mem_cgroup_do_precharge(1);
6959 if (!ret)
6960 goto retry;
6961 }
6962
6963 return ret;
6964}
6965
6966static void mem_cgroup_move_charge(struct mm_struct *mm)
6967{
6968 struct vm_area_struct *vma;
6969
6970 lru_add_drain_all();
6971retry:
6972 if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
6973
6974
6975
6976
6977
6978
6979
6980 __mem_cgroup_clear_mc();
6981 cond_resched();
6982 goto retry;
6983 }
6984 for (vma = mm->mmap; vma; vma = vma->vm_next) {
6985 int ret;
6986 struct mm_walk mem_cgroup_move_charge_walk = {
6987 .pmd_entry = mem_cgroup_move_charge_pte_range,
6988 .mm = mm,
6989 .private = vma,
6990 };
6991 if (is_vm_hugetlb_page(vma))
6992 continue;
6993 ret = walk_page_range(vma->vm_start, vma->vm_end,
6994 &mem_cgroup_move_charge_walk);
6995 if (ret)
6996
6997
6998
6999
7000 break;
7001 }
7002 up_read(&mm->mmap_sem);
7003}
7004
7005static void mem_cgroup_move_task(struct cgroup *cont,
7006 struct cgroup_taskset *tset)
7007{
7008 struct task_struct *p = cgroup_taskset_first(tset);
7009 struct mm_struct *mm = get_task_mm(p);
7010
7011 if (mm) {
7012 if (mc.to)
7013 mem_cgroup_move_charge(mm);
7014 mmput(mm);
7015 }
7016 if (mc.to)
7017 mem_cgroup_clear_mc();
7018}
7019#else
7020static int mem_cgroup_can_attach(struct cgroup *cgroup,
7021 struct cgroup_taskset *tset)
7022{
7023 return 0;
7024}
7025static void mem_cgroup_cancel_attach(struct cgroup *cgroup,
7026 struct cgroup_taskset *tset)
7027{
7028}
7029static void mem_cgroup_move_task(struct cgroup *cont,
7030 struct cgroup_taskset *tset)
7031{
7032}
7033#endif
7034
7035
7036
7037
7038
7039static void mem_cgroup_bind(struct cgroup *root)
7040{
7041
7042
7043
7044
7045
7046 if (cgroup_sane_behavior(root))
7047 mem_cgroup_from_cont(root)->use_hierarchy = true;
7048}
7049
7050struct cgroup_subsys mem_cgroup_subsys = {
7051 .name = "memory",
7052 .subsys_id = mem_cgroup_subsys_id,
7053 .css_alloc = mem_cgroup_css_alloc,
7054 .css_online = mem_cgroup_css_online,
7055 .css_offline = mem_cgroup_css_offline,
7056 .css_free = mem_cgroup_css_free,
7057 .can_attach = mem_cgroup_can_attach,
7058 .cancel_attach = mem_cgroup_cancel_attach,
7059 .attach = mem_cgroup_move_task,
7060 .bind = mem_cgroup_bind,
7061 .base_cftypes = mem_cgroup_files,
7062 .early_init = 0,
7063};
7064
7065#ifdef CONFIG_MEMCG_SWAP
7066static int __init enable_swap_account(char *s)
7067{
7068
7069 if (!strcmp(s, "1"))
7070 really_do_swap_account = 1;
7071 else if (!strcmp(s, "0"))
7072 really_do_swap_account = 0;
7073 return 1;
7074}
7075__setup("swapaccount=", enable_swap_account);
7076
7077static void __init memsw_file_init(void)
7078{
7079 WARN_ON(cgroup_add_cftypes(&mem_cgroup_subsys, memsw_cgroup_files));
7080}
7081
7082static void __init enable_swap_cgroup(void)
7083{
7084 if (!mem_cgroup_disabled() && really_do_swap_account) {
7085 do_swap_account = 1;
7086 memsw_file_init();
7087 }
7088}
7089
7090#else
7091static void __init enable_swap_cgroup(void)
7092{
7093}
7094#endif
7095
7096
7097
7098
7099
7100
7101
7102
7103
7104static int __init mem_cgroup_init(void)
7105{
7106 hotcpu_notifier(memcg_cpu_hotplug_callback, 0);
7107 enable_swap_cgroup();
7108 mem_cgroup_soft_limit_tree_init();
7109 memcg_stock_init();
7110 return 0;
7111}
7112subsys_initcall(mem_cgroup_init);
7113