1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29#include <linux/cgroup.h>
30#include <linux/cred.h>
31#include <linux/ctype.h>
32#include <linux/errno.h>
33#include <linux/fs.h>
34#include <linux/init_task.h>
35#include <linux/kernel.h>
36#include <linux/list.h>
37#include <linux/mm.h>
38#include <linux/mutex.h>
39#include <linux/mount.h>
40#include <linux/pagemap.h>
41#include <linux/proc_fs.h>
42#include <linux/rcupdate.h>
43#include <linux/sched.h>
44#include <linux/backing-dev.h>
45#include <linux/seq_file.h>
46#include <linux/slab.h>
47#include <linux/magic.h>
48#include <linux/spinlock.h>
49#include <linux/string.h>
50#include <linux/sort.h>
51#include <linux/kmod.h>
52#include <linux/module.h>
53#include <linux/delayacct.h>
54#include <linux/cgroupstats.h>
55#include <linux/hash.h>
56#include <linux/namei.h>
57#include <linux/pid_namespace.h>
58#include <linux/idr.h>
59#include <linux/vmalloc.h>
60#include <linux/eventfd.h>
61#include <linux/poll.h>
62#include <linux/flex_array.h>
63#include <linux/kthread.h>
64
65#include <linux/atomic.h>
66
67
68#define CSS_DEACT_BIAS INT_MIN
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86static DEFINE_MUTEX(cgroup_mutex);
87static DEFINE_MUTEX(cgroup_root_mutex);
88
89
90
91
92
93
94
95#define SUBSYS(_x) [_x ## _subsys_id] = &_x ## _subsys,
96#define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option)
97static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = {
98#include <linux/cgroup_subsys.h>
99};
100
101#define MAX_CGROUP_ROOT_NAMELEN 64
102
103
104
105
106
107
108struct cgroupfs_root {
109 struct super_block *sb;
110
111
112
113
114
115 unsigned long subsys_mask;
116
117
118 int hierarchy_id;
119
120
121 unsigned long actual_subsys_mask;
122
123
124 struct list_head subsys_list;
125
126
127 struct cgroup top_cgroup;
128
129
130 int number_of_cgroups;
131
132
133 struct list_head root_list;
134
135
136 struct list_head allcg_list;
137
138
139 unsigned long flags;
140
141
142 struct ida cgroup_ida;
143
144
145 char release_agent_path[PATH_MAX];
146
147
148 char name[MAX_CGROUP_ROOT_NAMELEN];
149};
150
151
152
153
154
155
156static struct cgroupfs_root rootnode;
157
158
159
160
161struct cfent {
162 struct list_head node;
163 struct dentry *dentry;
164 struct cftype *type;
165};
166
167
168
169
170
171#define CSS_ID_MAX (65535)
172struct css_id {
173
174
175
176
177
178
179
180 struct cgroup_subsys_state __rcu *css;
181
182
183
184 unsigned short id;
185
186
187
188 unsigned short depth;
189
190
191
192 struct rcu_head rcu_head;
193
194
195
196 unsigned short stack[0];
197};
198
199
200
201
202struct cgroup_event {
203
204
205
206 struct cgroup *cgrp;
207
208
209
210 struct cftype *cft;
211
212
213
214 struct eventfd_ctx *eventfd;
215
216
217
218 struct list_head list;
219
220
221
222
223 poll_table pt;
224 wait_queue_head_t *wqh;
225 wait_queue_t wait;
226 struct work_struct remove;
227};
228
229
230
231static LIST_HEAD(roots);
232static int root_count;
233
234static DEFINE_IDA(hierarchy_ida);
235static int next_hierarchy_id;
236static DEFINE_SPINLOCK(hierarchy_id_lock);
237
238
239#define dummytop (&rootnode.top_cgroup)
240
241
242
243
244
245
246static int need_forkexit_callback __read_mostly;
247
248static int cgroup_destroy_locked(struct cgroup *cgrp);
249static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys,
250 struct cftype cfts[], bool is_add);
251
252#ifdef CONFIG_PROVE_LOCKING
253int cgroup_lock_is_held(void)
254{
255 return lockdep_is_held(&cgroup_mutex);
256}
257#else
258int cgroup_lock_is_held(void)
259{
260 return mutex_is_locked(&cgroup_mutex);
261}
262#endif
263
264EXPORT_SYMBOL_GPL(cgroup_lock_is_held);
265
266static int css_unbias_refcnt(int refcnt)
267{
268 return refcnt >= 0 ? refcnt : refcnt - CSS_DEACT_BIAS;
269}
270
271
272static int css_refcnt(struct cgroup_subsys_state *css)
273{
274 int v = atomic_read(&css->refcnt);
275
276 return css_unbias_refcnt(v);
277}
278
279
280inline int cgroup_is_removed(const struct cgroup *cgrp)
281{
282 return test_bit(CGRP_REMOVED, &cgrp->flags);
283}
284
285
286enum {
287 ROOT_NOPREFIX,
288 ROOT_XATTR,
289};
290
291static int cgroup_is_releasable(const struct cgroup *cgrp)
292{
293 const int bits =
294 (1 << CGRP_RELEASABLE) |
295 (1 << CGRP_NOTIFY_ON_RELEASE);
296 return (cgrp->flags & bits) == bits;
297}
298
299static int notify_on_release(const struct cgroup *cgrp)
300{
301 return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
302}
303
304
305
306
307
308#define for_each_subsys(_root, _ss) \
309list_for_each_entry(_ss, &_root->subsys_list, sibling)
310
311
312#define for_each_active_root(_root) \
313list_for_each_entry(_root, &roots, root_list)
314
315static inline struct cgroup *__d_cgrp(struct dentry *dentry)
316{
317 return dentry->d_fsdata;
318}
319
320static inline struct cfent *__d_cfe(struct dentry *dentry)
321{
322 return dentry->d_fsdata;
323}
324
325static inline struct cftype *__d_cft(struct dentry *dentry)
326{
327 return __d_cfe(dentry)->type;
328}
329
330
331
332static LIST_HEAD(release_list);
333static DEFINE_RAW_SPINLOCK(release_list_lock);
334static void cgroup_release_agent(struct work_struct *work);
335static DECLARE_WORK(release_agent_work, cgroup_release_agent);
336static void check_for_release(struct cgroup *cgrp);
337
338
339struct cg_cgroup_link {
340
341
342
343
344 struct list_head cgrp_link_list;
345 struct cgroup *cgrp;
346
347
348
349
350 struct list_head cg_link_list;
351 struct css_set *cg;
352};
353
354
355
356
357
358
359
360
361static struct css_set init_css_set;
362static struct cg_cgroup_link init_css_set_link;
363
364static int cgroup_init_idr(struct cgroup_subsys *ss,
365 struct cgroup_subsys_state *css);
366
367
368
369
370static DEFINE_RWLOCK(css_set_lock);
371static int css_set_count;
372
373
374
375
376
377
378#define CSS_SET_HASH_BITS 7
379#define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS)
380static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE];
381
382static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
383{
384 int i;
385 int index;
386 unsigned long tmp = 0UL;
387
388 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++)
389 tmp += (unsigned long)css[i];
390 tmp = (tmp >> 16) ^ tmp;
391
392 index = hash_long(tmp, CSS_SET_HASH_BITS);
393
394 return &css_set_table[index];
395}
396
397
398
399
400
401static int use_task_css_set_links __read_mostly;
402
403static void __put_css_set(struct css_set *cg, int taskexit)
404{
405 struct cg_cgroup_link *link;
406 struct cg_cgroup_link *saved_link;
407
408
409
410
411
412 if (atomic_add_unless(&cg->refcount, -1, 1))
413 return;
414 write_lock(&css_set_lock);
415 if (!atomic_dec_and_test(&cg->refcount)) {
416 write_unlock(&css_set_lock);
417 return;
418 }
419
420
421 hlist_del(&cg->hlist);
422 css_set_count--;
423
424 list_for_each_entry_safe(link, saved_link, &cg->cg_links,
425 cg_link_list) {
426 struct cgroup *cgrp = link->cgrp;
427 list_del(&link->cg_link_list);
428 list_del(&link->cgrp_link_list);
429 if (atomic_dec_and_test(&cgrp->count) &&
430 notify_on_release(cgrp)) {
431 if (taskexit)
432 set_bit(CGRP_RELEASABLE, &cgrp->flags);
433 check_for_release(cgrp);
434 }
435
436 kfree(link);
437 }
438
439 write_unlock(&css_set_lock);
440 kfree_rcu(cg, rcu_head);
441}
442
443
444
445
446static inline void get_css_set(struct css_set *cg)
447{
448 atomic_inc(&cg->refcount);
449}
450
451static inline void put_css_set(struct css_set *cg)
452{
453 __put_css_set(cg, 0);
454}
455
456static inline void put_css_set_taskexit(struct css_set *cg)
457{
458 __put_css_set(cg, 1);
459}
460
461
462
463
464
465
466
467
468
469
470
471static bool compare_css_sets(struct css_set *cg,
472 struct css_set *old_cg,
473 struct cgroup *new_cgrp,
474 struct cgroup_subsys_state *template[])
475{
476 struct list_head *l1, *l2;
477
478 if (memcmp(template, cg->subsys, sizeof(cg->subsys))) {
479
480 return false;
481 }
482
483
484
485
486
487
488
489
490
491
492 l1 = &cg->cg_links;
493 l2 = &old_cg->cg_links;
494 while (1) {
495 struct cg_cgroup_link *cgl1, *cgl2;
496 struct cgroup *cg1, *cg2;
497
498 l1 = l1->next;
499 l2 = l2->next;
500
501 if (l1 == &cg->cg_links) {
502 BUG_ON(l2 != &old_cg->cg_links);
503 break;
504 } else {
505 BUG_ON(l2 == &old_cg->cg_links);
506 }
507
508 cgl1 = list_entry(l1, struct cg_cgroup_link, cg_link_list);
509 cgl2 = list_entry(l2, struct cg_cgroup_link, cg_link_list);
510 cg1 = cgl1->cgrp;
511 cg2 = cgl2->cgrp;
512
513 BUG_ON(cg1->root != cg2->root);
514
515
516
517
518
519
520
521
522 if (cg1->root == new_cgrp->root) {
523 if (cg1 != new_cgrp)
524 return false;
525 } else {
526 if (cg1 != cg2)
527 return false;
528 }
529 }
530 return true;
531}
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546static struct css_set *find_existing_css_set(
547 struct css_set *oldcg,
548 struct cgroup *cgrp,
549 struct cgroup_subsys_state *template[])
550{
551 int i;
552 struct cgroupfs_root *root = cgrp->root;
553 struct hlist_head *hhead;
554 struct hlist_node *node;
555 struct css_set *cg;
556
557
558
559
560
561
562 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
563 if (root->subsys_mask & (1UL << i)) {
564
565
566
567 template[i] = cgrp->subsys[i];
568 } else {
569
570
571 template[i] = oldcg->subsys[i];
572 }
573 }
574
575 hhead = css_set_hash(template);
576 hlist_for_each_entry(cg, node, hhead, hlist) {
577 if (!compare_css_sets(cg, oldcg, cgrp, template))
578 continue;
579
580
581 return cg;
582 }
583
584
585 return NULL;
586}
587
588static void free_cg_links(struct list_head *tmp)
589{
590 struct cg_cgroup_link *link;
591 struct cg_cgroup_link *saved_link;
592
593 list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) {
594 list_del(&link->cgrp_link_list);
595 kfree(link);
596 }
597}
598
599
600
601
602
603
604static int allocate_cg_links(int count, struct list_head *tmp)
605{
606 struct cg_cgroup_link *link;
607 int i;
608 INIT_LIST_HEAD(tmp);
609 for (i = 0; i < count; i++) {
610 link = kmalloc(sizeof(*link), GFP_KERNEL);
611 if (!link) {
612 free_cg_links(tmp);
613 return -ENOMEM;
614 }
615 list_add(&link->cgrp_link_list, tmp);
616 }
617 return 0;
618}
619
620
621
622
623
624
625
626static void link_css_set(struct list_head *tmp_cg_links,
627 struct css_set *cg, struct cgroup *cgrp)
628{
629 struct cg_cgroup_link *link;
630
631 BUG_ON(list_empty(tmp_cg_links));
632 link = list_first_entry(tmp_cg_links, struct cg_cgroup_link,
633 cgrp_link_list);
634 link->cg = cg;
635 link->cgrp = cgrp;
636 atomic_inc(&cgrp->count);
637 list_move(&link->cgrp_link_list, &cgrp->css_sets);
638
639
640
641
642 list_add_tail(&link->cg_link_list, &cg->cg_links);
643}
644
645
646
647
648
649
650
651
652static struct css_set *find_css_set(
653 struct css_set *oldcg, struct cgroup *cgrp)
654{
655 struct css_set *res;
656 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
657
658 struct list_head tmp_cg_links;
659
660 struct hlist_head *hhead;
661 struct cg_cgroup_link *link;
662
663
664
665 read_lock(&css_set_lock);
666 res = find_existing_css_set(oldcg, cgrp, template);
667 if (res)
668 get_css_set(res);
669 read_unlock(&css_set_lock);
670
671 if (res)
672 return res;
673
674 res = kmalloc(sizeof(*res), GFP_KERNEL);
675 if (!res)
676 return NULL;
677
678
679 if (allocate_cg_links(root_count, &tmp_cg_links) < 0) {
680 kfree(res);
681 return NULL;
682 }
683
684 atomic_set(&res->refcount, 1);
685 INIT_LIST_HEAD(&res->cg_links);
686 INIT_LIST_HEAD(&res->tasks);
687 INIT_HLIST_NODE(&res->hlist);
688
689
690
691 memcpy(res->subsys, template, sizeof(res->subsys));
692
693 write_lock(&css_set_lock);
694
695 list_for_each_entry(link, &oldcg->cg_links, cg_link_list) {
696 struct cgroup *c = link->cgrp;
697 if (c->root == cgrp->root)
698 c = cgrp;
699 link_css_set(&tmp_cg_links, res, c);
700 }
701
702 BUG_ON(!list_empty(&tmp_cg_links));
703
704 css_set_count++;
705
706
707 hhead = css_set_hash(res->subsys);
708 hlist_add_head(&res->hlist, hhead);
709
710 write_unlock(&css_set_lock);
711
712 return res;
713}
714
715
716
717
718
719static struct cgroup *task_cgroup_from_root(struct task_struct *task,
720 struct cgroupfs_root *root)
721{
722 struct css_set *css;
723 struct cgroup *res = NULL;
724
725 BUG_ON(!mutex_is_locked(&cgroup_mutex));
726 read_lock(&css_set_lock);
727
728
729
730
731
732 css = task->cgroups;
733 if (css == &init_css_set) {
734 res = &root->top_cgroup;
735 } else {
736 struct cg_cgroup_link *link;
737 list_for_each_entry(link, &css->cg_links, cg_link_list) {
738 struct cgroup *c = link->cgrp;
739 if (c->root == root) {
740 res = c;
741 break;
742 }
743 }
744 }
745 read_unlock(&css_set_lock);
746 BUG_ON(!res);
747 return res;
748}
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804void cgroup_lock(void)
805{
806 mutex_lock(&cgroup_mutex);
807}
808EXPORT_SYMBOL_GPL(cgroup_lock);
809
810
811
812
813
814
815void cgroup_unlock(void)
816{
817 mutex_unlock(&cgroup_mutex);
818}
819EXPORT_SYMBOL_GPL(cgroup_unlock);
820
821
822
823
824
825
826
827
828static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
829static struct dentry *cgroup_lookup(struct inode *, struct dentry *, unsigned int);
830static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
831static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files,
832 unsigned long subsys_mask);
833static const struct inode_operations cgroup_dir_inode_operations;
834static const struct file_operations proc_cgroupstats_operations;
835
836static struct backing_dev_info cgroup_backing_dev_info = {
837 .name = "cgroup",
838 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
839};
840
841static int alloc_css_id(struct cgroup_subsys *ss,
842 struct cgroup *parent, struct cgroup *child);
843
844static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb)
845{
846 struct inode *inode = new_inode(sb);
847
848 if (inode) {
849 inode->i_ino = get_next_ino();
850 inode->i_mode = mode;
851 inode->i_uid = current_fsuid();
852 inode->i_gid = current_fsgid();
853 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
854 inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;
855 }
856 return inode;
857}
858
859static void cgroup_diput(struct dentry *dentry, struct inode *inode)
860{
861
862 if (S_ISDIR(inode->i_mode)) {
863 struct cgroup *cgrp = dentry->d_fsdata;
864 struct cgroup_subsys *ss;
865 BUG_ON(!(cgroup_is_removed(cgrp)));
866
867
868
869
870
871
872 synchronize_rcu();
873
874 mutex_lock(&cgroup_mutex);
875
876
877
878 for_each_subsys(cgrp->root, ss)
879 ss->css_free(cgrp);
880
881 cgrp->root->number_of_cgroups--;
882 mutex_unlock(&cgroup_mutex);
883
884
885
886
887
888 deactivate_super(cgrp->root->sb);
889
890
891
892
893
894 BUG_ON(!list_empty(&cgrp->pidlists));
895
896 simple_xattrs_free(&cgrp->xattrs);
897
898 ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id);
899 kfree_rcu(cgrp, rcu_head);
900 } else {
901 struct cfent *cfe = __d_cfe(dentry);
902 struct cgroup *cgrp = dentry->d_parent->d_fsdata;
903 struct cftype *cft = cfe->type;
904
905 WARN_ONCE(!list_empty(&cfe->node) &&
906 cgrp != &cgrp->root->top_cgroup,
907 "cfe still linked for %s\n", cfe->type->name);
908 kfree(cfe);
909 simple_xattrs_free(&cft->xattrs);
910 }
911 iput(inode);
912}
913
914static int cgroup_delete(const struct dentry *d)
915{
916 return 1;
917}
918
919static void remove_dir(struct dentry *d)
920{
921 struct dentry *parent = dget(d->d_parent);
922
923 d_delete(d);
924 simple_rmdir(parent->d_inode, d);
925 dput(parent);
926}
927
928static int cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
929{
930 struct cfent *cfe;
931
932 lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex);
933 lockdep_assert_held(&cgroup_mutex);
934
935 list_for_each_entry(cfe, &cgrp->files, node) {
936 struct dentry *d = cfe->dentry;
937
938 if (cft && cfe->type != cft)
939 continue;
940
941 dget(d);
942 d_delete(d);
943 simple_unlink(cgrp->dentry->d_inode, d);
944 list_del_init(&cfe->node);
945 dput(d);
946
947 return 0;
948 }
949 return -ENOENT;
950}
951
952
953
954
955
956
957
958static void cgroup_clear_directory(struct dentry *dir, bool base_files,
959 unsigned long subsys_mask)
960{
961 struct cgroup *cgrp = __d_cgrp(dir);
962 struct cgroup_subsys *ss;
963
964 for_each_subsys(cgrp->root, ss) {
965 struct cftype_set *set;
966 if (!test_bit(ss->subsys_id, &subsys_mask))
967 continue;
968 list_for_each_entry(set, &ss->cftsets, node)
969 cgroup_addrm_files(cgrp, NULL, set->cfts, false);
970 }
971 if (base_files) {
972 while (!list_empty(&cgrp->files))
973 cgroup_rm_file(cgrp, NULL);
974 }
975}
976
977
978
979
980static void cgroup_d_remove_dir(struct dentry *dentry)
981{
982 struct dentry *parent;
983 struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
984
985 cgroup_clear_directory(dentry, true, root->subsys_mask);
986
987 parent = dentry->d_parent;
988 spin_lock(&parent->d_lock);
989 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
990 list_del_init(&dentry->d_u.d_child);
991 spin_unlock(&dentry->d_lock);
992 spin_unlock(&parent->d_lock);
993 remove_dir(dentry);
994}
995
996
997
998
999
1000
1001static int rebind_subsystems(struct cgroupfs_root *root,
1002 unsigned long final_subsys_mask)
1003{
1004 unsigned long added_mask, removed_mask;
1005 struct cgroup *cgrp = &root->top_cgroup;
1006 int i;
1007
1008 BUG_ON(!mutex_is_locked(&cgroup_mutex));
1009 BUG_ON(!mutex_is_locked(&cgroup_root_mutex));
1010
1011 removed_mask = root->actual_subsys_mask & ~final_subsys_mask;
1012 added_mask = final_subsys_mask & ~root->actual_subsys_mask;
1013
1014 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1015 unsigned long bit = 1UL << i;
1016 struct cgroup_subsys *ss = subsys[i];
1017 if (!(bit & added_mask))
1018 continue;
1019
1020
1021
1022
1023
1024 BUG_ON(ss == NULL);
1025 if (ss->root != &rootnode) {
1026
1027 return -EBUSY;
1028 }
1029 }
1030
1031
1032
1033
1034
1035 if (root->number_of_cgroups > 1)
1036 return -EBUSY;
1037
1038
1039 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1040 struct cgroup_subsys *ss = subsys[i];
1041 unsigned long bit = 1UL << i;
1042 if (bit & added_mask) {
1043
1044 BUG_ON(ss == NULL);
1045 BUG_ON(cgrp->subsys[i]);
1046 BUG_ON(!dummytop->subsys[i]);
1047 BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
1048 cgrp->subsys[i] = dummytop->subsys[i];
1049 cgrp->subsys[i]->cgroup = cgrp;
1050 list_move(&ss->sibling, &root->subsys_list);
1051 ss->root = root;
1052 if (ss->bind)
1053 ss->bind(cgrp);
1054
1055 } else if (bit & removed_mask) {
1056
1057 BUG_ON(ss == NULL);
1058 BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
1059 BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
1060 if (ss->bind)
1061 ss->bind(dummytop);
1062 dummytop->subsys[i]->cgroup = dummytop;
1063 cgrp->subsys[i] = NULL;
1064 subsys[i]->root = &rootnode;
1065 list_move(&ss->sibling, &rootnode.subsys_list);
1066
1067 module_put(ss->module);
1068 } else if (bit & final_subsys_mask) {
1069
1070 BUG_ON(ss == NULL);
1071 BUG_ON(!cgrp->subsys[i]);
1072
1073
1074
1075
1076 module_put(ss->module);
1077#ifdef CONFIG_MODULE_UNLOAD
1078 BUG_ON(ss->module && !module_refcount(ss->module));
1079#endif
1080 } else {
1081
1082 BUG_ON(cgrp->subsys[i]);
1083 }
1084 }
1085 root->subsys_mask = root->actual_subsys_mask = final_subsys_mask;
1086 synchronize_rcu();
1087
1088 return 0;
1089}
1090
1091static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
1092{
1093 struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
1094 struct cgroup_subsys *ss;
1095
1096 mutex_lock(&cgroup_root_mutex);
1097 for_each_subsys(root, ss)
1098 seq_printf(seq, ",%s", ss->name);
1099 if (test_bit(ROOT_NOPREFIX, &root->flags))
1100 seq_puts(seq, ",noprefix");
1101 if (test_bit(ROOT_XATTR, &root->flags))
1102 seq_puts(seq, ",xattr");
1103 if (strlen(root->release_agent_path))
1104 seq_printf(seq, ",release_agent=%s", root->release_agent_path);
1105 if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags))
1106 seq_puts(seq, ",clone_children");
1107 if (strlen(root->name))
1108 seq_printf(seq, ",name=%s", root->name);
1109 mutex_unlock(&cgroup_root_mutex);
1110 return 0;
1111}
1112
1113struct cgroup_sb_opts {
1114 unsigned long subsys_mask;
1115 unsigned long flags;
1116 char *release_agent;
1117 bool cpuset_clone_children;
1118 char *name;
1119
1120 bool none;
1121
1122 struct cgroupfs_root *new_root;
1123
1124};
1125
1126
1127
1128
1129
1130
1131
1132static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1133{
1134 char *token, *o = data;
1135 bool all_ss = false, one_ss = false;
1136 unsigned long mask = (unsigned long)-1;
1137 int i;
1138 bool module_pin_failed = false;
1139
1140 BUG_ON(!mutex_is_locked(&cgroup_mutex));
1141
1142#ifdef CONFIG_CPUSETS
1143 mask = ~(1UL << cpuset_subsys_id);
1144#endif
1145
1146 memset(opts, 0, sizeof(*opts));
1147
1148 while ((token = strsep(&o, ",")) != NULL) {
1149 if (!*token)
1150 return -EINVAL;
1151 if (!strcmp(token, "none")) {
1152
1153 opts->none = true;
1154 continue;
1155 }
1156 if (!strcmp(token, "all")) {
1157
1158 if (one_ss)
1159 return -EINVAL;
1160 all_ss = true;
1161 continue;
1162 }
1163 if (!strcmp(token, "noprefix")) {
1164 set_bit(ROOT_NOPREFIX, &opts->flags);
1165 continue;
1166 }
1167 if (!strcmp(token, "clone_children")) {
1168 opts->cpuset_clone_children = true;
1169 continue;
1170 }
1171 if (!strcmp(token, "xattr")) {
1172 set_bit(ROOT_XATTR, &opts->flags);
1173 continue;
1174 }
1175 if (!strncmp(token, "release_agent=", 14)) {
1176
1177 if (opts->release_agent)
1178 return -EINVAL;
1179 opts->release_agent =
1180 kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL);
1181 if (!opts->release_agent)
1182 return -ENOMEM;
1183 continue;
1184 }
1185 if (!strncmp(token, "name=", 5)) {
1186 const char *name = token + 5;
1187
1188 if (!strlen(name))
1189 return -EINVAL;
1190
1191 for (i = 0; i < strlen(name); i++) {
1192 char c = name[i];
1193 if (isalnum(c))
1194 continue;
1195 if ((c == '.') || (c == '-') || (c == '_'))
1196 continue;
1197 return -EINVAL;
1198 }
1199
1200 if (opts->name)
1201 return -EINVAL;
1202 opts->name = kstrndup(name,
1203 MAX_CGROUP_ROOT_NAMELEN - 1,
1204 GFP_KERNEL);
1205 if (!opts->name)
1206 return -ENOMEM;
1207
1208 continue;
1209 }
1210
1211 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1212 struct cgroup_subsys *ss = subsys[i];
1213 if (ss == NULL)
1214 continue;
1215 if (strcmp(token, ss->name))
1216 continue;
1217 if (ss->disabled)
1218 continue;
1219
1220
1221 if (all_ss)
1222 return -EINVAL;
1223 set_bit(i, &opts->subsys_mask);
1224 one_ss = true;
1225
1226 break;
1227 }
1228 if (i == CGROUP_SUBSYS_COUNT)
1229 return -ENOENT;
1230 }
1231
1232
1233
1234
1235
1236
1237 if (all_ss || (!one_ss && !opts->none && !opts->name)) {
1238 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1239 struct cgroup_subsys *ss = subsys[i];
1240 if (ss == NULL)
1241 continue;
1242 if (ss->disabled)
1243 continue;
1244 set_bit(i, &opts->subsys_mask);
1245 }
1246 }
1247
1248
1249
1250
1251
1252
1253
1254
1255 if (test_bit(ROOT_NOPREFIX, &opts->flags) &&
1256 (opts->subsys_mask & mask))
1257 return -EINVAL;
1258
1259
1260
1261 if (opts->subsys_mask && opts->none)
1262 return -EINVAL;
1263
1264
1265
1266
1267
1268 if (!opts->subsys_mask && !opts->name)
1269 return -EINVAL;
1270
1271
1272
1273
1274
1275
1276
1277 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1278 unsigned long bit = 1UL << i;
1279
1280 if (!(bit & opts->subsys_mask))
1281 continue;
1282 if (!try_module_get(subsys[i]->module)) {
1283 module_pin_failed = true;
1284 break;
1285 }
1286 }
1287 if (module_pin_failed) {
1288
1289
1290
1291
1292
1293 for (i--; i >= 0; i--) {
1294
1295 unsigned long bit = 1UL << i;
1296
1297 if (!(bit & opts->subsys_mask))
1298 continue;
1299 module_put(subsys[i]->module);
1300 }
1301 return -ENOENT;
1302 }
1303
1304 return 0;
1305}
1306
1307static void drop_parsed_module_refcounts(unsigned long subsys_mask)
1308{
1309 int i;
1310 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1311 unsigned long bit = 1UL << i;
1312
1313 if (!(bit & subsys_mask))
1314 continue;
1315 module_put(subsys[i]->module);
1316 }
1317}
1318
1319static int cgroup_remount(struct super_block *sb, int *flags, char *data)
1320{
1321 int ret = 0;
1322 struct cgroupfs_root *root = sb->s_fs_info;
1323 struct cgroup *cgrp = &root->top_cgroup;
1324 struct cgroup_sb_opts opts;
1325 unsigned long added_mask, removed_mask;
1326
1327 mutex_lock(&cgrp->dentry->d_inode->i_mutex);
1328 mutex_lock(&cgroup_mutex);
1329 mutex_lock(&cgroup_root_mutex);
1330
1331
1332 ret = parse_cgroupfs_options(data, &opts);
1333 if (ret)
1334 goto out_unlock;
1335
1336 if (opts.subsys_mask != root->actual_subsys_mask || opts.release_agent)
1337 pr_warning("cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n",
1338 task_tgid_nr(current), current->comm);
1339
1340 added_mask = opts.subsys_mask & ~root->subsys_mask;
1341 removed_mask = root->subsys_mask & ~opts.subsys_mask;
1342
1343
1344 if (opts.flags != root->flags ||
1345 (opts.name && strcmp(opts.name, root->name))) {
1346 ret = -EINVAL;
1347 drop_parsed_module_refcounts(opts.subsys_mask);
1348 goto out_unlock;
1349 }
1350
1351
1352
1353
1354
1355
1356 cgroup_clear_directory(cgrp->dentry, false, removed_mask);
1357
1358 ret = rebind_subsystems(root, opts.subsys_mask);
1359 if (ret) {
1360
1361 cgroup_populate_dir(cgrp, false, removed_mask);
1362 drop_parsed_module_refcounts(opts.subsys_mask);
1363 goto out_unlock;
1364 }
1365
1366
1367 cgroup_populate_dir(cgrp, false, added_mask);
1368
1369 if (opts.release_agent)
1370 strcpy(root->release_agent_path, opts.release_agent);
1371 out_unlock:
1372 kfree(opts.release_agent);
1373 kfree(opts.name);
1374 mutex_unlock(&cgroup_root_mutex);
1375 mutex_unlock(&cgroup_mutex);
1376 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
1377 return ret;
1378}
1379
1380static const struct super_operations cgroup_ops = {
1381 .statfs = simple_statfs,
1382 .drop_inode = generic_delete_inode,
1383 .show_options = cgroup_show_options,
1384 .remount_fs = cgroup_remount,
1385};
1386
1387static void init_cgroup_housekeeping(struct cgroup *cgrp)
1388{
1389 INIT_LIST_HEAD(&cgrp->sibling);
1390 INIT_LIST_HEAD(&cgrp->children);
1391 INIT_LIST_HEAD(&cgrp->files);
1392 INIT_LIST_HEAD(&cgrp->css_sets);
1393 INIT_LIST_HEAD(&cgrp->allcg_node);
1394 INIT_LIST_HEAD(&cgrp->release_list);
1395 INIT_LIST_HEAD(&cgrp->pidlists);
1396 mutex_init(&cgrp->pidlist_mutex);
1397 INIT_LIST_HEAD(&cgrp->event_list);
1398 spin_lock_init(&cgrp->event_list_lock);
1399 simple_xattrs_init(&cgrp->xattrs);
1400}
1401
1402static void init_cgroup_root(struct cgroupfs_root *root)
1403{
1404 struct cgroup *cgrp = &root->top_cgroup;
1405
1406 INIT_LIST_HEAD(&root->subsys_list);
1407 INIT_LIST_HEAD(&root->root_list);
1408 INIT_LIST_HEAD(&root->allcg_list);
1409 root->number_of_cgroups = 1;
1410 cgrp->root = root;
1411 cgrp->top_cgroup = cgrp;
1412 init_cgroup_housekeeping(cgrp);
1413 list_add_tail(&cgrp->allcg_node, &root->allcg_list);
1414}
1415
1416static bool init_root_id(struct cgroupfs_root *root)
1417{
1418 int ret = 0;
1419
1420 do {
1421 if (!ida_pre_get(&hierarchy_ida, GFP_KERNEL))
1422 return false;
1423 spin_lock(&hierarchy_id_lock);
1424
1425 ret = ida_get_new_above(&hierarchy_ida, next_hierarchy_id,
1426 &root->hierarchy_id);
1427 if (ret == -ENOSPC)
1428
1429 ret = ida_get_new(&hierarchy_ida, &root->hierarchy_id);
1430 if (!ret) {
1431 next_hierarchy_id = root->hierarchy_id + 1;
1432 } else if (ret != -EAGAIN) {
1433
1434 BUG_ON(ret);
1435 }
1436 spin_unlock(&hierarchy_id_lock);
1437 } while (ret);
1438 return true;
1439}
1440
1441static int cgroup_test_super(struct super_block *sb, void *data)
1442{
1443 struct cgroup_sb_opts *opts = data;
1444 struct cgroupfs_root *root = sb->s_fs_info;
1445
1446
1447 if (opts->name && strcmp(opts->name, root->name))
1448 return 0;
1449
1450
1451
1452
1453
1454 if ((opts->subsys_mask || opts->none)
1455 && (opts->subsys_mask != root->subsys_mask))
1456 return 0;
1457
1458 return 1;
1459}
1460
1461static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
1462{
1463 struct cgroupfs_root *root;
1464
1465 if (!opts->subsys_mask && !opts->none)
1466 return NULL;
1467
1468 root = kzalloc(sizeof(*root), GFP_KERNEL);
1469 if (!root)
1470 return ERR_PTR(-ENOMEM);
1471
1472 if (!init_root_id(root)) {
1473 kfree(root);
1474 return ERR_PTR(-ENOMEM);
1475 }
1476 init_cgroup_root(root);
1477
1478 root->subsys_mask = opts->subsys_mask;
1479 root->flags = opts->flags;
1480 ida_init(&root->cgroup_ida);
1481 if (opts->release_agent)
1482 strcpy(root->release_agent_path, opts->release_agent);
1483 if (opts->name)
1484 strcpy(root->name, opts->name);
1485 if (opts->cpuset_clone_children)
1486 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags);
1487 return root;
1488}
1489
1490static void cgroup_drop_root(struct cgroupfs_root *root)
1491{
1492 if (!root)
1493 return;
1494
1495 BUG_ON(!root->hierarchy_id);
1496 spin_lock(&hierarchy_id_lock);
1497 ida_remove(&hierarchy_ida, root->hierarchy_id);
1498 spin_unlock(&hierarchy_id_lock);
1499 ida_destroy(&root->cgroup_ida);
1500 kfree(root);
1501}
1502
1503static int cgroup_set_super(struct super_block *sb, void *data)
1504{
1505 int ret;
1506 struct cgroup_sb_opts *opts = data;
1507
1508
1509 if (!opts->new_root)
1510 return -EINVAL;
1511
1512 BUG_ON(!opts->subsys_mask && !opts->none);
1513
1514 ret = set_anon_super(sb, NULL);
1515 if (ret)
1516 return ret;
1517
1518 sb->s_fs_info = opts->new_root;
1519 opts->new_root->sb = sb;
1520
1521 sb->s_blocksize = PAGE_CACHE_SIZE;
1522 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
1523 sb->s_magic = CGROUP_SUPER_MAGIC;
1524 sb->s_op = &cgroup_ops;
1525
1526 return 0;
1527}
1528
1529static int cgroup_get_rootdir(struct super_block *sb)
1530{
1531 static const struct dentry_operations cgroup_dops = {
1532 .d_iput = cgroup_diput,
1533 .d_delete = cgroup_delete,
1534 };
1535
1536 struct inode *inode =
1537 cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb);
1538
1539 if (!inode)
1540 return -ENOMEM;
1541
1542 inode->i_fop = &simple_dir_operations;
1543 inode->i_op = &cgroup_dir_inode_operations;
1544
1545 inc_nlink(inode);
1546 sb->s_root = d_make_root(inode);
1547 if (!sb->s_root)
1548 return -ENOMEM;
1549
1550 sb->s_d_op = &cgroup_dops;
1551 return 0;
1552}
1553
1554static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1555 int flags, const char *unused_dev_name,
1556 void *data)
1557{
1558 struct cgroup_sb_opts opts;
1559 struct cgroupfs_root *root;
1560 int ret = 0;
1561 struct super_block *sb;
1562 struct cgroupfs_root *new_root;
1563 struct inode *inode;
1564
1565
1566 mutex_lock(&cgroup_mutex);
1567 ret = parse_cgroupfs_options(data, &opts);
1568 mutex_unlock(&cgroup_mutex);
1569 if (ret)
1570 goto out_err;
1571
1572
1573
1574
1575
1576 new_root = cgroup_root_from_opts(&opts);
1577 if (IS_ERR(new_root)) {
1578 ret = PTR_ERR(new_root);
1579 goto drop_modules;
1580 }
1581 opts.new_root = new_root;
1582
1583
1584 sb = sget(fs_type, cgroup_test_super, cgroup_set_super, 0, &opts);
1585 if (IS_ERR(sb)) {
1586 ret = PTR_ERR(sb);
1587 cgroup_drop_root(opts.new_root);
1588 goto drop_modules;
1589 }
1590
1591 root = sb->s_fs_info;
1592 BUG_ON(!root);
1593 if (root == opts.new_root) {
1594
1595 struct list_head tmp_cg_links;
1596 struct cgroup *root_cgrp = &root->top_cgroup;
1597 struct cgroupfs_root *existing_root;
1598 const struct cred *cred;
1599 int i;
1600
1601 BUG_ON(sb->s_root != NULL);
1602
1603 ret = cgroup_get_rootdir(sb);
1604 if (ret)
1605 goto drop_new_super;
1606 inode = sb->s_root->d_inode;
1607
1608 mutex_lock(&inode->i_mutex);
1609 mutex_lock(&cgroup_mutex);
1610 mutex_lock(&cgroup_root_mutex);
1611
1612
1613 ret = -EBUSY;
1614 if (strlen(root->name))
1615 for_each_active_root(existing_root)
1616 if (!strcmp(existing_root->name, root->name))
1617 goto unlock_drop;
1618
1619
1620
1621
1622
1623
1624
1625
1626 ret = allocate_cg_links(css_set_count, &tmp_cg_links);
1627 if (ret)
1628 goto unlock_drop;
1629
1630 ret = rebind_subsystems(root, root->subsys_mask);
1631 if (ret == -EBUSY) {
1632 free_cg_links(&tmp_cg_links);
1633 goto unlock_drop;
1634 }
1635
1636
1637
1638
1639
1640
1641
1642 BUG_ON(ret);
1643
1644 list_add(&root->root_list, &roots);
1645 root_count++;
1646
1647 sb->s_root->d_fsdata = root_cgrp;
1648 root->top_cgroup.dentry = sb->s_root;
1649
1650
1651
1652 write_lock(&css_set_lock);
1653 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
1654 struct hlist_head *hhead = &css_set_table[i];
1655 struct hlist_node *node;
1656 struct css_set *cg;
1657
1658 hlist_for_each_entry(cg, node, hhead, hlist)
1659 link_css_set(&tmp_cg_links, cg, root_cgrp);
1660 }
1661 write_unlock(&css_set_lock);
1662
1663 free_cg_links(&tmp_cg_links);
1664
1665 BUG_ON(!list_empty(&root_cgrp->children));
1666 BUG_ON(root->number_of_cgroups != 1);
1667
1668 cred = override_creds(&init_cred);
1669 cgroup_populate_dir(root_cgrp, true, root->subsys_mask);
1670 revert_creds(cred);
1671 mutex_unlock(&cgroup_root_mutex);
1672 mutex_unlock(&cgroup_mutex);
1673 mutex_unlock(&inode->i_mutex);
1674 } else {
1675
1676
1677
1678
1679 cgroup_drop_root(opts.new_root);
1680
1681 drop_parsed_module_refcounts(opts.subsys_mask);
1682 }
1683
1684 kfree(opts.release_agent);
1685 kfree(opts.name);
1686 return dget(sb->s_root);
1687
1688 unlock_drop:
1689 mutex_unlock(&cgroup_root_mutex);
1690 mutex_unlock(&cgroup_mutex);
1691 mutex_unlock(&inode->i_mutex);
1692 drop_new_super:
1693 deactivate_locked_super(sb);
1694 drop_modules:
1695 drop_parsed_module_refcounts(opts.subsys_mask);
1696 out_err:
1697 kfree(opts.release_agent);
1698 kfree(opts.name);
1699 return ERR_PTR(ret);
1700}
1701
1702static void cgroup_kill_sb(struct super_block *sb) {
1703 struct cgroupfs_root *root = sb->s_fs_info;
1704 struct cgroup *cgrp = &root->top_cgroup;
1705 int ret;
1706 struct cg_cgroup_link *link;
1707 struct cg_cgroup_link *saved_link;
1708
1709 BUG_ON(!root);
1710
1711 BUG_ON(root->number_of_cgroups != 1);
1712 BUG_ON(!list_empty(&cgrp->children));
1713
1714 mutex_lock(&cgroup_mutex);
1715 mutex_lock(&cgroup_root_mutex);
1716
1717
1718 ret = rebind_subsystems(root, 0);
1719
1720 BUG_ON(ret);
1721
1722
1723
1724
1725
1726 write_lock(&css_set_lock);
1727
1728 list_for_each_entry_safe(link, saved_link, &cgrp->css_sets,
1729 cgrp_link_list) {
1730 list_del(&link->cg_link_list);
1731 list_del(&link->cgrp_link_list);
1732 kfree(link);
1733 }
1734 write_unlock(&css_set_lock);
1735
1736 if (!list_empty(&root->root_list)) {
1737 list_del(&root->root_list);
1738 root_count--;
1739 }
1740
1741 mutex_unlock(&cgroup_root_mutex);
1742 mutex_unlock(&cgroup_mutex);
1743
1744 simple_xattrs_free(&cgrp->xattrs);
1745
1746 kill_litter_super(sb);
1747 cgroup_drop_root(root);
1748}
1749
1750static struct file_system_type cgroup_fs_type = {
1751 .name = "cgroup",
1752 .mount = cgroup_mount,
1753 .kill_sb = cgroup_kill_sb,
1754};
1755
1756static struct kobject *cgroup_kobj;
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1769{
1770 struct dentry *dentry = cgrp->dentry;
1771 char *start;
1772
1773 rcu_lockdep_assert(rcu_read_lock_held() || cgroup_lock_is_held(),
1774 "cgroup_path() called without proper locking");
1775
1776 if (!dentry || cgrp == dummytop) {
1777
1778
1779
1780
1781 strcpy(buf, "/");
1782 return 0;
1783 }
1784
1785 start = buf + buflen - 1;
1786
1787 *start = '\0';
1788 for (;;) {
1789 int len = dentry->d_name.len;
1790
1791 if ((start -= len) < buf)
1792 return -ENAMETOOLONG;
1793 memcpy(start, dentry->d_name.name, len);
1794 cgrp = cgrp->parent;
1795 if (!cgrp)
1796 break;
1797
1798 dentry = cgrp->dentry;
1799 if (!cgrp->parent)
1800 continue;
1801 if (--start < buf)
1802 return -ENAMETOOLONG;
1803 *start = '/';
1804 }
1805 memmove(buf, start, buf + buflen - start);
1806 return 0;
1807}
1808EXPORT_SYMBOL_GPL(cgroup_path);
1809
1810
1811
1812
1813struct task_and_cgroup {
1814 struct task_struct *task;
1815 struct cgroup *cgrp;
1816 struct css_set *cg;
1817};
1818
1819struct cgroup_taskset {
1820 struct task_and_cgroup single;
1821 struct flex_array *tc_array;
1822 int tc_array_len;
1823 int idx;
1824 struct cgroup *cur_cgrp;
1825};
1826
1827
1828
1829
1830
1831
1832
1833struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset)
1834{
1835 if (tset->tc_array) {
1836 tset->idx = 0;
1837 return cgroup_taskset_next(tset);
1838 } else {
1839 tset->cur_cgrp = tset->single.cgrp;
1840 return tset->single.task;
1841 }
1842}
1843EXPORT_SYMBOL_GPL(cgroup_taskset_first);
1844
1845
1846
1847
1848
1849
1850
1851
1852struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset)
1853{
1854 struct task_and_cgroup *tc;
1855
1856 if (!tset->tc_array || tset->idx >= tset->tc_array_len)
1857 return NULL;
1858
1859 tc = flex_array_get(tset->tc_array, tset->idx++);
1860 tset->cur_cgrp = tc->cgrp;
1861 return tc->task;
1862}
1863EXPORT_SYMBOL_GPL(cgroup_taskset_next);
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873struct cgroup *cgroup_taskset_cur_cgroup(struct cgroup_taskset *tset)
1874{
1875 return tset->cur_cgrp;
1876}
1877EXPORT_SYMBOL_GPL(cgroup_taskset_cur_cgroup);
1878
1879
1880
1881
1882
1883int cgroup_taskset_size(struct cgroup_taskset *tset)
1884{
1885 return tset->tc_array ? tset->tc_array_len : 1;
1886}
1887EXPORT_SYMBOL_GPL(cgroup_taskset_size);
1888
1889
1890
1891
1892
1893
1894
1895static void cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
1896 struct task_struct *tsk, struct css_set *newcg)
1897{
1898 struct css_set *oldcg;
1899
1900
1901
1902
1903
1904
1905 WARN_ON_ONCE(tsk->flags & PF_EXITING);
1906 oldcg = tsk->cgroups;
1907
1908 task_lock(tsk);
1909 rcu_assign_pointer(tsk->cgroups, newcg);
1910 task_unlock(tsk);
1911
1912
1913 write_lock(&css_set_lock);
1914 if (!list_empty(&tsk->cg_list))
1915 list_move(&tsk->cg_list, &newcg->tasks);
1916 write_unlock(&css_set_lock);
1917
1918
1919
1920
1921
1922
1923 set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
1924 put_css_set(oldcg);
1925}
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1936{
1937 int retval = 0;
1938 struct cgroup_subsys *ss, *failed_ss = NULL;
1939 struct cgroup *oldcgrp;
1940 struct cgroupfs_root *root = cgrp->root;
1941 struct cgroup_taskset tset = { };
1942 struct css_set *newcg;
1943
1944
1945 if (tsk->flags & PF_EXITING)
1946 return -ESRCH;
1947
1948
1949 oldcgrp = task_cgroup_from_root(tsk, root);
1950 if (cgrp == oldcgrp)
1951 return 0;
1952
1953 tset.single.task = tsk;
1954 tset.single.cgrp = oldcgrp;
1955
1956 for_each_subsys(root, ss) {
1957 if (ss->can_attach) {
1958 retval = ss->can_attach(cgrp, &tset);
1959 if (retval) {
1960
1961
1962
1963
1964
1965
1966 failed_ss = ss;
1967 goto out;
1968 }
1969 }
1970 }
1971
1972 newcg = find_css_set(tsk->cgroups, cgrp);
1973 if (!newcg) {
1974 retval = -ENOMEM;
1975 goto out;
1976 }
1977
1978 cgroup_task_migrate(cgrp, oldcgrp, tsk, newcg);
1979
1980 for_each_subsys(root, ss) {
1981 if (ss->attach)
1982 ss->attach(cgrp, &tset);
1983 }
1984
1985 synchronize_rcu();
1986out:
1987 if (retval) {
1988 for_each_subsys(root, ss) {
1989 if (ss == failed_ss)
1990
1991
1992
1993
1994
1995
1996 break;
1997 if (ss->cancel_attach)
1998 ss->cancel_attach(cgrp, &tset);
1999 }
2000 }
2001 return retval;
2002}
2003
2004
2005
2006
2007
2008
2009int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
2010{
2011 struct cgroupfs_root *root;
2012 int retval = 0;
2013
2014 cgroup_lock();
2015 for_each_active_root(root) {
2016 struct cgroup *from_cg = task_cgroup_from_root(from, root);
2017
2018 retval = cgroup_attach_task(from_cg, tsk);
2019 if (retval)
2020 break;
2021 }
2022 cgroup_unlock();
2023
2024 return retval;
2025}
2026EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
2037{
2038 int retval, i, group_size;
2039 struct cgroup_subsys *ss, *failed_ss = NULL;
2040
2041 struct cgroupfs_root *root = cgrp->root;
2042
2043 struct task_struct *tsk;
2044 struct task_and_cgroup *tc;
2045 struct flex_array *group;
2046 struct cgroup_taskset tset = { };
2047
2048
2049
2050
2051
2052
2053
2054
2055 group_size = get_nr_threads(leader);
2056
2057 group = flex_array_alloc(sizeof(*tc), group_size, GFP_KERNEL);
2058 if (!group)
2059 return -ENOMEM;
2060
2061 retval = flex_array_prealloc(group, 0, group_size - 1, GFP_KERNEL);
2062 if (retval)
2063 goto out_free_group_list;
2064
2065 tsk = leader;
2066 i = 0;
2067
2068
2069
2070
2071
2072 rcu_read_lock();
2073 do {
2074 struct task_and_cgroup ent;
2075
2076
2077 if (tsk->flags & PF_EXITING)
2078 continue;
2079
2080
2081 BUG_ON(i >= group_size);
2082 ent.task = tsk;
2083 ent.cgrp = task_cgroup_from_root(tsk, root);
2084
2085 if (ent.cgrp == cgrp)
2086 continue;
2087
2088
2089
2090
2091 retval = flex_array_put(group, i, &ent, GFP_ATOMIC);
2092 BUG_ON(retval != 0);
2093 i++;
2094 } while_each_thread(leader, tsk);
2095 rcu_read_unlock();
2096
2097 group_size = i;
2098 tset.tc_array = group;
2099 tset.tc_array_len = group_size;
2100
2101
2102 retval = 0;
2103 if (!group_size)
2104 goto out_free_group_list;
2105
2106
2107
2108
2109 for_each_subsys(root, ss) {
2110 if (ss->can_attach) {
2111 retval = ss->can_attach(cgrp, &tset);
2112 if (retval) {
2113 failed_ss = ss;
2114 goto out_cancel_attach;
2115 }
2116 }
2117 }
2118
2119
2120
2121
2122
2123 for (i = 0; i < group_size; i++) {
2124 tc = flex_array_get(group, i);
2125 tc->cg = find_css_set(tc->task->cgroups, cgrp);
2126 if (!tc->cg) {
2127 retval = -ENOMEM;
2128 goto out_put_css_set_refs;
2129 }
2130 }
2131
2132
2133
2134
2135
2136
2137 for (i = 0; i < group_size; i++) {
2138 tc = flex_array_get(group, i);
2139 cgroup_task_migrate(cgrp, tc->cgrp, tc->task, tc->cg);
2140 }
2141
2142
2143
2144
2145
2146 for_each_subsys(root, ss) {
2147 if (ss->attach)
2148 ss->attach(cgrp, &tset);
2149 }
2150
2151
2152
2153
2154 synchronize_rcu();
2155 retval = 0;
2156out_put_css_set_refs:
2157 if (retval) {
2158 for (i = 0; i < group_size; i++) {
2159 tc = flex_array_get(group, i);
2160 if (!tc->cg)
2161 break;
2162 put_css_set(tc->cg);
2163 }
2164 }
2165out_cancel_attach:
2166 if (retval) {
2167 for_each_subsys(root, ss) {
2168 if (ss == failed_ss)
2169 break;
2170 if (ss->cancel_attach)
2171 ss->cancel_attach(cgrp, &tset);
2172 }
2173 }
2174out_free_group_list:
2175 flex_array_free(group);
2176 return retval;
2177}
2178
2179
2180
2181
2182
2183
2184static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
2185{
2186 struct task_struct *tsk;
2187 const struct cred *cred = current_cred(), *tcred;
2188 int ret;
2189
2190 if (!cgroup_lock_live_group(cgrp))
2191 return -ENODEV;
2192
2193retry_find_task:
2194 rcu_read_lock();
2195 if (pid) {
2196 tsk = find_task_by_vpid(pid);
2197 if (!tsk) {
2198 rcu_read_unlock();
2199 ret= -ESRCH;
2200 goto out_unlock_cgroup;
2201 }
2202
2203
2204
2205
2206 tcred = __task_cred(tsk);
2207 if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
2208 !uid_eq(cred->euid, tcred->uid) &&
2209 !uid_eq(cred->euid, tcred->suid)) {
2210 rcu_read_unlock();
2211 ret = -EACCES;
2212 goto out_unlock_cgroup;
2213 }
2214 } else
2215 tsk = current;
2216
2217 if (threadgroup)
2218 tsk = tsk->group_leader;
2219
2220
2221
2222
2223
2224
2225 if (tsk == kthreadd_task || (tsk->flags & PF_THREAD_BOUND)) {
2226 ret = -EINVAL;
2227 rcu_read_unlock();
2228 goto out_unlock_cgroup;
2229 }
2230
2231 get_task_struct(tsk);
2232 rcu_read_unlock();
2233
2234 threadgroup_lock(tsk);
2235 if (threadgroup) {
2236 if (!thread_group_leader(tsk)) {
2237
2238
2239
2240
2241
2242
2243
2244 threadgroup_unlock(tsk);
2245 put_task_struct(tsk);
2246 goto retry_find_task;
2247 }
2248 ret = cgroup_attach_proc(cgrp, tsk);
2249 } else
2250 ret = cgroup_attach_task(cgrp, tsk);
2251 threadgroup_unlock(tsk);
2252
2253 put_task_struct(tsk);
2254out_unlock_cgroup:
2255 cgroup_unlock();
2256 return ret;
2257}
2258
2259static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
2260{
2261 return attach_task_by_pid(cgrp, pid, false);
2262}
2263
2264static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid)
2265{
2266 return attach_task_by_pid(cgrp, tgid, true);
2267}
2268
2269
2270
2271
2272
2273
2274
2275
2276bool cgroup_lock_live_group(struct cgroup *cgrp)
2277{
2278 mutex_lock(&cgroup_mutex);
2279 if (cgroup_is_removed(cgrp)) {
2280 mutex_unlock(&cgroup_mutex);
2281 return false;
2282 }
2283 return true;
2284}
2285EXPORT_SYMBOL_GPL(cgroup_lock_live_group);
2286
2287static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
2288 const char *buffer)
2289{
2290 BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
2291 if (strlen(buffer) >= PATH_MAX)
2292 return -EINVAL;
2293 if (!cgroup_lock_live_group(cgrp))
2294 return -ENODEV;
2295 mutex_lock(&cgroup_root_mutex);
2296 strcpy(cgrp->root->release_agent_path, buffer);
2297 mutex_unlock(&cgroup_root_mutex);
2298 cgroup_unlock();
2299 return 0;
2300}
2301
2302static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
2303 struct seq_file *seq)
2304{
2305 if (!cgroup_lock_live_group(cgrp))
2306 return -ENODEV;
2307 seq_puts(seq, cgrp->root->release_agent_path);
2308 seq_putc(seq, '\n');
2309 cgroup_unlock();
2310 return 0;
2311}
2312
2313
2314#define CGROUP_LOCAL_BUFFER_SIZE 64
2315
2316static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
2317 struct file *file,
2318 const char __user *userbuf,
2319 size_t nbytes, loff_t *unused_ppos)
2320{
2321 char buffer[CGROUP_LOCAL_BUFFER_SIZE];
2322 int retval = 0;
2323 char *end;
2324
2325 if (!nbytes)
2326 return -EINVAL;
2327 if (nbytes >= sizeof(buffer))
2328 return -E2BIG;
2329 if (copy_from_user(buffer, userbuf, nbytes))
2330 return -EFAULT;
2331
2332 buffer[nbytes] = 0;
2333 if (cft->write_u64) {
2334 u64 val = simple_strtoull(strstrip(buffer), &end, 0);
2335 if (*end)
2336 return -EINVAL;
2337 retval = cft->write_u64(cgrp, cft, val);
2338 } else {
2339 s64 val = simple_strtoll(strstrip(buffer), &end, 0);
2340 if (*end)
2341 return -EINVAL;
2342 retval = cft->write_s64(cgrp, cft, val);
2343 }
2344 if (!retval)
2345 retval = nbytes;
2346 return retval;
2347}
2348
2349static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
2350 struct file *file,
2351 const char __user *userbuf,
2352 size_t nbytes, loff_t *unused_ppos)
2353{
2354 char local_buffer[CGROUP_LOCAL_BUFFER_SIZE];
2355 int retval = 0;
2356 size_t max_bytes = cft->max_write_len;
2357 char *buffer = local_buffer;
2358
2359 if (!max_bytes)
2360 max_bytes = sizeof(local_buffer) - 1;
2361 if (nbytes >= max_bytes)
2362 return -E2BIG;
2363
2364 if (nbytes >= sizeof(local_buffer)) {
2365 buffer = kmalloc(nbytes + 1, GFP_KERNEL);
2366 if (buffer == NULL)
2367 return -ENOMEM;
2368 }
2369 if (nbytes && copy_from_user(buffer, userbuf, nbytes)) {
2370 retval = -EFAULT;
2371 goto out;
2372 }
2373
2374 buffer[nbytes] = 0;
2375 retval = cft->write_string(cgrp, cft, strstrip(buffer));
2376 if (!retval)
2377 retval = nbytes;
2378out:
2379 if (buffer != local_buffer)
2380 kfree(buffer);
2381 return retval;
2382}
2383
2384static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
2385 size_t nbytes, loff_t *ppos)
2386{
2387 struct cftype *cft = __d_cft(file->f_dentry);
2388 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2389
2390 if (cgroup_is_removed(cgrp))
2391 return -ENODEV;
2392 if (cft->write)
2393 return cft->write(cgrp, cft, file, buf, nbytes, ppos);
2394 if (cft->write_u64 || cft->write_s64)
2395 return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos);
2396 if (cft->write_string)
2397 return cgroup_write_string(cgrp, cft, file, buf, nbytes, ppos);
2398 if (cft->trigger) {
2399 int ret = cft->trigger(cgrp, (unsigned int)cft->private);
2400 return ret ? ret : nbytes;
2401 }
2402 return -EINVAL;
2403}
2404
2405static ssize_t cgroup_read_u64(struct cgroup *cgrp, struct cftype *cft,
2406 struct file *file,
2407 char __user *buf, size_t nbytes,
2408 loff_t *ppos)
2409{
2410 char tmp[CGROUP_LOCAL_BUFFER_SIZE];
2411 u64 val = cft->read_u64(cgrp, cft);
2412 int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
2413
2414 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
2415}
2416
2417static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft,
2418 struct file *file,
2419 char __user *buf, size_t nbytes,
2420 loff_t *ppos)
2421{
2422 char tmp[CGROUP_LOCAL_BUFFER_SIZE];
2423 s64 val = cft->read_s64(cgrp, cft);
2424 int len = sprintf(tmp, "%lld\n", (long long) val);
2425
2426 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
2427}
2428
2429static ssize_t cgroup_file_read(struct file *file, char __user *buf,
2430 size_t nbytes, loff_t *ppos)
2431{
2432 struct cftype *cft = __d_cft(file->f_dentry);
2433 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2434
2435 if (cgroup_is_removed(cgrp))
2436 return -ENODEV;
2437
2438 if (cft->read)
2439 return cft->read(cgrp, cft, file, buf, nbytes, ppos);
2440 if (cft->read_u64)
2441 return cgroup_read_u64(cgrp, cft, file, buf, nbytes, ppos);
2442 if (cft->read_s64)
2443 return cgroup_read_s64(cgrp, cft, file, buf, nbytes, ppos);
2444 return -EINVAL;
2445}
2446
2447
2448
2449
2450
2451
2452struct cgroup_seqfile_state {
2453 struct cftype *cft;
2454 struct cgroup *cgroup;
2455};
2456
2457static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value)
2458{
2459 struct seq_file *sf = cb->state;
2460 return seq_printf(sf, "%s %llu\n", key, (unsigned long long)value);
2461}
2462
2463static int cgroup_seqfile_show(struct seq_file *m, void *arg)
2464{
2465 struct cgroup_seqfile_state *state = m->private;
2466 struct cftype *cft = state->cft;
2467 if (cft->read_map) {
2468 struct cgroup_map_cb cb = {
2469 .fill = cgroup_map_add,
2470 .state = m,
2471 };
2472 return cft->read_map(state->cgroup, cft, &cb);
2473 }
2474 return cft->read_seq_string(state->cgroup, cft, m);
2475}
2476
2477static int cgroup_seqfile_release(struct inode *inode, struct file *file)
2478{
2479 struct seq_file *seq = file->private_data;
2480 kfree(seq->private);
2481 return single_release(inode, file);
2482}
2483
2484static const struct file_operations cgroup_seqfile_operations = {
2485 .read = seq_read,
2486 .write = cgroup_file_write,
2487 .llseek = seq_lseek,
2488 .release = cgroup_seqfile_release,
2489};
2490
2491static int cgroup_file_open(struct inode *inode, struct file *file)
2492{
2493 int err;
2494 struct cftype *cft;
2495
2496 err = generic_file_open(inode, file);
2497 if (err)
2498 return err;
2499 cft = __d_cft(file->f_dentry);
2500
2501 if (cft->read_map || cft->read_seq_string) {
2502 struct cgroup_seqfile_state *state =
2503 kzalloc(sizeof(*state), GFP_USER);
2504 if (!state)
2505 return -ENOMEM;
2506 state->cft = cft;
2507 state->cgroup = __d_cgrp(file->f_dentry->d_parent);
2508 file->f_op = &cgroup_seqfile_operations;
2509 err = single_open(file, cgroup_seqfile_show, state);
2510 if (err < 0)
2511 kfree(state);
2512 } else if (cft->open)
2513 err = cft->open(inode, file);
2514 else
2515 err = 0;
2516
2517 return err;
2518}
2519
2520static int cgroup_file_release(struct inode *inode, struct file *file)
2521{
2522 struct cftype *cft = __d_cft(file->f_dentry);
2523 if (cft->release)
2524 return cft->release(inode, file);
2525 return 0;
2526}
2527
2528
2529
2530
2531static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
2532 struct inode *new_dir, struct dentry *new_dentry)
2533{
2534 if (!S_ISDIR(old_dentry->d_inode->i_mode))
2535 return -ENOTDIR;
2536 if (new_dentry->d_inode)
2537 return -EEXIST;
2538 if (old_dir != new_dir)
2539 return -EIO;
2540 return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
2541}
2542
2543static struct simple_xattrs *__d_xattrs(struct dentry *dentry)
2544{
2545 if (S_ISDIR(dentry->d_inode->i_mode))
2546 return &__d_cgrp(dentry)->xattrs;
2547 else
2548 return &__d_cft(dentry)->xattrs;
2549}
2550
2551static inline int xattr_enabled(struct dentry *dentry)
2552{
2553 struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
2554 return test_bit(ROOT_XATTR, &root->flags);
2555}
2556
2557static bool is_valid_xattr(const char *name)
2558{
2559 if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
2560 !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN))
2561 return true;
2562 return false;
2563}
2564
2565static int cgroup_setxattr(struct dentry *dentry, const char *name,
2566 const void *val, size_t size, int flags)
2567{
2568 if (!xattr_enabled(dentry))
2569 return -EOPNOTSUPP;
2570 if (!is_valid_xattr(name))
2571 return -EINVAL;
2572 return simple_xattr_set(__d_xattrs(dentry), name, val, size, flags);
2573}
2574
2575static int cgroup_removexattr(struct dentry *dentry, const char *name)
2576{
2577 if (!xattr_enabled(dentry))
2578 return -EOPNOTSUPP;
2579 if (!is_valid_xattr(name))
2580 return -EINVAL;
2581 return simple_xattr_remove(__d_xattrs(dentry), name);
2582}
2583
2584static ssize_t cgroup_getxattr(struct dentry *dentry, const char *name,
2585 void *buf, size_t size)
2586{
2587 if (!xattr_enabled(dentry))
2588 return -EOPNOTSUPP;
2589 if (!is_valid_xattr(name))
2590 return -EINVAL;
2591 return simple_xattr_get(__d_xattrs(dentry), name, buf, size);
2592}
2593
2594static ssize_t cgroup_listxattr(struct dentry *dentry, char *buf, size_t size)
2595{
2596 if (!xattr_enabled(dentry))
2597 return -EOPNOTSUPP;
2598 return simple_xattr_list(__d_xattrs(dentry), buf, size);
2599}
2600
2601static const struct file_operations cgroup_file_operations = {
2602 .read = cgroup_file_read,
2603 .write = cgroup_file_write,
2604 .llseek = generic_file_llseek,
2605 .open = cgroup_file_open,
2606 .release = cgroup_file_release,
2607};
2608
2609static const struct inode_operations cgroup_file_inode_operations = {
2610 .setxattr = cgroup_setxattr,
2611 .getxattr = cgroup_getxattr,
2612 .listxattr = cgroup_listxattr,
2613 .removexattr = cgroup_removexattr,
2614};
2615
2616static const struct inode_operations cgroup_dir_inode_operations = {
2617 .lookup = cgroup_lookup,
2618 .mkdir = cgroup_mkdir,
2619 .rmdir = cgroup_rmdir,
2620 .rename = cgroup_rename,
2621 .setxattr = cgroup_setxattr,
2622 .getxattr = cgroup_getxattr,
2623 .listxattr = cgroup_listxattr,
2624 .removexattr = cgroup_removexattr,
2625};
2626
2627static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
2628{
2629 if (dentry->d_name.len > NAME_MAX)
2630 return ERR_PTR(-ENAMETOOLONG);
2631 d_add(dentry, NULL);
2632 return NULL;
2633}
2634
2635
2636
2637
2638static inline struct cftype *__file_cft(struct file *file)
2639{
2640 if (file->f_dentry->d_inode->i_fop != &cgroup_file_operations)
2641 return ERR_PTR(-EINVAL);
2642 return __d_cft(file->f_dentry);
2643}
2644
2645static int cgroup_create_file(struct dentry *dentry, umode_t mode,
2646 struct super_block *sb)
2647{
2648 struct inode *inode;
2649
2650 if (!dentry)
2651 return -ENOENT;
2652 if (dentry->d_inode)
2653 return -EEXIST;
2654
2655 inode = cgroup_new_inode(mode, sb);
2656 if (!inode)
2657 return -ENOMEM;
2658
2659 if (S_ISDIR(mode)) {
2660 inode->i_op = &cgroup_dir_inode_operations;
2661 inode->i_fop = &simple_dir_operations;
2662
2663
2664 inc_nlink(inode);
2665 inc_nlink(dentry->d_parent->d_inode);
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675 WARN_ON_ONCE(!mutex_trylock(&inode->i_mutex));
2676 } else if (S_ISREG(mode)) {
2677 inode->i_size = 0;
2678 inode->i_fop = &cgroup_file_operations;
2679 inode->i_op = &cgroup_file_inode_operations;
2680 }
2681 d_instantiate(dentry, inode);
2682 dget(dentry);
2683 return 0;
2684}
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695static umode_t cgroup_file_mode(const struct cftype *cft)
2696{
2697 umode_t mode = 0;
2698
2699 if (cft->mode)
2700 return cft->mode;
2701
2702 if (cft->read || cft->read_u64 || cft->read_s64 ||
2703 cft->read_map || cft->read_seq_string)
2704 mode |= S_IRUGO;
2705
2706 if (cft->write || cft->write_u64 || cft->write_s64 ||
2707 cft->write_string || cft->trigger)
2708 mode |= S_IWUSR;
2709
2710 return mode;
2711}
2712
2713static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys,
2714 struct cftype *cft)
2715{
2716 struct dentry *dir = cgrp->dentry;
2717 struct cgroup *parent = __d_cgrp(dir);
2718 struct dentry *dentry;
2719 struct cfent *cfe;
2720 int error;
2721 umode_t mode;
2722 char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
2723
2724 simple_xattrs_init(&cft->xattrs);
2725
2726 if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) {
2727 strcpy(name, subsys->name);
2728 strcat(name, ".");
2729 }
2730 strcat(name, cft->name);
2731
2732 BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex));
2733
2734 cfe = kzalloc(sizeof(*cfe), GFP_KERNEL);
2735 if (!cfe)
2736 return -ENOMEM;
2737
2738 dentry = lookup_one_len(name, dir, strlen(name));
2739 if (IS_ERR(dentry)) {
2740 error = PTR_ERR(dentry);
2741 goto out;
2742 }
2743
2744 mode = cgroup_file_mode(cft);
2745 error = cgroup_create_file(dentry, mode | S_IFREG, cgrp->root->sb);
2746 if (!error) {
2747 cfe->type = (void *)cft;
2748 cfe->dentry = dentry;
2749 dentry->d_fsdata = cfe;
2750 list_add_tail(&cfe->node, &parent->files);
2751 cfe = NULL;
2752 }
2753 dput(dentry);
2754out:
2755 kfree(cfe);
2756 return error;
2757}
2758
2759static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys,
2760 struct cftype cfts[], bool is_add)
2761{
2762 struct cftype *cft;
2763 int err, ret = 0;
2764
2765 for (cft = cfts; cft->name[0] != '\0'; cft++) {
2766
2767 if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent)
2768 continue;
2769 if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent)
2770 continue;
2771
2772 if (is_add)
2773 err = cgroup_add_file(cgrp, subsys, cft);
2774 else
2775 err = cgroup_rm_file(cgrp, cft);
2776 if (err) {
2777 pr_warning("cgroup_addrm_files: failed to %s %s, err=%d\n",
2778 is_add ? "add" : "remove", cft->name, err);
2779 ret = err;
2780 }
2781 }
2782 return ret;
2783}
2784
2785static DEFINE_MUTEX(cgroup_cft_mutex);
2786
2787static void cgroup_cfts_prepare(void)
2788 __acquires(&cgroup_cft_mutex) __acquires(&cgroup_mutex)
2789{
2790
2791
2792
2793
2794
2795
2796
2797 mutex_lock(&cgroup_cft_mutex);
2798 mutex_lock(&cgroup_mutex);
2799}
2800
2801static void cgroup_cfts_commit(struct cgroup_subsys *ss,
2802 struct cftype *cfts, bool is_add)
2803 __releases(&cgroup_mutex) __releases(&cgroup_cft_mutex)
2804{
2805 LIST_HEAD(pending);
2806 struct cgroup *cgrp, *n;
2807
2808
2809 if (cfts && ss->root != &rootnode) {
2810 list_for_each_entry(cgrp, &ss->root->allcg_list, allcg_node) {
2811 dget(cgrp->dentry);
2812 list_add_tail(&cgrp->cft_q_node, &pending);
2813 }
2814 }
2815
2816 mutex_unlock(&cgroup_mutex);
2817
2818
2819
2820
2821
2822 list_for_each_entry_safe(cgrp, n, &pending, cft_q_node) {
2823 struct inode *inode = cgrp->dentry->d_inode;
2824
2825 mutex_lock(&inode->i_mutex);
2826 mutex_lock(&cgroup_mutex);
2827 if (!cgroup_is_removed(cgrp))
2828 cgroup_addrm_files(cgrp, ss, cfts, is_add);
2829 mutex_unlock(&cgroup_mutex);
2830 mutex_unlock(&inode->i_mutex);
2831
2832 list_del_init(&cgrp->cft_q_node);
2833 dput(cgrp->dentry);
2834 }
2835
2836 mutex_unlock(&cgroup_cft_mutex);
2837}
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
2854{
2855 struct cftype_set *set;
2856
2857 set = kzalloc(sizeof(*set), GFP_KERNEL);
2858 if (!set)
2859 return -ENOMEM;
2860
2861 cgroup_cfts_prepare();
2862 set->cfts = cfts;
2863 list_add_tail(&set->node, &ss->cftsets);
2864 cgroup_cfts_commit(ss, cfts, true);
2865
2866 return 0;
2867}
2868EXPORT_SYMBOL_GPL(cgroup_add_cftypes);
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
2884{
2885 struct cftype_set *set;
2886
2887 cgroup_cfts_prepare();
2888
2889 list_for_each_entry(set, &ss->cftsets, node) {
2890 if (set->cfts == cfts) {
2891 list_del_init(&set->node);
2892 cgroup_cfts_commit(ss, cfts, false);
2893 return 0;
2894 }
2895 }
2896
2897 cgroup_cfts_commit(ss, NULL, false);
2898 return -ENOENT;
2899}
2900
2901
2902
2903
2904
2905
2906
2907int cgroup_task_count(const struct cgroup *cgrp)
2908{
2909 int count = 0;
2910 struct cg_cgroup_link *link;
2911
2912 read_lock(&css_set_lock);
2913 list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
2914 count += atomic_read(&link->cg->refcount);
2915 }
2916 read_unlock(&css_set_lock);
2917 return count;
2918}
2919
2920
2921
2922
2923
2924static void cgroup_advance_iter(struct cgroup *cgrp,
2925 struct cgroup_iter *it)
2926{
2927 struct list_head *l = it->cg_link;
2928 struct cg_cgroup_link *link;
2929 struct css_set *cg;
2930
2931
2932 do {
2933 l = l->next;
2934 if (l == &cgrp->css_sets) {
2935 it->cg_link = NULL;
2936 return;
2937 }
2938 link = list_entry(l, struct cg_cgroup_link, cgrp_link_list);
2939 cg = link->cg;
2940 } while (list_empty(&cg->tasks));
2941 it->cg_link = l;
2942 it->task = cg->tasks.next;
2943}
2944
2945
2946
2947
2948
2949
2950
2951static void cgroup_enable_task_cg_lists(void)
2952{
2953 struct task_struct *p, *g;
2954 write_lock(&css_set_lock);
2955 use_task_css_set_links = 1;
2956
2957
2958
2959
2960
2961
2962
2963 read_lock(&tasklist_lock);
2964 do_each_thread(g, p) {
2965 task_lock(p);
2966
2967
2968
2969
2970
2971 if (!(p->flags & PF_EXITING) && list_empty(&p->cg_list))
2972 list_add(&p->cg_list, &p->cgroups->tasks);
2973 task_unlock(p);
2974 } while_each_thread(g, p);
2975 read_unlock(&tasklist_lock);
2976 write_unlock(&css_set_lock);
2977}
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
2988 struct cgroup *cgroup)
2989{
2990 struct cgroup *next;
2991
2992 WARN_ON_ONCE(!rcu_read_lock_held());
2993
2994
2995 if (!pos) {
2996 if (list_empty(&cgroup->children))
2997 return NULL;
2998 pos = cgroup;
2999 }
3000
3001
3002 next = list_first_or_null_rcu(&pos->children, struct cgroup, sibling);
3003 if (next)
3004 return next;
3005
3006
3007 do {
3008 next = list_entry_rcu(pos->sibling.next, struct cgroup,
3009 sibling);
3010 if (&next->sibling != &pos->parent->children)
3011 return next;
3012
3013 pos = pos->parent;
3014 } while (pos != cgroup);
3015
3016 return NULL;
3017}
3018EXPORT_SYMBOL_GPL(cgroup_next_descendant_pre);
3019
3020static struct cgroup *cgroup_leftmost_descendant(struct cgroup *pos)
3021{
3022 struct cgroup *last;
3023
3024 do {
3025 last = pos;
3026 pos = list_first_or_null_rcu(&pos->children, struct cgroup,
3027 sibling);
3028 } while (pos);
3029
3030 return last;
3031}
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041struct cgroup *cgroup_next_descendant_post(struct cgroup *pos,
3042 struct cgroup *cgroup)
3043{
3044 struct cgroup *next;
3045
3046 WARN_ON_ONCE(!rcu_read_lock_held());
3047
3048
3049 if (!pos) {
3050 next = cgroup_leftmost_descendant(cgroup);
3051 return next != cgroup ? next : NULL;
3052 }
3053
3054
3055 next = list_entry_rcu(pos->sibling.next, struct cgroup, sibling);
3056 if (&next->sibling != &pos->parent->children)
3057 return cgroup_leftmost_descendant(next);
3058
3059
3060 next = pos->parent;
3061 return next != cgroup ? next : NULL;
3062}
3063EXPORT_SYMBOL_GPL(cgroup_next_descendant_post);
3064
3065void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
3066 __acquires(css_set_lock)
3067{
3068
3069
3070
3071
3072
3073 if (!use_task_css_set_links)
3074 cgroup_enable_task_cg_lists();
3075
3076 read_lock(&css_set_lock);
3077 it->cg_link = &cgrp->css_sets;
3078 cgroup_advance_iter(cgrp, it);
3079}
3080
3081struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
3082 struct cgroup_iter *it)
3083{
3084 struct task_struct *res;
3085 struct list_head *l = it->task;
3086 struct cg_cgroup_link *link;
3087
3088
3089 if (!it->cg_link)
3090 return NULL;
3091 res = list_entry(l, struct task_struct, cg_list);
3092
3093 l = l->next;
3094 link = list_entry(it->cg_link, struct cg_cgroup_link, cgrp_link_list);
3095 if (l == &link->cg->tasks) {
3096
3097
3098 cgroup_advance_iter(cgrp, it);
3099 } else {
3100 it->task = l;
3101 }
3102 return res;
3103}
3104
3105void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it)
3106 __releases(css_set_lock)
3107{
3108 read_unlock(&css_set_lock);
3109}
3110
3111static inline int started_after_time(struct task_struct *t1,
3112 struct timespec *time,
3113 struct task_struct *t2)
3114{
3115 int start_diff = timespec_compare(&t1->start_time, time);
3116 if (start_diff > 0) {
3117 return 1;
3118 } else if (start_diff < 0) {
3119 return 0;
3120 } else {
3121
3122
3123
3124
3125
3126
3127
3128
3129 return t1 > t2;
3130 }
3131}
3132
3133
3134
3135
3136
3137
3138static inline int started_after(void *p1, void *p2)
3139{
3140 struct task_struct *t1 = p1;
3141 struct task_struct *t2 = p2;
3142 return started_after_time(t1, &t2->start_time, t2);
3143}
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172int cgroup_scan_tasks(struct cgroup_scanner *scan)
3173{
3174 int retval, i;
3175 struct cgroup_iter it;
3176 struct task_struct *p, *dropped;
3177
3178 struct task_struct *latest_task = NULL;
3179 struct ptr_heap tmp_heap;
3180 struct ptr_heap *heap;
3181 struct timespec latest_time = { 0, 0 };
3182
3183 if (scan->heap) {
3184
3185 heap = scan->heap;
3186 heap->gt = &started_after;
3187 } else {
3188
3189 heap = &tmp_heap;
3190 retval = heap_init(heap, PAGE_SIZE, GFP_KERNEL, &started_after);
3191 if (retval)
3192
3193 return retval;
3194 }
3195
3196 again:
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209 heap->size = 0;
3210 cgroup_iter_start(scan->cg, &it);
3211 while ((p = cgroup_iter_next(scan->cg, &it))) {
3212
3213
3214
3215
3216 if (scan->test_task && !scan->test_task(p, scan))
3217 continue;
3218
3219
3220
3221
3222 if (!started_after_time(p, &latest_time, latest_task))
3223 continue;
3224 dropped = heap_insert(heap, p);
3225 if (dropped == NULL) {
3226
3227
3228
3229
3230 get_task_struct(p);
3231 } else if (dropped != p) {
3232
3233
3234
3235
3236 get_task_struct(p);
3237 put_task_struct(dropped);
3238 }
3239
3240
3241
3242
3243 }
3244 cgroup_iter_end(scan->cg, &it);
3245
3246 if (heap->size) {
3247 for (i = 0; i < heap->size; i++) {
3248 struct task_struct *q = heap->ptrs[i];
3249 if (i == 0) {
3250 latest_time = q->start_time;
3251 latest_task = q;
3252 }
3253
3254 scan->process_task(q, scan);
3255 put_task_struct(q);
3256 }
3257
3258
3259
3260
3261
3262
3263
3264 goto again;
3265 }
3266 if (heap == &tmp_heap)
3267 heap_free(&tmp_heap);
3268 return 0;
3269}
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282enum cgroup_filetype {
3283 CGROUP_FILE_PROCS,
3284 CGROUP_FILE_TASKS,
3285};
3286
3287
3288
3289
3290
3291
3292
3293struct cgroup_pidlist {
3294
3295
3296
3297
3298 struct { enum cgroup_filetype type; struct pid_namespace *ns; } key;
3299
3300 pid_t *list;
3301
3302 int length;
3303
3304 int use_count;
3305
3306 struct list_head links;
3307
3308 struct cgroup *owner;
3309
3310 struct rw_semaphore mutex;
3311};
3312
3313
3314
3315
3316
3317
3318#define PIDLIST_TOO_LARGE(c) ((c) * sizeof(pid_t) > (PAGE_SIZE * 2))
3319static void *pidlist_allocate(int count)
3320{
3321 if (PIDLIST_TOO_LARGE(count))
3322 return vmalloc(count * sizeof(pid_t));
3323 else
3324 return kmalloc(count * sizeof(pid_t), GFP_KERNEL);
3325}
3326static void pidlist_free(void *p)
3327{
3328 if (is_vmalloc_addr(p))
3329 vfree(p);
3330 else
3331 kfree(p);
3332}
3333static void *pidlist_resize(void *p, int newcount)
3334{
3335 void *newlist;
3336
3337 if (is_vmalloc_addr(p)) {
3338 newlist = vmalloc(newcount * sizeof(pid_t));
3339 if (!newlist)
3340 return NULL;
3341 memcpy(newlist, p, newcount * sizeof(pid_t));
3342 vfree(p);
3343 } else {
3344 newlist = krealloc(p, newcount * sizeof(pid_t), GFP_KERNEL);
3345 }
3346 return newlist;
3347}
3348
3349
3350
3351
3352
3353
3354
3355
3356#define PIDLIST_REALLOC_DIFFERENCE(old, new) ((old) - PAGE_SIZE >= (new))
3357static int pidlist_uniq(pid_t **p, int length)
3358{
3359 int src, dest = 1;
3360 pid_t *list = *p;
3361 pid_t *newlist;
3362
3363
3364
3365
3366
3367 if (length == 0 || length == 1)
3368 return length;
3369
3370 for (src = 1; src < length; src++) {
3371
3372 while (list[src] == list[src-1]) {
3373 src++;
3374 if (src == length)
3375 goto after;
3376 }
3377
3378 list[dest] = list[src];
3379 dest++;
3380 }
3381after:
3382
3383
3384
3385
3386
3387 if (PIDLIST_REALLOC_DIFFERENCE(length, dest)) {
3388 newlist = pidlist_resize(list, dest);
3389 if (newlist)
3390 *p = newlist;
3391 }
3392 return dest;
3393}
3394
3395static int cmppid(const void *a, const void *b)
3396{
3397 return *(pid_t *)a - *(pid_t *)b;
3398}
3399
3400
3401
3402
3403
3404
3405
3406static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
3407 enum cgroup_filetype type)
3408{
3409 struct cgroup_pidlist *l;
3410
3411 struct pid_namespace *ns = task_active_pid_ns(current);
3412
3413
3414
3415
3416
3417
3418
3419 mutex_lock(&cgrp->pidlist_mutex);
3420 list_for_each_entry(l, &cgrp->pidlists, links) {
3421 if (l->key.type == type && l->key.ns == ns) {
3422
3423 down_write(&l->mutex);
3424 mutex_unlock(&cgrp->pidlist_mutex);
3425 return l;
3426 }
3427 }
3428
3429 l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
3430 if (!l) {
3431 mutex_unlock(&cgrp->pidlist_mutex);
3432 return l;
3433 }
3434 init_rwsem(&l->mutex);
3435 down_write(&l->mutex);
3436 l->key.type = type;
3437 l->key.ns = get_pid_ns(ns);
3438 l->use_count = 0;
3439 l->list = NULL;
3440 l->owner = cgrp;
3441 list_add(&l->links, &cgrp->pidlists);
3442 mutex_unlock(&cgrp->pidlist_mutex);
3443 return l;
3444}
3445
3446
3447
3448
3449static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
3450 struct cgroup_pidlist **lp)
3451{
3452 pid_t *array;
3453 int length;
3454 int pid, n = 0;
3455 struct cgroup_iter it;
3456 struct task_struct *tsk;
3457 struct cgroup_pidlist *l;
3458
3459
3460
3461
3462
3463
3464
3465 length = cgroup_task_count(cgrp);
3466 array = pidlist_allocate(length);
3467 if (!array)
3468 return -ENOMEM;
3469
3470 cgroup_iter_start(cgrp, &it);
3471 while ((tsk = cgroup_iter_next(cgrp, &it))) {
3472 if (unlikely(n == length))
3473 break;
3474
3475 if (type == CGROUP_FILE_PROCS)
3476 pid = task_tgid_vnr(tsk);
3477 else
3478 pid = task_pid_vnr(tsk);
3479 if (pid > 0)
3480 array[n++] = pid;
3481 }
3482 cgroup_iter_end(cgrp, &it);
3483 length = n;
3484
3485 sort(array, length, sizeof(pid_t), cmppid, NULL);
3486 if (type == CGROUP_FILE_PROCS)
3487 length = pidlist_uniq(&array, length);
3488 l = cgroup_pidlist_find(cgrp, type);
3489 if (!l) {
3490 pidlist_free(array);
3491 return -ENOMEM;
3492 }
3493
3494 pidlist_free(l->list);
3495 l->list = array;
3496 l->length = length;
3497 l->use_count++;
3498 up_write(&l->mutex);
3499 *lp = l;
3500 return 0;
3501}
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
3513{
3514 int ret = -EINVAL;
3515 struct cgroup *cgrp;
3516 struct cgroup_iter it;
3517 struct task_struct *tsk;
3518
3519
3520
3521
3522
3523 if (dentry->d_sb->s_op != &cgroup_ops ||
3524 !S_ISDIR(dentry->d_inode->i_mode))
3525 goto err;
3526
3527 ret = 0;
3528 cgrp = dentry->d_fsdata;
3529
3530 cgroup_iter_start(cgrp, &it);
3531 while ((tsk = cgroup_iter_next(cgrp, &it))) {
3532 switch (tsk->state) {
3533 case TASK_RUNNING:
3534 stats->nr_running++;
3535 break;
3536 case TASK_INTERRUPTIBLE:
3537 stats->nr_sleeping++;
3538 break;
3539 case TASK_UNINTERRUPTIBLE:
3540 stats->nr_uninterruptible++;
3541 break;
3542 case TASK_STOPPED:
3543 stats->nr_stopped++;
3544 break;
3545 default:
3546 if (delayacct_is_task_waiting_on_io(tsk))
3547 stats->nr_io_wait++;
3548 break;
3549 }
3550 }
3551 cgroup_iter_end(cgrp, &it);
3552
3553err:
3554 return ret;
3555}
3556
3557
3558
3559
3560
3561
3562
3563
3564static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
3565{
3566
3567
3568
3569
3570
3571
3572 struct cgroup_pidlist *l = s->private;
3573 int index = 0, pid = *pos;
3574 int *iter;
3575
3576 down_read(&l->mutex);
3577 if (pid) {
3578 int end = l->length;
3579
3580 while (index < end) {
3581 int mid = (index + end) / 2;
3582 if (l->list[mid] == pid) {
3583 index = mid;
3584 break;
3585 } else if (l->list[mid] <= pid)
3586 index = mid + 1;
3587 else
3588 end = mid;
3589 }
3590 }
3591
3592 if (index >= l->length)
3593 return NULL;
3594
3595 iter = l->list + index;
3596 *pos = *iter;
3597 return iter;
3598}
3599
3600static void cgroup_pidlist_stop(struct seq_file *s, void *v)
3601{
3602 struct cgroup_pidlist *l = s->private;
3603 up_read(&l->mutex);
3604}
3605
3606static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
3607{
3608 struct cgroup_pidlist *l = s->private;
3609 pid_t *p = v;
3610 pid_t *end = l->list + l->length;
3611
3612
3613
3614
3615 p++;
3616 if (p >= end) {
3617 return NULL;
3618 } else {
3619 *pos = *p;
3620 return p;
3621 }
3622}
3623
3624static int cgroup_pidlist_show(struct seq_file *s, void *v)
3625{
3626 return seq_printf(s, "%d\n", *(int *)v);
3627}
3628
3629
3630
3631
3632
3633static const struct seq_operations cgroup_pidlist_seq_operations = {
3634 .start = cgroup_pidlist_start,
3635 .stop = cgroup_pidlist_stop,
3636 .next = cgroup_pidlist_next,
3637 .show = cgroup_pidlist_show,
3638};
3639
3640static void cgroup_release_pid_array(struct cgroup_pidlist *l)
3641{
3642
3643
3644
3645
3646
3647
3648 mutex_lock(&l->owner->pidlist_mutex);
3649 down_write(&l->mutex);
3650 BUG_ON(!l->use_count);
3651 if (!--l->use_count) {
3652
3653 list_del(&l->links);
3654 mutex_unlock(&l->owner->pidlist_mutex);
3655 pidlist_free(l->list);
3656 put_pid_ns(l->key.ns);
3657 up_write(&l->mutex);
3658 kfree(l);
3659 return;
3660 }
3661 mutex_unlock(&l->owner->pidlist_mutex);
3662 up_write(&l->mutex);
3663}
3664
3665static int cgroup_pidlist_release(struct inode *inode, struct file *file)
3666{
3667 struct cgroup_pidlist *l;
3668 if (!(file->f_mode & FMODE_READ))
3669 return 0;
3670
3671
3672
3673
3674 l = ((struct seq_file *)file->private_data)->private;
3675 cgroup_release_pid_array(l);
3676 return seq_release(inode, file);
3677}
3678
3679static const struct file_operations cgroup_pidlist_operations = {
3680 .read = seq_read,
3681 .llseek = seq_lseek,
3682 .write = cgroup_file_write,
3683 .release = cgroup_pidlist_release,
3684};
3685
3686
3687
3688
3689
3690
3691
3692static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type)
3693{
3694 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
3695 struct cgroup_pidlist *l;
3696 int retval;
3697
3698
3699 if (!(file->f_mode & FMODE_READ))
3700 return 0;
3701
3702
3703 retval = pidlist_array_load(cgrp, type, &l);
3704 if (retval)
3705 return retval;
3706
3707 file->f_op = &cgroup_pidlist_operations;
3708
3709 retval = seq_open(file, &cgroup_pidlist_seq_operations);
3710 if (retval) {
3711 cgroup_release_pid_array(l);
3712 return retval;
3713 }
3714 ((struct seq_file *)file->private_data)->private = l;
3715 return 0;
3716}
3717static int cgroup_tasks_open(struct inode *unused, struct file *file)
3718{
3719 return cgroup_pidlist_open(file, CGROUP_FILE_TASKS);
3720}
3721static int cgroup_procs_open(struct inode *unused, struct file *file)
3722{
3723 return cgroup_pidlist_open(file, CGROUP_FILE_PROCS);
3724}
3725
3726static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
3727 struct cftype *cft)
3728{
3729 return notify_on_release(cgrp);
3730}
3731
3732static int cgroup_write_notify_on_release(struct cgroup *cgrp,
3733 struct cftype *cft,
3734 u64 val)
3735{
3736 clear_bit(CGRP_RELEASABLE, &cgrp->flags);
3737 if (val)
3738 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3739 else
3740 clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3741 return 0;
3742}
3743
3744
3745
3746
3747
3748
3749static void cgroup_event_remove(struct work_struct *work)
3750{
3751 struct cgroup_event *event = container_of(work, struct cgroup_event,
3752 remove);
3753 struct cgroup *cgrp = event->cgrp;
3754
3755 event->cft->unregister_event(cgrp, event->cft, event->eventfd);
3756
3757 eventfd_ctx_put(event->eventfd);
3758 kfree(event);
3759 dput(cgrp->dentry);
3760}
3761
3762
3763
3764
3765
3766
3767static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
3768 int sync, void *key)
3769{
3770 struct cgroup_event *event = container_of(wait,
3771 struct cgroup_event, wait);
3772 struct cgroup *cgrp = event->cgrp;
3773 unsigned long flags = (unsigned long)key;
3774
3775 if (flags & POLLHUP) {
3776 __remove_wait_queue(event->wqh, &event->wait);
3777 spin_lock(&cgrp->event_list_lock);
3778 list_del_init(&event->list);
3779 spin_unlock(&cgrp->event_list_lock);
3780
3781
3782
3783
3784 schedule_work(&event->remove);
3785 }
3786
3787 return 0;
3788}
3789
3790static void cgroup_event_ptable_queue_proc(struct file *file,
3791 wait_queue_head_t *wqh, poll_table *pt)
3792{
3793 struct cgroup_event *event = container_of(pt,
3794 struct cgroup_event, pt);
3795
3796 event->wqh = wqh;
3797 add_wait_queue(wqh, &event->wait);
3798}
3799
3800
3801
3802
3803
3804
3805
3806static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
3807 const char *buffer)
3808{
3809 struct cgroup_event *event = NULL;
3810 unsigned int efd, cfd;
3811 struct file *efile = NULL;
3812 struct file *cfile = NULL;
3813 char *endp;
3814 int ret;
3815
3816 efd = simple_strtoul(buffer, &endp, 10);
3817 if (*endp != ' ')
3818 return -EINVAL;
3819 buffer = endp + 1;
3820
3821 cfd = simple_strtoul(buffer, &endp, 10);
3822 if ((*endp != ' ') && (*endp != '\0'))
3823 return -EINVAL;
3824 buffer = endp + 1;
3825
3826 event = kzalloc(sizeof(*event), GFP_KERNEL);
3827 if (!event)
3828 return -ENOMEM;
3829 event->cgrp = cgrp;
3830 INIT_LIST_HEAD(&event->list);
3831 init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc);
3832 init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
3833 INIT_WORK(&event->remove, cgroup_event_remove);
3834
3835 efile = eventfd_fget(efd);
3836 if (IS_ERR(efile)) {
3837 ret = PTR_ERR(efile);
3838 goto fail;
3839 }
3840
3841 event->eventfd = eventfd_ctx_fileget(efile);
3842 if (IS_ERR(event->eventfd)) {
3843 ret = PTR_ERR(event->eventfd);
3844 goto fail;
3845 }
3846
3847 cfile = fget(cfd);
3848 if (!cfile) {
3849 ret = -EBADF;
3850 goto fail;
3851 }
3852
3853
3854
3855 ret = inode_permission(cfile->f_path.dentry->d_inode, MAY_READ);
3856 if (ret < 0)
3857 goto fail;
3858
3859 event->cft = __file_cft(cfile);
3860 if (IS_ERR(event->cft)) {
3861 ret = PTR_ERR(event->cft);
3862 goto fail;
3863 }
3864
3865 if (!event->cft->register_event || !event->cft->unregister_event) {
3866 ret = -EINVAL;
3867 goto fail;
3868 }
3869
3870 ret = event->cft->register_event(cgrp, event->cft,
3871 event->eventfd, buffer);
3872 if (ret)
3873 goto fail;
3874
3875 if (efile->f_op->poll(efile, &event->pt) & POLLHUP) {
3876 event->cft->unregister_event(cgrp, event->cft, event->eventfd);
3877 ret = 0;
3878 goto fail;
3879 }
3880
3881
3882
3883
3884
3885
3886 dget(cgrp->dentry);
3887
3888 spin_lock(&cgrp->event_list_lock);
3889 list_add(&event->list, &cgrp->event_list);
3890 spin_unlock(&cgrp->event_list_lock);
3891
3892 fput(cfile);
3893 fput(efile);
3894
3895 return 0;
3896
3897fail:
3898 if (cfile)
3899 fput(cfile);
3900
3901 if (event && event->eventfd && !IS_ERR(event->eventfd))
3902 eventfd_ctx_put(event->eventfd);
3903
3904 if (!IS_ERR_OR_NULL(efile))
3905 fput(efile);
3906
3907 kfree(event);
3908
3909 return ret;
3910}
3911
3912static u64 cgroup_clone_children_read(struct cgroup *cgrp,
3913 struct cftype *cft)
3914{
3915 return test_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
3916}
3917
3918static int cgroup_clone_children_write(struct cgroup *cgrp,
3919 struct cftype *cft,
3920 u64 val)
3921{
3922 if (val)
3923 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
3924 else
3925 clear_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
3926 return 0;
3927}
3928
3929
3930
3931
3932
3933#define CGROUP_FILE_GENERIC_PREFIX "cgroup."
3934static struct cftype files[] = {
3935 {
3936 .name = "tasks",
3937 .open = cgroup_tasks_open,
3938 .write_u64 = cgroup_tasks_write,
3939 .release = cgroup_pidlist_release,
3940 .mode = S_IRUGO | S_IWUSR,
3941 },
3942 {
3943 .name = CGROUP_FILE_GENERIC_PREFIX "procs",
3944 .open = cgroup_procs_open,
3945 .write_u64 = cgroup_procs_write,
3946 .release = cgroup_pidlist_release,
3947 .mode = S_IRUGO | S_IWUSR,
3948 },
3949 {
3950 .name = "notify_on_release",
3951 .read_u64 = cgroup_read_notify_on_release,
3952 .write_u64 = cgroup_write_notify_on_release,
3953 },
3954 {
3955 .name = CGROUP_FILE_GENERIC_PREFIX "event_control",
3956 .write_string = cgroup_write_event_control,
3957 .mode = S_IWUGO,
3958 },
3959 {
3960 .name = "cgroup.clone_children",
3961 .read_u64 = cgroup_clone_children_read,
3962 .write_u64 = cgroup_clone_children_write,
3963 },
3964 {
3965 .name = "release_agent",
3966 .flags = CFTYPE_ONLY_ON_ROOT,
3967 .read_seq_string = cgroup_release_agent_show,
3968 .write_string = cgroup_release_agent_write,
3969 .max_write_len = PATH_MAX,
3970 },
3971 { }
3972};
3973
3974
3975
3976
3977
3978
3979
3980static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files,
3981 unsigned long subsys_mask)
3982{
3983 int err;
3984 struct cgroup_subsys *ss;
3985
3986 if (base_files) {
3987 err = cgroup_addrm_files(cgrp, NULL, files, true);
3988 if (err < 0)
3989 return err;
3990 }
3991
3992
3993 for_each_subsys(cgrp->root, ss) {
3994 struct cftype_set *set;
3995 if (!test_bit(ss->subsys_id, &subsys_mask))
3996 continue;
3997
3998 list_for_each_entry(set, &ss->cftsets, node)
3999 cgroup_addrm_files(cgrp, ss, set->cfts, true);
4000 }
4001
4002
4003 for_each_subsys(cgrp->root, ss) {
4004 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
4005
4006
4007
4008
4009
4010 if (css->id)
4011 rcu_assign_pointer(css->id->css, css);
4012 }
4013
4014 return 0;
4015}
4016
4017static void css_dput_fn(struct work_struct *work)
4018{
4019 struct cgroup_subsys_state *css =
4020 container_of(work, struct cgroup_subsys_state, dput_work);
4021 struct dentry *dentry = css->cgroup->dentry;
4022 struct super_block *sb = dentry->d_sb;
4023
4024 atomic_inc(&sb->s_active);
4025 dput(dentry);
4026 deactivate_super(sb);
4027}
4028
4029static void init_cgroup_css(struct cgroup_subsys_state *css,
4030 struct cgroup_subsys *ss,
4031 struct cgroup *cgrp)
4032{
4033 css->cgroup = cgrp;
4034 atomic_set(&css->refcnt, 1);
4035 css->flags = 0;
4036 css->id = NULL;
4037 if (cgrp == dummytop)
4038 css->flags |= CSS_ROOT;
4039 BUG_ON(cgrp->subsys[ss->subsys_id]);
4040 cgrp->subsys[ss->subsys_id] = css;
4041
4042
4043
4044
4045
4046
4047
4048 INIT_WORK(&css->dput_work, css_dput_fn);
4049}
4050
4051
4052static int online_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
4053{
4054 int ret = 0;
4055
4056 lockdep_assert_held(&cgroup_mutex);
4057
4058 if (ss->css_online)
4059 ret = ss->css_online(cgrp);
4060 if (!ret)
4061 cgrp->subsys[ss->subsys_id]->flags |= CSS_ONLINE;
4062 return ret;
4063}
4064
4065
4066static void offline_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
4067 __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
4068{
4069 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
4070
4071 lockdep_assert_held(&cgroup_mutex);
4072
4073 if (!(css->flags & CSS_ONLINE))
4074 return;
4075
4076
4077
4078
4079
4080
4081
4082 if (ss->css_offline) {
4083 mutex_unlock(&cgroup_mutex);
4084 ss->css_offline(cgrp);
4085 mutex_lock(&cgroup_mutex);
4086 }
4087
4088 cgrp->subsys[ss->subsys_id]->flags &= ~CSS_ONLINE;
4089}
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
4100 umode_t mode)
4101{
4102 struct cgroup *cgrp;
4103 struct cgroupfs_root *root = parent->root;
4104 int err = 0;
4105 struct cgroup_subsys *ss;
4106 struct super_block *sb = root->sb;
4107
4108
4109 cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL);
4110 if (!cgrp)
4111 return -ENOMEM;
4112
4113 cgrp->id = ida_simple_get(&root->cgroup_ida, 1, 0, GFP_KERNEL);
4114 if (cgrp->id < 0)
4115 goto err_free_cgrp;
4116
4117
4118
4119
4120
4121
4122
4123
4124 if (!cgroup_lock_live_group(parent)) {
4125 err = -ENODEV;
4126 goto err_free_id;
4127 }
4128
4129
4130
4131
4132
4133
4134 atomic_inc(&sb->s_active);
4135
4136 init_cgroup_housekeeping(cgrp);
4137
4138 cgrp->parent = parent;
4139 cgrp->root = parent->root;
4140 cgrp->top_cgroup = parent->top_cgroup;
4141
4142 if (notify_on_release(parent))
4143 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
4144
4145 if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags))
4146 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
4147
4148 for_each_subsys(root, ss) {
4149 struct cgroup_subsys_state *css;
4150
4151 css = ss->css_alloc(cgrp);
4152 if (IS_ERR(css)) {
4153 err = PTR_ERR(css);
4154 goto err_free_all;
4155 }
4156 init_cgroup_css(css, ss, cgrp);
4157 if (ss->use_id) {
4158 err = alloc_css_id(ss, parent, cgrp);
4159 if (err)
4160 goto err_free_all;
4161 }
4162 }
4163
4164
4165
4166
4167
4168
4169 err = cgroup_create_file(dentry, S_IFDIR | mode, sb);
4170 if (err < 0)
4171 goto err_free_all;
4172 lockdep_assert_held(&dentry->d_inode->i_mutex);
4173
4174
4175 dentry->d_fsdata = cgrp;
4176 cgrp->dentry = dentry;
4177 list_add_tail(&cgrp->allcg_node, &root->allcg_list);
4178 list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children);
4179 root->number_of_cgroups++;
4180
4181
4182 for_each_subsys(root, ss)
4183 dget(dentry);
4184
4185
4186 for_each_subsys(root, ss) {
4187 err = online_css(ss, cgrp);
4188 if (err)
4189 goto err_destroy;
4190
4191 if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
4192 parent->parent) {
4193 pr_warning("cgroup: %s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
4194 current->comm, current->pid, ss->name);
4195 if (!strcmp(ss->name, "memory"))
4196 pr_warning("cgroup: \"memory\" requires setting use_hierarchy to 1 on the root.\n");
4197 ss->warned_broken_hierarchy = true;
4198 }
4199 }
4200
4201 err = cgroup_populate_dir(cgrp, true, root->subsys_mask);
4202 if (err)
4203 goto err_destroy;
4204
4205 mutex_unlock(&cgroup_mutex);
4206 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
4207
4208 return 0;
4209
4210err_free_all:
4211 for_each_subsys(root, ss) {
4212 if (cgrp->subsys[ss->subsys_id])
4213 ss->css_free(cgrp);
4214 }
4215 mutex_unlock(&cgroup_mutex);
4216
4217 deactivate_super(sb);
4218err_free_id:
4219 ida_simple_remove(&root->cgroup_ida, cgrp->id);
4220err_free_cgrp:
4221 kfree(cgrp);
4222 return err;
4223
4224err_destroy:
4225 cgroup_destroy_locked(cgrp);
4226 mutex_unlock(&cgroup_mutex);
4227 mutex_unlock(&dentry->d_inode->i_mutex);
4228 return err;
4229}
4230
4231static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
4232{
4233 struct cgroup *c_parent = dentry->d_parent->d_fsdata;
4234
4235
4236 return cgroup_create(c_parent, dentry, mode | S_IFDIR);
4237}
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248static int cgroup_has_css_refs(struct cgroup *cgrp)
4249{
4250 int i;
4251
4252
4253
4254
4255
4256
4257 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4258 struct cgroup_subsys *ss = subsys[i];
4259 struct cgroup_subsys_state *css;
4260
4261
4262 if (ss == NULL || ss->root != cgrp->root)
4263 continue;
4264
4265 css = cgrp->subsys[ss->subsys_id];
4266
4267
4268
4269
4270
4271
4272
4273
4274 if (css && css_refcnt(css) > 1)
4275 return 1;
4276 }
4277 return 0;
4278}
4279
4280static int cgroup_destroy_locked(struct cgroup *cgrp)
4281 __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
4282{
4283 struct dentry *d = cgrp->dentry;
4284 struct cgroup *parent = cgrp->parent;
4285 DEFINE_WAIT(wait);
4286 struct cgroup_event *event, *tmp;
4287 struct cgroup_subsys *ss;
4288 LIST_HEAD(tmp_list);
4289
4290 lockdep_assert_held(&d->d_inode->i_mutex);
4291 lockdep_assert_held(&cgroup_mutex);
4292
4293 if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children))
4294 return -EBUSY;
4295
4296
4297
4298
4299
4300
4301
4302 for_each_subsys(cgrp->root, ss) {
4303 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
4304
4305 WARN_ON(atomic_read(&css->refcnt) < 0);
4306 atomic_add(CSS_DEACT_BIAS, &css->refcnt);
4307 }
4308 set_bit(CGRP_REMOVED, &cgrp->flags);
4309
4310
4311 for_each_subsys(cgrp->root, ss)
4312 offline_css(ss, cgrp);
4313
4314
4315
4316
4317
4318
4319
4320
4321 for_each_subsys(cgrp->root, ss)
4322 css_put(cgrp->subsys[ss->subsys_id]);
4323
4324 raw_spin_lock(&release_list_lock);
4325 if (!list_empty(&cgrp->release_list))
4326 list_del_init(&cgrp->release_list);
4327 raw_spin_unlock(&release_list_lock);
4328
4329
4330 list_del_rcu(&cgrp->sibling);
4331 list_del_init(&cgrp->allcg_node);
4332
4333 dget(d);
4334 cgroup_d_remove_dir(d);
4335 dput(d);
4336
4337 set_bit(CGRP_RELEASABLE, &parent->flags);
4338 check_for_release(parent);
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348 spin_lock(&cgrp->event_list_lock);
4349 list_splice_init(&cgrp->event_list, &tmp_list);
4350 spin_unlock(&cgrp->event_list_lock);
4351 list_for_each_entry_safe(event, tmp, &tmp_list, list) {
4352 list_del_init(&event->list);
4353 remove_wait_queue(event->wqh, &event->wait);
4354 eventfd_signal(event->eventfd, 1);
4355 schedule_work(&event->remove);
4356 }
4357
4358 return 0;
4359}
4360
4361static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
4362{
4363 int ret;
4364
4365 mutex_lock(&cgroup_mutex);
4366 ret = cgroup_destroy_locked(dentry->d_fsdata);
4367 mutex_unlock(&cgroup_mutex);
4368
4369 return ret;
4370}
4371
4372static void __init_or_module cgroup_init_cftsets(struct cgroup_subsys *ss)
4373{
4374 INIT_LIST_HEAD(&ss->cftsets);
4375
4376
4377
4378
4379
4380 if (ss->base_cftypes) {
4381 ss->base_cftset.cfts = ss->base_cftypes;
4382 list_add_tail(&ss->base_cftset.node, &ss->cftsets);
4383 }
4384}
4385
4386static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
4387{
4388 struct cgroup_subsys_state *css;
4389
4390 printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
4391
4392 mutex_lock(&cgroup_mutex);
4393
4394
4395 cgroup_init_cftsets(ss);
4396
4397
4398 list_add(&ss->sibling, &rootnode.subsys_list);
4399 ss->root = &rootnode;
4400 css = ss->css_alloc(dummytop);
4401
4402 BUG_ON(IS_ERR(css));
4403 init_cgroup_css(css, ss, dummytop);
4404
4405
4406
4407
4408
4409 init_css_set.subsys[ss->subsys_id] = css;
4410
4411 need_forkexit_callback |= ss->fork || ss->exit;
4412
4413
4414
4415
4416 BUG_ON(!list_empty(&init_task.tasks));
4417
4418 ss->active = 1;
4419 BUG_ON(online_css(ss, dummytop));
4420
4421 mutex_unlock(&cgroup_mutex);
4422
4423
4424
4425 BUG_ON(ss->module);
4426}
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
4438{
4439 struct cgroup_subsys_state *css;
4440 int i, ret;
4441
4442
4443 if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN ||
4444 ss->css_alloc == NULL || ss->css_free == NULL)
4445 return -EINVAL;
4446
4447
4448
4449
4450
4451
4452
4453 if (ss->fork || ss->exit)
4454 return -EINVAL;
4455
4456
4457
4458
4459
4460 if (ss->module == NULL) {
4461
4462 BUG_ON(subsys[ss->subsys_id] != ss);
4463 return 0;
4464 }
4465
4466
4467 cgroup_init_cftsets(ss);
4468
4469 mutex_lock(&cgroup_mutex);
4470 subsys[ss->subsys_id] = ss;
4471
4472
4473
4474
4475
4476
4477 css = ss->css_alloc(dummytop);
4478 if (IS_ERR(css)) {
4479
4480 subsys[ss->subsys_id] = NULL;
4481 mutex_unlock(&cgroup_mutex);
4482 return PTR_ERR(css);
4483 }
4484
4485 list_add(&ss->sibling, &rootnode.subsys_list);
4486 ss->root = &rootnode;
4487
4488
4489 init_cgroup_css(css, ss, dummytop);
4490
4491 if (ss->use_id) {
4492 ret = cgroup_init_idr(ss, css);
4493 if (ret)
4494 goto err_unload;
4495 }
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505 write_lock(&css_set_lock);
4506 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
4507 struct css_set *cg;
4508 struct hlist_node *node, *tmp;
4509 struct hlist_head *bucket = &css_set_table[i], *new_bucket;
4510
4511 hlist_for_each_entry_safe(cg, node, tmp, bucket, hlist) {
4512
4513 if (cg->subsys[ss->subsys_id])
4514 continue;
4515
4516 hlist_del(&cg->hlist);
4517
4518 cg->subsys[ss->subsys_id] = css;
4519
4520 new_bucket = css_set_hash(cg->subsys);
4521 hlist_add_head(&cg->hlist, new_bucket);
4522 }
4523 }
4524 write_unlock(&css_set_lock);
4525
4526 ss->active = 1;
4527 ret = online_css(ss, dummytop);
4528 if (ret)
4529 goto err_unload;
4530
4531
4532 mutex_unlock(&cgroup_mutex);
4533 return 0;
4534
4535err_unload:
4536 mutex_unlock(&cgroup_mutex);
4537
4538 cgroup_unload_subsys(ss);
4539 return ret;
4540}
4541EXPORT_SYMBOL_GPL(cgroup_load_subsys);
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551void cgroup_unload_subsys(struct cgroup_subsys *ss)
4552{
4553 struct cg_cgroup_link *link;
4554 struct hlist_head *hhead;
4555
4556 BUG_ON(ss->module == NULL);
4557
4558
4559
4560
4561
4562
4563 BUG_ON(ss->root != &rootnode);
4564
4565 mutex_lock(&cgroup_mutex);
4566
4567 offline_css(ss, dummytop);
4568 ss->active = 0;
4569
4570 if (ss->use_id) {
4571 idr_remove_all(&ss->idr);
4572 idr_destroy(&ss->idr);
4573 }
4574
4575
4576 subsys[ss->subsys_id] = NULL;
4577
4578
4579 list_del_init(&ss->sibling);
4580
4581
4582
4583
4584
4585 write_lock(&css_set_lock);
4586 list_for_each_entry(link, &dummytop->css_sets, cgrp_link_list) {
4587 struct css_set *cg = link->cg;
4588
4589 hlist_del(&cg->hlist);
4590 cg->subsys[ss->subsys_id] = NULL;
4591 hhead = css_set_hash(cg->subsys);
4592 hlist_add_head(&cg->hlist, hhead);
4593 }
4594 write_unlock(&css_set_lock);
4595
4596
4597
4598
4599
4600
4601
4602 ss->css_free(dummytop);
4603 dummytop->subsys[ss->subsys_id] = NULL;
4604
4605 mutex_unlock(&cgroup_mutex);
4606}
4607EXPORT_SYMBOL_GPL(cgroup_unload_subsys);
4608
4609
4610
4611
4612
4613
4614
4615int __init cgroup_init_early(void)
4616{
4617 int i;
4618 atomic_set(&init_css_set.refcount, 1);
4619 INIT_LIST_HEAD(&init_css_set.cg_links);
4620 INIT_LIST_HEAD(&init_css_set.tasks);
4621 INIT_HLIST_NODE(&init_css_set.hlist);
4622 css_set_count = 1;
4623 init_cgroup_root(&rootnode);
4624 root_count = 1;
4625 init_task.cgroups = &init_css_set;
4626
4627 init_css_set_link.cg = &init_css_set;
4628 init_css_set_link.cgrp = dummytop;
4629 list_add(&init_css_set_link.cgrp_link_list,
4630 &rootnode.top_cgroup.css_sets);
4631 list_add(&init_css_set_link.cg_link_list,
4632 &init_css_set.cg_links);
4633
4634 for (i = 0; i < CSS_SET_TABLE_SIZE; i++)
4635 INIT_HLIST_HEAD(&css_set_table[i]);
4636
4637 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4638 struct cgroup_subsys *ss = subsys[i];
4639
4640
4641 if (!ss || ss->module)
4642 continue;
4643
4644 BUG_ON(!ss->name);
4645 BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
4646 BUG_ON(!ss->css_alloc);
4647 BUG_ON(!ss->css_free);
4648 if (ss->subsys_id != i) {
4649 printk(KERN_ERR "cgroup: Subsys %s id == %d\n",
4650 ss->name, ss->subsys_id);
4651 BUG();
4652 }
4653
4654 if (ss->early_init)
4655 cgroup_init_subsys(ss);
4656 }
4657 return 0;
4658}
4659
4660
4661
4662
4663
4664
4665
4666int __init cgroup_init(void)
4667{
4668 int err;
4669 int i;
4670 struct hlist_head *hhead;
4671
4672 err = bdi_init(&cgroup_backing_dev_info);
4673 if (err)
4674 return err;
4675
4676 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4677 struct cgroup_subsys *ss = subsys[i];
4678
4679
4680 if (!ss || ss->module)
4681 continue;
4682 if (!ss->early_init)
4683 cgroup_init_subsys(ss);
4684 if (ss->use_id)
4685 cgroup_init_idr(ss, init_css_set.subsys[ss->subsys_id]);
4686 }
4687
4688
4689 hhead = css_set_hash(init_css_set.subsys);
4690 hlist_add_head(&init_css_set.hlist, hhead);
4691 BUG_ON(!init_root_id(&rootnode));
4692
4693 cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj);
4694 if (!cgroup_kobj) {
4695 err = -ENOMEM;
4696 goto out;
4697 }
4698
4699 err = register_filesystem(&cgroup_fs_type);
4700 if (err < 0) {
4701 kobject_put(cgroup_kobj);
4702 goto out;
4703 }
4704
4705 proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations);
4706
4707out:
4708 if (err)
4709 bdi_destroy(&cgroup_backing_dev_info);
4710
4711 return err;
4712}
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727static int proc_cgroup_show(struct seq_file *m, void *v)
4728{
4729 struct pid *pid;
4730 struct task_struct *tsk;
4731 char *buf;
4732 int retval;
4733 struct cgroupfs_root *root;
4734
4735 retval = -ENOMEM;
4736 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
4737 if (!buf)
4738 goto out;
4739
4740 retval = -ESRCH;
4741 pid = m->private;
4742 tsk = get_pid_task(pid, PIDTYPE_PID);
4743 if (!tsk)
4744 goto out_free;
4745
4746 retval = 0;
4747
4748 mutex_lock(&cgroup_mutex);
4749
4750 for_each_active_root(root) {
4751 struct cgroup_subsys *ss;
4752 struct cgroup *cgrp;
4753 int count = 0;
4754
4755 seq_printf(m, "%d:", root->hierarchy_id);
4756 for_each_subsys(root, ss)
4757 seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
4758 if (strlen(root->name))
4759 seq_printf(m, "%sname=%s", count ? "," : "",
4760 root->name);
4761 seq_putc(m, ':');
4762 cgrp = task_cgroup_from_root(tsk, root);
4763 retval = cgroup_path(cgrp, buf, PAGE_SIZE);
4764 if (retval < 0)
4765 goto out_unlock;
4766 seq_puts(m, buf);
4767 seq_putc(m, '\n');
4768 }
4769
4770out_unlock:
4771 mutex_unlock(&cgroup_mutex);
4772 put_task_struct(tsk);
4773out_free:
4774 kfree(buf);
4775out:
4776 return retval;
4777}
4778
4779static int cgroup_open(struct inode *inode, struct file *file)
4780{
4781 struct pid *pid = PROC_I(inode)->pid;
4782 return single_open(file, proc_cgroup_show, pid);
4783}
4784
4785const struct file_operations proc_cgroup_operations = {
4786 .open = cgroup_open,
4787 .read = seq_read,
4788 .llseek = seq_lseek,
4789 .release = single_release,
4790};
4791
4792
4793static int proc_cgroupstats_show(struct seq_file *m, void *v)
4794{
4795 int i;
4796
4797 seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
4798
4799
4800
4801
4802
4803 mutex_lock(&cgroup_mutex);
4804 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4805 struct cgroup_subsys *ss = subsys[i];
4806 if (ss == NULL)
4807 continue;
4808 seq_printf(m, "%s\t%d\t%d\t%d\n",
4809 ss->name, ss->root->hierarchy_id,
4810 ss->root->number_of_cgroups, !ss->disabled);
4811 }
4812 mutex_unlock(&cgroup_mutex);
4813 return 0;
4814}
4815
4816static int cgroupstats_open(struct inode *inode, struct file *file)
4817{
4818 return single_open(file, proc_cgroupstats_show, NULL);
4819}
4820
4821static const struct file_operations proc_cgroupstats_operations = {
4822 .open = cgroupstats_open,
4823 .read = seq_read,
4824 .llseek = seq_lseek,
4825 .release = single_release,
4826};
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844void cgroup_fork(struct task_struct *child)
4845{
4846 task_lock(current);
4847 child->cgroups = current->cgroups;
4848 get_css_set(child->cgroups);
4849 task_unlock(current);
4850 INIT_LIST_HEAD(&child->cg_list);
4851}
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863void cgroup_post_fork(struct task_struct *child)
4864{
4865 int i;
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878 if (use_task_css_set_links) {
4879 write_lock(&css_set_lock);
4880 task_lock(child);
4881 if (list_empty(&child->cg_list))
4882 list_add(&child->cg_list, &child->cgroups->tasks);
4883 task_unlock(child);
4884 write_unlock(&css_set_lock);
4885 }
4886
4887
4888
4889
4890
4891
4892 if (need_forkexit_callback) {
4893 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4894 struct cgroup_subsys *ss = subsys[i];
4895
4896
4897
4898
4899
4900
4901 if (!ss || ss->module)
4902 continue;
4903
4904 if (ss->fork)
4905 ss->fork(child);
4906 }
4907 }
4908}
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945void cgroup_exit(struct task_struct *tsk, int run_callbacks)
4946{
4947 struct css_set *cg;
4948 int i;
4949
4950
4951
4952
4953
4954
4955 if (!list_empty(&tsk->cg_list)) {
4956 write_lock(&css_set_lock);
4957 if (!list_empty(&tsk->cg_list))
4958 list_del_init(&tsk->cg_list);
4959 write_unlock(&css_set_lock);
4960 }
4961
4962
4963 task_lock(tsk);
4964 cg = tsk->cgroups;
4965 tsk->cgroups = &init_css_set;
4966
4967 if (run_callbacks && need_forkexit_callback) {
4968 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4969 struct cgroup_subsys *ss = subsys[i];
4970
4971
4972 if (!ss || ss->module)
4973 continue;
4974
4975 if (ss->exit) {
4976 struct cgroup *old_cgrp =
4977 rcu_dereference_raw(cg->subsys[i])->cgroup;
4978 struct cgroup *cgrp = task_cgroup(tsk, i);
4979 ss->exit(cgrp, old_cgrp, tsk);
4980 }
4981 }
4982 }
4983 task_unlock(tsk);
4984
4985 if (cg)
4986 put_css_set_taskexit(cg);
4987}
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task)
5003{
5004 int ret;
5005 struct cgroup *target;
5006
5007 if (cgrp == dummytop)
5008 return 1;
5009
5010 target = task_cgroup_from_root(task, cgrp->root);
5011 while (cgrp != target && cgrp!= cgrp->top_cgroup)
5012 cgrp = cgrp->parent;
5013 ret = (cgrp == target);
5014 return ret;
5015}
5016
5017static void check_for_release(struct cgroup *cgrp)
5018{
5019
5020
5021 if (cgroup_is_releasable(cgrp) && !atomic_read(&cgrp->count)
5022 && list_empty(&cgrp->children) && !cgroup_has_css_refs(cgrp)) {
5023
5024
5025
5026 int need_schedule_work = 0;
5027 raw_spin_lock(&release_list_lock);
5028 if (!cgroup_is_removed(cgrp) &&
5029 list_empty(&cgrp->release_list)) {
5030 list_add(&cgrp->release_list, &release_list);
5031 need_schedule_work = 1;
5032 }
5033 raw_spin_unlock(&release_list_lock);
5034 if (need_schedule_work)
5035 schedule_work(&release_agent_work);
5036 }
5037}
5038
5039
5040bool __css_tryget(struct cgroup_subsys_state *css)
5041{
5042 while (true) {
5043 int t, v;
5044
5045 v = css_refcnt(css);
5046 t = atomic_cmpxchg(&css->refcnt, v, v + 1);
5047 if (likely(t == v))
5048 return true;
5049 else if (t < 0)
5050 return false;
5051 cpu_relax();
5052 }
5053}
5054EXPORT_SYMBOL_GPL(__css_tryget);
5055
5056
5057void __css_put(struct cgroup_subsys_state *css)
5058{
5059 struct cgroup *cgrp = css->cgroup;
5060 int v;
5061
5062 rcu_read_lock();
5063 v = css_unbias_refcnt(atomic_dec_return(&css->refcnt));
5064
5065 switch (v) {
5066 case 1:
5067 if (notify_on_release(cgrp)) {
5068 set_bit(CGRP_RELEASABLE, &cgrp->flags);
5069 check_for_release(cgrp);
5070 }
5071 break;
5072 case 0:
5073 schedule_work(&css->dput_work);
5074 break;
5075 }
5076 rcu_read_unlock();
5077}
5078EXPORT_SYMBOL_GPL(__css_put);
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103static void cgroup_release_agent(struct work_struct *work)
5104{
5105 BUG_ON(work != &release_agent_work);
5106 mutex_lock(&cgroup_mutex);
5107 raw_spin_lock(&release_list_lock);
5108 while (!list_empty(&release_list)) {
5109 char *argv[3], *envp[3];
5110 int i;
5111 char *pathbuf = NULL, *agentbuf = NULL;
5112 struct cgroup *cgrp = list_entry(release_list.next,
5113 struct cgroup,
5114 release_list);
5115 list_del_init(&cgrp->release_list);
5116 raw_spin_unlock(&release_list_lock);
5117 pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
5118 if (!pathbuf)
5119 goto continue_free;
5120 if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0)
5121 goto continue_free;
5122 agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
5123 if (!agentbuf)
5124 goto continue_free;
5125
5126 i = 0;
5127 argv[i++] = agentbuf;
5128 argv[i++] = pathbuf;
5129 argv[i] = NULL;
5130
5131 i = 0;
5132
5133 envp[i++] = "HOME=/";
5134 envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
5135 envp[i] = NULL;
5136
5137
5138
5139
5140 mutex_unlock(&cgroup_mutex);
5141 call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
5142 mutex_lock(&cgroup_mutex);
5143 continue_free:
5144 kfree(pathbuf);
5145 kfree(agentbuf);
5146 raw_spin_lock(&release_list_lock);
5147 }
5148 raw_spin_unlock(&release_list_lock);
5149 mutex_unlock(&cgroup_mutex);
5150}
5151
5152static int __init cgroup_disable(char *str)
5153{
5154 int i;
5155 char *token;
5156
5157 while ((token = strsep(&str, ",")) != NULL) {
5158 if (!*token)
5159 continue;
5160 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
5161 struct cgroup_subsys *ss = subsys[i];
5162
5163
5164
5165
5166
5167
5168 if (!ss || ss->module)
5169 continue;
5170
5171 if (!strcmp(token, ss->name)) {
5172 ss->disabled = 1;
5173 printk(KERN_INFO "Disabling %s control group"
5174 " subsystem\n", ss->name);
5175 break;
5176 }
5177 }
5178 }
5179 return 1;
5180}
5181__setup("cgroup_disable=", cgroup_disable);
5182
5183
5184
5185
5186
5187
5188
5189
5190unsigned short css_id(struct cgroup_subsys_state *css)
5191{
5192 struct css_id *cssid;
5193
5194
5195
5196
5197
5198
5199 cssid = rcu_dereference_check(css->id, css_refcnt(css));
5200
5201 if (cssid)
5202 return cssid->id;
5203 return 0;
5204}
5205EXPORT_SYMBOL_GPL(css_id);
5206
5207unsigned short css_depth(struct cgroup_subsys_state *css)
5208{
5209 struct css_id *cssid;
5210
5211 cssid = rcu_dereference_check(css->id, css_refcnt(css));
5212
5213 if (cssid)
5214 return cssid->depth;
5215 return 0;
5216}
5217EXPORT_SYMBOL_GPL(css_depth);
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232bool css_is_ancestor(struct cgroup_subsys_state *child,
5233 const struct cgroup_subsys_state *root)
5234{
5235 struct css_id *child_id;
5236 struct css_id *root_id;
5237
5238 child_id = rcu_dereference(child->id);
5239 if (!child_id)
5240 return false;
5241 root_id = rcu_dereference(root->id);
5242 if (!root_id)
5243 return false;
5244 if (child_id->depth < root_id->depth)
5245 return false;
5246 if (child_id->stack[root_id->depth] != root_id->id)
5247 return false;
5248 return true;
5249}
5250
5251void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
5252{
5253 struct css_id *id = css->id;
5254
5255 if (!id)
5256 return;
5257
5258 BUG_ON(!ss->use_id);
5259
5260 rcu_assign_pointer(id->css, NULL);
5261 rcu_assign_pointer(css->id, NULL);
5262 spin_lock(&ss->id_lock);
5263 idr_remove(&ss->idr, id->id);
5264 spin_unlock(&ss->id_lock);
5265 kfree_rcu(id, rcu_head);
5266}
5267EXPORT_SYMBOL_GPL(free_css_id);
5268
5269
5270
5271
5272
5273
5274static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
5275{
5276 struct css_id *newid;
5277 int myid, error, size;
5278
5279 BUG_ON(!ss->use_id);
5280
5281 size = sizeof(*newid) + sizeof(unsigned short) * (depth + 1);
5282 newid = kzalloc(size, GFP_KERNEL);
5283 if (!newid)
5284 return ERR_PTR(-ENOMEM);
5285
5286 if (unlikely(!idr_pre_get(&ss->idr, GFP_KERNEL))) {
5287 error = -ENOMEM;
5288 goto err_out;
5289 }
5290 spin_lock(&ss->id_lock);
5291
5292 error = idr_get_new_above(&ss->idr, newid, 1, &myid);
5293 spin_unlock(&ss->id_lock);
5294
5295
5296 if (error) {
5297 error = -ENOSPC;
5298 goto err_out;
5299 }
5300 if (myid > CSS_ID_MAX)
5301 goto remove_idr;
5302
5303 newid->id = myid;
5304 newid->depth = depth;
5305 return newid;
5306remove_idr:
5307 error = -ENOSPC;
5308 spin_lock(&ss->id_lock);
5309 idr_remove(&ss->idr, myid);
5310 spin_unlock(&ss->id_lock);
5311err_out:
5312 kfree(newid);
5313 return ERR_PTR(error);
5314
5315}
5316
5317static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss,
5318 struct cgroup_subsys_state *rootcss)
5319{
5320 struct css_id *newid;
5321
5322 spin_lock_init(&ss->id_lock);
5323 idr_init(&ss->idr);
5324
5325 newid = get_new_cssid(ss, 0);
5326 if (IS_ERR(newid))
5327 return PTR_ERR(newid);
5328
5329 newid->stack[0] = newid->id;
5330 newid->css = rootcss;
5331 rootcss->id = newid;
5332 return 0;
5333}
5334
5335static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent,
5336 struct cgroup *child)
5337{
5338 int subsys_id, i, depth = 0;
5339 struct cgroup_subsys_state *parent_css, *child_css;
5340 struct css_id *child_id, *parent_id;
5341
5342 subsys_id = ss->subsys_id;
5343 parent_css = parent->subsys[subsys_id];
5344 child_css = child->subsys[subsys_id];
5345 parent_id = parent_css->id;
5346 depth = parent_id->depth + 1;
5347
5348 child_id = get_new_cssid(ss, depth);
5349 if (IS_ERR(child_id))
5350 return PTR_ERR(child_id);
5351
5352 for (i = 0; i < depth; i++)
5353 child_id->stack[i] = parent_id->stack[i];
5354 child_id->stack[depth] = child_id->id;
5355
5356
5357
5358
5359 rcu_assign_pointer(child_css->id, child_id);
5360
5361 return 0;
5362}
5363
5364
5365
5366
5367
5368
5369
5370
5371
5372struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id)
5373{
5374 struct css_id *cssid = NULL;
5375
5376 BUG_ON(!ss->use_id);
5377 cssid = idr_find(&ss->idr, id);
5378
5379 if (unlikely(!cssid))
5380 return NULL;
5381
5382 return rcu_dereference(cssid->css);
5383}
5384EXPORT_SYMBOL_GPL(css_lookup);
5385
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395
5396struct cgroup_subsys_state *
5397css_get_next(struct cgroup_subsys *ss, int id,
5398 struct cgroup_subsys_state *root, int *foundid)
5399{
5400 struct cgroup_subsys_state *ret = NULL;
5401 struct css_id *tmp;
5402 int tmpid;
5403 int rootid = css_id(root);
5404 int depth = css_depth(root);
5405
5406 if (!rootid)
5407 return NULL;
5408
5409 BUG_ON(!ss->use_id);
5410 WARN_ON_ONCE(!rcu_read_lock_held());
5411
5412
5413 tmpid = id;
5414 while (1) {
5415
5416
5417
5418
5419 tmp = idr_get_next(&ss->idr, &tmpid);
5420 if (!tmp)
5421 break;
5422 if (tmp->depth >= depth && tmp->stack[depth] == rootid) {
5423 ret = rcu_dereference(tmp->css);
5424 if (ret) {
5425 *foundid = tmpid;
5426 break;
5427 }
5428 }
5429
5430 tmpid = tmpid + 1;
5431 }
5432 return ret;
5433}
5434
5435
5436
5437
5438struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id)
5439{
5440 struct cgroup *cgrp;
5441 struct inode *inode;
5442 struct cgroup_subsys_state *css;
5443
5444 inode = f->f_dentry->d_inode;
5445
5446 if (inode->i_op != &cgroup_dir_inode_operations)
5447 return ERR_PTR(-EBADF);
5448
5449 if (id < 0 || id >= CGROUP_SUBSYS_COUNT)
5450 return ERR_PTR(-EINVAL);
5451
5452
5453 cgrp = __d_cgrp(f->f_dentry);
5454 css = cgrp->subsys[id];
5455 return css ? css : ERR_PTR(-ENOENT);
5456}
5457
5458#ifdef CONFIG_CGROUP_DEBUG
5459static struct cgroup_subsys_state *debug_css_alloc(struct cgroup *cont)
5460{
5461 struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
5462
5463 if (!css)
5464 return ERR_PTR(-ENOMEM);
5465
5466 return css;
5467}
5468
5469static void debug_css_free(struct cgroup *cont)
5470{
5471 kfree(cont->subsys[debug_subsys_id]);
5472}
5473
5474static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft)
5475{
5476 return atomic_read(&cont->count);
5477}
5478
5479static u64 debug_taskcount_read(struct cgroup *cont, struct cftype *cft)
5480{
5481 return cgroup_task_count(cont);
5482}
5483
5484static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft)
5485{
5486 return (u64)(unsigned long)current->cgroups;
5487}
5488
5489static u64 current_css_set_refcount_read(struct cgroup *cont,
5490 struct cftype *cft)
5491{
5492 u64 count;
5493
5494 rcu_read_lock();
5495 count = atomic_read(¤t->cgroups->refcount);
5496 rcu_read_unlock();
5497 return count;
5498}
5499
5500static int current_css_set_cg_links_read(struct cgroup *cont,
5501 struct cftype *cft,
5502 struct seq_file *seq)
5503{
5504 struct cg_cgroup_link *link;
5505 struct css_set *cg;
5506
5507 read_lock(&css_set_lock);
5508 rcu_read_lock();
5509 cg = rcu_dereference(current->cgroups);
5510 list_for_each_entry(link, &cg->cg_links, cg_link_list) {
5511 struct cgroup *c = link->cgrp;
5512 const char *name;
5513
5514 if (c->dentry)
5515 name = c->dentry->d_name.name;
5516 else
5517 name = "?";
5518 seq_printf(seq, "Root %d group %s\n",
5519 c->root->hierarchy_id, name);
5520 }
5521 rcu_read_unlock();
5522 read_unlock(&css_set_lock);
5523 return 0;
5524}
5525
5526#define MAX_TASKS_SHOWN_PER_CSS 25
5527static int cgroup_css_links_read(struct cgroup *cont,
5528 struct cftype *cft,
5529 struct seq_file *seq)
5530{
5531 struct cg_cgroup_link *link;
5532
5533 read_lock(&css_set_lock);
5534 list_for_each_entry(link, &cont->css_sets, cgrp_link_list) {
5535 struct css_set *cg = link->cg;
5536 struct task_struct *task;
5537 int count = 0;
5538 seq_printf(seq, "css_set %p\n", cg);
5539 list_for_each_entry(task, &cg->tasks, cg_list) {
5540 if (count++ > MAX_TASKS_SHOWN_PER_CSS) {
5541 seq_puts(seq, " ...\n");
5542 break;
5543 } else {
5544 seq_printf(seq, " task %d\n",
5545 task_pid_vnr(task));
5546 }
5547 }
5548 }
5549 read_unlock(&css_set_lock);
5550 return 0;
5551}
5552
5553static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
5554{
5555 return test_bit(CGRP_RELEASABLE, &cgrp->flags);
5556}
5557
5558static struct cftype debug_files[] = {
5559 {
5560 .name = "cgroup_refcount",
5561 .read_u64 = cgroup_refcount_read,
5562 },
5563 {
5564 .name = "taskcount",
5565 .read_u64 = debug_taskcount_read,
5566 },
5567
5568 {
5569 .name = "current_css_set",
5570 .read_u64 = current_css_set_read,
5571 },
5572
5573 {
5574 .name = "current_css_set_refcount",
5575 .read_u64 = current_css_set_refcount_read,
5576 },
5577
5578 {
5579 .name = "current_css_set_cg_links",
5580 .read_seq_string = current_css_set_cg_links_read,
5581 },
5582
5583 {
5584 .name = "cgroup_css_links",
5585 .read_seq_string = cgroup_css_links_read,
5586 },
5587
5588 {
5589 .name = "releasable",
5590 .read_u64 = releasable_read,
5591 },
5592
5593 { }
5594};
5595
5596struct cgroup_subsys debug_subsys = {
5597 .name = "debug",
5598 .css_alloc = debug_css_alloc,
5599 .css_free = debug_css_free,
5600 .subsys_id = debug_subsys_id,
5601 .base_cftypes = debug_files,
5602};
5603#endif
5604