1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29#include <linux/cgroup.h>
30#include <linux/cred.h>
31#include <linux/ctype.h>
32#include <linux/errno.h>
33#include <linux/init_task.h>
34#include <linux/kernel.h>
35#include <linux/list.h>
36#include <linux/mm.h>
37#include <linux/mutex.h>
38#include <linux/mount.h>
39#include <linux/pagemap.h>
40#include <linux/proc_fs.h>
41#include <linux/rcupdate.h>
42#include <linux/sched.h>
43#include <linux/backing-dev.h>
44#include <linux/seq_file.h>
45#include <linux/slab.h>
46#include <linux/magic.h>
47#include <linux/spinlock.h>
48#include <linux/string.h>
49#include <linux/sort.h>
50#include <linux/kmod.h>
51#include <linux/module.h>
52#include <linux/delayacct.h>
53#include <linux/cgroupstats.h>
54#include <linux/hashtable.h>
55#include <linux/namei.h>
56#include <linux/pid_namespace.h>
57#include <linux/idr.h>
58#include <linux/vmalloc.h>
59#include <linux/eventfd.h>
60#include <linux/poll.h>
61#include <linux/flex_array.h>
62#include <linux/kthread.h>
63#include <linux/file.h>
64
65#include <linux/atomic.h>
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83#ifdef CONFIG_PROVE_RCU
84DEFINE_MUTEX(cgroup_mutex);
85EXPORT_SYMBOL_GPL(cgroup_mutex);
86#else
87static DEFINE_MUTEX(cgroup_mutex);
88#endif
89
90static DEFINE_MUTEX(cgroup_root_mutex);
91
92
93
94
95
96
97
98#define SUBSYS(_x) [_x ## _subsys_id] = &_x ## _subsys,
99#define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option)
100static struct cgroup_subsys *cgroup_subsys[CGROUP_SUBSYS_COUNT] = {
101#include <linux/cgroup_subsys.h>
102};
103
104
105
106
107
108
109static struct cgroupfs_root cgroup_dummy_root;
110
111
112static struct cgroup * const cgroup_dummy_top = &cgroup_dummy_root.top_cgroup;
113
114
115
116
117struct cfent {
118 struct list_head node;
119 struct dentry *dentry;
120 struct cftype *type;
121 struct cgroup_subsys_state *css;
122
123
124 struct simple_xattrs xattrs;
125};
126
127
128
129
130
131#define CSS_ID_MAX (65535)
132struct css_id {
133
134
135
136
137
138
139
140 struct cgroup_subsys_state __rcu *css;
141
142
143
144 unsigned short id;
145
146
147
148 unsigned short depth;
149
150
151
152 struct rcu_head rcu_head;
153
154
155
156 unsigned short stack[0];
157};
158
159
160
161
162struct cgroup_event {
163
164
165
166 struct cgroup_subsys_state *css;
167
168
169
170 struct cftype *cft;
171
172
173
174 struct eventfd_ctx *eventfd;
175
176
177
178 struct list_head list;
179
180
181
182
183 poll_table pt;
184 wait_queue_head_t *wqh;
185 wait_queue_t wait;
186 struct work_struct remove;
187};
188
189
190
191static LIST_HEAD(cgroup_roots);
192static int cgroup_root_count;
193
194
195
196
197
198
199static DEFINE_IDR(cgroup_hierarchy_idr);
200
201static struct cgroup_name root_cgroup_name = { .name = "/" };
202
203
204
205
206
207
208
209
210
211static u64 cgroup_serial_nr_next = 1;
212
213
214
215
216
217
218static int need_forkexit_callback __read_mostly;
219
220static struct cftype cgroup_base_files[];
221
222static void cgroup_destroy_css_killed(struct cgroup *cgrp);
223static int cgroup_destroy_locked(struct cgroup *cgrp);
224static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
225 bool is_add);
226
227
228
229
230
231
232
233
234
235
236
237
238static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
239 struct cgroup_subsys *ss)
240{
241 if (ss)
242 return rcu_dereference_check(cgrp->subsys[ss->subsys_id],
243 lockdep_is_held(&cgroup_mutex));
244 else
245 return &cgrp->dummy_css;
246}
247
248
249static inline bool cgroup_is_dead(const struct cgroup *cgrp)
250{
251 return test_bit(CGRP_DEAD, &cgrp->flags);
252}
253
254
255
256
257
258
259
260
261
262
263bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor)
264{
265 while (cgrp) {
266 if (cgrp == ancestor)
267 return true;
268 cgrp = cgrp->parent;
269 }
270 return false;
271}
272EXPORT_SYMBOL_GPL(cgroup_is_descendant);
273
274static int cgroup_is_releasable(const struct cgroup *cgrp)
275{
276 const int bits =
277 (1 << CGRP_RELEASABLE) |
278 (1 << CGRP_NOTIFY_ON_RELEASE);
279 return (cgrp->flags & bits) == bits;
280}
281
282static int notify_on_release(const struct cgroup *cgrp)
283{
284 return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
285}
286
287
288
289
290
291
292
293
294#define for_each_subsys(ss, i) \
295 for ((i) = 0; (i) < CGROUP_SUBSYS_COUNT; (i)++) \
296 if (({ lockdep_assert_held(&cgroup_mutex); \
297 !((ss) = cgroup_subsys[i]); })) { } \
298 else
299
300
301
302
303
304
305
306
307
308#define for_each_builtin_subsys(ss, i) \
309 for ((i) = 0; (i) < CGROUP_BUILTIN_SUBSYS_COUNT && \
310 (((ss) = cgroup_subsys[i]) || true); (i)++)
311
312
313#define for_each_root_subsys(root, ss) \
314 list_for_each_entry((ss), &(root)->subsys_list, sibling)
315
316
317#define for_each_active_root(root) \
318 list_for_each_entry((root), &cgroup_roots, root_list)
319
320static inline struct cgroup *__d_cgrp(struct dentry *dentry)
321{
322 return dentry->d_fsdata;
323}
324
325static inline struct cfent *__d_cfe(struct dentry *dentry)
326{
327 return dentry->d_fsdata;
328}
329
330static inline struct cftype *__d_cft(struct dentry *dentry)
331{
332 return __d_cfe(dentry)->type;
333}
334
335
336
337
338
339
340
341
342static bool cgroup_lock_live_group(struct cgroup *cgrp)
343{
344 mutex_lock(&cgroup_mutex);
345 if (cgroup_is_dead(cgrp)) {
346 mutex_unlock(&cgroup_mutex);
347 return false;
348 }
349 return true;
350}
351
352
353
354static LIST_HEAD(release_list);
355static DEFINE_RAW_SPINLOCK(release_list_lock);
356static void cgroup_release_agent(struct work_struct *work);
357static DECLARE_WORK(release_agent_work, cgroup_release_agent);
358static void check_for_release(struct cgroup *cgrp);
359
360
361
362
363
364
365
366
367
368struct cgrp_cset_link {
369
370 struct cgroup *cgrp;
371 struct css_set *cset;
372
373
374 struct list_head cset_link;
375
376
377 struct list_head cgrp_link;
378};
379
380
381
382
383
384
385
386
387static struct css_set init_css_set;
388static struct cgrp_cset_link init_cgrp_cset_link;
389
390static int cgroup_init_idr(struct cgroup_subsys *ss,
391 struct cgroup_subsys_state *css);
392
393
394
395
396
397
398static DEFINE_RWLOCK(css_set_lock);
399static int css_set_count;
400
401
402
403
404
405
406#define CSS_SET_HASH_BITS 7
407static DEFINE_HASHTABLE(css_set_table, CSS_SET_HASH_BITS);
408
409static unsigned long css_set_hash(struct cgroup_subsys_state *css[])
410{
411 unsigned long key = 0UL;
412 struct cgroup_subsys *ss;
413 int i;
414
415 for_each_subsys(ss, i)
416 key += (unsigned long)css[i];
417 key = (key >> 16) ^ key;
418
419 return key;
420}
421
422
423
424
425
426
427
428static int use_task_css_set_links __read_mostly;
429
430static void __put_css_set(struct css_set *cset, int taskexit)
431{
432 struct cgrp_cset_link *link, *tmp_link;
433
434
435
436
437
438
439 if (atomic_add_unless(&cset->refcount, -1, 1))
440 return;
441 write_lock(&css_set_lock);
442 if (!atomic_dec_and_test(&cset->refcount)) {
443 write_unlock(&css_set_lock);
444 return;
445 }
446
447
448 hash_del(&cset->hlist);
449 css_set_count--;
450
451 list_for_each_entry_safe(link, tmp_link, &cset->cgrp_links, cgrp_link) {
452 struct cgroup *cgrp = link->cgrp;
453
454 list_del(&link->cset_link);
455 list_del(&link->cgrp_link);
456
457
458 if (list_empty(&cgrp->cset_links) && notify_on_release(cgrp)) {
459 if (taskexit)
460 set_bit(CGRP_RELEASABLE, &cgrp->flags);
461 check_for_release(cgrp);
462 }
463
464 kfree(link);
465 }
466
467 write_unlock(&css_set_lock);
468 kfree_rcu(cset, rcu_head);
469}
470
471
472
473
474static inline void get_css_set(struct css_set *cset)
475{
476 atomic_inc(&cset->refcount);
477}
478
479static inline void put_css_set(struct css_set *cset)
480{
481 __put_css_set(cset, 0);
482}
483
484static inline void put_css_set_taskexit(struct css_set *cset)
485{
486 __put_css_set(cset, 1);
487}
488
489
490
491
492
493
494
495
496
497
498
499static bool compare_css_sets(struct css_set *cset,
500 struct css_set *old_cset,
501 struct cgroup *new_cgrp,
502 struct cgroup_subsys_state *template[])
503{
504 struct list_head *l1, *l2;
505
506 if (memcmp(template, cset->subsys, sizeof(cset->subsys))) {
507
508 return false;
509 }
510
511
512
513
514
515
516
517
518
519
520 l1 = &cset->cgrp_links;
521 l2 = &old_cset->cgrp_links;
522 while (1) {
523 struct cgrp_cset_link *link1, *link2;
524 struct cgroup *cgrp1, *cgrp2;
525
526 l1 = l1->next;
527 l2 = l2->next;
528
529 if (l1 == &cset->cgrp_links) {
530 BUG_ON(l2 != &old_cset->cgrp_links);
531 break;
532 } else {
533 BUG_ON(l2 == &old_cset->cgrp_links);
534 }
535
536 link1 = list_entry(l1, struct cgrp_cset_link, cgrp_link);
537 link2 = list_entry(l2, struct cgrp_cset_link, cgrp_link);
538 cgrp1 = link1->cgrp;
539 cgrp2 = link2->cgrp;
540
541 BUG_ON(cgrp1->root != cgrp2->root);
542
543
544
545
546
547
548
549
550 if (cgrp1->root == new_cgrp->root) {
551 if (cgrp1 != new_cgrp)
552 return false;
553 } else {
554 if (cgrp1 != cgrp2)
555 return false;
556 }
557 }
558 return true;
559}
560
561
562
563
564
565
566
567static struct css_set *find_existing_css_set(struct css_set *old_cset,
568 struct cgroup *cgrp,
569 struct cgroup_subsys_state *template[])
570{
571 struct cgroupfs_root *root = cgrp->root;
572 struct cgroup_subsys *ss;
573 struct css_set *cset;
574 unsigned long key;
575 int i;
576
577
578
579
580
581
582 for_each_subsys(ss, i) {
583 if (root->subsys_mask & (1UL << i)) {
584
585
586
587 template[i] = cgroup_css(cgrp, ss);
588 } else {
589
590
591 template[i] = old_cset->subsys[i];
592 }
593 }
594
595 key = css_set_hash(template);
596 hash_for_each_possible(css_set_table, cset, hlist, key) {
597 if (!compare_css_sets(cset, old_cset, cgrp, template))
598 continue;
599
600
601 return cset;
602 }
603
604
605 return NULL;
606}
607
608static void free_cgrp_cset_links(struct list_head *links_to_free)
609{
610 struct cgrp_cset_link *link, *tmp_link;
611
612 list_for_each_entry_safe(link, tmp_link, links_to_free, cset_link) {
613 list_del(&link->cset_link);
614 kfree(link);
615 }
616}
617
618
619
620
621
622
623
624
625
626static int allocate_cgrp_cset_links(int count, struct list_head *tmp_links)
627{
628 struct cgrp_cset_link *link;
629 int i;
630
631 INIT_LIST_HEAD(tmp_links);
632
633 for (i = 0; i < count; i++) {
634 link = kzalloc(sizeof(*link), GFP_KERNEL);
635 if (!link) {
636 free_cgrp_cset_links(tmp_links);
637 return -ENOMEM;
638 }
639 list_add(&link->cset_link, tmp_links);
640 }
641 return 0;
642}
643
644
645
646
647
648
649
650static void link_css_set(struct list_head *tmp_links, struct css_set *cset,
651 struct cgroup *cgrp)
652{
653 struct cgrp_cset_link *link;
654
655 BUG_ON(list_empty(tmp_links));
656 link = list_first_entry(tmp_links, struct cgrp_cset_link, cset_link);
657 link->cset = cset;
658 link->cgrp = cgrp;
659 list_move(&link->cset_link, &cgrp->cset_links);
660
661
662
663
664 list_add_tail(&link->cgrp_link, &cset->cgrp_links);
665}
666
667
668
669
670
671
672
673
674
675static struct css_set *find_css_set(struct css_set *old_cset,
676 struct cgroup *cgrp)
677{
678 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT] = { };
679 struct css_set *cset;
680 struct list_head tmp_links;
681 struct cgrp_cset_link *link;
682 unsigned long key;
683
684 lockdep_assert_held(&cgroup_mutex);
685
686
687
688 read_lock(&css_set_lock);
689 cset = find_existing_css_set(old_cset, cgrp, template);
690 if (cset)
691 get_css_set(cset);
692 read_unlock(&css_set_lock);
693
694 if (cset)
695 return cset;
696
697 cset = kzalloc(sizeof(*cset), GFP_KERNEL);
698 if (!cset)
699 return NULL;
700
701
702 if (allocate_cgrp_cset_links(cgroup_root_count, &tmp_links) < 0) {
703 kfree(cset);
704 return NULL;
705 }
706
707 atomic_set(&cset->refcount, 1);
708 INIT_LIST_HEAD(&cset->cgrp_links);
709 INIT_LIST_HEAD(&cset->tasks);
710 INIT_HLIST_NODE(&cset->hlist);
711
712
713
714 memcpy(cset->subsys, template, sizeof(cset->subsys));
715
716 write_lock(&css_set_lock);
717
718 list_for_each_entry(link, &old_cset->cgrp_links, cgrp_link) {
719 struct cgroup *c = link->cgrp;
720
721 if (c->root == cgrp->root)
722 c = cgrp;
723 link_css_set(&tmp_links, cset, c);
724 }
725
726 BUG_ON(!list_empty(&tmp_links));
727
728 css_set_count++;
729
730
731 key = css_set_hash(cset->subsys);
732 hash_add(css_set_table, &cset->hlist, key);
733
734 write_unlock(&css_set_lock);
735
736 return cset;
737}
738
739
740
741
742
743static struct cgroup *task_cgroup_from_root(struct task_struct *task,
744 struct cgroupfs_root *root)
745{
746 struct css_set *cset;
747 struct cgroup *res = NULL;
748
749 BUG_ON(!mutex_is_locked(&cgroup_mutex));
750 read_lock(&css_set_lock);
751
752
753
754
755
756 cset = task_css_set(task);
757 if (cset == &init_css_set) {
758 res = &root->top_cgroup;
759 } else {
760 struct cgrp_cset_link *link;
761
762 list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
763 struct cgroup *c = link->cgrp;
764
765 if (c->root == root) {
766 res = c;
767 break;
768 }
769 }
770 }
771 read_unlock(&css_set_lock);
772 BUG_ON(!res);
773 return res;
774}
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
834static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
835static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask);
836static const struct inode_operations cgroup_dir_inode_operations;
837static const struct file_operations proc_cgroupstats_operations;
838
839static struct backing_dev_info cgroup_backing_dev_info = {
840 .name = "cgroup",
841 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
842};
843
844static int alloc_css_id(struct cgroup_subsys_state *child_css);
845
846static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb)
847{
848 struct inode *inode = new_inode(sb);
849
850 if (inode) {
851 inode->i_ino = get_next_ino();
852 inode->i_mode = mode;
853 inode->i_uid = current_fsuid();
854 inode->i_gid = current_fsgid();
855 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
856 inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;
857 }
858 return inode;
859}
860
861static struct cgroup_name *cgroup_alloc_name(struct dentry *dentry)
862{
863 struct cgroup_name *name;
864
865 name = kmalloc(sizeof(*name) + dentry->d_name.len + 1, GFP_KERNEL);
866 if (!name)
867 return NULL;
868 strcpy(name->name, dentry->d_name.name);
869 return name;
870}
871
872static void cgroup_free_fn(struct work_struct *work)
873{
874 struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work);
875
876 mutex_lock(&cgroup_mutex);
877 cgrp->root->number_of_cgroups--;
878 mutex_unlock(&cgroup_mutex);
879
880
881
882
883
884
885 dput(cgrp->parent->dentry);
886
887
888
889
890
891
892 deactivate_super(cgrp->root->sb);
893
894
895
896
897
898 BUG_ON(!list_empty(&cgrp->pidlists));
899
900 simple_xattrs_free(&cgrp->xattrs);
901
902 kfree(rcu_dereference_raw(cgrp->name));
903 kfree(cgrp);
904}
905
906static void cgroup_free_rcu(struct rcu_head *head)
907{
908 struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head);
909
910 INIT_WORK(&cgrp->destroy_work, cgroup_free_fn);
911 schedule_work(&cgrp->destroy_work);
912}
913
914static void cgroup_diput(struct dentry *dentry, struct inode *inode)
915{
916
917 if (S_ISDIR(inode->i_mode)) {
918 struct cgroup *cgrp = dentry->d_fsdata;
919
920 BUG_ON(!(cgroup_is_dead(cgrp)));
921 call_rcu(&cgrp->rcu_head, cgroup_free_rcu);
922 } else {
923 struct cfent *cfe = __d_cfe(dentry);
924 struct cgroup *cgrp = dentry->d_parent->d_fsdata;
925
926 WARN_ONCE(!list_empty(&cfe->node) &&
927 cgrp != &cgrp->root->top_cgroup,
928 "cfe still linked for %s\n", cfe->type->name);
929 simple_xattrs_free(&cfe->xattrs);
930 kfree(cfe);
931 }
932 iput(inode);
933}
934
935static int cgroup_delete(const struct dentry *d)
936{
937 return 1;
938}
939
940static void remove_dir(struct dentry *d)
941{
942 struct dentry *parent = dget(d->d_parent);
943
944 d_delete(d);
945 simple_rmdir(parent->d_inode, d);
946 dput(parent);
947}
948
949static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
950{
951 struct cfent *cfe;
952
953 lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex);
954 lockdep_assert_held(&cgroup_mutex);
955
956
957
958
959
960 list_for_each_entry(cfe, &cgrp->files, node) {
961 struct dentry *d = cfe->dentry;
962
963 if (cft && cfe->type != cft)
964 continue;
965
966 dget(d);
967 d_delete(d);
968 simple_unlink(cgrp->dentry->d_inode, d);
969 list_del_init(&cfe->node);
970 dput(d);
971
972 break;
973 }
974}
975
976
977
978
979
980
981static void cgroup_clear_dir(struct cgroup *cgrp, unsigned long subsys_mask)
982{
983 struct cgroup_subsys *ss;
984 int i;
985
986 for_each_subsys(ss, i) {
987 struct cftype_set *set;
988
989 if (!test_bit(i, &subsys_mask))
990 continue;
991 list_for_each_entry(set, &ss->cftsets, node)
992 cgroup_addrm_files(cgrp, set->cfts, false);
993 }
994}
995
996
997
998
999static void cgroup_d_remove_dir(struct dentry *dentry)
1000{
1001 struct dentry *parent;
1002
1003 parent = dentry->d_parent;
1004 spin_lock(&parent->d_lock);
1005 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1006 list_del_init(&dentry->d_u.d_child);
1007 spin_unlock(&dentry->d_lock);
1008 spin_unlock(&parent->d_lock);
1009 remove_dir(dentry);
1010}
1011
1012
1013
1014
1015
1016
1017static int rebind_subsystems(struct cgroupfs_root *root,
1018 unsigned long added_mask, unsigned removed_mask)
1019{
1020 struct cgroup *cgrp = &root->top_cgroup;
1021 struct cgroup_subsys *ss;
1022 unsigned long pinned = 0;
1023 int i, ret;
1024
1025 BUG_ON(!mutex_is_locked(&cgroup_mutex));
1026 BUG_ON(!mutex_is_locked(&cgroup_root_mutex));
1027
1028
1029 for_each_subsys(ss, i) {
1030 if (!(added_mask & (1 << i)))
1031 continue;
1032
1033
1034 if (ss->root != &cgroup_dummy_root) {
1035 ret = -EBUSY;
1036 goto out_put;
1037 }
1038
1039
1040 if (!try_module_get(ss->module)) {
1041 ret = -ENOENT;
1042 goto out_put;
1043 }
1044 pinned |= 1 << i;
1045 }
1046
1047
1048 if (added_mask != pinned) {
1049 ret = -ENOENT;
1050 goto out_put;
1051 }
1052
1053 ret = cgroup_populate_dir(cgrp, added_mask);
1054 if (ret)
1055 goto out_put;
1056
1057
1058
1059
1060
1061 cgroup_clear_dir(cgrp, removed_mask);
1062
1063 for_each_subsys(ss, i) {
1064 unsigned long bit = 1UL << i;
1065
1066 if (bit & added_mask) {
1067
1068 BUG_ON(cgroup_css(cgrp, ss));
1069 BUG_ON(!cgroup_css(cgroup_dummy_top, ss));
1070 BUG_ON(cgroup_css(cgroup_dummy_top, ss)->cgroup != cgroup_dummy_top);
1071
1072 rcu_assign_pointer(cgrp->subsys[i],
1073 cgroup_css(cgroup_dummy_top, ss));
1074 cgroup_css(cgrp, ss)->cgroup = cgrp;
1075
1076 list_move(&ss->sibling, &root->subsys_list);
1077 ss->root = root;
1078 if (ss->bind)
1079 ss->bind(cgroup_css(cgrp, ss));
1080
1081
1082 root->subsys_mask |= bit;
1083 } else if (bit & removed_mask) {
1084
1085 BUG_ON(cgroup_css(cgrp, ss) != cgroup_css(cgroup_dummy_top, ss));
1086 BUG_ON(cgroup_css(cgrp, ss)->cgroup != cgrp);
1087
1088 if (ss->bind)
1089 ss->bind(cgroup_css(cgroup_dummy_top, ss));
1090
1091 cgroup_css(cgroup_dummy_top, ss)->cgroup = cgroup_dummy_top;
1092 RCU_INIT_POINTER(cgrp->subsys[i], NULL);
1093
1094 cgroup_subsys[i]->root = &cgroup_dummy_root;
1095 list_move(&ss->sibling, &cgroup_dummy_root.subsys_list);
1096
1097
1098 module_put(ss->module);
1099 root->subsys_mask &= ~bit;
1100 }
1101 }
1102
1103
1104
1105
1106
1107 root->flags |= CGRP_ROOT_SUBSYS_BOUND;
1108
1109 return 0;
1110
1111out_put:
1112 for_each_subsys(ss, i)
1113 if (pinned & (1 << i))
1114 module_put(ss->module);
1115 return ret;
1116}
1117
1118static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
1119{
1120 struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
1121 struct cgroup_subsys *ss;
1122
1123 mutex_lock(&cgroup_root_mutex);
1124 for_each_root_subsys(root, ss)
1125 seq_printf(seq, ",%s", ss->name);
1126 if (root->flags & CGRP_ROOT_SANE_BEHAVIOR)
1127 seq_puts(seq, ",sane_behavior");
1128 if (root->flags & CGRP_ROOT_NOPREFIX)
1129 seq_puts(seq, ",noprefix");
1130 if (root->flags & CGRP_ROOT_XATTR)
1131 seq_puts(seq, ",xattr");
1132 if (strlen(root->release_agent_path))
1133 seq_printf(seq, ",release_agent=%s", root->release_agent_path);
1134 if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags))
1135 seq_puts(seq, ",clone_children");
1136 if (strlen(root->name))
1137 seq_printf(seq, ",name=%s", root->name);
1138 mutex_unlock(&cgroup_root_mutex);
1139 return 0;
1140}
1141
1142struct cgroup_sb_opts {
1143 unsigned long subsys_mask;
1144 unsigned long flags;
1145 char *release_agent;
1146 bool cpuset_clone_children;
1147 char *name;
1148
1149 bool none;
1150
1151 struct cgroupfs_root *new_root;
1152
1153};
1154
1155
1156
1157
1158
1159
1160
1161static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1162{
1163 char *token, *o = data;
1164 bool all_ss = false, one_ss = false;
1165 unsigned long mask = (unsigned long)-1;
1166 struct cgroup_subsys *ss;
1167 int i;
1168
1169 BUG_ON(!mutex_is_locked(&cgroup_mutex));
1170
1171#ifdef CONFIG_CPUSETS
1172 mask = ~(1UL << cpuset_subsys_id);
1173#endif
1174
1175 memset(opts, 0, sizeof(*opts));
1176
1177 while ((token = strsep(&o, ",")) != NULL) {
1178 if (!*token)
1179 return -EINVAL;
1180 if (!strcmp(token, "none")) {
1181
1182 opts->none = true;
1183 continue;
1184 }
1185 if (!strcmp(token, "all")) {
1186
1187 if (one_ss)
1188 return -EINVAL;
1189 all_ss = true;
1190 continue;
1191 }
1192 if (!strcmp(token, "__DEVEL__sane_behavior")) {
1193 opts->flags |= CGRP_ROOT_SANE_BEHAVIOR;
1194 continue;
1195 }
1196 if (!strcmp(token, "noprefix")) {
1197 opts->flags |= CGRP_ROOT_NOPREFIX;
1198 continue;
1199 }
1200 if (!strcmp(token, "clone_children")) {
1201 opts->cpuset_clone_children = true;
1202 continue;
1203 }
1204 if (!strcmp(token, "xattr")) {
1205 opts->flags |= CGRP_ROOT_XATTR;
1206 continue;
1207 }
1208 if (!strncmp(token, "release_agent=", 14)) {
1209
1210 if (opts->release_agent)
1211 return -EINVAL;
1212 opts->release_agent =
1213 kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL);
1214 if (!opts->release_agent)
1215 return -ENOMEM;
1216 continue;
1217 }
1218 if (!strncmp(token, "name=", 5)) {
1219 const char *name = token + 5;
1220
1221 if (!strlen(name))
1222 return -EINVAL;
1223
1224 for (i = 0; i < strlen(name); i++) {
1225 char c = name[i];
1226 if (isalnum(c))
1227 continue;
1228 if ((c == '.') || (c == '-') || (c == '_'))
1229 continue;
1230 return -EINVAL;
1231 }
1232
1233 if (opts->name)
1234 return -EINVAL;
1235 opts->name = kstrndup(name,
1236 MAX_CGROUP_ROOT_NAMELEN - 1,
1237 GFP_KERNEL);
1238 if (!opts->name)
1239 return -ENOMEM;
1240
1241 continue;
1242 }
1243
1244 for_each_subsys(ss, i) {
1245 if (strcmp(token, ss->name))
1246 continue;
1247 if (ss->disabled)
1248 continue;
1249
1250
1251 if (all_ss)
1252 return -EINVAL;
1253 set_bit(i, &opts->subsys_mask);
1254 one_ss = true;
1255
1256 break;
1257 }
1258 if (i == CGROUP_SUBSYS_COUNT)
1259 return -ENOENT;
1260 }
1261
1262
1263
1264
1265
1266
1267 if (all_ss || (!one_ss && !opts->none && !opts->name))
1268 for_each_subsys(ss, i)
1269 if (!ss->disabled)
1270 set_bit(i, &opts->subsys_mask);
1271
1272
1273
1274 if (opts->flags & CGRP_ROOT_SANE_BEHAVIOR) {
1275 pr_warning("cgroup: sane_behavior: this is still under development and its behaviors will change, proceed at your own risk\n");
1276
1277 if (opts->flags & CGRP_ROOT_NOPREFIX) {
1278 pr_err("cgroup: sane_behavior: noprefix is not allowed\n");
1279 return -EINVAL;
1280 }
1281
1282 if (opts->cpuset_clone_children) {
1283 pr_err("cgroup: sane_behavior: clone_children is not allowed\n");
1284 return -EINVAL;
1285 }
1286 }
1287
1288
1289
1290
1291
1292
1293 if ((opts->flags & CGRP_ROOT_NOPREFIX) && (opts->subsys_mask & mask))
1294 return -EINVAL;
1295
1296
1297
1298 if (opts->subsys_mask && opts->none)
1299 return -EINVAL;
1300
1301
1302
1303
1304
1305 if (!opts->subsys_mask && !opts->name)
1306 return -EINVAL;
1307
1308 return 0;
1309}
1310
1311static int cgroup_remount(struct super_block *sb, int *flags, char *data)
1312{
1313 int ret = 0;
1314 struct cgroupfs_root *root = sb->s_fs_info;
1315 struct cgroup *cgrp = &root->top_cgroup;
1316 struct cgroup_sb_opts opts;
1317 unsigned long added_mask, removed_mask;
1318
1319 if (root->flags & CGRP_ROOT_SANE_BEHAVIOR) {
1320 pr_err("cgroup: sane_behavior: remount is not allowed\n");
1321 return -EINVAL;
1322 }
1323
1324 mutex_lock(&cgrp->dentry->d_inode->i_mutex);
1325 mutex_lock(&cgroup_mutex);
1326 mutex_lock(&cgroup_root_mutex);
1327
1328
1329 ret = parse_cgroupfs_options(data, &opts);
1330 if (ret)
1331 goto out_unlock;
1332
1333 if (opts.subsys_mask != root->subsys_mask || opts.release_agent)
1334 pr_warning("cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n",
1335 task_tgid_nr(current), current->comm);
1336
1337 added_mask = opts.subsys_mask & ~root->subsys_mask;
1338 removed_mask = root->subsys_mask & ~opts.subsys_mask;
1339
1340
1341 if (((opts.flags ^ root->flags) & CGRP_ROOT_OPTION_MASK) ||
1342 (opts.name && strcmp(opts.name, root->name))) {
1343 pr_err("cgroup: option or name mismatch, new: 0x%lx \"%s\", old: 0x%lx \"%s\"\n",
1344 opts.flags & CGRP_ROOT_OPTION_MASK, opts.name ?: "",
1345 root->flags & CGRP_ROOT_OPTION_MASK, root->name);
1346 ret = -EINVAL;
1347 goto out_unlock;
1348 }
1349
1350
1351 if (root->number_of_cgroups > 1) {
1352 ret = -EBUSY;
1353 goto out_unlock;
1354 }
1355
1356 ret = rebind_subsystems(root, added_mask, removed_mask);
1357 if (ret)
1358 goto out_unlock;
1359
1360 if (opts.release_agent)
1361 strcpy(root->release_agent_path, opts.release_agent);
1362 out_unlock:
1363 kfree(opts.release_agent);
1364 kfree(opts.name);
1365 mutex_unlock(&cgroup_root_mutex);
1366 mutex_unlock(&cgroup_mutex);
1367 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
1368 return ret;
1369}
1370
1371static const struct super_operations cgroup_ops = {
1372 .statfs = simple_statfs,
1373 .drop_inode = generic_delete_inode,
1374 .show_options = cgroup_show_options,
1375 .remount_fs = cgroup_remount,
1376};
1377
1378static void init_cgroup_housekeeping(struct cgroup *cgrp)
1379{
1380 INIT_LIST_HEAD(&cgrp->sibling);
1381 INIT_LIST_HEAD(&cgrp->children);
1382 INIT_LIST_HEAD(&cgrp->files);
1383 INIT_LIST_HEAD(&cgrp->cset_links);
1384 INIT_LIST_HEAD(&cgrp->release_list);
1385 INIT_LIST_HEAD(&cgrp->pidlists);
1386 mutex_init(&cgrp->pidlist_mutex);
1387 cgrp->dummy_css.cgroup = cgrp;
1388 INIT_LIST_HEAD(&cgrp->event_list);
1389 spin_lock_init(&cgrp->event_list_lock);
1390 simple_xattrs_init(&cgrp->xattrs);
1391}
1392
1393static void init_cgroup_root(struct cgroupfs_root *root)
1394{
1395 struct cgroup *cgrp = &root->top_cgroup;
1396
1397 INIT_LIST_HEAD(&root->subsys_list);
1398 INIT_LIST_HEAD(&root->root_list);
1399 root->number_of_cgroups = 1;
1400 cgrp->root = root;
1401 RCU_INIT_POINTER(cgrp->name, &root_cgroup_name);
1402 init_cgroup_housekeeping(cgrp);
1403 idr_init(&root->cgroup_idr);
1404}
1405
1406static int cgroup_init_root_id(struct cgroupfs_root *root, int start, int end)
1407{
1408 int id;
1409
1410 lockdep_assert_held(&cgroup_mutex);
1411 lockdep_assert_held(&cgroup_root_mutex);
1412
1413 id = idr_alloc_cyclic(&cgroup_hierarchy_idr, root, start, end,
1414 GFP_KERNEL);
1415 if (id < 0)
1416 return id;
1417
1418 root->hierarchy_id = id;
1419 return 0;
1420}
1421
1422static void cgroup_exit_root_id(struct cgroupfs_root *root)
1423{
1424 lockdep_assert_held(&cgroup_mutex);
1425 lockdep_assert_held(&cgroup_root_mutex);
1426
1427 if (root->hierarchy_id) {
1428 idr_remove(&cgroup_hierarchy_idr, root->hierarchy_id);
1429 root->hierarchy_id = 0;
1430 }
1431}
1432
1433static int cgroup_test_super(struct super_block *sb, void *data)
1434{
1435 struct cgroup_sb_opts *opts = data;
1436 struct cgroupfs_root *root = sb->s_fs_info;
1437
1438
1439 if (opts->name && strcmp(opts->name, root->name))
1440 return 0;
1441
1442
1443
1444
1445
1446 if ((opts->subsys_mask || opts->none)
1447 && (opts->subsys_mask != root->subsys_mask))
1448 return 0;
1449
1450 return 1;
1451}
1452
1453static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
1454{
1455 struct cgroupfs_root *root;
1456
1457 if (!opts->subsys_mask && !opts->none)
1458 return NULL;
1459
1460 root = kzalloc(sizeof(*root), GFP_KERNEL);
1461 if (!root)
1462 return ERR_PTR(-ENOMEM);
1463
1464 init_cgroup_root(root);
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474 root->subsys_mask = opts->subsys_mask;
1475 root->flags = opts->flags;
1476 if (opts->release_agent)
1477 strcpy(root->release_agent_path, opts->release_agent);
1478 if (opts->name)
1479 strcpy(root->name, opts->name);
1480 if (opts->cpuset_clone_children)
1481 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags);
1482 return root;
1483}
1484
1485static void cgroup_free_root(struct cgroupfs_root *root)
1486{
1487 if (root) {
1488
1489 WARN_ON_ONCE(root->hierarchy_id);
1490
1491 idr_destroy(&root->cgroup_idr);
1492 kfree(root);
1493 }
1494}
1495
1496static int cgroup_set_super(struct super_block *sb, void *data)
1497{
1498 int ret;
1499 struct cgroup_sb_opts *opts = data;
1500
1501
1502 if (!opts->new_root)
1503 return -EINVAL;
1504
1505 BUG_ON(!opts->subsys_mask && !opts->none);
1506
1507 ret = set_anon_super(sb, NULL);
1508 if (ret)
1509 return ret;
1510
1511 sb->s_fs_info = opts->new_root;
1512 opts->new_root->sb = sb;
1513
1514 sb->s_blocksize = PAGE_CACHE_SIZE;
1515 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
1516 sb->s_magic = CGROUP_SUPER_MAGIC;
1517 sb->s_op = &cgroup_ops;
1518
1519 return 0;
1520}
1521
1522static int cgroup_get_rootdir(struct super_block *sb)
1523{
1524 static const struct dentry_operations cgroup_dops = {
1525 .d_iput = cgroup_diput,
1526 .d_delete = cgroup_delete,
1527 };
1528
1529 struct inode *inode =
1530 cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb);
1531
1532 if (!inode)
1533 return -ENOMEM;
1534
1535 inode->i_fop = &simple_dir_operations;
1536 inode->i_op = &cgroup_dir_inode_operations;
1537
1538 inc_nlink(inode);
1539 sb->s_root = d_make_root(inode);
1540 if (!sb->s_root)
1541 return -ENOMEM;
1542
1543 sb->s_d_op = &cgroup_dops;
1544 return 0;
1545}
1546
1547static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1548 int flags, const char *unused_dev_name,
1549 void *data)
1550{
1551 struct cgroup_sb_opts opts;
1552 struct cgroupfs_root *root;
1553 int ret = 0;
1554 struct super_block *sb;
1555 struct cgroupfs_root *new_root;
1556 struct list_head tmp_links;
1557 struct inode *inode;
1558 const struct cred *cred;
1559
1560
1561 mutex_lock(&cgroup_mutex);
1562 ret = parse_cgroupfs_options(data, &opts);
1563 mutex_unlock(&cgroup_mutex);
1564 if (ret)
1565 goto out_err;
1566
1567
1568
1569
1570
1571 new_root = cgroup_root_from_opts(&opts);
1572 if (IS_ERR(new_root)) {
1573 ret = PTR_ERR(new_root);
1574 goto out_err;
1575 }
1576 opts.new_root = new_root;
1577
1578
1579 sb = sget(fs_type, cgroup_test_super, cgroup_set_super, 0, &opts);
1580 if (IS_ERR(sb)) {
1581 ret = PTR_ERR(sb);
1582 cgroup_free_root(opts.new_root);
1583 goto out_err;
1584 }
1585
1586 root = sb->s_fs_info;
1587 BUG_ON(!root);
1588 if (root == opts.new_root) {
1589
1590 struct cgroup *root_cgrp = &root->top_cgroup;
1591 struct cgroupfs_root *existing_root;
1592 int i;
1593 struct css_set *cset;
1594
1595 BUG_ON(sb->s_root != NULL);
1596
1597 ret = cgroup_get_rootdir(sb);
1598 if (ret)
1599 goto drop_new_super;
1600 inode = sb->s_root->d_inode;
1601
1602 mutex_lock(&inode->i_mutex);
1603 mutex_lock(&cgroup_mutex);
1604 mutex_lock(&cgroup_root_mutex);
1605
1606 root_cgrp->id = idr_alloc(&root->cgroup_idr, root_cgrp,
1607 0, 1, GFP_KERNEL);
1608 if (root_cgrp->id < 0)
1609 goto unlock_drop;
1610
1611
1612 ret = -EBUSY;
1613 if (strlen(root->name))
1614 for_each_active_root(existing_root)
1615 if (!strcmp(existing_root->name, root->name))
1616 goto unlock_drop;
1617
1618
1619
1620
1621
1622
1623
1624
1625 ret = allocate_cgrp_cset_links(css_set_count, &tmp_links);
1626 if (ret)
1627 goto unlock_drop;
1628
1629
1630 ret = cgroup_init_root_id(root, 2, 0);
1631 if (ret)
1632 goto unlock_drop;
1633
1634 sb->s_root->d_fsdata = root_cgrp;
1635 root_cgrp->dentry = sb->s_root;
1636
1637
1638
1639
1640
1641
1642
1643
1644 cred = override_creds(&init_cred);
1645
1646 ret = cgroup_addrm_files(root_cgrp, cgroup_base_files, true);
1647 if (ret)
1648 goto rm_base_files;
1649
1650 ret = rebind_subsystems(root, root->subsys_mask, 0);
1651 if (ret)
1652 goto rm_base_files;
1653
1654 revert_creds(cred);
1655
1656
1657
1658
1659
1660
1661
1662 list_add(&root->root_list, &cgroup_roots);
1663 cgroup_root_count++;
1664
1665
1666
1667 write_lock(&css_set_lock);
1668 hash_for_each(css_set_table, i, cset, hlist)
1669 link_css_set(&tmp_links, cset, root_cgrp);
1670 write_unlock(&css_set_lock);
1671
1672 free_cgrp_cset_links(&tmp_links);
1673
1674 BUG_ON(!list_empty(&root_cgrp->children));
1675 BUG_ON(root->number_of_cgroups != 1);
1676
1677 mutex_unlock(&cgroup_root_mutex);
1678 mutex_unlock(&cgroup_mutex);
1679 mutex_unlock(&inode->i_mutex);
1680 } else {
1681
1682
1683
1684
1685 cgroup_free_root(opts.new_root);
1686
1687 if ((root->flags ^ opts.flags) & CGRP_ROOT_OPTION_MASK) {
1688 if ((root->flags | opts.flags) & CGRP_ROOT_SANE_BEHAVIOR) {
1689 pr_err("cgroup: sane_behavior: new mount options should match the existing superblock\n");
1690 ret = -EINVAL;
1691 goto drop_new_super;
1692 } else {
1693 pr_warning("cgroup: new mount options do not match the existing superblock, will be ignored\n");
1694 }
1695 }
1696 }
1697
1698 kfree(opts.release_agent);
1699 kfree(opts.name);
1700 return dget(sb->s_root);
1701
1702 rm_base_files:
1703 free_cgrp_cset_links(&tmp_links);
1704 cgroup_addrm_files(&root->top_cgroup, cgroup_base_files, false);
1705 revert_creds(cred);
1706 unlock_drop:
1707 cgroup_exit_root_id(root);
1708 mutex_unlock(&cgroup_root_mutex);
1709 mutex_unlock(&cgroup_mutex);
1710 mutex_unlock(&inode->i_mutex);
1711 drop_new_super:
1712 deactivate_locked_super(sb);
1713 out_err:
1714 kfree(opts.release_agent);
1715 kfree(opts.name);
1716 return ERR_PTR(ret);
1717}
1718
1719static void cgroup_kill_sb(struct super_block *sb) {
1720 struct cgroupfs_root *root = sb->s_fs_info;
1721 struct cgroup *cgrp = &root->top_cgroup;
1722 struct cgrp_cset_link *link, *tmp_link;
1723 int ret;
1724
1725 BUG_ON(!root);
1726
1727 BUG_ON(root->number_of_cgroups != 1);
1728 BUG_ON(!list_empty(&cgrp->children));
1729
1730 mutex_lock(&cgrp->dentry->d_inode->i_mutex);
1731 mutex_lock(&cgroup_mutex);
1732 mutex_lock(&cgroup_root_mutex);
1733
1734
1735 if (root->flags & CGRP_ROOT_SUBSYS_BOUND) {
1736 ret = rebind_subsystems(root, 0, root->subsys_mask);
1737
1738 BUG_ON(ret);
1739 }
1740
1741
1742
1743
1744
1745 write_lock(&css_set_lock);
1746
1747 list_for_each_entry_safe(link, tmp_link, &cgrp->cset_links, cset_link) {
1748 list_del(&link->cset_link);
1749 list_del(&link->cgrp_link);
1750 kfree(link);
1751 }
1752 write_unlock(&css_set_lock);
1753
1754 if (!list_empty(&root->root_list)) {
1755 list_del(&root->root_list);
1756 cgroup_root_count--;
1757 }
1758
1759 cgroup_exit_root_id(root);
1760
1761 mutex_unlock(&cgroup_root_mutex);
1762 mutex_unlock(&cgroup_mutex);
1763 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
1764
1765 simple_xattrs_free(&cgrp->xattrs);
1766
1767 kill_litter_super(sb);
1768 cgroup_free_root(root);
1769}
1770
1771static struct file_system_type cgroup_fs_type = {
1772 .name = "cgroup",
1773 .mount = cgroup_mount,
1774 .kill_sb = cgroup_kill_sb,
1775};
1776
1777static struct kobject *cgroup_kobj;
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1793{
1794 int ret = -ENAMETOOLONG;
1795 char *start;
1796
1797 if (!cgrp->parent) {
1798 if (strlcpy(buf, "/", buflen) >= buflen)
1799 return -ENAMETOOLONG;
1800 return 0;
1801 }
1802
1803 start = buf + buflen - 1;
1804 *start = '\0';
1805
1806 rcu_read_lock();
1807 do {
1808 const char *name = cgroup_name(cgrp);
1809 int len;
1810
1811 len = strlen(name);
1812 if ((start -= len) < buf)
1813 goto out;
1814 memcpy(start, name, len);
1815
1816 if (--start < buf)
1817 goto out;
1818 *start = '/';
1819
1820 cgrp = cgrp->parent;
1821 } while (cgrp->parent);
1822 ret = 0;
1823 memmove(buf, start, buf + buflen - start);
1824out:
1825 rcu_read_unlock();
1826 return ret;
1827}
1828EXPORT_SYMBOL_GPL(cgroup_path);
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
1844{
1845 struct cgroupfs_root *root;
1846 struct cgroup *cgrp;
1847 int hierarchy_id = 1, ret = 0;
1848
1849 if (buflen < 2)
1850 return -ENAMETOOLONG;
1851
1852 mutex_lock(&cgroup_mutex);
1853
1854 root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
1855
1856 if (root) {
1857 cgrp = task_cgroup_from_root(task, root);
1858 ret = cgroup_path(cgrp, buf, buflen);
1859 } else {
1860
1861 memcpy(buf, "/", 2);
1862 }
1863
1864 mutex_unlock(&cgroup_mutex);
1865 return ret;
1866}
1867EXPORT_SYMBOL_GPL(task_cgroup_path);
1868
1869
1870
1871
1872struct task_and_cgroup {
1873 struct task_struct *task;
1874 struct cgroup *cgrp;
1875 struct css_set *cset;
1876};
1877
1878struct cgroup_taskset {
1879 struct task_and_cgroup single;
1880 struct flex_array *tc_array;
1881 int tc_array_len;
1882 int idx;
1883 struct cgroup *cur_cgrp;
1884};
1885
1886
1887
1888
1889
1890
1891
1892struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset)
1893{
1894 if (tset->tc_array) {
1895 tset->idx = 0;
1896 return cgroup_taskset_next(tset);
1897 } else {
1898 tset->cur_cgrp = tset->single.cgrp;
1899 return tset->single.task;
1900 }
1901}
1902EXPORT_SYMBOL_GPL(cgroup_taskset_first);
1903
1904
1905
1906
1907
1908
1909
1910
1911struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset)
1912{
1913 struct task_and_cgroup *tc;
1914
1915 if (!tset->tc_array || tset->idx >= tset->tc_array_len)
1916 return NULL;
1917
1918 tc = flex_array_get(tset->tc_array, tset->idx++);
1919 tset->cur_cgrp = tc->cgrp;
1920 return tc->task;
1921}
1922EXPORT_SYMBOL_GPL(cgroup_taskset_next);
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933struct cgroup_subsys_state *cgroup_taskset_cur_css(struct cgroup_taskset *tset,
1934 int subsys_id)
1935{
1936 return cgroup_css(tset->cur_cgrp, cgroup_subsys[subsys_id]);
1937}
1938EXPORT_SYMBOL_GPL(cgroup_taskset_cur_css);
1939
1940
1941
1942
1943
1944int cgroup_taskset_size(struct cgroup_taskset *tset)
1945{
1946 return tset->tc_array ? tset->tc_array_len : 1;
1947}
1948EXPORT_SYMBOL_GPL(cgroup_taskset_size);
1949
1950
1951
1952
1953
1954
1955
1956static void cgroup_task_migrate(struct cgroup *old_cgrp,
1957 struct task_struct *tsk,
1958 struct css_set *new_cset)
1959{
1960 struct css_set *old_cset;
1961
1962
1963
1964
1965
1966
1967 WARN_ON_ONCE(tsk->flags & PF_EXITING);
1968 old_cset = task_css_set(tsk);
1969
1970 task_lock(tsk);
1971 rcu_assign_pointer(tsk->cgroups, new_cset);
1972 task_unlock(tsk);
1973
1974
1975 write_lock(&css_set_lock);
1976 if (!list_empty(&tsk->cg_list))
1977 list_move(&tsk->cg_list, &new_cset->tasks);
1978 write_unlock(&css_set_lock);
1979
1980
1981
1982
1983
1984
1985 set_bit(CGRP_RELEASABLE, &old_cgrp->flags);
1986 put_css_set(old_cset);
1987}
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk,
1999 bool threadgroup)
2000{
2001 int retval, i, group_size;
2002 struct cgroup_subsys *ss, *failed_ss = NULL;
2003 struct cgroupfs_root *root = cgrp->root;
2004
2005 struct task_struct *leader = tsk;
2006 struct task_and_cgroup *tc;
2007 struct flex_array *group;
2008 struct cgroup_taskset tset = { };
2009
2010
2011
2012
2013
2014
2015
2016
2017 if (threadgroup)
2018 group_size = get_nr_threads(tsk);
2019 else
2020 group_size = 1;
2021
2022 group = flex_array_alloc(sizeof(*tc), group_size, GFP_KERNEL);
2023 if (!group)
2024 return -ENOMEM;
2025
2026 retval = flex_array_prealloc(group, 0, group_size, GFP_KERNEL);
2027 if (retval)
2028 goto out_free_group_list;
2029
2030 i = 0;
2031
2032
2033
2034
2035
2036 rcu_read_lock();
2037 do {
2038 struct task_and_cgroup ent;
2039
2040
2041 if (tsk->flags & PF_EXITING)
2042 goto next;
2043
2044
2045 BUG_ON(i >= group_size);
2046 ent.task = tsk;
2047 ent.cgrp = task_cgroup_from_root(tsk, root);
2048
2049 if (ent.cgrp == cgrp)
2050 goto next;
2051
2052
2053
2054
2055 retval = flex_array_put(group, i, &ent, GFP_ATOMIC);
2056 BUG_ON(retval != 0);
2057 i++;
2058 next:
2059 if (!threadgroup)
2060 break;
2061 } while_each_thread(leader, tsk);
2062 rcu_read_unlock();
2063
2064 group_size = i;
2065 tset.tc_array = group;
2066 tset.tc_array_len = group_size;
2067
2068
2069 retval = 0;
2070 if (!group_size)
2071 goto out_free_group_list;
2072
2073
2074
2075
2076 for_each_root_subsys(root, ss) {
2077 struct cgroup_subsys_state *css = cgroup_css(cgrp, ss);
2078
2079 if (ss->can_attach) {
2080 retval = ss->can_attach(css, &tset);
2081 if (retval) {
2082 failed_ss = ss;
2083 goto out_cancel_attach;
2084 }
2085 }
2086 }
2087
2088
2089
2090
2091
2092 for (i = 0; i < group_size; i++) {
2093 struct css_set *old_cset;
2094
2095 tc = flex_array_get(group, i);
2096 old_cset = task_css_set(tc->task);
2097 tc->cset = find_css_set(old_cset, cgrp);
2098 if (!tc->cset) {
2099 retval = -ENOMEM;
2100 goto out_put_css_set_refs;
2101 }
2102 }
2103
2104
2105
2106
2107
2108
2109 for (i = 0; i < group_size; i++) {
2110 tc = flex_array_get(group, i);
2111 cgroup_task_migrate(tc->cgrp, tc->task, tc->cset);
2112 }
2113
2114
2115
2116
2117
2118 for_each_root_subsys(root, ss) {
2119 struct cgroup_subsys_state *css = cgroup_css(cgrp, ss);
2120
2121 if (ss->attach)
2122 ss->attach(css, &tset);
2123 }
2124
2125
2126
2127
2128 retval = 0;
2129out_put_css_set_refs:
2130 if (retval) {
2131 for (i = 0; i < group_size; i++) {
2132 tc = flex_array_get(group, i);
2133 if (!tc->cset)
2134 break;
2135 put_css_set(tc->cset);
2136 }
2137 }
2138out_cancel_attach:
2139 if (retval) {
2140 for_each_root_subsys(root, ss) {
2141 struct cgroup_subsys_state *css = cgroup_css(cgrp, ss);
2142
2143 if (ss == failed_ss)
2144 break;
2145 if (ss->cancel_attach)
2146 ss->cancel_attach(css, &tset);
2147 }
2148 }
2149out_free_group_list:
2150 flex_array_free(group);
2151 return retval;
2152}
2153
2154
2155
2156
2157
2158
2159static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
2160{
2161 struct task_struct *tsk;
2162 const struct cred *cred = current_cred(), *tcred;
2163 int ret;
2164
2165 if (!cgroup_lock_live_group(cgrp))
2166 return -ENODEV;
2167
2168retry_find_task:
2169 rcu_read_lock();
2170 if (pid) {
2171 tsk = find_task_by_vpid(pid);
2172 if (!tsk) {
2173 rcu_read_unlock();
2174 ret= -ESRCH;
2175 goto out_unlock_cgroup;
2176 }
2177
2178
2179
2180
2181 tcred = __task_cred(tsk);
2182 if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
2183 !uid_eq(cred->euid, tcred->uid) &&
2184 !uid_eq(cred->euid, tcred->suid)) {
2185 rcu_read_unlock();
2186 ret = -EACCES;
2187 goto out_unlock_cgroup;
2188 }
2189 } else
2190 tsk = current;
2191
2192 if (threadgroup)
2193 tsk = tsk->group_leader;
2194
2195
2196
2197
2198
2199
2200 if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
2201 ret = -EINVAL;
2202 rcu_read_unlock();
2203 goto out_unlock_cgroup;
2204 }
2205
2206 get_task_struct(tsk);
2207 rcu_read_unlock();
2208
2209 threadgroup_lock(tsk);
2210 if (threadgroup) {
2211 if (!thread_group_leader(tsk)) {
2212
2213
2214
2215
2216
2217
2218
2219 threadgroup_unlock(tsk);
2220 put_task_struct(tsk);
2221 goto retry_find_task;
2222 }
2223 }
2224
2225 ret = cgroup_attach_task(cgrp, tsk, threadgroup);
2226
2227 threadgroup_unlock(tsk);
2228
2229 put_task_struct(tsk);
2230out_unlock_cgroup:
2231 mutex_unlock(&cgroup_mutex);
2232 return ret;
2233}
2234
2235
2236
2237
2238
2239
2240int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
2241{
2242 struct cgroupfs_root *root;
2243 int retval = 0;
2244
2245 mutex_lock(&cgroup_mutex);
2246 for_each_active_root(root) {
2247 struct cgroup *from_cgrp = task_cgroup_from_root(from, root);
2248
2249 retval = cgroup_attach_task(from_cgrp, tsk, false);
2250 if (retval)
2251 break;
2252 }
2253 mutex_unlock(&cgroup_mutex);
2254
2255 return retval;
2256}
2257EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
2258
2259static int cgroup_tasks_write(struct cgroup_subsys_state *css,
2260 struct cftype *cft, u64 pid)
2261{
2262 return attach_task_by_pid(css->cgroup, pid, false);
2263}
2264
2265static int cgroup_procs_write(struct cgroup_subsys_state *css,
2266 struct cftype *cft, u64 tgid)
2267{
2268 return attach_task_by_pid(css->cgroup, tgid, true);
2269}
2270
2271static int cgroup_release_agent_write(struct cgroup_subsys_state *css,
2272 struct cftype *cft, const char *buffer)
2273{
2274 BUILD_BUG_ON(sizeof(css->cgroup->root->release_agent_path) < PATH_MAX);
2275 if (strlen(buffer) >= PATH_MAX)
2276 return -EINVAL;
2277 if (!cgroup_lock_live_group(css->cgroup))
2278 return -ENODEV;
2279 mutex_lock(&cgroup_root_mutex);
2280 strcpy(css->cgroup->root->release_agent_path, buffer);
2281 mutex_unlock(&cgroup_root_mutex);
2282 mutex_unlock(&cgroup_mutex);
2283 return 0;
2284}
2285
2286static int cgroup_release_agent_show(struct cgroup_subsys_state *css,
2287 struct cftype *cft, struct seq_file *seq)
2288{
2289 struct cgroup *cgrp = css->cgroup;
2290
2291 if (!cgroup_lock_live_group(cgrp))
2292 return -ENODEV;
2293 seq_puts(seq, cgrp->root->release_agent_path);
2294 seq_putc(seq, '\n');
2295 mutex_unlock(&cgroup_mutex);
2296 return 0;
2297}
2298
2299static int cgroup_sane_behavior_show(struct cgroup_subsys_state *css,
2300 struct cftype *cft, struct seq_file *seq)
2301{
2302 seq_printf(seq, "%d\n", cgroup_sane_behavior(css->cgroup));
2303 return 0;
2304}
2305
2306
2307#define CGROUP_LOCAL_BUFFER_SIZE 64
2308
2309static ssize_t cgroup_write_X64(struct cgroup_subsys_state *css,
2310 struct cftype *cft, struct file *file,
2311 const char __user *userbuf, size_t nbytes,
2312 loff_t *unused_ppos)
2313{
2314 char buffer[CGROUP_LOCAL_BUFFER_SIZE];
2315 int retval = 0;
2316 char *end;
2317
2318 if (!nbytes)
2319 return -EINVAL;
2320 if (nbytes >= sizeof(buffer))
2321 return -E2BIG;
2322 if (copy_from_user(buffer, userbuf, nbytes))
2323 return -EFAULT;
2324
2325 buffer[nbytes] = 0;
2326 if (cft->write_u64) {
2327 u64 val = simple_strtoull(strstrip(buffer), &end, 0);
2328 if (*end)
2329 return -EINVAL;
2330 retval = cft->write_u64(css, cft, val);
2331 } else {
2332 s64 val = simple_strtoll(strstrip(buffer), &end, 0);
2333 if (*end)
2334 return -EINVAL;
2335 retval = cft->write_s64(css, cft, val);
2336 }
2337 if (!retval)
2338 retval = nbytes;
2339 return retval;
2340}
2341
2342static ssize_t cgroup_write_string(struct cgroup_subsys_state *css,
2343 struct cftype *cft, struct file *file,
2344 const char __user *userbuf, size_t nbytes,
2345 loff_t *unused_ppos)
2346{
2347 char local_buffer[CGROUP_LOCAL_BUFFER_SIZE];
2348 int retval = 0;
2349 size_t max_bytes = cft->max_write_len;
2350 char *buffer = local_buffer;
2351
2352 if (!max_bytes)
2353 max_bytes = sizeof(local_buffer) - 1;
2354 if (nbytes >= max_bytes)
2355 return -E2BIG;
2356
2357 if (nbytes >= sizeof(local_buffer)) {
2358 buffer = kmalloc(nbytes + 1, GFP_KERNEL);
2359 if (buffer == NULL)
2360 return -ENOMEM;
2361 }
2362 if (nbytes && copy_from_user(buffer, userbuf, nbytes)) {
2363 retval = -EFAULT;
2364 goto out;
2365 }
2366
2367 buffer[nbytes] = 0;
2368 retval = cft->write_string(css, cft, strstrip(buffer));
2369 if (!retval)
2370 retval = nbytes;
2371out:
2372 if (buffer != local_buffer)
2373 kfree(buffer);
2374 return retval;
2375}
2376
2377static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
2378 size_t nbytes, loff_t *ppos)
2379{
2380 struct cfent *cfe = __d_cfe(file->f_dentry);
2381 struct cftype *cft = __d_cft(file->f_dentry);
2382 struct cgroup_subsys_state *css = cfe->css;
2383
2384 if (cft->write)
2385 return cft->write(css, cft, file, buf, nbytes, ppos);
2386 if (cft->write_u64 || cft->write_s64)
2387 return cgroup_write_X64(css, cft, file, buf, nbytes, ppos);
2388 if (cft->write_string)
2389 return cgroup_write_string(css, cft, file, buf, nbytes, ppos);
2390 if (cft->trigger) {
2391 int ret = cft->trigger(css, (unsigned int)cft->private);
2392 return ret ? ret : nbytes;
2393 }
2394 return -EINVAL;
2395}
2396
2397static ssize_t cgroup_read_u64(struct cgroup_subsys_state *css,
2398 struct cftype *cft, struct file *file,
2399 char __user *buf, size_t nbytes, loff_t *ppos)
2400{
2401 char tmp[CGROUP_LOCAL_BUFFER_SIZE];
2402 u64 val = cft->read_u64(css, cft);
2403 int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
2404
2405 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
2406}
2407
2408static ssize_t cgroup_read_s64(struct cgroup_subsys_state *css,
2409 struct cftype *cft, struct file *file,
2410 char __user *buf, size_t nbytes, loff_t *ppos)
2411{
2412 char tmp[CGROUP_LOCAL_BUFFER_SIZE];
2413 s64 val = cft->read_s64(css, cft);
2414 int len = sprintf(tmp, "%lld\n", (long long) val);
2415
2416 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
2417}
2418
2419static ssize_t cgroup_file_read(struct file *file, char __user *buf,
2420 size_t nbytes, loff_t *ppos)
2421{
2422 struct cfent *cfe = __d_cfe(file->f_dentry);
2423 struct cftype *cft = __d_cft(file->f_dentry);
2424 struct cgroup_subsys_state *css = cfe->css;
2425
2426 if (cft->read)
2427 return cft->read(css, cft, file, buf, nbytes, ppos);
2428 if (cft->read_u64)
2429 return cgroup_read_u64(css, cft, file, buf, nbytes, ppos);
2430 if (cft->read_s64)
2431 return cgroup_read_s64(css, cft, file, buf, nbytes, ppos);
2432 return -EINVAL;
2433}
2434
2435
2436
2437
2438
2439
2440static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value)
2441{
2442 struct seq_file *sf = cb->state;
2443 return seq_printf(sf, "%s %llu\n", key, (unsigned long long)value);
2444}
2445
2446static int cgroup_seqfile_show(struct seq_file *m, void *arg)
2447{
2448 struct cfent *cfe = m->private;
2449 struct cftype *cft = cfe->type;
2450 struct cgroup_subsys_state *css = cfe->css;
2451
2452 if (cft->read_map) {
2453 struct cgroup_map_cb cb = {
2454 .fill = cgroup_map_add,
2455 .state = m,
2456 };
2457 return cft->read_map(css, cft, &cb);
2458 }
2459 return cft->read_seq_string(css, cft, m);
2460}
2461
2462static const struct file_operations cgroup_seqfile_operations = {
2463 .read = seq_read,
2464 .write = cgroup_file_write,
2465 .llseek = seq_lseek,
2466 .release = single_release,
2467};
2468
2469static int cgroup_file_open(struct inode *inode, struct file *file)
2470{
2471 struct cfent *cfe = __d_cfe(file->f_dentry);
2472 struct cftype *cft = __d_cft(file->f_dentry);
2473 struct cgroup *cgrp = __d_cgrp(cfe->dentry->d_parent);
2474 struct cgroup_subsys_state *css;
2475 int err;
2476
2477 err = generic_file_open(inode, file);
2478 if (err)
2479 return err;
2480
2481
2482
2483
2484
2485
2486 rcu_read_lock();
2487 css = cgroup_css(cgrp, cft->ss);
2488 if (cft->ss && !css_tryget(css))
2489 css = NULL;
2490 rcu_read_unlock();
2491
2492 if (!css)
2493 return -ENODEV;
2494
2495
2496
2497
2498
2499
2500
2501 WARN_ON_ONCE(cfe->css && cfe->css != css);
2502 cfe->css = css;
2503
2504 if (cft->read_map || cft->read_seq_string) {
2505 file->f_op = &cgroup_seqfile_operations;
2506 err = single_open(file, cgroup_seqfile_show, cfe);
2507 } else if (cft->open) {
2508 err = cft->open(inode, file);
2509 }
2510
2511 if (css->ss && err)
2512 css_put(css);
2513 return err;
2514}
2515
2516static int cgroup_file_release(struct inode *inode, struct file *file)
2517{
2518 struct cfent *cfe = __d_cfe(file->f_dentry);
2519 struct cftype *cft = __d_cft(file->f_dentry);
2520 struct cgroup_subsys_state *css = cfe->css;
2521 int ret = 0;
2522
2523 if (cft->release)
2524 ret = cft->release(inode, file);
2525 if (css->ss)
2526 css_put(css);
2527 return ret;
2528}
2529
2530
2531
2532
2533static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
2534 struct inode *new_dir, struct dentry *new_dentry)
2535{
2536 int ret;
2537 struct cgroup_name *name, *old_name;
2538 struct cgroup *cgrp;
2539
2540
2541
2542
2543
2544 lockdep_assert_held(&old_dir->i_mutex);
2545
2546 if (!S_ISDIR(old_dentry->d_inode->i_mode))
2547 return -ENOTDIR;
2548 if (new_dentry->d_inode)
2549 return -EEXIST;
2550 if (old_dir != new_dir)
2551 return -EIO;
2552
2553 cgrp = __d_cgrp(old_dentry);
2554
2555
2556
2557
2558
2559 if (cgroup_sane_behavior(cgrp))
2560 return -EPERM;
2561
2562 name = cgroup_alloc_name(new_dentry);
2563 if (!name)
2564 return -ENOMEM;
2565
2566 ret = simple_rename(old_dir, old_dentry, new_dir, new_dentry);
2567 if (ret) {
2568 kfree(name);
2569 return ret;
2570 }
2571
2572 old_name = rcu_dereference_protected(cgrp->name, true);
2573 rcu_assign_pointer(cgrp->name, name);
2574
2575 kfree_rcu(old_name, rcu_head);
2576 return 0;
2577}
2578
2579static struct simple_xattrs *__d_xattrs(struct dentry *dentry)
2580{
2581 if (S_ISDIR(dentry->d_inode->i_mode))
2582 return &__d_cgrp(dentry)->xattrs;
2583 else
2584 return &__d_cfe(dentry)->xattrs;
2585}
2586
2587static inline int xattr_enabled(struct dentry *dentry)
2588{
2589 struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
2590 return root->flags & CGRP_ROOT_XATTR;
2591}
2592
2593static bool is_valid_xattr(const char *name)
2594{
2595 if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
2596 !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN))
2597 return true;
2598 return false;
2599}
2600
2601static int cgroup_setxattr(struct dentry *dentry, const char *name,
2602 const void *val, size_t size, int flags)
2603{
2604 if (!xattr_enabled(dentry))
2605 return -EOPNOTSUPP;
2606 if (!is_valid_xattr(name))
2607 return -EINVAL;
2608 return simple_xattr_set(__d_xattrs(dentry), name, val, size, flags);
2609}
2610
2611static int cgroup_removexattr(struct dentry *dentry, const char *name)
2612{
2613 if (!xattr_enabled(dentry))
2614 return -EOPNOTSUPP;
2615 if (!is_valid_xattr(name))
2616 return -EINVAL;
2617 return simple_xattr_remove(__d_xattrs(dentry), name);
2618}
2619
2620static ssize_t cgroup_getxattr(struct dentry *dentry, const char *name,
2621 void *buf, size_t size)
2622{
2623 if (!xattr_enabled(dentry))
2624 return -EOPNOTSUPP;
2625 if (!is_valid_xattr(name))
2626 return -EINVAL;
2627 return simple_xattr_get(__d_xattrs(dentry), name, buf, size);
2628}
2629
2630static ssize_t cgroup_listxattr(struct dentry *dentry, char *buf, size_t size)
2631{
2632 if (!xattr_enabled(dentry))
2633 return -EOPNOTSUPP;
2634 return simple_xattr_list(__d_xattrs(dentry), buf, size);
2635}
2636
2637static const struct file_operations cgroup_file_operations = {
2638 .read = cgroup_file_read,
2639 .write = cgroup_file_write,
2640 .llseek = generic_file_llseek,
2641 .open = cgroup_file_open,
2642 .release = cgroup_file_release,
2643};
2644
2645static const struct inode_operations cgroup_file_inode_operations = {
2646 .setxattr = cgroup_setxattr,
2647 .getxattr = cgroup_getxattr,
2648 .listxattr = cgroup_listxattr,
2649 .removexattr = cgroup_removexattr,
2650};
2651
2652static const struct inode_operations cgroup_dir_inode_operations = {
2653 .lookup = simple_lookup,
2654 .mkdir = cgroup_mkdir,
2655 .rmdir = cgroup_rmdir,
2656 .rename = cgroup_rename,
2657 .setxattr = cgroup_setxattr,
2658 .getxattr = cgroup_getxattr,
2659 .listxattr = cgroup_listxattr,
2660 .removexattr = cgroup_removexattr,
2661};
2662
2663
2664
2665
2666static inline struct cftype *__file_cft(struct file *file)
2667{
2668 if (file_inode(file)->i_fop != &cgroup_file_operations)
2669 return ERR_PTR(-EINVAL);
2670 return __d_cft(file->f_dentry);
2671}
2672
2673static int cgroup_create_file(struct dentry *dentry, umode_t mode,
2674 struct super_block *sb)
2675{
2676 struct inode *inode;
2677
2678 if (!dentry)
2679 return -ENOENT;
2680 if (dentry->d_inode)
2681 return -EEXIST;
2682
2683 inode = cgroup_new_inode(mode, sb);
2684 if (!inode)
2685 return -ENOMEM;
2686
2687 if (S_ISDIR(mode)) {
2688 inode->i_op = &cgroup_dir_inode_operations;
2689 inode->i_fop = &simple_dir_operations;
2690
2691
2692 inc_nlink(inode);
2693 inc_nlink(dentry->d_parent->d_inode);
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703 WARN_ON_ONCE(!mutex_trylock(&inode->i_mutex));
2704 } else if (S_ISREG(mode)) {
2705 inode->i_size = 0;
2706 inode->i_fop = &cgroup_file_operations;
2707 inode->i_op = &cgroup_file_inode_operations;
2708 }
2709 d_instantiate(dentry, inode);
2710 dget(dentry);
2711 return 0;
2712}
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723static umode_t cgroup_file_mode(const struct cftype *cft)
2724{
2725 umode_t mode = 0;
2726
2727 if (cft->mode)
2728 return cft->mode;
2729
2730 if (cft->read || cft->read_u64 || cft->read_s64 ||
2731 cft->read_map || cft->read_seq_string)
2732 mode |= S_IRUGO;
2733
2734 if (cft->write || cft->write_u64 || cft->write_s64 ||
2735 cft->write_string || cft->trigger)
2736 mode |= S_IWUSR;
2737
2738 return mode;
2739}
2740
2741static int cgroup_add_file(struct cgroup *cgrp, struct cftype *cft)
2742{
2743 struct dentry *dir = cgrp->dentry;
2744 struct cgroup *parent = __d_cgrp(dir);
2745 struct dentry *dentry;
2746 struct cfent *cfe;
2747 int error;
2748 umode_t mode;
2749 char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
2750
2751 if (cft->ss && !(cft->flags & CFTYPE_NO_PREFIX) &&
2752 !(cgrp->root->flags & CGRP_ROOT_NOPREFIX)) {
2753 strcpy(name, cft->ss->name);
2754 strcat(name, ".");
2755 }
2756 strcat(name, cft->name);
2757
2758 BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex));
2759
2760 cfe = kzalloc(sizeof(*cfe), GFP_KERNEL);
2761 if (!cfe)
2762 return -ENOMEM;
2763
2764 dentry = lookup_one_len(name, dir, strlen(name));
2765 if (IS_ERR(dentry)) {
2766 error = PTR_ERR(dentry);
2767 goto out;
2768 }
2769
2770 cfe->type = (void *)cft;
2771 cfe->dentry = dentry;
2772 dentry->d_fsdata = cfe;
2773 simple_xattrs_init(&cfe->xattrs);
2774
2775 mode = cgroup_file_mode(cft);
2776 error = cgroup_create_file(dentry, mode | S_IFREG, cgrp->root->sb);
2777 if (!error) {
2778 list_add_tail(&cfe->node, &parent->files);
2779 cfe = NULL;
2780 }
2781 dput(dentry);
2782out:
2783 kfree(cfe);
2784 return error;
2785}
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
2799 bool is_add)
2800{
2801 struct cftype *cft;
2802 int ret;
2803
2804 lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex);
2805 lockdep_assert_held(&cgroup_mutex);
2806
2807 for (cft = cfts; cft->name[0] != '\0'; cft++) {
2808
2809 if ((cft->flags & CFTYPE_INSANE) && cgroup_sane_behavior(cgrp))
2810 continue;
2811 if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent)
2812 continue;
2813 if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent)
2814 continue;
2815
2816 if (is_add) {
2817 ret = cgroup_add_file(cgrp, cft);
2818 if (ret) {
2819 pr_warn("cgroup_addrm_files: failed to add %s, err=%d\n",
2820 cft->name, ret);
2821 return ret;
2822 }
2823 } else {
2824 cgroup_rm_file(cgrp, cft);
2825 }
2826 }
2827 return 0;
2828}
2829
2830static void cgroup_cfts_prepare(void)
2831 __acquires(&cgroup_mutex)
2832{
2833
2834
2835
2836
2837
2838
2839 mutex_lock(&cgroup_mutex);
2840}
2841
2842static int cgroup_cfts_commit(struct cftype *cfts, bool is_add)
2843 __releases(&cgroup_mutex)
2844{
2845 LIST_HEAD(pending);
2846 struct cgroup_subsys *ss = cfts[0].ss;
2847 struct cgroup *root = &ss->root->top_cgroup;
2848 struct super_block *sb = ss->root->sb;
2849 struct dentry *prev = NULL;
2850 struct inode *inode;
2851 struct cgroup_subsys_state *css;
2852 u64 update_before;
2853 int ret = 0;
2854
2855
2856 if (!cfts || ss->root == &cgroup_dummy_root ||
2857 !atomic_inc_not_zero(&sb->s_active)) {
2858 mutex_unlock(&cgroup_mutex);
2859 return 0;
2860 }
2861
2862
2863
2864
2865
2866
2867 update_before = cgroup_serial_nr_next;
2868
2869 mutex_unlock(&cgroup_mutex);
2870
2871
2872 rcu_read_lock();
2873 css_for_each_descendant_pre(css, cgroup_css(root, ss)) {
2874 struct cgroup *cgrp = css->cgroup;
2875
2876 if (cgroup_is_dead(cgrp))
2877 continue;
2878
2879 inode = cgrp->dentry->d_inode;
2880 dget(cgrp->dentry);
2881 rcu_read_unlock();
2882
2883 dput(prev);
2884 prev = cgrp->dentry;
2885
2886 mutex_lock(&inode->i_mutex);
2887 mutex_lock(&cgroup_mutex);
2888 if (cgrp->serial_nr < update_before && !cgroup_is_dead(cgrp))
2889 ret = cgroup_addrm_files(cgrp, cfts, is_add);
2890 mutex_unlock(&cgroup_mutex);
2891 mutex_unlock(&inode->i_mutex);
2892
2893 rcu_read_lock();
2894 if (ret)
2895 break;
2896 }
2897 rcu_read_unlock();
2898 dput(prev);
2899 deactivate_super(sb);
2900 return ret;
2901}
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
2918{
2919 struct cftype_set *set;
2920 struct cftype *cft;
2921 int ret;
2922
2923 set = kzalloc(sizeof(*set), GFP_KERNEL);
2924 if (!set)
2925 return -ENOMEM;
2926
2927 for (cft = cfts; cft->name[0] != '\0'; cft++)
2928 cft->ss = ss;
2929
2930 cgroup_cfts_prepare();
2931 set->cfts = cfts;
2932 list_add_tail(&set->node, &ss->cftsets);
2933 ret = cgroup_cfts_commit(cfts, true);
2934 if (ret)
2935 cgroup_rm_cftypes(cfts);
2936 return ret;
2937}
2938EXPORT_SYMBOL_GPL(cgroup_add_cftypes);
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951int cgroup_rm_cftypes(struct cftype *cfts)
2952{
2953 struct cftype_set *set;
2954
2955 if (!cfts || !cfts[0].ss)
2956 return -ENOENT;
2957
2958 cgroup_cfts_prepare();
2959
2960 list_for_each_entry(set, &cfts[0].ss->cftsets, node) {
2961 if (set->cfts == cfts) {
2962 list_del(&set->node);
2963 kfree(set);
2964 cgroup_cfts_commit(cfts, false);
2965 return 0;
2966 }
2967 }
2968
2969 cgroup_cfts_commit(NULL, false);
2970 return -ENOENT;
2971}
2972
2973
2974
2975
2976
2977
2978
2979int cgroup_task_count(const struct cgroup *cgrp)
2980{
2981 int count = 0;
2982 struct cgrp_cset_link *link;
2983
2984 read_lock(&css_set_lock);
2985 list_for_each_entry(link, &cgrp->cset_links, cset_link)
2986 count += atomic_read(&link->cset->refcount);
2987 read_unlock(&css_set_lock);
2988 return count;
2989}
2990
2991
2992
2993
2994
2995
2996
2997static void cgroup_enable_task_cg_lists(void)
2998{
2999 struct task_struct *p, *g;
3000 write_lock(&css_set_lock);
3001 use_task_css_set_links = 1;
3002
3003
3004
3005
3006
3007
3008
3009 read_lock(&tasklist_lock);
3010 do_each_thread(g, p) {
3011 task_lock(p);
3012
3013
3014
3015
3016
3017 if (!(p->flags & PF_EXITING) && list_empty(&p->cg_list))
3018 list_add(&p->cg_list, &task_css_set(p)->tasks);
3019 task_unlock(p);
3020 } while_each_thread(g, p);
3021 read_unlock(&tasklist_lock);
3022 write_unlock(&css_set_lock);
3023}
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035struct cgroup_subsys_state *
3036css_next_child(struct cgroup_subsys_state *pos_css,
3037 struct cgroup_subsys_state *parent_css)
3038{
3039 struct cgroup *pos = pos_css ? pos_css->cgroup : NULL;
3040 struct cgroup *cgrp = parent_css->cgroup;
3041 struct cgroup *next;
3042
3043 WARN_ON_ONCE(!rcu_read_lock_held());
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064 if (!pos) {
3065 next = list_entry_rcu(cgrp->children.next, struct cgroup, sibling);
3066 } else if (likely(!cgroup_is_dead(pos))) {
3067 next = list_entry_rcu(pos->sibling.next, struct cgroup, sibling);
3068 } else {
3069 list_for_each_entry_rcu(next, &cgrp->children, sibling)
3070 if (next->serial_nr > pos->serial_nr)
3071 break;
3072 }
3073
3074 if (&next->sibling == &cgrp->children)
3075 return NULL;
3076
3077 return cgroup_css(next, parent_css->ss);
3078}
3079EXPORT_SYMBOL_GPL(css_next_child);
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095struct cgroup_subsys_state *
3096css_next_descendant_pre(struct cgroup_subsys_state *pos,
3097 struct cgroup_subsys_state *root)
3098{
3099 struct cgroup_subsys_state *next;
3100
3101 WARN_ON_ONCE(!rcu_read_lock_held());
3102
3103
3104 if (!pos)
3105 return root;
3106
3107
3108 next = css_next_child(NULL, pos);
3109 if (next)
3110 return next;
3111
3112
3113 while (pos != root) {
3114 next = css_next_child(pos, css_parent(pos));
3115 if (next)
3116 return next;
3117 pos = css_parent(pos);
3118 }
3119
3120 return NULL;
3121}
3122EXPORT_SYMBOL_GPL(css_next_descendant_pre);
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137struct cgroup_subsys_state *
3138css_rightmost_descendant(struct cgroup_subsys_state *pos)
3139{
3140 struct cgroup_subsys_state *last, *tmp;
3141
3142 WARN_ON_ONCE(!rcu_read_lock_held());
3143
3144 do {
3145 last = pos;
3146
3147 pos = NULL;
3148 css_for_each_child(tmp, last)
3149 pos = tmp;
3150 } while (pos);
3151
3152 return last;
3153}
3154EXPORT_SYMBOL_GPL(css_rightmost_descendant);
3155
3156static struct cgroup_subsys_state *
3157css_leftmost_descendant(struct cgroup_subsys_state *pos)
3158{
3159 struct cgroup_subsys_state *last;
3160
3161 do {
3162 last = pos;
3163 pos = css_next_child(NULL, pos);
3164 } while (pos);
3165
3166 return last;
3167}
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183struct cgroup_subsys_state *
3184css_next_descendant_post(struct cgroup_subsys_state *pos,
3185 struct cgroup_subsys_state *root)
3186{
3187 struct cgroup_subsys_state *next;
3188
3189 WARN_ON_ONCE(!rcu_read_lock_held());
3190
3191
3192 if (!pos)
3193 return css_leftmost_descendant(root);
3194
3195
3196 if (pos == root)
3197 return NULL;
3198
3199
3200 next = css_next_child(pos, css_parent(pos));
3201 if (next)
3202 return css_leftmost_descendant(next);
3203
3204
3205 return css_parent(pos);
3206}
3207EXPORT_SYMBOL_GPL(css_next_descendant_post);
3208
3209
3210
3211
3212
3213
3214
3215static void css_advance_task_iter(struct css_task_iter *it)
3216{
3217 struct list_head *l = it->cset_link;
3218 struct cgrp_cset_link *link;
3219 struct css_set *cset;
3220
3221
3222 do {
3223 l = l->next;
3224 if (l == &it->origin_css->cgroup->cset_links) {
3225 it->cset_link = NULL;
3226 return;
3227 }
3228 link = list_entry(l, struct cgrp_cset_link, cset_link);
3229 cset = link->cset;
3230 } while (list_empty(&cset->tasks));
3231 it->cset_link = l;
3232 it->task = cset->tasks.next;
3233}
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249void css_task_iter_start(struct cgroup_subsys_state *css,
3250 struct css_task_iter *it)
3251 __acquires(css_set_lock)
3252{
3253
3254
3255
3256
3257
3258 if (!use_task_css_set_links)
3259 cgroup_enable_task_cg_lists();
3260
3261 read_lock(&css_set_lock);
3262
3263 it->origin_css = css;
3264 it->cset_link = &css->cgroup->cset_links;
3265
3266 css_advance_task_iter(it);
3267}
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277struct task_struct *css_task_iter_next(struct css_task_iter *it)
3278{
3279 struct task_struct *res;
3280 struct list_head *l = it->task;
3281 struct cgrp_cset_link *link;
3282
3283
3284 if (!it->cset_link)
3285 return NULL;
3286 res = list_entry(l, struct task_struct, cg_list);
3287
3288 l = l->next;
3289 link = list_entry(it->cset_link, struct cgrp_cset_link, cset_link);
3290 if (l == &link->cset->tasks) {
3291
3292
3293
3294
3295 css_advance_task_iter(it);
3296 } else {
3297 it->task = l;
3298 }
3299 return res;
3300}
3301
3302
3303
3304
3305
3306
3307
3308void css_task_iter_end(struct css_task_iter *it)
3309 __releases(css_set_lock)
3310{
3311 read_unlock(&css_set_lock);
3312}
3313
3314static inline int started_after_time(struct task_struct *t1,
3315 struct timespec *time,
3316 struct task_struct *t2)
3317{
3318 int start_diff = timespec_compare(&t1->start_time, time);
3319 if (start_diff > 0) {
3320 return 1;
3321 } else if (start_diff < 0) {
3322 return 0;
3323 } else {
3324
3325
3326
3327
3328
3329
3330
3331
3332 return t1 > t2;
3333 }
3334}
3335
3336
3337
3338
3339
3340
3341static inline int started_after(void *p1, void *p2)
3342{
3343 struct task_struct *t1 = p1;
3344 struct task_struct *t2 = p2;
3345 return started_after_time(t1, &t2->start_time, t2);
3346}
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377int css_scan_tasks(struct cgroup_subsys_state *css,
3378 bool (*test)(struct task_struct *, void *),
3379 void (*process)(struct task_struct *, void *),
3380 void *data, struct ptr_heap *heap)
3381{
3382 int retval, i;
3383 struct css_task_iter it;
3384 struct task_struct *p, *dropped;
3385
3386 struct task_struct *latest_task = NULL;
3387 struct ptr_heap tmp_heap;
3388 struct timespec latest_time = { 0, 0 };
3389
3390 if (heap) {
3391
3392 heap->gt = &started_after;
3393 } else {
3394
3395 heap = &tmp_heap;
3396 retval = heap_init(heap, PAGE_SIZE, GFP_KERNEL, &started_after);
3397 if (retval)
3398
3399 return retval;
3400 }
3401
3402 again:
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414 heap->size = 0;
3415 css_task_iter_start(css, &it);
3416 while ((p = css_task_iter_next(&it))) {
3417
3418
3419
3420
3421 if (test && !test(p, data))
3422 continue;
3423
3424
3425
3426
3427 if (!started_after_time(p, &latest_time, latest_task))
3428 continue;
3429 dropped = heap_insert(heap, p);
3430 if (dropped == NULL) {
3431
3432
3433
3434
3435 get_task_struct(p);
3436 } else if (dropped != p) {
3437
3438
3439
3440
3441 get_task_struct(p);
3442 put_task_struct(dropped);
3443 }
3444
3445
3446
3447
3448 }
3449 css_task_iter_end(&it);
3450
3451 if (heap->size) {
3452 for (i = 0; i < heap->size; i++) {
3453 struct task_struct *q = heap->ptrs[i];
3454 if (i == 0) {
3455 latest_time = q->start_time;
3456 latest_task = q;
3457 }
3458
3459 process(q, data);
3460 put_task_struct(q);
3461 }
3462
3463
3464
3465
3466
3467
3468
3469 goto again;
3470 }
3471 if (heap == &tmp_heap)
3472 heap_free(&tmp_heap);
3473 return 0;
3474}
3475
3476static void cgroup_transfer_one_task(struct task_struct *task, void *data)
3477{
3478 struct cgroup *new_cgroup = data;
3479
3480 mutex_lock(&cgroup_mutex);
3481 cgroup_attach_task(new_cgroup, task, false);
3482 mutex_unlock(&cgroup_mutex);
3483}
3484
3485
3486
3487
3488
3489
3490int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
3491{
3492 return css_scan_tasks(&from->dummy_css, NULL, cgroup_transfer_one_task,
3493 to, NULL);
3494}
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507enum cgroup_filetype {
3508 CGROUP_FILE_PROCS,
3509 CGROUP_FILE_TASKS,
3510};
3511
3512
3513
3514
3515
3516
3517
3518struct cgroup_pidlist {
3519
3520
3521
3522
3523 struct { enum cgroup_filetype type; struct pid_namespace *ns; } key;
3524
3525 pid_t *list;
3526
3527 int length;
3528
3529 int use_count;
3530
3531 struct list_head links;
3532
3533 struct cgroup *owner;
3534
3535 struct rw_semaphore rwsem;
3536};
3537
3538
3539
3540
3541
3542
3543#define PIDLIST_TOO_LARGE(c) ((c) * sizeof(pid_t) > (PAGE_SIZE * 2))
3544static void *pidlist_allocate(int count)
3545{
3546 if (PIDLIST_TOO_LARGE(count))
3547 return vmalloc(count * sizeof(pid_t));
3548 else
3549 return kmalloc(count * sizeof(pid_t), GFP_KERNEL);
3550}
3551static void pidlist_free(void *p)
3552{
3553 if (is_vmalloc_addr(p))
3554 vfree(p);
3555 else
3556 kfree(p);
3557}
3558
3559
3560
3561
3562
3563static int pidlist_uniq(pid_t *list, int length)
3564{
3565 int src, dest = 1;
3566
3567
3568
3569
3570
3571 if (length == 0 || length == 1)
3572 return length;
3573
3574 for (src = 1; src < length; src++) {
3575
3576 while (list[src] == list[src-1]) {
3577 src++;
3578 if (src == length)
3579 goto after;
3580 }
3581
3582 list[dest] = list[src];
3583 dest++;
3584 }
3585after:
3586 return dest;
3587}
3588
3589static int cmppid(const void *a, const void *b)
3590{
3591 return *(pid_t *)a - *(pid_t *)b;
3592}
3593
3594
3595
3596
3597
3598
3599
3600static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
3601 enum cgroup_filetype type)
3602{
3603 struct cgroup_pidlist *l;
3604
3605 struct pid_namespace *ns = task_active_pid_ns(current);
3606
3607
3608
3609
3610
3611
3612
3613 mutex_lock(&cgrp->pidlist_mutex);
3614 list_for_each_entry(l, &cgrp->pidlists, links) {
3615 if (l->key.type == type && l->key.ns == ns) {
3616
3617 down_write(&l->rwsem);
3618 mutex_unlock(&cgrp->pidlist_mutex);
3619 return l;
3620 }
3621 }
3622
3623 l = kzalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
3624 if (!l) {
3625 mutex_unlock(&cgrp->pidlist_mutex);
3626 return l;
3627 }
3628 init_rwsem(&l->rwsem);
3629 down_write(&l->rwsem);
3630 l->key.type = type;
3631 l->key.ns = get_pid_ns(ns);
3632 l->owner = cgrp;
3633 list_add(&l->links, &cgrp->pidlists);
3634 mutex_unlock(&cgrp->pidlist_mutex);
3635 return l;
3636}
3637
3638
3639
3640
3641static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
3642 struct cgroup_pidlist **lp)
3643{
3644 pid_t *array;
3645 int length;
3646 int pid, n = 0;
3647 struct css_task_iter it;
3648 struct task_struct *tsk;
3649 struct cgroup_pidlist *l;
3650
3651
3652
3653
3654
3655
3656
3657 length = cgroup_task_count(cgrp);
3658 array = pidlist_allocate(length);
3659 if (!array)
3660 return -ENOMEM;
3661
3662 css_task_iter_start(&cgrp->dummy_css, &it);
3663 while ((tsk = css_task_iter_next(&it))) {
3664 if (unlikely(n == length))
3665 break;
3666
3667 if (type == CGROUP_FILE_PROCS)
3668 pid = task_tgid_vnr(tsk);
3669 else
3670 pid = task_pid_vnr(tsk);
3671 if (pid > 0)
3672 array[n++] = pid;
3673 }
3674 css_task_iter_end(&it);
3675 length = n;
3676
3677 sort(array, length, sizeof(pid_t), cmppid, NULL);
3678 if (type == CGROUP_FILE_PROCS)
3679 length = pidlist_uniq(array, length);
3680 l = cgroup_pidlist_find(cgrp, type);
3681 if (!l) {
3682 pidlist_free(array);
3683 return -ENOMEM;
3684 }
3685
3686 pidlist_free(l->list);
3687 l->list = array;
3688 l->length = length;
3689 l->use_count++;
3690 up_write(&l->rwsem);
3691 *lp = l;
3692 return 0;
3693}
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
3705{
3706 int ret = -EINVAL;
3707 struct cgroup *cgrp;
3708 struct css_task_iter it;
3709 struct task_struct *tsk;
3710
3711
3712
3713
3714
3715 if (dentry->d_sb->s_op != &cgroup_ops ||
3716 !S_ISDIR(dentry->d_inode->i_mode))
3717 goto err;
3718
3719 ret = 0;
3720 cgrp = dentry->d_fsdata;
3721
3722 css_task_iter_start(&cgrp->dummy_css, &it);
3723 while ((tsk = css_task_iter_next(&it))) {
3724 switch (tsk->state) {
3725 case TASK_RUNNING:
3726 stats->nr_running++;
3727 break;
3728 case TASK_INTERRUPTIBLE:
3729 stats->nr_sleeping++;
3730 break;
3731 case TASK_UNINTERRUPTIBLE:
3732 stats->nr_uninterruptible++;
3733 break;
3734 case TASK_STOPPED:
3735 stats->nr_stopped++;
3736 break;
3737 default:
3738 if (delayacct_is_task_waiting_on_io(tsk))
3739 stats->nr_io_wait++;
3740 break;
3741 }
3742 }
3743 css_task_iter_end(&it);
3744
3745err:
3746 return ret;
3747}
3748
3749
3750
3751
3752
3753
3754
3755
3756static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
3757{
3758
3759
3760
3761
3762
3763
3764 struct cgroup_pidlist *l = s->private;
3765 int index = 0, pid = *pos;
3766 int *iter;
3767
3768 down_read(&l->rwsem);
3769 if (pid) {
3770 int end = l->length;
3771
3772 while (index < end) {
3773 int mid = (index + end) / 2;
3774 if (l->list[mid] == pid) {
3775 index = mid;
3776 break;
3777 } else if (l->list[mid] <= pid)
3778 index = mid + 1;
3779 else
3780 end = mid;
3781 }
3782 }
3783
3784 if (index >= l->length)
3785 return NULL;
3786
3787 iter = l->list + index;
3788 *pos = *iter;
3789 return iter;
3790}
3791
3792static void cgroup_pidlist_stop(struct seq_file *s, void *v)
3793{
3794 struct cgroup_pidlist *l = s->private;
3795 up_read(&l->rwsem);
3796}
3797
3798static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
3799{
3800 struct cgroup_pidlist *l = s->private;
3801 pid_t *p = v;
3802 pid_t *end = l->list + l->length;
3803
3804
3805
3806
3807 p++;
3808 if (p >= end) {
3809 return NULL;
3810 } else {
3811 *pos = *p;
3812 return p;
3813 }
3814}
3815
3816static int cgroup_pidlist_show(struct seq_file *s, void *v)
3817{
3818 return seq_printf(s, "%d\n", *(int *)v);
3819}
3820
3821
3822
3823
3824
3825static const struct seq_operations cgroup_pidlist_seq_operations = {
3826 .start = cgroup_pidlist_start,
3827 .stop = cgroup_pidlist_stop,
3828 .next = cgroup_pidlist_next,
3829 .show = cgroup_pidlist_show,
3830};
3831
3832static void cgroup_release_pid_array(struct cgroup_pidlist *l)
3833{
3834
3835
3836
3837
3838
3839
3840 mutex_lock(&l->owner->pidlist_mutex);
3841 down_write(&l->rwsem);
3842 BUG_ON(!l->use_count);
3843 if (!--l->use_count) {
3844
3845 list_del(&l->links);
3846 mutex_unlock(&l->owner->pidlist_mutex);
3847 pidlist_free(l->list);
3848 put_pid_ns(l->key.ns);
3849 up_write(&l->rwsem);
3850 kfree(l);
3851 return;
3852 }
3853 mutex_unlock(&l->owner->pidlist_mutex);
3854 up_write(&l->rwsem);
3855}
3856
3857static int cgroup_pidlist_release(struct inode *inode, struct file *file)
3858{
3859 struct cgroup_pidlist *l;
3860 if (!(file->f_mode & FMODE_READ))
3861 return 0;
3862
3863
3864
3865
3866 l = ((struct seq_file *)file->private_data)->private;
3867 cgroup_release_pid_array(l);
3868 return seq_release(inode, file);
3869}
3870
3871static const struct file_operations cgroup_pidlist_operations = {
3872 .read = seq_read,
3873 .llseek = seq_lseek,
3874 .write = cgroup_file_write,
3875 .release = cgroup_pidlist_release,
3876};
3877
3878
3879
3880
3881
3882
3883
3884static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type)
3885{
3886 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
3887 struct cgroup_pidlist *l;
3888 int retval;
3889
3890
3891 if (!(file->f_mode & FMODE_READ))
3892 return 0;
3893
3894
3895 retval = pidlist_array_load(cgrp, type, &l);
3896 if (retval)
3897 return retval;
3898
3899 file->f_op = &cgroup_pidlist_operations;
3900
3901 retval = seq_open(file, &cgroup_pidlist_seq_operations);
3902 if (retval) {
3903 cgroup_release_pid_array(l);
3904 return retval;
3905 }
3906 ((struct seq_file *)file->private_data)->private = l;
3907 return 0;
3908}
3909static int cgroup_tasks_open(struct inode *unused, struct file *file)
3910{
3911 return cgroup_pidlist_open(file, CGROUP_FILE_TASKS);
3912}
3913static int cgroup_procs_open(struct inode *unused, struct file *file)
3914{
3915 return cgroup_pidlist_open(file, CGROUP_FILE_PROCS);
3916}
3917
3918static u64 cgroup_read_notify_on_release(struct cgroup_subsys_state *css,
3919 struct cftype *cft)
3920{
3921 return notify_on_release(css->cgroup);
3922}
3923
3924static int cgroup_write_notify_on_release(struct cgroup_subsys_state *css,
3925 struct cftype *cft, u64 val)
3926{
3927 clear_bit(CGRP_RELEASABLE, &css->cgroup->flags);
3928 if (val)
3929 set_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags);
3930 else
3931 clear_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags);
3932 return 0;
3933}
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943static void cgroup_dput(struct cgroup *cgrp)
3944{
3945 struct super_block *sb = cgrp->root->sb;
3946
3947 atomic_inc(&sb->s_active);
3948 dput(cgrp->dentry);
3949 deactivate_super(sb);
3950}
3951
3952
3953
3954
3955
3956
3957static void cgroup_event_remove(struct work_struct *work)
3958{
3959 struct cgroup_event *event = container_of(work, struct cgroup_event,
3960 remove);
3961 struct cgroup_subsys_state *css = event->css;
3962
3963 remove_wait_queue(event->wqh, &event->wait);
3964
3965 event->cft->unregister_event(css, event->cft, event->eventfd);
3966
3967
3968 eventfd_signal(event->eventfd, 1);
3969
3970 eventfd_ctx_put(event->eventfd);
3971 kfree(event);
3972 css_put(css);
3973}
3974
3975
3976
3977
3978
3979
3980static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
3981 int sync, void *key)
3982{
3983 struct cgroup_event *event = container_of(wait,
3984 struct cgroup_event, wait);
3985 struct cgroup *cgrp = event->css->cgroup;
3986 unsigned long flags = (unsigned long)key;
3987
3988 if (flags & POLLHUP) {
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998 spin_lock(&cgrp->event_list_lock);
3999 if (!list_empty(&event->list)) {
4000 list_del_init(&event->list);
4001
4002
4003
4004
4005 schedule_work(&event->remove);
4006 }
4007 spin_unlock(&cgrp->event_list_lock);
4008 }
4009
4010 return 0;
4011}
4012
4013static void cgroup_event_ptable_queue_proc(struct file *file,
4014 wait_queue_head_t *wqh, poll_table *pt)
4015{
4016 struct cgroup_event *event = container_of(pt,
4017 struct cgroup_event, pt);
4018
4019 event->wqh = wqh;
4020 add_wait_queue(wqh, &event->wait);
4021}
4022
4023
4024
4025
4026
4027
4028
4029static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css,
4030 struct cftype *cft, const char *buffer)
4031{
4032 struct cgroup *cgrp = dummy_css->cgroup;
4033 struct cgroup_event *event;
4034 struct cgroup_subsys_state *cfile_css;
4035 unsigned int efd, cfd;
4036 struct fd efile;
4037 struct fd cfile;
4038 char *endp;
4039 int ret;
4040
4041 efd = simple_strtoul(buffer, &endp, 10);
4042 if (*endp != ' ')
4043 return -EINVAL;
4044 buffer = endp + 1;
4045
4046 cfd = simple_strtoul(buffer, &endp, 10);
4047 if ((*endp != ' ') && (*endp != '\0'))
4048 return -EINVAL;
4049 buffer = endp + 1;
4050
4051 event = kzalloc(sizeof(*event), GFP_KERNEL);
4052 if (!event)
4053 return -ENOMEM;
4054
4055 INIT_LIST_HEAD(&event->list);
4056 init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc);
4057 init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
4058 INIT_WORK(&event->remove, cgroup_event_remove);
4059
4060 efile = fdget(efd);
4061 if (!efile.file) {
4062 ret = -EBADF;
4063 goto out_kfree;
4064 }
4065
4066 event->eventfd = eventfd_ctx_fileget(efile.file);
4067 if (IS_ERR(event->eventfd)) {
4068 ret = PTR_ERR(event->eventfd);
4069 goto out_put_efile;
4070 }
4071
4072 cfile = fdget(cfd);
4073 if (!cfile.file) {
4074 ret = -EBADF;
4075 goto out_put_eventfd;
4076 }
4077
4078
4079
4080 ret = inode_permission(file_inode(cfile.file), MAY_READ);
4081 if (ret < 0)
4082 goto out_put_cfile;
4083
4084 event->cft = __file_cft(cfile.file);
4085 if (IS_ERR(event->cft)) {
4086 ret = PTR_ERR(event->cft);
4087 goto out_put_cfile;
4088 }
4089
4090 if (!event->cft->ss) {
4091 ret = -EBADF;
4092 goto out_put_cfile;
4093 }
4094
4095
4096
4097
4098
4099
4100
4101 rcu_read_lock();
4102
4103 ret = -EINVAL;
4104 event->css = cgroup_css(cgrp, event->cft->ss);
4105 cfile_css = css_from_dir(cfile.file->f_dentry->d_parent, event->cft->ss);
4106 if (event->css && event->css == cfile_css && css_tryget(event->css))
4107 ret = 0;
4108
4109 rcu_read_unlock();
4110 if (ret)
4111 goto out_put_cfile;
4112
4113 if (!event->cft->register_event || !event->cft->unregister_event) {
4114 ret = -EINVAL;
4115 goto out_put_css;
4116 }
4117
4118 ret = event->cft->register_event(event->css, event->cft,
4119 event->eventfd, buffer);
4120 if (ret)
4121 goto out_put_css;
4122
4123 efile.file->f_op->poll(efile.file, &event->pt);
4124
4125 spin_lock(&cgrp->event_list_lock);
4126 list_add(&event->list, &cgrp->event_list);
4127 spin_unlock(&cgrp->event_list_lock);
4128
4129 fdput(cfile);
4130 fdput(efile);
4131
4132 return 0;
4133
4134out_put_css:
4135 css_put(event->css);
4136out_put_cfile:
4137 fdput(cfile);
4138out_put_eventfd:
4139 eventfd_ctx_put(event->eventfd);
4140out_put_efile:
4141 fdput(efile);
4142out_kfree:
4143 kfree(event);
4144
4145 return ret;
4146}
4147
4148static u64 cgroup_clone_children_read(struct cgroup_subsys_state *css,
4149 struct cftype *cft)
4150{
4151 return test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags);
4152}
4153
4154static int cgroup_clone_children_write(struct cgroup_subsys_state *css,
4155 struct cftype *cft, u64 val)
4156{
4157 if (val)
4158 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags);
4159 else
4160 clear_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags);
4161 return 0;
4162}
4163
4164static struct cftype cgroup_base_files[] = {
4165 {
4166 .name = "cgroup.procs",
4167 .open = cgroup_procs_open,
4168 .write_u64 = cgroup_procs_write,
4169 .release = cgroup_pidlist_release,
4170 .mode = S_IRUGO | S_IWUSR,
4171 },
4172 {
4173 .name = "cgroup.event_control",
4174 .write_string = cgroup_write_event_control,
4175 .mode = S_IWUGO,
4176 },
4177 {
4178 .name = "cgroup.clone_children",
4179 .flags = CFTYPE_INSANE,
4180 .read_u64 = cgroup_clone_children_read,
4181 .write_u64 = cgroup_clone_children_write,
4182 },
4183 {
4184 .name = "cgroup.sane_behavior",
4185 .flags = CFTYPE_ONLY_ON_ROOT,
4186 .read_seq_string = cgroup_sane_behavior_show,
4187 },
4188
4189
4190
4191
4192
4193
4194 {
4195 .name = "tasks",
4196 .flags = CFTYPE_INSANE,
4197 .open = cgroup_tasks_open,
4198 .write_u64 = cgroup_tasks_write,
4199 .release = cgroup_pidlist_release,
4200 .mode = S_IRUGO | S_IWUSR,
4201 },
4202 {
4203 .name = "notify_on_release",
4204 .flags = CFTYPE_INSANE,
4205 .read_u64 = cgroup_read_notify_on_release,
4206 .write_u64 = cgroup_write_notify_on_release,
4207 },
4208 {
4209 .name = "release_agent",
4210 .flags = CFTYPE_INSANE | CFTYPE_ONLY_ON_ROOT,
4211 .read_seq_string = cgroup_release_agent_show,
4212 .write_string = cgroup_release_agent_write,
4213 .max_write_len = PATH_MAX,
4214 },
4215 { }
4216};
4217
4218
4219
4220
4221
4222
4223
4224
4225static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask)
4226{
4227 struct cgroup_subsys *ss;
4228 int i, ret = 0;
4229
4230
4231 for_each_subsys(ss, i) {
4232 struct cftype_set *set;
4233
4234 if (!test_bit(i, &subsys_mask))
4235 continue;
4236
4237 list_for_each_entry(set, &ss->cftsets, node) {
4238 ret = cgroup_addrm_files(cgrp, set->cfts, true);
4239 if (ret < 0)
4240 goto err;
4241 }
4242 }
4243
4244
4245 for_each_root_subsys(cgrp->root, ss) {
4246 struct cgroup_subsys_state *css = cgroup_css(cgrp, ss);
4247 struct css_id *id = rcu_dereference_protected(css->id, true);
4248
4249
4250
4251
4252
4253
4254 if (id)
4255 rcu_assign_pointer(id->css, css);
4256 }
4257
4258 return 0;
4259err:
4260 cgroup_clear_dir(cgrp, subsys_mask);
4261 return ret;
4262}
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286static void css_free_work_fn(struct work_struct *work)
4287{
4288 struct cgroup_subsys_state *css =
4289 container_of(work, struct cgroup_subsys_state, destroy_work);
4290 struct cgroup *cgrp = css->cgroup;
4291
4292 if (css->parent)
4293 css_put(css->parent);
4294
4295 css->ss->css_free(css);
4296 cgroup_dput(cgrp);
4297}
4298
4299static void css_free_rcu_fn(struct rcu_head *rcu_head)
4300{
4301 struct cgroup_subsys_state *css =
4302 container_of(rcu_head, struct cgroup_subsys_state, rcu_head);
4303
4304
4305
4306
4307
4308 INIT_WORK(&css->destroy_work, css_free_work_fn);
4309 schedule_work(&css->destroy_work);
4310}
4311
4312static void css_release(struct percpu_ref *ref)
4313{
4314 struct cgroup_subsys_state *css =
4315 container_of(ref, struct cgroup_subsys_state, refcnt);
4316
4317 call_rcu(&css->rcu_head, css_free_rcu_fn);
4318}
4319
4320static void init_css(struct cgroup_subsys_state *css, struct cgroup_subsys *ss,
4321 struct cgroup *cgrp)
4322{
4323 css->cgroup = cgrp;
4324 css->ss = ss;
4325 css->flags = 0;
4326 css->id = NULL;
4327
4328 if (cgrp->parent)
4329 css->parent = cgroup_css(cgrp->parent, ss);
4330 else
4331 css->flags |= CSS_ROOT;
4332
4333 BUG_ON(cgroup_css(cgrp, ss));
4334}
4335
4336
4337static int online_css(struct cgroup_subsys_state *css)
4338{
4339 struct cgroup_subsys *ss = css->ss;
4340 int ret = 0;
4341
4342 lockdep_assert_held(&cgroup_mutex);
4343
4344 if (ss->css_online)
4345 ret = ss->css_online(css);
4346 if (!ret) {
4347 css->flags |= CSS_ONLINE;
4348 css->cgroup->nr_css++;
4349 rcu_assign_pointer(css->cgroup->subsys[ss->subsys_id], css);
4350 }
4351 return ret;
4352}
4353
4354
4355static void offline_css(struct cgroup_subsys_state *css)
4356{
4357 struct cgroup_subsys *ss = css->ss;
4358
4359 lockdep_assert_held(&cgroup_mutex);
4360
4361 if (!(css->flags & CSS_ONLINE))
4362 return;
4363
4364 if (ss->css_offline)
4365 ss->css_offline(css);
4366
4367 css->flags &= ~CSS_ONLINE;
4368 css->cgroup->nr_css--;
4369 RCU_INIT_POINTER(css->cgroup->subsys[ss->subsys_id], css);
4370}
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
4381 umode_t mode)
4382{
4383 struct cgroup_subsys_state *css_ar[CGROUP_SUBSYS_COUNT] = { };
4384 struct cgroup *cgrp;
4385 struct cgroup_name *name;
4386 struct cgroupfs_root *root = parent->root;
4387 int err = 0;
4388 struct cgroup_subsys *ss;
4389 struct super_block *sb = root->sb;
4390
4391
4392 cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL);
4393 if (!cgrp)
4394 return -ENOMEM;
4395
4396 name = cgroup_alloc_name(dentry);
4397 if (!name)
4398 goto err_free_cgrp;
4399 rcu_assign_pointer(cgrp->name, name);
4400
4401
4402
4403
4404
4405 cgrp->id = idr_alloc(&root->cgroup_idr, NULL, 1, 0, GFP_KERNEL);
4406 if (cgrp->id < 0)
4407 goto err_free_name;
4408
4409
4410
4411
4412
4413
4414
4415
4416 if (!cgroup_lock_live_group(parent)) {
4417 err = -ENODEV;
4418 goto err_free_id;
4419 }
4420
4421
4422
4423
4424
4425
4426 atomic_inc(&sb->s_active);
4427
4428 init_cgroup_housekeeping(cgrp);
4429
4430 dentry->d_fsdata = cgrp;
4431 cgrp->dentry = dentry;
4432
4433 cgrp->parent = parent;
4434 cgrp->dummy_css.parent = &parent->dummy_css;
4435 cgrp->root = parent->root;
4436
4437 if (notify_on_release(parent))
4438 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
4439
4440 if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags))
4441 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
4442
4443 for_each_root_subsys(root, ss) {
4444 struct cgroup_subsys_state *css;
4445
4446 css = ss->css_alloc(cgroup_css(parent, ss));
4447 if (IS_ERR(css)) {
4448 err = PTR_ERR(css);
4449 goto err_free_all;
4450 }
4451 css_ar[ss->subsys_id] = css;
4452
4453 err = percpu_ref_init(&css->refcnt, css_release);
4454 if (err)
4455 goto err_free_all;
4456
4457 init_css(css, ss, cgrp);
4458
4459 if (ss->use_id) {
4460 err = alloc_css_id(css);
4461 if (err)
4462 goto err_free_all;
4463 }
4464 }
4465
4466
4467
4468
4469
4470
4471 err = cgroup_create_file(dentry, S_IFDIR | mode, sb);
4472 if (err < 0)
4473 goto err_free_all;
4474 lockdep_assert_held(&dentry->d_inode->i_mutex);
4475
4476 cgrp->serial_nr = cgroup_serial_nr_next++;
4477
4478
4479 list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children);
4480 root->number_of_cgroups++;
4481
4482
4483 for_each_root_subsys(root, ss) {
4484 struct cgroup_subsys_state *css = css_ar[ss->subsys_id];
4485
4486 dget(dentry);
4487 css_get(css->parent);
4488 }
4489
4490
4491 dget(parent->dentry);
4492
4493
4494 for_each_root_subsys(root, ss) {
4495 struct cgroup_subsys_state *css = css_ar[ss->subsys_id];
4496
4497 err = online_css(css);
4498 if (err)
4499 goto err_destroy;
4500
4501 if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
4502 parent->parent) {
4503 pr_warning("cgroup: %s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
4504 current->comm, current->pid, ss->name);
4505 if (!strcmp(ss->name, "memory"))
4506 pr_warning("cgroup: \"memory\" requires setting use_hierarchy to 1 on the root.\n");
4507 ss->warned_broken_hierarchy = true;
4508 }
4509 }
4510
4511 idr_replace(&root->cgroup_idr, cgrp, cgrp->id);
4512
4513 err = cgroup_addrm_files(cgrp, cgroup_base_files, true);
4514 if (err)
4515 goto err_destroy;
4516
4517 err = cgroup_populate_dir(cgrp, root->subsys_mask);
4518 if (err)
4519 goto err_destroy;
4520
4521 mutex_unlock(&cgroup_mutex);
4522 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
4523
4524 return 0;
4525
4526err_free_all:
4527 for_each_root_subsys(root, ss) {
4528 struct cgroup_subsys_state *css = css_ar[ss->subsys_id];
4529
4530 if (css) {
4531 percpu_ref_cancel_init(&css->refcnt);
4532 ss->css_free(css);
4533 }
4534 }
4535 mutex_unlock(&cgroup_mutex);
4536
4537 deactivate_super(sb);
4538err_free_id:
4539 idr_remove(&root->cgroup_idr, cgrp->id);
4540err_free_name:
4541 kfree(rcu_dereference_raw(cgrp->name));
4542err_free_cgrp:
4543 kfree(cgrp);
4544 return err;
4545
4546err_destroy:
4547 cgroup_destroy_locked(cgrp);
4548 mutex_unlock(&cgroup_mutex);
4549 mutex_unlock(&dentry->d_inode->i_mutex);
4550 return err;
4551}
4552
4553static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
4554{
4555 struct cgroup *c_parent = dentry->d_parent->d_fsdata;
4556
4557
4558 return cgroup_create(c_parent, dentry, mode | S_IFDIR);
4559}
4560
4561
4562
4563
4564
4565static void css_killed_work_fn(struct work_struct *work)
4566{
4567 struct cgroup_subsys_state *css =
4568 container_of(work, struct cgroup_subsys_state, destroy_work);
4569 struct cgroup *cgrp = css->cgroup;
4570
4571 mutex_lock(&cgroup_mutex);
4572
4573
4574
4575
4576
4577 offline_css(css);
4578
4579
4580
4581
4582
4583
4584 if (!cgrp->nr_css && cgroup_is_dead(cgrp))
4585 cgroup_destroy_css_killed(cgrp);
4586
4587 mutex_unlock(&cgroup_mutex);
4588
4589
4590
4591
4592
4593
4594
4595
4596 css_put(css);
4597}
4598
4599
4600static void css_killed_ref_fn(struct percpu_ref *ref)
4601{
4602 struct cgroup_subsys_state *css =
4603 container_of(ref, struct cgroup_subsys_state, refcnt);
4604
4605 INIT_WORK(&css->destroy_work, css_killed_work_fn);
4606 schedule_work(&css->destroy_work);
4607}
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618static void kill_css(struct cgroup_subsys_state *css)
4619{
4620 cgroup_clear_dir(css->cgroup, 1 << css->ss->subsys_id);
4621
4622
4623
4624
4625
4626 css_get(css);
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638 percpu_ref_kill_and_confirm(&css->refcnt, css_killed_ref_fn);
4639}
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665static int cgroup_destroy_locked(struct cgroup *cgrp)
4666 __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
4667{
4668 struct dentry *d = cgrp->dentry;
4669 struct cgroup_event *event, *tmp;
4670 struct cgroup_subsys *ss;
4671 struct cgroup *child;
4672 bool empty;
4673
4674 lockdep_assert_held(&d->d_inode->i_mutex);
4675 lockdep_assert_held(&cgroup_mutex);
4676
4677
4678
4679
4680
4681 read_lock(&css_set_lock);
4682 empty = list_empty(&cgrp->cset_links);
4683 read_unlock(&css_set_lock);
4684 if (!empty)
4685 return -EBUSY;
4686
4687
4688
4689
4690
4691
4692 empty = true;
4693 rcu_read_lock();
4694 list_for_each_entry_rcu(child, &cgrp->children, sibling) {
4695 empty = cgroup_is_dead(child);
4696 if (!empty)
4697 break;
4698 }
4699 rcu_read_unlock();
4700 if (!empty)
4701 return -EBUSY;
4702
4703
4704
4705
4706
4707
4708 for_each_root_subsys(cgrp->root, ss)
4709 kill_css(cgroup_css(cgrp, ss));
4710
4711
4712
4713
4714
4715
4716
4717
4718 set_bit(CGRP_DEAD, &cgrp->flags);
4719
4720
4721 raw_spin_lock(&release_list_lock);
4722 if (!list_empty(&cgrp->release_list))
4723 list_del_init(&cgrp->release_list);
4724 raw_spin_unlock(&release_list_lock);
4725
4726
4727
4728
4729
4730
4731
4732 if (!cgrp->nr_css)
4733 cgroup_destroy_css_killed(cgrp);
4734
4735
4736
4737
4738
4739
4740 cgroup_addrm_files(cgrp, cgroup_base_files, false);
4741 dget(d);
4742 cgroup_d_remove_dir(d);
4743
4744
4745
4746
4747
4748
4749 spin_lock(&cgrp->event_list_lock);
4750 list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
4751 list_del_init(&event->list);
4752 schedule_work(&event->remove);
4753 }
4754 spin_unlock(&cgrp->event_list_lock);
4755
4756 return 0;
4757};
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768static void cgroup_destroy_css_killed(struct cgroup *cgrp)
4769{
4770 struct cgroup *parent = cgrp->parent;
4771 struct dentry *d = cgrp->dentry;
4772
4773 lockdep_assert_held(&cgroup_mutex);
4774
4775
4776 list_del_rcu(&cgrp->sibling);
4777
4778
4779
4780
4781
4782
4783 idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
4784 cgrp->id = -1;
4785
4786 dput(d);
4787
4788 set_bit(CGRP_RELEASABLE, &parent->flags);
4789 check_for_release(parent);
4790}
4791
4792static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
4793{
4794 int ret;
4795
4796 mutex_lock(&cgroup_mutex);
4797 ret = cgroup_destroy_locked(dentry->d_fsdata);
4798 mutex_unlock(&cgroup_mutex);
4799
4800 return ret;
4801}
4802
4803static void __init_or_module cgroup_init_cftsets(struct cgroup_subsys *ss)
4804{
4805 INIT_LIST_HEAD(&ss->cftsets);
4806
4807
4808
4809
4810
4811 if (ss->base_cftypes) {
4812 struct cftype *cft;
4813
4814 for (cft = ss->base_cftypes; cft->name[0] != '\0'; cft++)
4815 cft->ss = ss;
4816
4817 ss->base_cftset.cfts = ss->base_cftypes;
4818 list_add_tail(&ss->base_cftset.node, &ss->cftsets);
4819 }
4820}
4821
4822static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
4823{
4824 struct cgroup_subsys_state *css;
4825
4826 printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
4827
4828 mutex_lock(&cgroup_mutex);
4829
4830
4831 cgroup_init_cftsets(ss);
4832
4833
4834 list_add(&ss->sibling, &cgroup_dummy_root.subsys_list);
4835 ss->root = &cgroup_dummy_root;
4836 css = ss->css_alloc(cgroup_css(cgroup_dummy_top, ss));
4837
4838 BUG_ON(IS_ERR(css));
4839 init_css(css, ss, cgroup_dummy_top);
4840
4841
4842
4843
4844
4845 init_css_set.subsys[ss->subsys_id] = css;
4846
4847 need_forkexit_callback |= ss->fork || ss->exit;
4848
4849
4850
4851
4852 BUG_ON(!list_empty(&init_task.tasks));
4853
4854 BUG_ON(online_css(css));
4855
4856 mutex_unlock(&cgroup_mutex);
4857
4858
4859
4860 BUG_ON(ss->module);
4861}
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
4873{
4874 struct cgroup_subsys_state *css;
4875 int i, ret;
4876 struct hlist_node *tmp;
4877 struct css_set *cset;
4878 unsigned long key;
4879
4880
4881 if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN ||
4882 ss->css_alloc == NULL || ss->css_free == NULL)
4883 return -EINVAL;
4884
4885
4886
4887
4888
4889
4890
4891 if (ss->fork || ss->exit)
4892 return -EINVAL;
4893
4894
4895
4896
4897
4898 if (ss->module == NULL) {
4899
4900 BUG_ON(cgroup_subsys[ss->subsys_id] != ss);
4901 return 0;
4902 }
4903
4904
4905 cgroup_init_cftsets(ss);
4906
4907 mutex_lock(&cgroup_mutex);
4908 cgroup_subsys[ss->subsys_id] = ss;
4909
4910
4911
4912
4913
4914
4915 css = ss->css_alloc(cgroup_css(cgroup_dummy_top, ss));
4916 if (IS_ERR(css)) {
4917
4918 cgroup_subsys[ss->subsys_id] = NULL;
4919 mutex_unlock(&cgroup_mutex);
4920 return PTR_ERR(css);
4921 }
4922
4923 list_add(&ss->sibling, &cgroup_dummy_root.subsys_list);
4924 ss->root = &cgroup_dummy_root;
4925
4926
4927 init_css(css, ss, cgroup_dummy_top);
4928
4929 if (ss->use_id) {
4930 ret = cgroup_init_idr(ss, css);
4931 if (ret)
4932 goto err_unload;
4933 }
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943 write_lock(&css_set_lock);
4944 hash_for_each_safe(css_set_table, i, tmp, cset, hlist) {
4945
4946 if (cset->subsys[ss->subsys_id])
4947 continue;
4948
4949 hash_del(&cset->hlist);
4950
4951 cset->subsys[ss->subsys_id] = css;
4952
4953 key = css_set_hash(cset->subsys);
4954 hash_add(css_set_table, &cset->hlist, key);
4955 }
4956 write_unlock(&css_set_lock);
4957
4958 ret = online_css(css);
4959 if (ret)
4960 goto err_unload;
4961
4962
4963 mutex_unlock(&cgroup_mutex);
4964 return 0;
4965
4966err_unload:
4967 mutex_unlock(&cgroup_mutex);
4968
4969 cgroup_unload_subsys(ss);
4970 return ret;
4971}
4972EXPORT_SYMBOL_GPL(cgroup_load_subsys);
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982void cgroup_unload_subsys(struct cgroup_subsys *ss)
4983{
4984 struct cgrp_cset_link *link;
4985
4986 BUG_ON(ss->module == NULL);
4987
4988
4989
4990
4991
4992
4993 BUG_ON(ss->root != &cgroup_dummy_root);
4994
4995 mutex_lock(&cgroup_mutex);
4996
4997 offline_css(cgroup_css(cgroup_dummy_top, ss));
4998
4999 if (ss->use_id)
5000 idr_destroy(&ss->idr);
5001
5002
5003 cgroup_subsys[ss->subsys_id] = NULL;
5004
5005
5006 list_del_init(&ss->sibling);
5007
5008
5009
5010
5011
5012
5013 write_lock(&css_set_lock);
5014 list_for_each_entry(link, &cgroup_dummy_top->cset_links, cset_link) {
5015 struct css_set *cset = link->cset;
5016 unsigned long key;
5017
5018 hash_del(&cset->hlist);
5019 cset->subsys[ss->subsys_id] = NULL;
5020 key = css_set_hash(cset->subsys);
5021 hash_add(css_set_table, &cset->hlist, key);
5022 }
5023 write_unlock(&css_set_lock);
5024
5025
5026
5027
5028
5029
5030
5031 ss->css_free(cgroup_css(cgroup_dummy_top, ss));
5032 RCU_INIT_POINTER(cgroup_dummy_top->subsys[ss->subsys_id], NULL);
5033
5034 mutex_unlock(&cgroup_mutex);
5035}
5036EXPORT_SYMBOL_GPL(cgroup_unload_subsys);
5037
5038
5039
5040
5041
5042
5043
5044int __init cgroup_init_early(void)
5045{
5046 struct cgroup_subsys *ss;
5047 int i;
5048
5049 atomic_set(&init_css_set.refcount, 1);
5050 INIT_LIST_HEAD(&init_css_set.cgrp_links);
5051 INIT_LIST_HEAD(&init_css_set.tasks);
5052 INIT_HLIST_NODE(&init_css_set.hlist);
5053 css_set_count = 1;
5054 init_cgroup_root(&cgroup_dummy_root);
5055 cgroup_root_count = 1;
5056 RCU_INIT_POINTER(init_task.cgroups, &init_css_set);
5057
5058 init_cgrp_cset_link.cset = &init_css_set;
5059 init_cgrp_cset_link.cgrp = cgroup_dummy_top;
5060 list_add(&init_cgrp_cset_link.cset_link, &cgroup_dummy_top->cset_links);
5061 list_add(&init_cgrp_cset_link.cgrp_link, &init_css_set.cgrp_links);
5062
5063
5064 for_each_builtin_subsys(ss, i) {
5065 BUG_ON(!ss->name);
5066 BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
5067 BUG_ON(!ss->css_alloc);
5068 BUG_ON(!ss->css_free);
5069 if (ss->subsys_id != i) {
5070 printk(KERN_ERR "cgroup: Subsys %s id == %d\n",
5071 ss->name, ss->subsys_id);
5072 BUG();
5073 }
5074
5075 if (ss->early_init)
5076 cgroup_init_subsys(ss);
5077 }
5078 return 0;
5079}
5080
5081
5082
5083
5084
5085
5086
5087int __init cgroup_init(void)
5088{
5089 struct cgroup_subsys *ss;
5090 unsigned long key;
5091 int i, err;
5092
5093 err = bdi_init(&cgroup_backing_dev_info);
5094 if (err)
5095 return err;
5096
5097 for_each_builtin_subsys(ss, i) {
5098 if (!ss->early_init)
5099 cgroup_init_subsys(ss);
5100 if (ss->use_id)
5101 cgroup_init_idr(ss, init_css_set.subsys[ss->subsys_id]);
5102 }
5103
5104
5105 mutex_lock(&cgroup_mutex);
5106 mutex_lock(&cgroup_root_mutex);
5107
5108
5109 key = css_set_hash(init_css_set.subsys);
5110 hash_add(css_set_table, &init_css_set.hlist, key);
5111
5112 BUG_ON(cgroup_init_root_id(&cgroup_dummy_root, 0, 1));
5113
5114 err = idr_alloc(&cgroup_dummy_root.cgroup_idr, cgroup_dummy_top,
5115 0, 1, GFP_KERNEL);
5116 BUG_ON(err < 0);
5117
5118 mutex_unlock(&cgroup_root_mutex);
5119 mutex_unlock(&cgroup_mutex);
5120
5121 cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj);
5122 if (!cgroup_kobj) {
5123 err = -ENOMEM;
5124 goto out;
5125 }
5126
5127 err = register_filesystem(&cgroup_fs_type);
5128 if (err < 0) {
5129 kobject_put(cgroup_kobj);
5130 goto out;
5131 }
5132
5133 proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations);
5134
5135out:
5136 if (err)
5137 bdi_destroy(&cgroup_backing_dev_info);
5138
5139 return err;
5140}
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155int proc_cgroup_show(struct seq_file *m, void *v)
5156{
5157 struct pid *pid;
5158 struct task_struct *tsk;
5159 char *buf;
5160 int retval;
5161 struct cgroupfs_root *root;
5162
5163 retval = -ENOMEM;
5164 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
5165 if (!buf)
5166 goto out;
5167
5168 retval = -ESRCH;
5169 pid = m->private;
5170 tsk = get_pid_task(pid, PIDTYPE_PID);
5171 if (!tsk)
5172 goto out_free;
5173
5174 retval = 0;
5175
5176 mutex_lock(&cgroup_mutex);
5177
5178 for_each_active_root(root) {
5179 struct cgroup_subsys *ss;
5180 struct cgroup *cgrp;
5181 int count = 0;
5182
5183 seq_printf(m, "%d:", root->hierarchy_id);
5184 for_each_root_subsys(root, ss)
5185 seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
5186 if (strlen(root->name))
5187 seq_printf(m, "%sname=%s", count ? "," : "",
5188 root->name);
5189 seq_putc(m, ':');
5190 cgrp = task_cgroup_from_root(tsk, root);
5191 retval = cgroup_path(cgrp, buf, PAGE_SIZE);
5192 if (retval < 0)
5193 goto out_unlock;
5194 seq_puts(m, buf);
5195 seq_putc(m, '\n');
5196 }
5197
5198out_unlock:
5199 mutex_unlock(&cgroup_mutex);
5200 put_task_struct(tsk);
5201out_free:
5202 kfree(buf);
5203out:
5204 return retval;
5205}
5206
5207
5208static int proc_cgroupstats_show(struct seq_file *m, void *v)
5209{
5210 struct cgroup_subsys *ss;
5211 int i;
5212
5213 seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
5214
5215
5216
5217
5218
5219 mutex_lock(&cgroup_mutex);
5220
5221 for_each_subsys(ss, i)
5222 seq_printf(m, "%s\t%d\t%d\t%d\n",
5223 ss->name, ss->root->hierarchy_id,
5224 ss->root->number_of_cgroups, !ss->disabled);
5225
5226 mutex_unlock(&cgroup_mutex);
5227 return 0;
5228}
5229
5230static int cgroupstats_open(struct inode *inode, struct file *file)
5231{
5232 return single_open(file, proc_cgroupstats_show, NULL);
5233}
5234
5235static const struct file_operations proc_cgroupstats_operations = {
5236 .open = cgroupstats_open,
5237 .read = seq_read,
5238 .llseek = seq_lseek,
5239 .release = single_release,
5240};
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258void cgroup_fork(struct task_struct *child)
5259{
5260 task_lock(current);
5261 get_css_set(task_css_set(current));
5262 child->cgroups = current->cgroups;
5263 task_unlock(current);
5264 INIT_LIST_HEAD(&child->cg_list);
5265}
5266
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277void cgroup_post_fork(struct task_struct *child)
5278{
5279 struct cgroup_subsys *ss;
5280 int i;
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293 if (use_task_css_set_links) {
5294 write_lock(&css_set_lock);
5295 task_lock(child);
5296 if (list_empty(&child->cg_list))
5297 list_add(&child->cg_list, &task_css_set(child)->tasks);
5298 task_unlock(child);
5299 write_unlock(&css_set_lock);
5300 }
5301
5302
5303
5304
5305
5306
5307 if (need_forkexit_callback) {
5308
5309
5310
5311
5312
5313
5314
5315
5316 for_each_builtin_subsys(ss, i)
5317 if (ss->fork)
5318 ss->fork(child);
5319 }
5320}
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357void cgroup_exit(struct task_struct *tsk, int run_callbacks)
5358{
5359 struct cgroup_subsys *ss;
5360 struct css_set *cset;
5361 int i;
5362
5363
5364
5365
5366
5367
5368 if (!list_empty(&tsk->cg_list)) {
5369 write_lock(&css_set_lock);
5370 if (!list_empty(&tsk->cg_list))
5371 list_del_init(&tsk->cg_list);
5372 write_unlock(&css_set_lock);
5373 }
5374
5375
5376 task_lock(tsk);
5377 cset = task_css_set(tsk);
5378 RCU_INIT_POINTER(tsk->cgroups, &init_css_set);
5379
5380 if (run_callbacks && need_forkexit_callback) {
5381
5382
5383
5384
5385 for_each_builtin_subsys(ss, i) {
5386 if (ss->exit) {
5387 struct cgroup_subsys_state *old_css = cset->subsys[i];
5388 struct cgroup_subsys_state *css = task_css(tsk, i);
5389
5390 ss->exit(css, old_css, tsk);
5391 }
5392 }
5393 }
5394 task_unlock(tsk);
5395
5396 put_css_set_taskexit(cset);
5397}
5398
5399static void check_for_release(struct cgroup *cgrp)
5400{
5401 if (cgroup_is_releasable(cgrp) &&
5402 list_empty(&cgrp->cset_links) && list_empty(&cgrp->children)) {
5403
5404
5405
5406
5407
5408 int need_schedule_work = 0;
5409
5410 raw_spin_lock(&release_list_lock);
5411 if (!cgroup_is_dead(cgrp) &&
5412 list_empty(&cgrp->release_list)) {
5413 list_add(&cgrp->release_list, &release_list);
5414 need_schedule_work = 1;
5415 }
5416 raw_spin_unlock(&release_list_lock);
5417 if (need_schedule_work)
5418 schedule_work(&release_agent_work);
5419 }
5420}
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445static void cgroup_release_agent(struct work_struct *work)
5446{
5447 BUG_ON(work != &release_agent_work);
5448 mutex_lock(&cgroup_mutex);
5449 raw_spin_lock(&release_list_lock);
5450 while (!list_empty(&release_list)) {
5451 char *argv[3], *envp[3];
5452 int i;
5453 char *pathbuf = NULL, *agentbuf = NULL;
5454 struct cgroup *cgrp = list_entry(release_list.next,
5455 struct cgroup,
5456 release_list);
5457 list_del_init(&cgrp->release_list);
5458 raw_spin_unlock(&release_list_lock);
5459 pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
5460 if (!pathbuf)
5461 goto continue_free;
5462 if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0)
5463 goto continue_free;
5464 agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
5465 if (!agentbuf)
5466 goto continue_free;
5467
5468 i = 0;
5469 argv[i++] = agentbuf;
5470 argv[i++] = pathbuf;
5471 argv[i] = NULL;
5472
5473 i = 0;
5474
5475 envp[i++] = "HOME=/";
5476 envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
5477 envp[i] = NULL;
5478
5479
5480
5481
5482 mutex_unlock(&cgroup_mutex);
5483 call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
5484 mutex_lock(&cgroup_mutex);
5485 continue_free:
5486 kfree(pathbuf);
5487 kfree(agentbuf);
5488 raw_spin_lock(&release_list_lock);
5489 }
5490 raw_spin_unlock(&release_list_lock);
5491 mutex_unlock(&cgroup_mutex);
5492}
5493
5494static int __init cgroup_disable(char *str)
5495{
5496 struct cgroup_subsys *ss;
5497 char *token;
5498 int i;
5499
5500 while ((token = strsep(&str, ",")) != NULL) {
5501 if (!*token)
5502 continue;
5503
5504
5505
5506
5507
5508 for_each_builtin_subsys(ss, i) {
5509 if (!strcmp(token, ss->name)) {
5510 ss->disabled = 1;
5511 printk(KERN_INFO "Disabling %s control group"
5512 " subsystem\n", ss->name);
5513 break;
5514 }
5515 }
5516 }
5517 return 1;
5518}
5519__setup("cgroup_disable=", cgroup_disable);
5520
5521
5522
5523
5524
5525
5526unsigned short css_id(struct cgroup_subsys_state *css)
5527{
5528 struct css_id *cssid;
5529
5530
5531
5532
5533
5534
5535 cssid = rcu_dereference_raw(css->id);
5536
5537 if (cssid)
5538 return cssid->id;
5539 return 0;
5540}
5541EXPORT_SYMBOL_GPL(css_id);
5542
5543
5544
5545
5546
5547
5548
5549
5550
5551
5552
5553
5554
5555
5556bool css_is_ancestor(struct cgroup_subsys_state *child,
5557 const struct cgroup_subsys_state *root)
5558{
5559 struct css_id *child_id;
5560 struct css_id *root_id;
5561
5562 child_id = rcu_dereference(child->id);
5563 if (!child_id)
5564 return false;
5565 root_id = rcu_dereference(root->id);
5566 if (!root_id)
5567 return false;
5568 if (child_id->depth < root_id->depth)
5569 return false;
5570 if (child_id->stack[root_id->depth] != root_id->id)
5571 return false;
5572 return true;
5573}
5574
5575void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
5576{
5577 struct css_id *id = rcu_dereference_protected(css->id, true);
5578
5579
5580 if (!id)
5581 return;
5582
5583 BUG_ON(!ss->use_id);
5584
5585 rcu_assign_pointer(id->css, NULL);
5586 rcu_assign_pointer(css->id, NULL);
5587 spin_lock(&ss->id_lock);
5588 idr_remove(&ss->idr, id->id);
5589 spin_unlock(&ss->id_lock);
5590 kfree_rcu(id, rcu_head);
5591}
5592EXPORT_SYMBOL_GPL(free_css_id);
5593
5594
5595
5596
5597
5598
5599static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
5600{
5601 struct css_id *newid;
5602 int ret, size;
5603
5604 BUG_ON(!ss->use_id);
5605
5606 size = sizeof(*newid) + sizeof(unsigned short) * (depth + 1);
5607 newid = kzalloc(size, GFP_KERNEL);
5608 if (!newid)
5609 return ERR_PTR(-ENOMEM);
5610
5611 idr_preload(GFP_KERNEL);
5612 spin_lock(&ss->id_lock);
5613
5614 ret = idr_alloc(&ss->idr, newid, 1, CSS_ID_MAX + 1, GFP_NOWAIT);
5615 spin_unlock(&ss->id_lock);
5616 idr_preload_end();
5617
5618
5619 if (ret < 0)
5620 goto err_out;
5621
5622 newid->id = ret;
5623 newid->depth = depth;
5624 return newid;
5625err_out:
5626 kfree(newid);
5627 return ERR_PTR(ret);
5628
5629}
5630
5631static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss,
5632 struct cgroup_subsys_state *rootcss)
5633{
5634 struct css_id *newid;
5635
5636 spin_lock_init(&ss->id_lock);
5637 idr_init(&ss->idr);
5638
5639 newid = get_new_cssid(ss, 0);
5640 if (IS_ERR(newid))
5641 return PTR_ERR(newid);
5642
5643 newid->stack[0] = newid->id;
5644 RCU_INIT_POINTER(newid->css, rootcss);
5645 RCU_INIT_POINTER(rootcss->id, newid);
5646 return 0;
5647}
5648
5649static int alloc_css_id(struct cgroup_subsys_state *child_css)
5650{
5651 struct cgroup_subsys_state *parent_css = css_parent(child_css);
5652 struct css_id *child_id, *parent_id;
5653 int i, depth;
5654
5655 parent_id = rcu_dereference_protected(parent_css->id, true);
5656 depth = parent_id->depth + 1;
5657
5658 child_id = get_new_cssid(child_css->ss, depth);
5659 if (IS_ERR(child_id))
5660 return PTR_ERR(child_id);
5661
5662 for (i = 0; i < depth; i++)
5663 child_id->stack[i] = parent_id->stack[i];
5664 child_id->stack[depth] = child_id->id;
5665
5666
5667
5668
5669 rcu_assign_pointer(child_css->id, child_id);
5670
5671 return 0;
5672}
5673
5674
5675
5676
5677
5678
5679
5680
5681
5682struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id)
5683{
5684 struct css_id *cssid = NULL;
5685
5686 BUG_ON(!ss->use_id);
5687 cssid = idr_find(&ss->idr, id);
5688
5689 if (unlikely(!cssid))
5690 return NULL;
5691
5692 return rcu_dereference(cssid->css);
5693}
5694EXPORT_SYMBOL_GPL(css_lookup);
5695
5696
5697
5698
5699
5700
5701
5702
5703
5704
5705struct cgroup_subsys_state *css_from_dir(struct dentry *dentry,
5706 struct cgroup_subsys *ss)
5707{
5708 struct cgroup *cgrp;
5709
5710 WARN_ON_ONCE(!rcu_read_lock_held());
5711
5712
5713 if (!dentry->d_inode ||
5714 dentry->d_inode->i_op != &cgroup_dir_inode_operations)
5715 return ERR_PTR(-EBADF);
5716
5717 cgrp = __d_cgrp(dentry);
5718 return cgroup_css(cgrp, ss) ?: ERR_PTR(-ENOENT);
5719}
5720
5721
5722
5723
5724
5725
5726
5727
5728
5729struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss)
5730{
5731 struct cgroup *cgrp;
5732
5733 rcu_lockdep_assert(rcu_read_lock_held() ||
5734 lockdep_is_held(&cgroup_mutex),
5735 "css_from_id() needs proper protection");
5736
5737 cgrp = idr_find(&ss->root->cgroup_idr, id);
5738 if (cgrp)
5739 return cgroup_css(cgrp, ss);
5740 return NULL;
5741}
5742
5743#ifdef CONFIG_CGROUP_DEBUG
5744static struct cgroup_subsys_state *
5745debug_css_alloc(struct cgroup_subsys_state *parent_css)
5746{
5747 struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
5748
5749 if (!css)
5750 return ERR_PTR(-ENOMEM);
5751
5752 return css;
5753}
5754
5755static void debug_css_free(struct cgroup_subsys_state *css)
5756{
5757 kfree(css);
5758}
5759
5760static u64 debug_taskcount_read(struct cgroup_subsys_state *css,
5761 struct cftype *cft)
5762{
5763 return cgroup_task_count(css->cgroup);
5764}
5765
5766static u64 current_css_set_read(struct cgroup_subsys_state *css,
5767 struct cftype *cft)
5768{
5769 return (u64)(unsigned long)current->cgroups;
5770}
5771
5772static u64 current_css_set_refcount_read(struct cgroup_subsys_state *css,
5773 struct cftype *cft)
5774{
5775 u64 count;
5776
5777 rcu_read_lock();
5778 count = atomic_read(&task_css_set(current)->refcount);
5779 rcu_read_unlock();
5780 return count;
5781}
5782
5783static int current_css_set_cg_links_read(struct cgroup_subsys_state *css,
5784 struct cftype *cft,
5785 struct seq_file *seq)
5786{
5787 struct cgrp_cset_link *link;
5788 struct css_set *cset;
5789
5790 read_lock(&css_set_lock);
5791 rcu_read_lock();
5792 cset = rcu_dereference(current->cgroups);
5793 list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
5794 struct cgroup *c = link->cgrp;
5795 const char *name;
5796
5797 if (c->dentry)
5798 name = c->dentry->d_name.name;
5799 else
5800 name = "?";
5801 seq_printf(seq, "Root %d group %s\n",
5802 c->root->hierarchy_id, name);
5803 }
5804 rcu_read_unlock();
5805 read_unlock(&css_set_lock);
5806 return 0;
5807}
5808
5809#define MAX_TASKS_SHOWN_PER_CSS 25
5810static int cgroup_css_links_read(struct cgroup_subsys_state *css,
5811 struct cftype *cft, struct seq_file *seq)
5812{
5813 struct cgrp_cset_link *link;
5814
5815 read_lock(&css_set_lock);
5816 list_for_each_entry(link, &css->cgroup->cset_links, cset_link) {
5817 struct css_set *cset = link->cset;
5818 struct task_struct *task;
5819 int count = 0;
5820 seq_printf(seq, "css_set %p\n", cset);
5821 list_for_each_entry(task, &cset->tasks, cg_list) {
5822 if (count++ > MAX_TASKS_SHOWN_PER_CSS) {
5823 seq_puts(seq, " ...\n");
5824 break;
5825 } else {
5826 seq_printf(seq, " task %d\n",
5827 task_pid_vnr(task));
5828 }
5829 }
5830 }
5831 read_unlock(&css_set_lock);
5832 return 0;
5833}
5834
5835static u64 releasable_read(struct cgroup_subsys_state *css, struct cftype *cft)
5836{
5837 return test_bit(CGRP_RELEASABLE, &css->cgroup->flags);
5838}
5839
5840static struct cftype debug_files[] = {
5841 {
5842 .name = "taskcount",
5843 .read_u64 = debug_taskcount_read,
5844 },
5845
5846 {
5847 .name = "current_css_set",
5848 .read_u64 = current_css_set_read,
5849 },
5850
5851 {
5852 .name = "current_css_set_refcount",
5853 .read_u64 = current_css_set_refcount_read,
5854 },
5855
5856 {
5857 .name = "current_css_set_cg_links",
5858 .read_seq_string = current_css_set_cg_links_read,
5859 },
5860
5861 {
5862 .name = "cgroup_css_links",
5863 .read_seq_string = cgroup_css_links_read,
5864 },
5865
5866 {
5867 .name = "releasable",
5868 .read_u64 = releasable_read,
5869 },
5870
5871 { }
5872};
5873
5874struct cgroup_subsys debug_subsys = {
5875 .name = "debug",
5876 .css_alloc = debug_css_alloc,
5877 .css_free = debug_css_free,
5878 .subsys_id = debug_subsys_id,
5879 .base_cftypes = debug_files,
5880};
5881#endif
5882