1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29#include <linux/cgroup.h>
30#include <linux/ctype.h>
31#include <linux/errno.h>
32#include <linux/fs.h>
33#include <linux/kernel.h>
34#include <linux/list.h>
35#include <linux/mm.h>
36#include <linux/mutex.h>
37#include <linux/mount.h>
38#include <linux/pagemap.h>
39#include <linux/proc_fs.h>
40#include <linux/rcupdate.h>
41#include <linux/sched.h>
42#include <linux/backing-dev.h>
43#include <linux/seq_file.h>
44#include <linux/slab.h>
45#include <linux/magic.h>
46#include <linux/spinlock.h>
47#include <linux/string.h>
48#include <linux/sort.h>
49#include <linux/kmod.h>
50#include <linux/module.h>
51#include <linux/delayacct.h>
52#include <linux/cgroupstats.h>
53#include <linux/hash.h>
54#include <linux/namei.h>
55#include <linux/pid_namespace.h>
56#include <linux/idr.h>
57#include <linux/vmalloc.h>
58#include <linux/eventfd.h>
59#include <linux/poll.h>
60
61#include <asm/atomic.h>
62
63static DEFINE_MUTEX(cgroup_mutex);
64
65
66
67
68
69
70
71#define SUBSYS(_x) &_x ## _subsys,
72static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = {
73#include <linux/cgroup_subsys.h>
74};
75
76#define MAX_CGROUP_ROOT_NAMELEN 64
77
78
79
80
81
82
83struct cgroupfs_root {
84 struct super_block *sb;
85
86
87
88
89
90 unsigned long subsys_bits;
91
92
93 int hierarchy_id;
94
95
96 unsigned long actual_subsys_bits;
97
98
99 struct list_head subsys_list;
100
101
102 struct cgroup top_cgroup;
103
104
105 int number_of_cgroups;
106
107
108 struct list_head root_list;
109
110
111 unsigned long flags;
112
113
114 char release_agent_path[PATH_MAX];
115
116
117 char name[MAX_CGROUP_ROOT_NAMELEN];
118};
119
120
121
122
123
124
125static struct cgroupfs_root rootnode;
126
127
128
129
130
131#define CSS_ID_MAX (65535)
132struct css_id {
133
134
135
136
137
138
139
140 struct cgroup_subsys_state __rcu *css;
141
142
143
144 unsigned short id;
145
146
147
148 unsigned short depth;
149
150
151
152 struct rcu_head rcu_head;
153
154
155
156 unsigned short stack[0];
157};
158
159
160
161
162struct cgroup_event {
163
164
165
166 struct cgroup *cgrp;
167
168
169
170 struct cftype *cft;
171
172
173
174 struct eventfd_ctx *eventfd;
175
176
177
178 struct list_head list;
179
180
181
182
183 poll_table pt;
184 wait_queue_head_t *wqh;
185 wait_queue_t wait;
186 struct work_struct remove;
187};
188
189
190
191static LIST_HEAD(roots);
192static int root_count;
193
194static DEFINE_IDA(hierarchy_ida);
195static int next_hierarchy_id;
196static DEFINE_SPINLOCK(hierarchy_id_lock);
197
198
199#define dummytop (&rootnode.top_cgroup)
200
201
202
203
204
205
206static int need_forkexit_callback __read_mostly;
207
208#ifdef CONFIG_PROVE_LOCKING
209int cgroup_lock_is_held(void)
210{
211 return lockdep_is_held(&cgroup_mutex);
212}
213#else
214int cgroup_lock_is_held(void)
215{
216 return mutex_is_locked(&cgroup_mutex);
217}
218#endif
219
220EXPORT_SYMBOL_GPL(cgroup_lock_is_held);
221
222
223inline int cgroup_is_removed(const struct cgroup *cgrp)
224{
225 return test_bit(CGRP_REMOVED, &cgrp->flags);
226}
227
228
229enum {
230 ROOT_NOPREFIX,
231};
232
233static int cgroup_is_releasable(const struct cgroup *cgrp)
234{
235 const int bits =
236 (1 << CGRP_RELEASABLE) |
237 (1 << CGRP_NOTIFY_ON_RELEASE);
238 return (cgrp->flags & bits) == bits;
239}
240
241static int notify_on_release(const struct cgroup *cgrp)
242{
243 return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
244}
245
246static int clone_children(const struct cgroup *cgrp)
247{
248 return test_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
249}
250
251
252
253
254
255#define for_each_subsys(_root, _ss) \
256list_for_each_entry(_ss, &_root->subsys_list, sibling)
257
258
259#define for_each_active_root(_root) \
260list_for_each_entry(_root, &roots, root_list)
261
262
263
264static LIST_HEAD(release_list);
265static DEFINE_SPINLOCK(release_list_lock);
266static void cgroup_release_agent(struct work_struct *work);
267static DECLARE_WORK(release_agent_work, cgroup_release_agent);
268static void check_for_release(struct cgroup *cgrp);
269
270
271struct cg_cgroup_link {
272
273
274
275
276 struct list_head cgrp_link_list;
277 struct cgroup *cgrp;
278
279
280
281
282 struct list_head cg_link_list;
283 struct css_set *cg;
284};
285
286
287
288
289
290
291
292
293static struct css_set init_css_set;
294static struct cg_cgroup_link init_css_set_link;
295
296static int cgroup_init_idr(struct cgroup_subsys *ss,
297 struct cgroup_subsys_state *css);
298
299
300
301
302static DEFINE_RWLOCK(css_set_lock);
303static int css_set_count;
304
305
306
307
308
309
310#define CSS_SET_HASH_BITS 7
311#define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS)
312static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE];
313
314static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
315{
316 int i;
317 int index;
318 unsigned long tmp = 0UL;
319
320 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++)
321 tmp += (unsigned long)css[i];
322 tmp = (tmp >> 16) ^ tmp;
323
324 index = hash_long(tmp, CSS_SET_HASH_BITS);
325
326 return &css_set_table[index];
327}
328
329static void free_css_set_rcu(struct rcu_head *obj)
330{
331 struct css_set *cg = container_of(obj, struct css_set, rcu_head);
332 kfree(cg);
333}
334
335
336
337
338
339static int use_task_css_set_links __read_mostly;
340
341static void __put_css_set(struct css_set *cg, int taskexit)
342{
343 struct cg_cgroup_link *link;
344 struct cg_cgroup_link *saved_link;
345
346
347
348
349
350 if (atomic_add_unless(&cg->refcount, -1, 1))
351 return;
352 write_lock(&css_set_lock);
353 if (!atomic_dec_and_test(&cg->refcount)) {
354 write_unlock(&css_set_lock);
355 return;
356 }
357
358
359 hlist_del(&cg->hlist);
360 css_set_count--;
361
362 list_for_each_entry_safe(link, saved_link, &cg->cg_links,
363 cg_link_list) {
364 struct cgroup *cgrp = link->cgrp;
365 list_del(&link->cg_link_list);
366 list_del(&link->cgrp_link_list);
367 if (atomic_dec_and_test(&cgrp->count) &&
368 notify_on_release(cgrp)) {
369 if (taskexit)
370 set_bit(CGRP_RELEASABLE, &cgrp->flags);
371 check_for_release(cgrp);
372 }
373
374 kfree(link);
375 }
376
377 write_unlock(&css_set_lock);
378 call_rcu(&cg->rcu_head, free_css_set_rcu);
379}
380
381
382
383
384static inline void get_css_set(struct css_set *cg)
385{
386 atomic_inc(&cg->refcount);
387}
388
389static inline void put_css_set(struct css_set *cg)
390{
391 __put_css_set(cg, 0);
392}
393
394static inline void put_css_set_taskexit(struct css_set *cg)
395{
396 __put_css_set(cg, 1);
397}
398
399
400
401
402
403
404
405
406
407
408
409static bool compare_css_sets(struct css_set *cg,
410 struct css_set *old_cg,
411 struct cgroup *new_cgrp,
412 struct cgroup_subsys_state *template[])
413{
414 struct list_head *l1, *l2;
415
416 if (memcmp(template, cg->subsys, sizeof(cg->subsys))) {
417
418 return false;
419 }
420
421
422
423
424
425
426
427
428
429
430 l1 = &cg->cg_links;
431 l2 = &old_cg->cg_links;
432 while (1) {
433 struct cg_cgroup_link *cgl1, *cgl2;
434 struct cgroup *cg1, *cg2;
435
436 l1 = l1->next;
437 l2 = l2->next;
438
439 if (l1 == &cg->cg_links) {
440 BUG_ON(l2 != &old_cg->cg_links);
441 break;
442 } else {
443 BUG_ON(l2 == &old_cg->cg_links);
444 }
445
446 cgl1 = list_entry(l1, struct cg_cgroup_link, cg_link_list);
447 cgl2 = list_entry(l2, struct cg_cgroup_link, cg_link_list);
448 cg1 = cgl1->cgrp;
449 cg2 = cgl2->cgrp;
450
451 BUG_ON(cg1->root != cg2->root);
452
453
454
455
456
457
458
459
460 if (cg1->root == new_cgrp->root) {
461 if (cg1 != new_cgrp)
462 return false;
463 } else {
464 if (cg1 != cg2)
465 return false;
466 }
467 }
468 return true;
469}
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484static struct css_set *find_existing_css_set(
485 struct css_set *oldcg,
486 struct cgroup *cgrp,
487 struct cgroup_subsys_state *template[])
488{
489 int i;
490 struct cgroupfs_root *root = cgrp->root;
491 struct hlist_head *hhead;
492 struct hlist_node *node;
493 struct css_set *cg;
494
495
496
497
498
499
500 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
501 if (root->subsys_bits & (1UL << i)) {
502
503
504
505 template[i] = cgrp->subsys[i];
506 } else {
507
508
509 template[i] = oldcg->subsys[i];
510 }
511 }
512
513 hhead = css_set_hash(template);
514 hlist_for_each_entry(cg, node, hhead, hlist) {
515 if (!compare_css_sets(cg, oldcg, cgrp, template))
516 continue;
517
518
519 return cg;
520 }
521
522
523 return NULL;
524}
525
526static void free_cg_links(struct list_head *tmp)
527{
528 struct cg_cgroup_link *link;
529 struct cg_cgroup_link *saved_link;
530
531 list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) {
532 list_del(&link->cgrp_link_list);
533 kfree(link);
534 }
535}
536
537
538
539
540
541
542static int allocate_cg_links(int count, struct list_head *tmp)
543{
544 struct cg_cgroup_link *link;
545 int i;
546 INIT_LIST_HEAD(tmp);
547 for (i = 0; i < count; i++) {
548 link = kmalloc(sizeof(*link), GFP_KERNEL);
549 if (!link) {
550 free_cg_links(tmp);
551 return -ENOMEM;
552 }
553 list_add(&link->cgrp_link_list, tmp);
554 }
555 return 0;
556}
557
558
559
560
561
562
563
564static void link_css_set(struct list_head *tmp_cg_links,
565 struct css_set *cg, struct cgroup *cgrp)
566{
567 struct cg_cgroup_link *link;
568
569 BUG_ON(list_empty(tmp_cg_links));
570 link = list_first_entry(tmp_cg_links, struct cg_cgroup_link,
571 cgrp_link_list);
572 link->cg = cg;
573 link->cgrp = cgrp;
574 atomic_inc(&cgrp->count);
575 list_move(&link->cgrp_link_list, &cgrp->css_sets);
576
577
578
579
580 list_add_tail(&link->cg_link_list, &cg->cg_links);
581}
582
583
584
585
586
587
588
589
590static struct css_set *find_css_set(
591 struct css_set *oldcg, struct cgroup *cgrp)
592{
593 struct css_set *res;
594 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
595
596 struct list_head tmp_cg_links;
597
598 struct hlist_head *hhead;
599 struct cg_cgroup_link *link;
600
601
602
603 read_lock(&css_set_lock);
604 res = find_existing_css_set(oldcg, cgrp, template);
605 if (res)
606 get_css_set(res);
607 read_unlock(&css_set_lock);
608
609 if (res)
610 return res;
611
612 res = kmalloc(sizeof(*res), GFP_KERNEL);
613 if (!res)
614 return NULL;
615
616
617 if (allocate_cg_links(root_count, &tmp_cg_links) < 0) {
618 kfree(res);
619 return NULL;
620 }
621
622 atomic_set(&res->refcount, 1);
623 INIT_LIST_HEAD(&res->cg_links);
624 INIT_LIST_HEAD(&res->tasks);
625 INIT_HLIST_NODE(&res->hlist);
626
627
628
629 memcpy(res->subsys, template, sizeof(res->subsys));
630
631 write_lock(&css_set_lock);
632
633 list_for_each_entry(link, &oldcg->cg_links, cg_link_list) {
634 struct cgroup *c = link->cgrp;
635 if (c->root == cgrp->root)
636 c = cgrp;
637 link_css_set(&tmp_cg_links, res, c);
638 }
639
640 BUG_ON(!list_empty(&tmp_cg_links));
641
642 css_set_count++;
643
644
645 hhead = css_set_hash(res->subsys);
646 hlist_add_head(&res->hlist, hhead);
647
648 write_unlock(&css_set_lock);
649
650 return res;
651}
652
653
654
655
656
657static struct cgroup *task_cgroup_from_root(struct task_struct *task,
658 struct cgroupfs_root *root)
659{
660 struct css_set *css;
661 struct cgroup *res = NULL;
662
663 BUG_ON(!mutex_is_locked(&cgroup_mutex));
664 read_lock(&css_set_lock);
665
666
667
668
669
670 css = task->cgroups;
671 if (css == &init_css_set) {
672 res = &root->top_cgroup;
673 } else {
674 struct cg_cgroup_link *link;
675 list_for_each_entry(link, &css->cg_links, cg_link_list) {
676 struct cgroup *c = link->cgrp;
677 if (c->root == root) {
678 res = c;
679 break;
680 }
681 }
682 }
683 read_unlock(&css_set_lock);
684 BUG_ON(!res);
685 return res;
686}
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742void cgroup_lock(void)
743{
744 mutex_lock(&cgroup_mutex);
745}
746EXPORT_SYMBOL_GPL(cgroup_lock);
747
748
749
750
751
752
753void cgroup_unlock(void)
754{
755 mutex_unlock(&cgroup_mutex);
756}
757EXPORT_SYMBOL_GPL(cgroup_unlock);
758
759
760
761
762
763
764
765
766static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode);
767static struct dentry *cgroup_lookup(struct inode *, struct dentry *, struct nameidata *);
768static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
769static int cgroup_populate_dir(struct cgroup *cgrp);
770static const struct inode_operations cgroup_dir_inode_operations;
771static const struct file_operations proc_cgroupstats_operations;
772
773static struct backing_dev_info cgroup_backing_dev_info = {
774 .name = "cgroup",
775 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
776};
777
778static int alloc_css_id(struct cgroup_subsys *ss,
779 struct cgroup *parent, struct cgroup *child);
780
781static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
782{
783 struct inode *inode = new_inode(sb);
784
785 if (inode) {
786 inode->i_ino = get_next_ino();
787 inode->i_mode = mode;
788 inode->i_uid = current_fsuid();
789 inode->i_gid = current_fsgid();
790 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
791 inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;
792 }
793 return inode;
794}
795
796
797
798
799
800static int cgroup_call_pre_destroy(struct cgroup *cgrp)
801{
802 struct cgroup_subsys *ss;
803 int ret = 0;
804
805 for_each_subsys(cgrp->root, ss)
806 if (ss->pre_destroy) {
807 ret = ss->pre_destroy(ss, cgrp);
808 if (ret)
809 break;
810 }
811
812 return ret;
813}
814
815static void free_cgroup_rcu(struct rcu_head *obj)
816{
817 struct cgroup *cgrp = container_of(obj, struct cgroup, rcu_head);
818
819 kfree(cgrp);
820}
821
822static void cgroup_diput(struct dentry *dentry, struct inode *inode)
823{
824
825 if (S_ISDIR(inode->i_mode)) {
826 struct cgroup *cgrp = dentry->d_fsdata;
827 struct cgroup_subsys *ss;
828 BUG_ON(!(cgroup_is_removed(cgrp)));
829
830
831
832
833
834
835 synchronize_rcu();
836
837 mutex_lock(&cgroup_mutex);
838
839
840
841 for_each_subsys(cgrp->root, ss)
842 ss->destroy(ss, cgrp);
843
844 cgrp->root->number_of_cgroups--;
845 mutex_unlock(&cgroup_mutex);
846
847
848
849
850
851 deactivate_super(cgrp->root->sb);
852
853
854
855
856
857 BUG_ON(!list_empty(&cgrp->pidlists));
858
859 call_rcu(&cgrp->rcu_head, free_cgroup_rcu);
860 }
861 iput(inode);
862}
863
864static int cgroup_delete(const struct dentry *d)
865{
866 return 1;
867}
868
869static void remove_dir(struct dentry *d)
870{
871 struct dentry *parent = dget(d->d_parent);
872
873 d_delete(d);
874 simple_rmdir(parent->d_inode, d);
875 dput(parent);
876}
877
878static void cgroup_clear_directory(struct dentry *dentry)
879{
880 struct list_head *node;
881
882 BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
883 spin_lock(&dentry->d_lock);
884 node = dentry->d_subdirs.next;
885 while (node != &dentry->d_subdirs) {
886 struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
887
888 spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
889 list_del_init(node);
890 if (d->d_inode) {
891
892
893 BUG_ON(d->d_inode->i_mode & S_IFDIR);
894 dget_dlock(d);
895 spin_unlock(&d->d_lock);
896 spin_unlock(&dentry->d_lock);
897 d_delete(d);
898 simple_unlink(dentry->d_inode, d);
899 dput(d);
900 spin_lock(&dentry->d_lock);
901 } else
902 spin_unlock(&d->d_lock);
903 node = dentry->d_subdirs.next;
904 }
905 spin_unlock(&dentry->d_lock);
906}
907
908
909
910
911static void cgroup_d_remove_dir(struct dentry *dentry)
912{
913 struct dentry *parent;
914
915 cgroup_clear_directory(dentry);
916
917 parent = dentry->d_parent;
918 spin_lock(&parent->d_lock);
919 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
920 list_del_init(&dentry->d_u.d_child);
921 spin_unlock(&dentry->d_lock);
922 spin_unlock(&parent->d_lock);
923 remove_dir(dentry);
924}
925
926
927
928
929
930
931
932
933
934DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
935
936static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
937{
938 if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
939 wake_up_all(&cgroup_rmdir_waitq);
940}
941
942void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
943{
944 css_get(css);
945}
946
947void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
948{
949 cgroup_wakeup_rmdir_waiter(css->cgroup);
950 css_put(css);
951}
952
953
954
955
956
957
958static int rebind_subsystems(struct cgroupfs_root *root,
959 unsigned long final_bits)
960{
961 unsigned long added_bits, removed_bits;
962 struct cgroup *cgrp = &root->top_cgroup;
963 int i;
964
965 BUG_ON(!mutex_is_locked(&cgroup_mutex));
966
967 removed_bits = root->actual_subsys_bits & ~final_bits;
968 added_bits = final_bits & ~root->actual_subsys_bits;
969
970 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
971 unsigned long bit = 1UL << i;
972 struct cgroup_subsys *ss = subsys[i];
973 if (!(bit & added_bits))
974 continue;
975
976
977
978
979
980 BUG_ON(ss == NULL);
981 if (ss->root != &rootnode) {
982
983 return -EBUSY;
984 }
985 }
986
987
988
989
990
991 if (root->number_of_cgroups > 1)
992 return -EBUSY;
993
994
995 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
996 struct cgroup_subsys *ss = subsys[i];
997 unsigned long bit = 1UL << i;
998 if (bit & added_bits) {
999
1000 BUG_ON(ss == NULL);
1001 BUG_ON(cgrp->subsys[i]);
1002 BUG_ON(!dummytop->subsys[i]);
1003 BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
1004 mutex_lock(&ss->hierarchy_mutex);
1005 cgrp->subsys[i] = dummytop->subsys[i];
1006 cgrp->subsys[i]->cgroup = cgrp;
1007 list_move(&ss->sibling, &root->subsys_list);
1008 ss->root = root;
1009 if (ss->bind)
1010 ss->bind(ss, cgrp);
1011 mutex_unlock(&ss->hierarchy_mutex);
1012
1013 } else if (bit & removed_bits) {
1014
1015 BUG_ON(ss == NULL);
1016 BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
1017 BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
1018 mutex_lock(&ss->hierarchy_mutex);
1019 if (ss->bind)
1020 ss->bind(ss, dummytop);
1021 dummytop->subsys[i]->cgroup = dummytop;
1022 cgrp->subsys[i] = NULL;
1023 subsys[i]->root = &rootnode;
1024 list_move(&ss->sibling, &rootnode.subsys_list);
1025 mutex_unlock(&ss->hierarchy_mutex);
1026
1027 module_put(ss->module);
1028 } else if (bit & final_bits) {
1029
1030 BUG_ON(ss == NULL);
1031 BUG_ON(!cgrp->subsys[i]);
1032
1033
1034
1035
1036 module_put(ss->module);
1037#ifdef CONFIG_MODULE_UNLOAD
1038 BUG_ON(ss->module && !module_refcount(ss->module));
1039#endif
1040 } else {
1041
1042 BUG_ON(cgrp->subsys[i]);
1043 }
1044 }
1045 root->subsys_bits = root->actual_subsys_bits = final_bits;
1046 synchronize_rcu();
1047
1048 return 0;
1049}
1050
1051static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
1052{
1053 struct cgroupfs_root *root = vfs->mnt_sb->s_fs_info;
1054 struct cgroup_subsys *ss;
1055
1056 mutex_lock(&cgroup_mutex);
1057 for_each_subsys(root, ss)
1058 seq_printf(seq, ",%s", ss->name);
1059 if (test_bit(ROOT_NOPREFIX, &root->flags))
1060 seq_puts(seq, ",noprefix");
1061 if (strlen(root->release_agent_path))
1062 seq_printf(seq, ",release_agent=%s", root->release_agent_path);
1063 if (clone_children(&root->top_cgroup))
1064 seq_puts(seq, ",clone_children");
1065 if (strlen(root->name))
1066 seq_printf(seq, ",name=%s", root->name);
1067 mutex_unlock(&cgroup_mutex);
1068 return 0;
1069}
1070
1071struct cgroup_sb_opts {
1072 unsigned long subsys_bits;
1073 unsigned long flags;
1074 char *release_agent;
1075 bool clone_children;
1076 char *name;
1077
1078 bool none;
1079
1080 struct cgroupfs_root *new_root;
1081
1082};
1083
1084
1085
1086
1087
1088
1089
1090static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1091{
1092 char *token, *o = data;
1093 bool all_ss = false, one_ss = false;
1094 unsigned long mask = (unsigned long)-1;
1095 int i;
1096 bool module_pin_failed = false;
1097
1098 BUG_ON(!mutex_is_locked(&cgroup_mutex));
1099
1100#ifdef CONFIG_CPUSETS
1101 mask = ~(1UL << cpuset_subsys_id);
1102#endif
1103
1104 memset(opts, 0, sizeof(*opts));
1105
1106 while ((token = strsep(&o, ",")) != NULL) {
1107 if (!*token)
1108 return -EINVAL;
1109 if (!strcmp(token, "none")) {
1110
1111 opts->none = true;
1112 continue;
1113 }
1114 if (!strcmp(token, "all")) {
1115
1116 if (one_ss)
1117 return -EINVAL;
1118 all_ss = true;
1119 continue;
1120 }
1121 if (!strcmp(token, "noprefix")) {
1122 set_bit(ROOT_NOPREFIX, &opts->flags);
1123 continue;
1124 }
1125 if (!strcmp(token, "clone_children")) {
1126 opts->clone_children = true;
1127 continue;
1128 }
1129 if (!strncmp(token, "release_agent=", 14)) {
1130
1131 if (opts->release_agent)
1132 return -EINVAL;
1133 opts->release_agent =
1134 kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL);
1135 if (!opts->release_agent)
1136 return -ENOMEM;
1137 continue;
1138 }
1139 if (!strncmp(token, "name=", 5)) {
1140 const char *name = token + 5;
1141
1142 if (!strlen(name))
1143 return -EINVAL;
1144
1145 for (i = 0; i < strlen(name); i++) {
1146 char c = name[i];
1147 if (isalnum(c))
1148 continue;
1149 if ((c == '.') || (c == '-') || (c == '_'))
1150 continue;
1151 return -EINVAL;
1152 }
1153
1154 if (opts->name)
1155 return -EINVAL;
1156 opts->name = kstrndup(name,
1157 MAX_CGROUP_ROOT_NAMELEN - 1,
1158 GFP_KERNEL);
1159 if (!opts->name)
1160 return -ENOMEM;
1161
1162 continue;
1163 }
1164
1165 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1166 struct cgroup_subsys *ss = subsys[i];
1167 if (ss == NULL)
1168 continue;
1169 if (strcmp(token, ss->name))
1170 continue;
1171 if (ss->disabled)
1172 continue;
1173
1174
1175 if (all_ss)
1176 return -EINVAL;
1177 set_bit(i, &opts->subsys_bits);
1178 one_ss = true;
1179
1180 break;
1181 }
1182 if (i == CGROUP_SUBSYS_COUNT)
1183 return -ENOENT;
1184 }
1185
1186
1187
1188
1189
1190
1191 if (all_ss || (!all_ss && !one_ss && !opts->none)) {
1192 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1193 struct cgroup_subsys *ss = subsys[i];
1194 if (ss == NULL)
1195 continue;
1196 if (ss->disabled)
1197 continue;
1198 set_bit(i, &opts->subsys_bits);
1199 }
1200 }
1201
1202
1203
1204
1205
1206
1207
1208
1209 if (test_bit(ROOT_NOPREFIX, &opts->flags) &&
1210 (opts->subsys_bits & mask))
1211 return -EINVAL;
1212
1213
1214
1215 if (opts->subsys_bits && opts->none)
1216 return -EINVAL;
1217
1218
1219
1220
1221
1222 if (!opts->subsys_bits && !opts->name)
1223 return -EINVAL;
1224
1225
1226
1227
1228
1229
1230
1231 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
1232 unsigned long bit = 1UL << i;
1233
1234 if (!(bit & opts->subsys_bits))
1235 continue;
1236 if (!try_module_get(subsys[i]->module)) {
1237 module_pin_failed = true;
1238 break;
1239 }
1240 }
1241 if (module_pin_failed) {
1242
1243
1244
1245
1246
1247 for (i--; i >= CGROUP_BUILTIN_SUBSYS_COUNT; i--) {
1248
1249 unsigned long bit = 1UL << i;
1250
1251 if (!(bit & opts->subsys_bits))
1252 continue;
1253 module_put(subsys[i]->module);
1254 }
1255 return -ENOENT;
1256 }
1257
1258 return 0;
1259}
1260
1261static void drop_parsed_module_refcounts(unsigned long subsys_bits)
1262{
1263 int i;
1264 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
1265 unsigned long bit = 1UL << i;
1266
1267 if (!(bit & subsys_bits))
1268 continue;
1269 module_put(subsys[i]->module);
1270 }
1271}
1272
1273static int cgroup_remount(struct super_block *sb, int *flags, char *data)
1274{
1275 int ret = 0;
1276 struct cgroupfs_root *root = sb->s_fs_info;
1277 struct cgroup *cgrp = &root->top_cgroup;
1278 struct cgroup_sb_opts opts;
1279
1280 mutex_lock(&cgrp->dentry->d_inode->i_mutex);
1281 mutex_lock(&cgroup_mutex);
1282
1283
1284 ret = parse_cgroupfs_options(data, &opts);
1285 if (ret)
1286 goto out_unlock;
1287
1288
1289 if (opts.flags != root->flags ||
1290 (opts.name && strcmp(opts.name, root->name))) {
1291 ret = -EINVAL;
1292 drop_parsed_module_refcounts(opts.subsys_bits);
1293 goto out_unlock;
1294 }
1295
1296 ret = rebind_subsystems(root, opts.subsys_bits);
1297 if (ret) {
1298 drop_parsed_module_refcounts(opts.subsys_bits);
1299 goto out_unlock;
1300 }
1301
1302
1303 cgroup_populate_dir(cgrp);
1304
1305 if (opts.release_agent)
1306 strcpy(root->release_agent_path, opts.release_agent);
1307 out_unlock:
1308 kfree(opts.release_agent);
1309 kfree(opts.name);
1310 mutex_unlock(&cgroup_mutex);
1311 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
1312 return ret;
1313}
1314
1315static const struct super_operations cgroup_ops = {
1316 .statfs = simple_statfs,
1317 .drop_inode = generic_delete_inode,
1318 .show_options = cgroup_show_options,
1319 .remount_fs = cgroup_remount,
1320};
1321
1322static void init_cgroup_housekeeping(struct cgroup *cgrp)
1323{
1324 INIT_LIST_HEAD(&cgrp->sibling);
1325 INIT_LIST_HEAD(&cgrp->children);
1326 INIT_LIST_HEAD(&cgrp->css_sets);
1327 INIT_LIST_HEAD(&cgrp->release_list);
1328 INIT_LIST_HEAD(&cgrp->pidlists);
1329 mutex_init(&cgrp->pidlist_mutex);
1330 INIT_LIST_HEAD(&cgrp->event_list);
1331 spin_lock_init(&cgrp->event_list_lock);
1332}
1333
1334static void init_cgroup_root(struct cgroupfs_root *root)
1335{
1336 struct cgroup *cgrp = &root->top_cgroup;
1337 INIT_LIST_HEAD(&root->subsys_list);
1338 INIT_LIST_HEAD(&root->root_list);
1339 root->number_of_cgroups = 1;
1340 cgrp->root = root;
1341 cgrp->top_cgroup = cgrp;
1342 init_cgroup_housekeeping(cgrp);
1343}
1344
1345static bool init_root_id(struct cgroupfs_root *root)
1346{
1347 int ret = 0;
1348
1349 do {
1350 if (!ida_pre_get(&hierarchy_ida, GFP_KERNEL))
1351 return false;
1352 spin_lock(&hierarchy_id_lock);
1353
1354 ret = ida_get_new_above(&hierarchy_ida, next_hierarchy_id,
1355 &root->hierarchy_id);
1356 if (ret == -ENOSPC)
1357
1358 ret = ida_get_new(&hierarchy_ida, &root->hierarchy_id);
1359 if (!ret) {
1360 next_hierarchy_id = root->hierarchy_id + 1;
1361 } else if (ret != -EAGAIN) {
1362
1363 BUG_ON(ret);
1364 }
1365 spin_unlock(&hierarchy_id_lock);
1366 } while (ret);
1367 return true;
1368}
1369
1370static int cgroup_test_super(struct super_block *sb, void *data)
1371{
1372 struct cgroup_sb_opts *opts = data;
1373 struct cgroupfs_root *root = sb->s_fs_info;
1374
1375
1376 if (opts->name && strcmp(opts->name, root->name))
1377 return 0;
1378
1379
1380
1381
1382
1383 if ((opts->subsys_bits || opts->none)
1384 && (opts->subsys_bits != root->subsys_bits))
1385 return 0;
1386
1387 return 1;
1388}
1389
1390static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
1391{
1392 struct cgroupfs_root *root;
1393
1394 if (!opts->subsys_bits && !opts->none)
1395 return NULL;
1396
1397 root = kzalloc(sizeof(*root), GFP_KERNEL);
1398 if (!root)
1399 return ERR_PTR(-ENOMEM);
1400
1401 if (!init_root_id(root)) {
1402 kfree(root);
1403 return ERR_PTR(-ENOMEM);
1404 }
1405 init_cgroup_root(root);
1406
1407 root->subsys_bits = opts->subsys_bits;
1408 root->flags = opts->flags;
1409 if (opts->release_agent)
1410 strcpy(root->release_agent_path, opts->release_agent);
1411 if (opts->name)
1412 strcpy(root->name, opts->name);
1413 if (opts->clone_children)
1414 set_bit(CGRP_CLONE_CHILDREN, &root->top_cgroup.flags);
1415 return root;
1416}
1417
1418static void cgroup_drop_root(struct cgroupfs_root *root)
1419{
1420 if (!root)
1421 return;
1422
1423 BUG_ON(!root->hierarchy_id);
1424 spin_lock(&hierarchy_id_lock);
1425 ida_remove(&hierarchy_ida, root->hierarchy_id);
1426 spin_unlock(&hierarchy_id_lock);
1427 kfree(root);
1428}
1429
1430static int cgroup_set_super(struct super_block *sb, void *data)
1431{
1432 int ret;
1433 struct cgroup_sb_opts *opts = data;
1434
1435
1436 if (!opts->new_root)
1437 return -EINVAL;
1438
1439 BUG_ON(!opts->subsys_bits && !opts->none);
1440
1441 ret = set_anon_super(sb, NULL);
1442 if (ret)
1443 return ret;
1444
1445 sb->s_fs_info = opts->new_root;
1446 opts->new_root->sb = sb;
1447
1448 sb->s_blocksize = PAGE_CACHE_SIZE;
1449 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
1450 sb->s_magic = CGROUP_SUPER_MAGIC;
1451 sb->s_op = &cgroup_ops;
1452
1453 return 0;
1454}
1455
1456static int cgroup_get_rootdir(struct super_block *sb)
1457{
1458 static const struct dentry_operations cgroup_dops = {
1459 .d_iput = cgroup_diput,
1460 .d_delete = cgroup_delete,
1461 };
1462
1463 struct inode *inode =
1464 cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb);
1465 struct dentry *dentry;
1466
1467 if (!inode)
1468 return -ENOMEM;
1469
1470 inode->i_fop = &simple_dir_operations;
1471 inode->i_op = &cgroup_dir_inode_operations;
1472
1473 inc_nlink(inode);
1474 dentry = d_alloc_root(inode);
1475 if (!dentry) {
1476 iput(inode);
1477 return -ENOMEM;
1478 }
1479 sb->s_root = dentry;
1480
1481 sb->s_d_op = &cgroup_dops;
1482 return 0;
1483}
1484
1485static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1486 int flags, const char *unused_dev_name,
1487 void *data)
1488{
1489 struct cgroup_sb_opts opts;
1490 struct cgroupfs_root *root;
1491 int ret = 0;
1492 struct super_block *sb;
1493 struct cgroupfs_root *new_root;
1494
1495
1496 mutex_lock(&cgroup_mutex);
1497 ret = parse_cgroupfs_options(data, &opts);
1498 mutex_unlock(&cgroup_mutex);
1499 if (ret)
1500 goto out_err;
1501
1502
1503
1504
1505
1506 new_root = cgroup_root_from_opts(&opts);
1507 if (IS_ERR(new_root)) {
1508 ret = PTR_ERR(new_root);
1509 goto drop_modules;
1510 }
1511 opts.new_root = new_root;
1512
1513
1514 sb = sget(fs_type, cgroup_test_super, cgroup_set_super, &opts);
1515 if (IS_ERR(sb)) {
1516 ret = PTR_ERR(sb);
1517 cgroup_drop_root(opts.new_root);
1518 goto drop_modules;
1519 }
1520
1521 root = sb->s_fs_info;
1522 BUG_ON(!root);
1523 if (root == opts.new_root) {
1524
1525 struct list_head tmp_cg_links;
1526 struct cgroup *root_cgrp = &root->top_cgroup;
1527 struct inode *inode;
1528 struct cgroupfs_root *existing_root;
1529 int i;
1530
1531 BUG_ON(sb->s_root != NULL);
1532
1533 ret = cgroup_get_rootdir(sb);
1534 if (ret)
1535 goto drop_new_super;
1536 inode = sb->s_root->d_inode;
1537
1538 mutex_lock(&inode->i_mutex);
1539 mutex_lock(&cgroup_mutex);
1540
1541 if (strlen(root->name)) {
1542
1543 for_each_active_root(existing_root) {
1544 if (!strcmp(existing_root->name, root->name)) {
1545 ret = -EBUSY;
1546 mutex_unlock(&cgroup_mutex);
1547 mutex_unlock(&inode->i_mutex);
1548 goto drop_new_super;
1549 }
1550 }
1551 }
1552
1553
1554
1555
1556
1557
1558
1559
1560 ret = allocate_cg_links(css_set_count, &tmp_cg_links);
1561 if (ret) {
1562 mutex_unlock(&cgroup_mutex);
1563 mutex_unlock(&inode->i_mutex);
1564 goto drop_new_super;
1565 }
1566
1567 ret = rebind_subsystems(root, root->subsys_bits);
1568 if (ret == -EBUSY) {
1569 mutex_unlock(&cgroup_mutex);
1570 mutex_unlock(&inode->i_mutex);
1571 free_cg_links(&tmp_cg_links);
1572 goto drop_new_super;
1573 }
1574
1575
1576
1577
1578
1579
1580
1581 BUG_ON(ret);
1582
1583 list_add(&root->root_list, &roots);
1584 root_count++;
1585
1586 sb->s_root->d_fsdata = root_cgrp;
1587 root->top_cgroup.dentry = sb->s_root;
1588
1589
1590
1591 write_lock(&css_set_lock);
1592 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
1593 struct hlist_head *hhead = &css_set_table[i];
1594 struct hlist_node *node;
1595 struct css_set *cg;
1596
1597 hlist_for_each_entry(cg, node, hhead, hlist)
1598 link_css_set(&tmp_cg_links, cg, root_cgrp);
1599 }
1600 write_unlock(&css_set_lock);
1601
1602 free_cg_links(&tmp_cg_links);
1603
1604 BUG_ON(!list_empty(&root_cgrp->sibling));
1605 BUG_ON(!list_empty(&root_cgrp->children));
1606 BUG_ON(root->number_of_cgroups != 1);
1607
1608 cgroup_populate_dir(root_cgrp);
1609 mutex_unlock(&cgroup_mutex);
1610 mutex_unlock(&inode->i_mutex);
1611 } else {
1612
1613
1614
1615
1616 cgroup_drop_root(opts.new_root);
1617
1618 drop_parsed_module_refcounts(opts.subsys_bits);
1619 }
1620
1621 kfree(opts.release_agent);
1622 kfree(opts.name);
1623 return dget(sb->s_root);
1624
1625 drop_new_super:
1626 deactivate_locked_super(sb);
1627 drop_modules:
1628 drop_parsed_module_refcounts(opts.subsys_bits);
1629 out_err:
1630 kfree(opts.release_agent);
1631 kfree(opts.name);
1632 return ERR_PTR(ret);
1633}
1634
1635static void cgroup_kill_sb(struct super_block *sb) {
1636 struct cgroupfs_root *root = sb->s_fs_info;
1637 struct cgroup *cgrp = &root->top_cgroup;
1638 int ret;
1639 struct cg_cgroup_link *link;
1640 struct cg_cgroup_link *saved_link;
1641
1642 BUG_ON(!root);
1643
1644 BUG_ON(root->number_of_cgroups != 1);
1645 BUG_ON(!list_empty(&cgrp->children));
1646 BUG_ON(!list_empty(&cgrp->sibling));
1647
1648 mutex_lock(&cgroup_mutex);
1649
1650
1651 ret = rebind_subsystems(root, 0);
1652
1653 BUG_ON(ret);
1654
1655
1656
1657
1658
1659 write_lock(&css_set_lock);
1660
1661 list_for_each_entry_safe(link, saved_link, &cgrp->css_sets,
1662 cgrp_link_list) {
1663 list_del(&link->cg_link_list);
1664 list_del(&link->cgrp_link_list);
1665 kfree(link);
1666 }
1667 write_unlock(&css_set_lock);
1668
1669 if (!list_empty(&root->root_list)) {
1670 list_del(&root->root_list);
1671 root_count--;
1672 }
1673
1674 mutex_unlock(&cgroup_mutex);
1675
1676 kill_litter_super(sb);
1677 cgroup_drop_root(root);
1678}
1679
1680static struct file_system_type cgroup_fs_type = {
1681 .name = "cgroup",
1682 .mount = cgroup_mount,
1683 .kill_sb = cgroup_kill_sb,
1684};
1685
1686static struct kobject *cgroup_kobj;
1687
1688static inline struct cgroup *__d_cgrp(struct dentry *dentry)
1689{
1690 return dentry->d_fsdata;
1691}
1692
1693static inline struct cftype *__d_cft(struct dentry *dentry)
1694{
1695 return dentry->d_fsdata;
1696}
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1709{
1710 char *start;
1711 struct dentry *dentry = rcu_dereference_check(cgrp->dentry,
1712 rcu_read_lock_held() ||
1713 cgroup_lock_is_held());
1714
1715 if (!dentry || cgrp == dummytop) {
1716
1717
1718
1719
1720 strcpy(buf, "/");
1721 return 0;
1722 }
1723
1724 start = buf + buflen;
1725
1726 *--start = '\0';
1727 for (;;) {
1728 int len = dentry->d_name.len;
1729
1730 if ((start -= len) < buf)
1731 return -ENAMETOOLONG;
1732 memcpy(start, dentry->d_name.name, len);
1733 cgrp = cgrp->parent;
1734 if (!cgrp)
1735 break;
1736
1737 dentry = rcu_dereference_check(cgrp->dentry,
1738 rcu_read_lock_held() ||
1739 cgroup_lock_is_held());
1740 if (!cgrp->parent)
1741 continue;
1742 if (--start < buf)
1743 return -ENAMETOOLONG;
1744 *start = '/';
1745 }
1746 memmove(buf, start, buf + buflen - start);
1747 return 0;
1748}
1749EXPORT_SYMBOL_GPL(cgroup_path);
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1760{
1761 int retval = 0;
1762 struct cgroup_subsys *ss, *failed_ss = NULL;
1763 struct cgroup *oldcgrp;
1764 struct css_set *cg;
1765 struct css_set *newcg;
1766 struct cgroupfs_root *root = cgrp->root;
1767
1768
1769 oldcgrp = task_cgroup_from_root(tsk, root);
1770 if (cgrp == oldcgrp)
1771 return 0;
1772
1773 for_each_subsys(root, ss) {
1774 if (ss->can_attach) {
1775 retval = ss->can_attach(ss, cgrp, tsk, false);
1776 if (retval) {
1777
1778
1779
1780
1781
1782
1783 failed_ss = ss;
1784 goto out;
1785 }
1786 }
1787 }
1788
1789 task_lock(tsk);
1790 cg = tsk->cgroups;
1791 get_css_set(cg);
1792 task_unlock(tsk);
1793
1794
1795
1796
1797 newcg = find_css_set(cg, cgrp);
1798 put_css_set(cg);
1799 if (!newcg) {
1800 retval = -ENOMEM;
1801 goto out;
1802 }
1803
1804 task_lock(tsk);
1805 if (tsk->flags & PF_EXITING) {
1806 task_unlock(tsk);
1807 put_css_set(newcg);
1808 retval = -ESRCH;
1809 goto out;
1810 }
1811 rcu_assign_pointer(tsk->cgroups, newcg);
1812 task_unlock(tsk);
1813
1814
1815 write_lock(&css_set_lock);
1816 if (!list_empty(&tsk->cg_list)) {
1817 list_del(&tsk->cg_list);
1818 list_add(&tsk->cg_list, &newcg->tasks);
1819 }
1820 write_unlock(&css_set_lock);
1821
1822 for_each_subsys(root, ss) {
1823 if (ss->attach)
1824 ss->attach(ss, cgrp, oldcgrp, tsk, false);
1825 }
1826 set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
1827 synchronize_rcu();
1828 put_css_set(cg);
1829
1830
1831
1832
1833
1834 cgroup_wakeup_rmdir_waiter(cgrp);
1835out:
1836 if (retval) {
1837 for_each_subsys(root, ss) {
1838 if (ss == failed_ss)
1839
1840
1841
1842
1843
1844
1845 break;
1846 if (ss->cancel_attach)
1847 ss->cancel_attach(ss, cgrp, tsk, false);
1848 }
1849 }
1850 return retval;
1851}
1852
1853
1854
1855
1856
1857
1858int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
1859{
1860 struct cgroupfs_root *root;
1861 int retval = 0;
1862
1863 cgroup_lock();
1864 for_each_active_root(root) {
1865 struct cgroup *from_cg = task_cgroup_from_root(from, root);
1866
1867 retval = cgroup_attach_task(from_cg, tsk);
1868 if (retval)
1869 break;
1870 }
1871 cgroup_unlock();
1872
1873 return retval;
1874}
1875EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
1876
1877
1878
1879
1880
1881static int attach_task_by_pid(struct cgroup *cgrp, u64 pid)
1882{
1883 struct task_struct *tsk;
1884 const struct cred *cred = current_cred(), *tcred;
1885 int ret;
1886
1887 if (pid) {
1888 rcu_read_lock();
1889 tsk = find_task_by_vpid(pid);
1890 if (!tsk || tsk->flags & PF_EXITING) {
1891 rcu_read_unlock();
1892 return -ESRCH;
1893 }
1894
1895 tcred = __task_cred(tsk);
1896 if (cred->euid &&
1897 cred->euid != tcred->uid &&
1898 cred->euid != tcred->suid) {
1899 rcu_read_unlock();
1900 return -EACCES;
1901 }
1902 get_task_struct(tsk);
1903 rcu_read_unlock();
1904 } else {
1905 tsk = current;
1906 get_task_struct(tsk);
1907 }
1908
1909 ret = cgroup_attach_task(cgrp, tsk);
1910 put_task_struct(tsk);
1911 return ret;
1912}
1913
1914static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
1915{
1916 int ret;
1917 if (!cgroup_lock_live_group(cgrp))
1918 return -ENODEV;
1919 ret = attach_task_by_pid(cgrp, pid);
1920 cgroup_unlock();
1921 return ret;
1922}
1923
1924
1925
1926
1927
1928
1929
1930
1931bool cgroup_lock_live_group(struct cgroup *cgrp)
1932{
1933 mutex_lock(&cgroup_mutex);
1934 if (cgroup_is_removed(cgrp)) {
1935 mutex_unlock(&cgroup_mutex);
1936 return false;
1937 }
1938 return true;
1939}
1940EXPORT_SYMBOL_GPL(cgroup_lock_live_group);
1941
1942static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
1943 const char *buffer)
1944{
1945 BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
1946 if (strlen(buffer) >= PATH_MAX)
1947 return -EINVAL;
1948 if (!cgroup_lock_live_group(cgrp))
1949 return -ENODEV;
1950 strcpy(cgrp->root->release_agent_path, buffer);
1951 cgroup_unlock();
1952 return 0;
1953}
1954
1955static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
1956 struct seq_file *seq)
1957{
1958 if (!cgroup_lock_live_group(cgrp))
1959 return -ENODEV;
1960 seq_puts(seq, cgrp->root->release_agent_path);
1961 seq_putc(seq, '\n');
1962 cgroup_unlock();
1963 return 0;
1964}
1965
1966
1967#define CGROUP_LOCAL_BUFFER_SIZE 64
1968
1969static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
1970 struct file *file,
1971 const char __user *userbuf,
1972 size_t nbytes, loff_t *unused_ppos)
1973{
1974 char buffer[CGROUP_LOCAL_BUFFER_SIZE];
1975 int retval = 0;
1976 char *end;
1977
1978 if (!nbytes)
1979 return -EINVAL;
1980 if (nbytes >= sizeof(buffer))
1981 return -E2BIG;
1982 if (copy_from_user(buffer, userbuf, nbytes))
1983 return -EFAULT;
1984
1985 buffer[nbytes] = 0;
1986 if (cft->write_u64) {
1987 u64 val = simple_strtoull(strstrip(buffer), &end, 0);
1988 if (*end)
1989 return -EINVAL;
1990 retval = cft->write_u64(cgrp, cft, val);
1991 } else {
1992 s64 val = simple_strtoll(strstrip(buffer), &end, 0);
1993 if (*end)
1994 return -EINVAL;
1995 retval = cft->write_s64(cgrp, cft, val);
1996 }
1997 if (!retval)
1998 retval = nbytes;
1999 return retval;
2000}
2001
2002static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
2003 struct file *file,
2004 const char __user *userbuf,
2005 size_t nbytes, loff_t *unused_ppos)
2006{
2007 char local_buffer[CGROUP_LOCAL_BUFFER_SIZE];
2008 int retval = 0;
2009 size_t max_bytes = cft->max_write_len;
2010 char *buffer = local_buffer;
2011
2012 if (!max_bytes)
2013 max_bytes = sizeof(local_buffer) - 1;
2014 if (nbytes >= max_bytes)
2015 return -E2BIG;
2016
2017 if (nbytes >= sizeof(local_buffer)) {
2018 buffer = kmalloc(nbytes + 1, GFP_KERNEL);
2019 if (buffer == NULL)
2020 return -ENOMEM;
2021 }
2022 if (nbytes && copy_from_user(buffer, userbuf, nbytes)) {
2023 retval = -EFAULT;
2024 goto out;
2025 }
2026
2027 buffer[nbytes] = 0;
2028 retval = cft->write_string(cgrp, cft, strstrip(buffer));
2029 if (!retval)
2030 retval = nbytes;
2031out:
2032 if (buffer != local_buffer)
2033 kfree(buffer);
2034 return retval;
2035}
2036
2037static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
2038 size_t nbytes, loff_t *ppos)
2039{
2040 struct cftype *cft = __d_cft(file->f_dentry);
2041 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2042
2043 if (cgroup_is_removed(cgrp))
2044 return -ENODEV;
2045 if (cft->write)
2046 return cft->write(cgrp, cft, file, buf, nbytes, ppos);
2047 if (cft->write_u64 || cft->write_s64)
2048 return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos);
2049 if (cft->write_string)
2050 return cgroup_write_string(cgrp, cft, file, buf, nbytes, ppos);
2051 if (cft->trigger) {
2052 int ret = cft->trigger(cgrp, (unsigned int)cft->private);
2053 return ret ? ret : nbytes;
2054 }
2055 return -EINVAL;
2056}
2057
2058static ssize_t cgroup_read_u64(struct cgroup *cgrp, struct cftype *cft,
2059 struct file *file,
2060 char __user *buf, size_t nbytes,
2061 loff_t *ppos)
2062{
2063 char tmp[CGROUP_LOCAL_BUFFER_SIZE];
2064 u64 val = cft->read_u64(cgrp, cft);
2065 int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
2066
2067 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
2068}
2069
2070static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft,
2071 struct file *file,
2072 char __user *buf, size_t nbytes,
2073 loff_t *ppos)
2074{
2075 char tmp[CGROUP_LOCAL_BUFFER_SIZE];
2076 s64 val = cft->read_s64(cgrp, cft);
2077 int len = sprintf(tmp, "%lld\n", (long long) val);
2078
2079 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
2080}
2081
2082static ssize_t cgroup_file_read(struct file *file, char __user *buf,
2083 size_t nbytes, loff_t *ppos)
2084{
2085 struct cftype *cft = __d_cft(file->f_dentry);
2086 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2087
2088 if (cgroup_is_removed(cgrp))
2089 return -ENODEV;
2090
2091 if (cft->read)
2092 return cft->read(cgrp, cft, file, buf, nbytes, ppos);
2093 if (cft->read_u64)
2094 return cgroup_read_u64(cgrp, cft, file, buf, nbytes, ppos);
2095 if (cft->read_s64)
2096 return cgroup_read_s64(cgrp, cft, file, buf, nbytes, ppos);
2097 return -EINVAL;
2098}
2099
2100
2101
2102
2103
2104
2105struct cgroup_seqfile_state {
2106 struct cftype *cft;
2107 struct cgroup *cgroup;
2108};
2109
2110static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value)
2111{
2112 struct seq_file *sf = cb->state;
2113 return seq_printf(sf, "%s %llu\n", key, (unsigned long long)value);
2114}
2115
2116static int cgroup_seqfile_show(struct seq_file *m, void *arg)
2117{
2118 struct cgroup_seqfile_state *state = m->private;
2119 struct cftype *cft = state->cft;
2120 if (cft->read_map) {
2121 struct cgroup_map_cb cb = {
2122 .fill = cgroup_map_add,
2123 .state = m,
2124 };
2125 return cft->read_map(state->cgroup, cft, &cb);
2126 }
2127 return cft->read_seq_string(state->cgroup, cft, m);
2128}
2129
2130static int cgroup_seqfile_release(struct inode *inode, struct file *file)
2131{
2132 struct seq_file *seq = file->private_data;
2133 kfree(seq->private);
2134 return single_release(inode, file);
2135}
2136
2137static const struct file_operations cgroup_seqfile_operations = {
2138 .read = seq_read,
2139 .write = cgroup_file_write,
2140 .llseek = seq_lseek,
2141 .release = cgroup_seqfile_release,
2142};
2143
2144static int cgroup_file_open(struct inode *inode, struct file *file)
2145{
2146 int err;
2147 struct cftype *cft;
2148
2149 err = generic_file_open(inode, file);
2150 if (err)
2151 return err;
2152 cft = __d_cft(file->f_dentry);
2153
2154 if (cft->read_map || cft->read_seq_string) {
2155 struct cgroup_seqfile_state *state =
2156 kzalloc(sizeof(*state), GFP_USER);
2157 if (!state)
2158 return -ENOMEM;
2159 state->cft = cft;
2160 state->cgroup = __d_cgrp(file->f_dentry->d_parent);
2161 file->f_op = &cgroup_seqfile_operations;
2162 err = single_open(file, cgroup_seqfile_show, state);
2163 if (err < 0)
2164 kfree(state);
2165 } else if (cft->open)
2166 err = cft->open(inode, file);
2167 else
2168 err = 0;
2169
2170 return err;
2171}
2172
2173static int cgroup_file_release(struct inode *inode, struct file *file)
2174{
2175 struct cftype *cft = __d_cft(file->f_dentry);
2176 if (cft->release)
2177 return cft->release(inode, file);
2178 return 0;
2179}
2180
2181
2182
2183
2184static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
2185 struct inode *new_dir, struct dentry *new_dentry)
2186{
2187 if (!S_ISDIR(old_dentry->d_inode->i_mode))
2188 return -ENOTDIR;
2189 if (new_dentry->d_inode)
2190 return -EEXIST;
2191 if (old_dir != new_dir)
2192 return -EIO;
2193 return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
2194}
2195
2196static const struct file_operations cgroup_file_operations = {
2197 .read = cgroup_file_read,
2198 .write = cgroup_file_write,
2199 .llseek = generic_file_llseek,
2200 .open = cgroup_file_open,
2201 .release = cgroup_file_release,
2202};
2203
2204static const struct inode_operations cgroup_dir_inode_operations = {
2205 .lookup = cgroup_lookup,
2206 .mkdir = cgroup_mkdir,
2207 .rmdir = cgroup_rmdir,
2208 .rename = cgroup_rename,
2209};
2210
2211static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
2212{
2213 if (dentry->d_name.len > NAME_MAX)
2214 return ERR_PTR(-ENAMETOOLONG);
2215 d_add(dentry, NULL);
2216 return NULL;
2217}
2218
2219
2220
2221
2222static inline struct cftype *__file_cft(struct file *file)
2223{
2224 if (file->f_dentry->d_inode->i_fop != &cgroup_file_operations)
2225 return ERR_PTR(-EINVAL);
2226 return __d_cft(file->f_dentry);
2227}
2228
2229static int cgroup_create_file(struct dentry *dentry, mode_t mode,
2230 struct super_block *sb)
2231{
2232 struct inode *inode;
2233
2234 if (!dentry)
2235 return -ENOENT;
2236 if (dentry->d_inode)
2237 return -EEXIST;
2238
2239 inode = cgroup_new_inode(mode, sb);
2240 if (!inode)
2241 return -ENOMEM;
2242
2243 if (S_ISDIR(mode)) {
2244 inode->i_op = &cgroup_dir_inode_operations;
2245 inode->i_fop = &simple_dir_operations;
2246
2247
2248 inc_nlink(inode);
2249
2250
2251
2252 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
2253 } else if (S_ISREG(mode)) {
2254 inode->i_size = 0;
2255 inode->i_fop = &cgroup_file_operations;
2256 }
2257 d_instantiate(dentry, inode);
2258 dget(dentry);
2259 return 0;
2260}
2261
2262
2263
2264
2265
2266
2267
2268
2269static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,
2270 mode_t mode)
2271{
2272 struct dentry *parent;
2273 int error = 0;
2274
2275 parent = cgrp->parent->dentry;
2276 error = cgroup_create_file(dentry, S_IFDIR | mode, cgrp->root->sb);
2277 if (!error) {
2278 dentry->d_fsdata = cgrp;
2279 inc_nlink(parent->d_inode);
2280 rcu_assign_pointer(cgrp->dentry, dentry);
2281 dget(dentry);
2282 }
2283 dput(dentry);
2284
2285 return error;
2286}
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297static mode_t cgroup_file_mode(const struct cftype *cft)
2298{
2299 mode_t mode = 0;
2300
2301 if (cft->mode)
2302 return cft->mode;
2303
2304 if (cft->read || cft->read_u64 || cft->read_s64 ||
2305 cft->read_map || cft->read_seq_string)
2306 mode |= S_IRUGO;
2307
2308 if (cft->write || cft->write_u64 || cft->write_s64 ||
2309 cft->write_string || cft->trigger)
2310 mode |= S_IWUSR;
2311
2312 return mode;
2313}
2314
2315int cgroup_add_file(struct cgroup *cgrp,
2316 struct cgroup_subsys *subsys,
2317 const struct cftype *cft)
2318{
2319 struct dentry *dir = cgrp->dentry;
2320 struct dentry *dentry;
2321 int error;
2322 mode_t mode;
2323
2324 char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
2325 if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) {
2326 strcpy(name, subsys->name);
2327 strcat(name, ".");
2328 }
2329 strcat(name, cft->name);
2330 BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex));
2331 dentry = lookup_one_len(name, dir, strlen(name));
2332 if (!IS_ERR(dentry)) {
2333 mode = cgroup_file_mode(cft);
2334 error = cgroup_create_file(dentry, mode | S_IFREG,
2335 cgrp->root->sb);
2336 if (!error)
2337 dentry->d_fsdata = (void *)cft;
2338 dput(dentry);
2339 } else
2340 error = PTR_ERR(dentry);
2341 return error;
2342}
2343EXPORT_SYMBOL_GPL(cgroup_add_file);
2344
2345int cgroup_add_files(struct cgroup *cgrp,
2346 struct cgroup_subsys *subsys,
2347 const struct cftype cft[],
2348 int count)
2349{
2350 int i, err;
2351 for (i = 0; i < count; i++) {
2352 err = cgroup_add_file(cgrp, subsys, &cft[i]);
2353 if (err)
2354 return err;
2355 }
2356 return 0;
2357}
2358EXPORT_SYMBOL_GPL(cgroup_add_files);
2359
2360
2361
2362
2363
2364
2365
2366int cgroup_task_count(const struct cgroup *cgrp)
2367{
2368 int count = 0;
2369 struct cg_cgroup_link *link;
2370
2371 read_lock(&css_set_lock);
2372 list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
2373 count += atomic_read(&link->cg->refcount);
2374 }
2375 read_unlock(&css_set_lock);
2376 return count;
2377}
2378
2379
2380
2381
2382
2383static void cgroup_advance_iter(struct cgroup *cgrp,
2384 struct cgroup_iter *it)
2385{
2386 struct list_head *l = it->cg_link;
2387 struct cg_cgroup_link *link;
2388 struct css_set *cg;
2389
2390
2391 do {
2392 l = l->next;
2393 if (l == &cgrp->css_sets) {
2394 it->cg_link = NULL;
2395 return;
2396 }
2397 link = list_entry(l, struct cg_cgroup_link, cgrp_link_list);
2398 cg = link->cg;
2399 } while (list_empty(&cg->tasks));
2400 it->cg_link = l;
2401 it->task = cg->tasks.next;
2402}
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413static void cgroup_enable_task_cg_lists(void)
2414{
2415 struct task_struct *p, *g;
2416 write_lock(&css_set_lock);
2417 use_task_css_set_links = 1;
2418 do_each_thread(g, p) {
2419 task_lock(p);
2420
2421
2422
2423
2424
2425 if (!(p->flags & PF_EXITING) && list_empty(&p->cg_list))
2426 list_add(&p->cg_list, &p->cgroups->tasks);
2427 task_unlock(p);
2428 } while_each_thread(g, p);
2429 write_unlock(&css_set_lock);
2430}
2431
2432void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
2433{
2434
2435
2436
2437
2438
2439 if (!use_task_css_set_links)
2440 cgroup_enable_task_cg_lists();
2441
2442 read_lock(&css_set_lock);
2443 it->cg_link = &cgrp->css_sets;
2444 cgroup_advance_iter(cgrp, it);
2445}
2446
2447struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
2448 struct cgroup_iter *it)
2449{
2450 struct task_struct *res;
2451 struct list_head *l = it->task;
2452 struct cg_cgroup_link *link;
2453
2454
2455 if (!it->cg_link)
2456 return NULL;
2457 res = list_entry(l, struct task_struct, cg_list);
2458
2459 l = l->next;
2460 link = list_entry(it->cg_link, struct cg_cgroup_link, cgrp_link_list);
2461 if (l == &link->cg->tasks) {
2462
2463
2464 cgroup_advance_iter(cgrp, it);
2465 } else {
2466 it->task = l;
2467 }
2468 return res;
2469}
2470
2471void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it)
2472{
2473 read_unlock(&css_set_lock);
2474}
2475
2476static inline int started_after_time(struct task_struct *t1,
2477 struct timespec *time,
2478 struct task_struct *t2)
2479{
2480 int start_diff = timespec_compare(&t1->start_time, time);
2481 if (start_diff > 0) {
2482 return 1;
2483 } else if (start_diff < 0) {
2484 return 0;
2485 } else {
2486
2487
2488
2489
2490
2491
2492
2493
2494 return t1 > t2;
2495 }
2496}
2497
2498
2499
2500
2501
2502
2503static inline int started_after(void *p1, void *p2)
2504{
2505 struct task_struct *t1 = p1;
2506 struct task_struct *t2 = p2;
2507 return started_after_time(t1, &t2->start_time, t2);
2508}
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537int cgroup_scan_tasks(struct cgroup_scanner *scan)
2538{
2539 int retval, i;
2540 struct cgroup_iter it;
2541 struct task_struct *p, *dropped;
2542
2543 struct task_struct *latest_task = NULL;
2544 struct ptr_heap tmp_heap;
2545 struct ptr_heap *heap;
2546 struct timespec latest_time = { 0, 0 };
2547
2548 if (scan->heap) {
2549
2550 heap = scan->heap;
2551 heap->gt = &started_after;
2552 } else {
2553
2554 heap = &tmp_heap;
2555 retval = heap_init(heap, PAGE_SIZE, GFP_KERNEL, &started_after);
2556 if (retval)
2557
2558 return retval;
2559 }
2560
2561 again:
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574 heap->size = 0;
2575 cgroup_iter_start(scan->cg, &it);
2576 while ((p = cgroup_iter_next(scan->cg, &it))) {
2577
2578
2579
2580
2581 if (scan->test_task && !scan->test_task(p, scan))
2582 continue;
2583
2584
2585
2586
2587 if (!started_after_time(p, &latest_time, latest_task))
2588 continue;
2589 dropped = heap_insert(heap, p);
2590 if (dropped == NULL) {
2591
2592
2593
2594
2595 get_task_struct(p);
2596 } else if (dropped != p) {
2597
2598
2599
2600
2601 get_task_struct(p);
2602 put_task_struct(dropped);
2603 }
2604
2605
2606
2607
2608 }
2609 cgroup_iter_end(scan->cg, &it);
2610
2611 if (heap->size) {
2612 for (i = 0; i < heap->size; i++) {
2613 struct task_struct *q = heap->ptrs[i];
2614 if (i == 0) {
2615 latest_time = q->start_time;
2616 latest_task = q;
2617 }
2618
2619 scan->process_task(q, scan);
2620 put_task_struct(q);
2621 }
2622
2623
2624
2625
2626
2627
2628
2629 goto again;
2630 }
2631 if (heap == &tmp_heap)
2632 heap_free(&tmp_heap);
2633 return 0;
2634}
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651#define PIDLIST_TOO_LARGE(c) ((c) * sizeof(pid_t) > (PAGE_SIZE * 2))
2652static void *pidlist_allocate(int count)
2653{
2654 if (PIDLIST_TOO_LARGE(count))
2655 return vmalloc(count * sizeof(pid_t));
2656 else
2657 return kmalloc(count * sizeof(pid_t), GFP_KERNEL);
2658}
2659static void pidlist_free(void *p)
2660{
2661 if (is_vmalloc_addr(p))
2662 vfree(p);
2663 else
2664 kfree(p);
2665}
2666static void *pidlist_resize(void *p, int newcount)
2667{
2668 void *newlist;
2669
2670 if (is_vmalloc_addr(p)) {
2671 newlist = vmalloc(newcount * sizeof(pid_t));
2672 if (!newlist)
2673 return NULL;
2674 memcpy(newlist, p, newcount * sizeof(pid_t));
2675 vfree(p);
2676 } else {
2677 newlist = krealloc(p, newcount * sizeof(pid_t), GFP_KERNEL);
2678 }
2679 return newlist;
2680}
2681
2682
2683
2684
2685
2686
2687
2688
2689#define PIDLIST_REALLOC_DIFFERENCE(old, new) ((old) - PAGE_SIZE >= (new))
2690static int pidlist_uniq(pid_t **p, int length)
2691{
2692 int src, dest = 1;
2693 pid_t *list = *p;
2694 pid_t *newlist;
2695
2696
2697
2698
2699
2700 if (length == 0 || length == 1)
2701 return length;
2702
2703 for (src = 1; src < length; src++) {
2704
2705 while (list[src] == list[src-1]) {
2706 src++;
2707 if (src == length)
2708 goto after;
2709 }
2710
2711 list[dest] = list[src];
2712 dest++;
2713 }
2714after:
2715
2716
2717
2718
2719
2720 if (PIDLIST_REALLOC_DIFFERENCE(length, dest)) {
2721 newlist = pidlist_resize(list, dest);
2722 if (newlist)
2723 *p = newlist;
2724 }
2725 return dest;
2726}
2727
2728static int cmppid(const void *a, const void *b)
2729{
2730 return *(pid_t *)a - *(pid_t *)b;
2731}
2732
2733
2734
2735
2736
2737
2738
2739static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
2740 enum cgroup_filetype type)
2741{
2742 struct cgroup_pidlist *l;
2743
2744 struct pid_namespace *ns = current->nsproxy->pid_ns;
2745
2746
2747
2748
2749
2750
2751
2752 mutex_lock(&cgrp->pidlist_mutex);
2753 list_for_each_entry(l, &cgrp->pidlists, links) {
2754 if (l->key.type == type && l->key.ns == ns) {
2755
2756 down_write(&l->mutex);
2757 mutex_unlock(&cgrp->pidlist_mutex);
2758 return l;
2759 }
2760 }
2761
2762 l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
2763 if (!l) {
2764 mutex_unlock(&cgrp->pidlist_mutex);
2765 return l;
2766 }
2767 init_rwsem(&l->mutex);
2768 down_write(&l->mutex);
2769 l->key.type = type;
2770 l->key.ns = get_pid_ns(ns);
2771 l->use_count = 0;
2772 l->list = NULL;
2773 l->owner = cgrp;
2774 list_add(&l->links, &cgrp->pidlists);
2775 mutex_unlock(&cgrp->pidlist_mutex);
2776 return l;
2777}
2778
2779
2780
2781
2782static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
2783 struct cgroup_pidlist **lp)
2784{
2785 pid_t *array;
2786 int length;
2787 int pid, n = 0;
2788 struct cgroup_iter it;
2789 struct task_struct *tsk;
2790 struct cgroup_pidlist *l;
2791
2792
2793
2794
2795
2796
2797
2798 length = cgroup_task_count(cgrp);
2799 array = pidlist_allocate(length);
2800 if (!array)
2801 return -ENOMEM;
2802
2803 cgroup_iter_start(cgrp, &it);
2804 while ((tsk = cgroup_iter_next(cgrp, &it))) {
2805 if (unlikely(n == length))
2806 break;
2807
2808 if (type == CGROUP_FILE_PROCS)
2809 pid = task_tgid_vnr(tsk);
2810 else
2811 pid = task_pid_vnr(tsk);
2812 if (pid > 0)
2813 array[n++] = pid;
2814 }
2815 cgroup_iter_end(cgrp, &it);
2816 length = n;
2817
2818 sort(array, length, sizeof(pid_t), cmppid, NULL);
2819 if (type == CGROUP_FILE_PROCS)
2820 length = pidlist_uniq(&array, length);
2821 l = cgroup_pidlist_find(cgrp, type);
2822 if (!l) {
2823 pidlist_free(array);
2824 return -ENOMEM;
2825 }
2826
2827 pidlist_free(l->list);
2828 l->list = array;
2829 l->length = length;
2830 l->use_count++;
2831 up_write(&l->mutex);
2832 *lp = l;
2833 return 0;
2834}
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
2846{
2847 int ret = -EINVAL;
2848 struct cgroup *cgrp;
2849 struct cgroup_iter it;
2850 struct task_struct *tsk;
2851
2852
2853
2854
2855
2856 if (dentry->d_sb->s_op != &cgroup_ops ||
2857 !S_ISDIR(dentry->d_inode->i_mode))
2858 goto err;
2859
2860 ret = 0;
2861 cgrp = dentry->d_fsdata;
2862
2863 cgroup_iter_start(cgrp, &it);
2864 while ((tsk = cgroup_iter_next(cgrp, &it))) {
2865 switch (tsk->state) {
2866 case TASK_RUNNING:
2867 stats->nr_running++;
2868 break;
2869 case TASK_INTERRUPTIBLE:
2870 stats->nr_sleeping++;
2871 break;
2872 case TASK_UNINTERRUPTIBLE:
2873 stats->nr_uninterruptible++;
2874 break;
2875 case TASK_STOPPED:
2876 stats->nr_stopped++;
2877 break;
2878 default:
2879 if (delayacct_is_task_waiting_on_io(tsk))
2880 stats->nr_io_wait++;
2881 break;
2882 }
2883 }
2884 cgroup_iter_end(cgrp, &it);
2885
2886err:
2887 return ret;
2888}
2889
2890
2891
2892
2893
2894
2895
2896
2897static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
2898{
2899
2900
2901
2902
2903
2904
2905 struct cgroup_pidlist *l = s->private;
2906 int index = 0, pid = *pos;
2907 int *iter;
2908
2909 down_read(&l->mutex);
2910 if (pid) {
2911 int end = l->length;
2912
2913 while (index < end) {
2914 int mid = (index + end) / 2;
2915 if (l->list[mid] == pid) {
2916 index = mid;
2917 break;
2918 } else if (l->list[mid] <= pid)
2919 index = mid + 1;
2920 else
2921 end = mid;
2922 }
2923 }
2924
2925 if (index >= l->length)
2926 return NULL;
2927
2928 iter = l->list + index;
2929 *pos = *iter;
2930 return iter;
2931}
2932
2933static void cgroup_pidlist_stop(struct seq_file *s, void *v)
2934{
2935 struct cgroup_pidlist *l = s->private;
2936 up_read(&l->mutex);
2937}
2938
2939static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
2940{
2941 struct cgroup_pidlist *l = s->private;
2942 pid_t *p = v;
2943 pid_t *end = l->list + l->length;
2944
2945
2946
2947
2948 p++;
2949 if (p >= end) {
2950 return NULL;
2951 } else {
2952 *pos = *p;
2953 return p;
2954 }
2955}
2956
2957static int cgroup_pidlist_show(struct seq_file *s, void *v)
2958{
2959 return seq_printf(s, "%d\n", *(int *)v);
2960}
2961
2962
2963
2964
2965
2966static const struct seq_operations cgroup_pidlist_seq_operations = {
2967 .start = cgroup_pidlist_start,
2968 .stop = cgroup_pidlist_stop,
2969 .next = cgroup_pidlist_next,
2970 .show = cgroup_pidlist_show,
2971};
2972
2973static void cgroup_release_pid_array(struct cgroup_pidlist *l)
2974{
2975
2976
2977
2978
2979
2980
2981 mutex_lock(&l->owner->pidlist_mutex);
2982 down_write(&l->mutex);
2983 BUG_ON(!l->use_count);
2984 if (!--l->use_count) {
2985
2986 list_del(&l->links);
2987 mutex_unlock(&l->owner->pidlist_mutex);
2988 pidlist_free(l->list);
2989 put_pid_ns(l->key.ns);
2990 up_write(&l->mutex);
2991 kfree(l);
2992 return;
2993 }
2994 mutex_unlock(&l->owner->pidlist_mutex);
2995 up_write(&l->mutex);
2996}
2997
2998static int cgroup_pidlist_release(struct inode *inode, struct file *file)
2999{
3000 struct cgroup_pidlist *l;
3001 if (!(file->f_mode & FMODE_READ))
3002 return 0;
3003
3004
3005
3006
3007 l = ((struct seq_file *)file->private_data)->private;
3008 cgroup_release_pid_array(l);
3009 return seq_release(inode, file);
3010}
3011
3012static const struct file_operations cgroup_pidlist_operations = {
3013 .read = seq_read,
3014 .llseek = seq_lseek,
3015 .write = cgroup_file_write,
3016 .release = cgroup_pidlist_release,
3017};
3018
3019
3020
3021
3022
3023
3024
3025static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type)
3026{
3027 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
3028 struct cgroup_pidlist *l;
3029 int retval;
3030
3031
3032 if (!(file->f_mode & FMODE_READ))
3033 return 0;
3034
3035
3036 retval = pidlist_array_load(cgrp, type, &l);
3037 if (retval)
3038 return retval;
3039
3040 file->f_op = &cgroup_pidlist_operations;
3041
3042 retval = seq_open(file, &cgroup_pidlist_seq_operations);
3043 if (retval) {
3044 cgroup_release_pid_array(l);
3045 return retval;
3046 }
3047 ((struct seq_file *)file->private_data)->private = l;
3048 return 0;
3049}
3050static int cgroup_tasks_open(struct inode *unused, struct file *file)
3051{
3052 return cgroup_pidlist_open(file, CGROUP_FILE_TASKS);
3053}
3054static int cgroup_procs_open(struct inode *unused, struct file *file)
3055{
3056 return cgroup_pidlist_open(file, CGROUP_FILE_PROCS);
3057}
3058
3059static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
3060 struct cftype *cft)
3061{
3062 return notify_on_release(cgrp);
3063}
3064
3065static int cgroup_write_notify_on_release(struct cgroup *cgrp,
3066 struct cftype *cft,
3067 u64 val)
3068{
3069 clear_bit(CGRP_RELEASABLE, &cgrp->flags);
3070 if (val)
3071 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3072 else
3073 clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3074 return 0;
3075}
3076
3077
3078
3079
3080
3081
3082static void cgroup_event_remove(struct work_struct *work)
3083{
3084 struct cgroup_event *event = container_of(work, struct cgroup_event,
3085 remove);
3086 struct cgroup *cgrp = event->cgrp;
3087
3088 event->cft->unregister_event(cgrp, event->cft, event->eventfd);
3089
3090 eventfd_ctx_put(event->eventfd);
3091 kfree(event);
3092 dput(cgrp->dentry);
3093}
3094
3095
3096
3097
3098
3099
3100static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
3101 int sync, void *key)
3102{
3103 struct cgroup_event *event = container_of(wait,
3104 struct cgroup_event, wait);
3105 struct cgroup *cgrp = event->cgrp;
3106 unsigned long flags = (unsigned long)key;
3107
3108 if (flags & POLLHUP) {
3109 __remove_wait_queue(event->wqh, &event->wait);
3110 spin_lock(&cgrp->event_list_lock);
3111 list_del(&event->list);
3112 spin_unlock(&cgrp->event_list_lock);
3113
3114
3115
3116
3117 schedule_work(&event->remove);
3118 }
3119
3120 return 0;
3121}
3122
3123static void cgroup_event_ptable_queue_proc(struct file *file,
3124 wait_queue_head_t *wqh, poll_table *pt)
3125{
3126 struct cgroup_event *event = container_of(pt,
3127 struct cgroup_event, pt);
3128
3129 event->wqh = wqh;
3130 add_wait_queue(wqh, &event->wait);
3131}
3132
3133
3134
3135
3136
3137
3138
3139static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
3140 const char *buffer)
3141{
3142 struct cgroup_event *event = NULL;
3143 unsigned int efd, cfd;
3144 struct file *efile = NULL;
3145 struct file *cfile = NULL;
3146 char *endp;
3147 int ret;
3148
3149 efd = simple_strtoul(buffer, &endp, 10);
3150 if (*endp != ' ')
3151 return -EINVAL;
3152 buffer = endp + 1;
3153
3154 cfd = simple_strtoul(buffer, &endp, 10);
3155 if ((*endp != ' ') && (*endp != '\0'))
3156 return -EINVAL;
3157 buffer = endp + 1;
3158
3159 event = kzalloc(sizeof(*event), GFP_KERNEL);
3160 if (!event)
3161 return -ENOMEM;
3162 event->cgrp = cgrp;
3163 INIT_LIST_HEAD(&event->list);
3164 init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc);
3165 init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
3166 INIT_WORK(&event->remove, cgroup_event_remove);
3167
3168 efile = eventfd_fget(efd);
3169 if (IS_ERR(efile)) {
3170 ret = PTR_ERR(efile);
3171 goto fail;
3172 }
3173
3174 event->eventfd = eventfd_ctx_fileget(efile);
3175 if (IS_ERR(event->eventfd)) {
3176 ret = PTR_ERR(event->eventfd);
3177 goto fail;
3178 }
3179
3180 cfile = fget(cfd);
3181 if (!cfile) {
3182 ret = -EBADF;
3183 goto fail;
3184 }
3185
3186
3187 ret = file_permission(cfile, MAY_READ);
3188 if (ret < 0)
3189 goto fail;
3190
3191 event->cft = __file_cft(cfile);
3192 if (IS_ERR(event->cft)) {
3193 ret = PTR_ERR(event->cft);
3194 goto fail;
3195 }
3196
3197 if (!event->cft->register_event || !event->cft->unregister_event) {
3198 ret = -EINVAL;
3199 goto fail;
3200 }
3201
3202 ret = event->cft->register_event(cgrp, event->cft,
3203 event->eventfd, buffer);
3204 if (ret)
3205 goto fail;
3206
3207 if (efile->f_op->poll(efile, &event->pt) & POLLHUP) {
3208 event->cft->unregister_event(cgrp, event->cft, event->eventfd);
3209 ret = 0;
3210 goto fail;
3211 }
3212
3213
3214
3215
3216
3217
3218 dget(cgrp->dentry);
3219
3220 spin_lock(&cgrp->event_list_lock);
3221 list_add(&event->list, &cgrp->event_list);
3222 spin_unlock(&cgrp->event_list_lock);
3223
3224 fput(cfile);
3225 fput(efile);
3226
3227 return 0;
3228
3229fail:
3230 if (cfile)
3231 fput(cfile);
3232
3233 if (event && event->eventfd && !IS_ERR(event->eventfd))
3234 eventfd_ctx_put(event->eventfd);
3235
3236 if (!IS_ERR_OR_NULL(efile))
3237 fput(efile);
3238
3239 kfree(event);
3240
3241 return ret;
3242}
3243
3244static u64 cgroup_clone_children_read(struct cgroup *cgrp,
3245 struct cftype *cft)
3246{
3247 return clone_children(cgrp);
3248}
3249
3250static int cgroup_clone_children_write(struct cgroup *cgrp,
3251 struct cftype *cft,
3252 u64 val)
3253{
3254 if (val)
3255 set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3256 else
3257 clear_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3258 return 0;
3259}
3260
3261
3262
3263
3264
3265#define CGROUP_FILE_GENERIC_PREFIX "cgroup."
3266static struct cftype files[] = {
3267 {
3268 .name = "tasks",
3269 .open = cgroup_tasks_open,
3270 .write_u64 = cgroup_tasks_write,
3271 .release = cgroup_pidlist_release,
3272 .mode = S_IRUGO | S_IWUSR,
3273 },
3274 {
3275 .name = CGROUP_FILE_GENERIC_PREFIX "procs",
3276 .open = cgroup_procs_open,
3277
3278 .release = cgroup_pidlist_release,
3279 .mode = S_IRUGO,
3280 },
3281 {
3282 .name = "notify_on_release",
3283 .read_u64 = cgroup_read_notify_on_release,
3284 .write_u64 = cgroup_write_notify_on_release,
3285 },
3286 {
3287 .name = CGROUP_FILE_GENERIC_PREFIX "event_control",
3288 .write_string = cgroup_write_event_control,
3289 .mode = S_IWUGO,
3290 },
3291 {
3292 .name = "cgroup.clone_children",
3293 .read_u64 = cgroup_clone_children_read,
3294 .write_u64 = cgroup_clone_children_write,
3295 },
3296};
3297
3298static struct cftype cft_release_agent = {
3299 .name = "release_agent",
3300 .read_seq_string = cgroup_release_agent_show,
3301 .write_string = cgroup_release_agent_write,
3302 .max_write_len = PATH_MAX,
3303};
3304
3305static int cgroup_populate_dir(struct cgroup *cgrp)
3306{
3307 int err;
3308 struct cgroup_subsys *ss;
3309
3310
3311 cgroup_clear_directory(cgrp->dentry);
3312
3313 err = cgroup_add_files(cgrp, NULL, files, ARRAY_SIZE(files));
3314 if (err < 0)
3315 return err;
3316
3317 if (cgrp == cgrp->top_cgroup) {
3318 if ((err = cgroup_add_file(cgrp, NULL, &cft_release_agent)) < 0)
3319 return err;
3320 }
3321
3322 for_each_subsys(cgrp->root, ss) {
3323 if (ss->populate && (err = ss->populate(ss, cgrp)) < 0)
3324 return err;
3325 }
3326
3327 for_each_subsys(cgrp->root, ss) {
3328 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
3329
3330
3331
3332
3333
3334 if (css->id)
3335 rcu_assign_pointer(css->id->css, css);
3336 }
3337
3338 return 0;
3339}
3340
3341static void init_cgroup_css(struct cgroup_subsys_state *css,
3342 struct cgroup_subsys *ss,
3343 struct cgroup *cgrp)
3344{
3345 css->cgroup = cgrp;
3346 atomic_set(&css->refcnt, 1);
3347 css->flags = 0;
3348 css->id = NULL;
3349 if (cgrp == dummytop)
3350 set_bit(CSS_ROOT, &css->flags);
3351 BUG_ON(cgrp->subsys[ss->subsys_id]);
3352 cgrp->subsys[ss->subsys_id] = css;
3353}
3354
3355static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
3356{
3357
3358 int i;
3359
3360
3361
3362
3363
3364 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3365 struct cgroup_subsys *ss = subsys[i];
3366 if (ss == NULL)
3367 continue;
3368 if (ss->root == root)
3369 mutex_lock(&ss->hierarchy_mutex);
3370 }
3371}
3372
3373static void cgroup_unlock_hierarchy(struct cgroupfs_root *root)
3374{
3375 int i;
3376
3377 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3378 struct cgroup_subsys *ss = subsys[i];
3379 if (ss == NULL)
3380 continue;
3381 if (ss->root == root)
3382 mutex_unlock(&ss->hierarchy_mutex);
3383 }
3384}
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
3395 mode_t mode)
3396{
3397 struct cgroup *cgrp;
3398 struct cgroupfs_root *root = parent->root;
3399 int err = 0;
3400 struct cgroup_subsys *ss;
3401 struct super_block *sb = root->sb;
3402
3403 cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL);
3404 if (!cgrp)
3405 return -ENOMEM;
3406
3407
3408
3409
3410
3411
3412 atomic_inc(&sb->s_active);
3413
3414 mutex_lock(&cgroup_mutex);
3415
3416 init_cgroup_housekeeping(cgrp);
3417
3418 cgrp->parent = parent;
3419 cgrp->root = parent->root;
3420 cgrp->top_cgroup = parent->top_cgroup;
3421
3422 if (notify_on_release(parent))
3423 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3424
3425 if (clone_children(parent))
3426 set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3427
3428 for_each_subsys(root, ss) {
3429 struct cgroup_subsys_state *css = ss->create(ss, cgrp);
3430
3431 if (IS_ERR(css)) {
3432 err = PTR_ERR(css);
3433 goto err_destroy;
3434 }
3435 init_cgroup_css(css, ss, cgrp);
3436 if (ss->use_id) {
3437 err = alloc_css_id(ss, parent, cgrp);
3438 if (err)
3439 goto err_destroy;
3440 }
3441
3442 if (clone_children(parent) && ss->post_clone)
3443 ss->post_clone(ss, cgrp);
3444 }
3445
3446 cgroup_lock_hierarchy(root);
3447 list_add(&cgrp->sibling, &cgrp->parent->children);
3448 cgroup_unlock_hierarchy(root);
3449 root->number_of_cgroups++;
3450
3451 err = cgroup_create_dir(cgrp, dentry, mode);
3452 if (err < 0)
3453 goto err_remove;
3454
3455
3456 BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex));
3457
3458 err = cgroup_populate_dir(cgrp);
3459
3460
3461 mutex_unlock(&cgroup_mutex);
3462 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
3463
3464 return 0;
3465
3466 err_remove:
3467
3468 cgroup_lock_hierarchy(root);
3469 list_del(&cgrp->sibling);
3470 cgroup_unlock_hierarchy(root);
3471 root->number_of_cgroups--;
3472
3473 err_destroy:
3474
3475 for_each_subsys(root, ss) {
3476 if (cgrp->subsys[ss->subsys_id])
3477 ss->destroy(ss, cgrp);
3478 }
3479
3480 mutex_unlock(&cgroup_mutex);
3481
3482
3483 deactivate_super(sb);
3484
3485 kfree(cgrp);
3486 return err;
3487}
3488
3489static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode)
3490{
3491 struct cgroup *c_parent = dentry->d_parent->d_fsdata;
3492
3493
3494 return cgroup_create(c_parent, dentry, mode | S_IFDIR);
3495}
3496
3497static int cgroup_has_css_refs(struct cgroup *cgrp)
3498{
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508 int i;
3509
3510
3511
3512
3513
3514 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3515 struct cgroup_subsys *ss = subsys[i];
3516 struct cgroup_subsys_state *css;
3517
3518 if (ss == NULL || ss->root != cgrp->root)
3519 continue;
3520 css = cgrp->subsys[ss->subsys_id];
3521
3522
3523
3524
3525
3526
3527 if (css && (atomic_read(&css->refcnt) > 1))
3528 return 1;
3529 }
3530 return 0;
3531}
3532
3533
3534
3535
3536
3537
3538
3539static int cgroup_clear_css_refs(struct cgroup *cgrp)
3540{
3541 struct cgroup_subsys *ss;
3542 unsigned long flags;
3543 bool failed = false;
3544 local_irq_save(flags);
3545 for_each_subsys(cgrp->root, ss) {
3546 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
3547 int refcnt;
3548 while (1) {
3549
3550 refcnt = atomic_read(&css->refcnt);
3551 if (refcnt > 1) {
3552 failed = true;
3553 goto done;
3554 }
3555 BUG_ON(!refcnt);
3556
3557
3558
3559
3560
3561
3562 if (atomic_cmpxchg(&css->refcnt, refcnt, 0) == refcnt)
3563 break;
3564 cpu_relax();
3565 }
3566 }
3567 done:
3568 for_each_subsys(cgrp->root, ss) {
3569 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
3570 if (failed) {
3571
3572
3573
3574
3575 if (!atomic_read(&css->refcnt))
3576 atomic_set(&css->refcnt, 1);
3577 } else {
3578
3579 set_bit(CSS_REMOVED, &css->flags);
3580 }
3581 }
3582 local_irq_restore(flags);
3583 return !failed;
3584}
3585
3586static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
3587{
3588 struct cgroup *cgrp = dentry->d_fsdata;
3589 struct dentry *d;
3590 struct cgroup *parent;
3591 DEFINE_WAIT(wait);
3592 struct cgroup_event *event, *tmp;
3593 int ret;
3594
3595
3596again:
3597 mutex_lock(&cgroup_mutex);
3598 if (atomic_read(&cgrp->count) != 0) {
3599 mutex_unlock(&cgroup_mutex);
3600 return -EBUSY;
3601 }
3602 if (!list_empty(&cgrp->children)) {
3603 mutex_unlock(&cgroup_mutex);
3604 return -EBUSY;
3605 }
3606 mutex_unlock(&cgroup_mutex);
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617 set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
3618
3619
3620
3621
3622
3623 ret = cgroup_call_pre_destroy(cgrp);
3624 if (ret) {
3625 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
3626 return ret;
3627 }
3628
3629 mutex_lock(&cgroup_mutex);
3630 parent = cgrp->parent;
3631 if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
3632 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
3633 mutex_unlock(&cgroup_mutex);
3634 return -EBUSY;
3635 }
3636 prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);
3637 if (!cgroup_clear_css_refs(cgrp)) {
3638 mutex_unlock(&cgroup_mutex);
3639
3640
3641
3642
3643 if (test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))
3644 schedule();
3645 finish_wait(&cgroup_rmdir_waitq, &wait);
3646 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
3647 if (signal_pending(current))
3648 return -EINTR;
3649 goto again;
3650 }
3651
3652 finish_wait(&cgroup_rmdir_waitq, &wait);
3653 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
3654
3655 spin_lock(&release_list_lock);
3656 set_bit(CGRP_REMOVED, &cgrp->flags);
3657 if (!list_empty(&cgrp->release_list))
3658 list_del(&cgrp->release_list);
3659 spin_unlock(&release_list_lock);
3660
3661 cgroup_lock_hierarchy(cgrp->root);
3662
3663 list_del(&cgrp->sibling);
3664 cgroup_unlock_hierarchy(cgrp->root);
3665
3666 d = dget(cgrp->dentry);
3667
3668 cgroup_d_remove_dir(d);
3669 dput(d);
3670
3671 set_bit(CGRP_RELEASABLE, &parent->flags);
3672 check_for_release(parent);
3673
3674
3675
3676
3677
3678
3679 spin_lock(&cgrp->event_list_lock);
3680 list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
3681 list_del(&event->list);
3682 remove_wait_queue(event->wqh, &event->wait);
3683 eventfd_signal(event->eventfd, 1);
3684 schedule_work(&event->remove);
3685 }
3686 spin_unlock(&cgrp->event_list_lock);
3687
3688 mutex_unlock(&cgroup_mutex);
3689 return 0;
3690}
3691
3692static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
3693{
3694 struct cgroup_subsys_state *css;
3695
3696 printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
3697
3698
3699 list_add(&ss->sibling, &rootnode.subsys_list);
3700 ss->root = &rootnode;
3701 css = ss->create(ss, dummytop);
3702
3703 BUG_ON(IS_ERR(css));
3704 init_cgroup_css(css, ss, dummytop);
3705
3706
3707
3708
3709
3710 init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
3711
3712 need_forkexit_callback |= ss->fork || ss->exit;
3713
3714
3715
3716
3717 BUG_ON(!list_empty(&init_task.tasks));
3718
3719 mutex_init(&ss->hierarchy_mutex);
3720 lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
3721 ss->active = 1;
3722
3723
3724
3725 BUG_ON(ss->module);
3726}
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
3738{
3739 int i;
3740 struct cgroup_subsys_state *css;
3741
3742
3743 if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN ||
3744 ss->create == NULL || ss->destroy == NULL)
3745 return -EINVAL;
3746
3747
3748
3749
3750
3751
3752
3753 if (ss->fork || ss->exit)
3754 return -EINVAL;
3755
3756
3757
3758
3759
3760 if (ss->module == NULL) {
3761
3762 BUG_ON(ss->subsys_id >= CGROUP_BUILTIN_SUBSYS_COUNT);
3763 BUG_ON(subsys[ss->subsys_id] != ss);
3764 return 0;
3765 }
3766
3767
3768
3769
3770
3771 mutex_lock(&cgroup_mutex);
3772
3773 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
3774 if (subsys[i] == NULL)
3775 break;
3776 }
3777 if (i == CGROUP_SUBSYS_COUNT) {
3778
3779 mutex_unlock(&cgroup_mutex);
3780 return -EBUSY;
3781 }
3782
3783 ss->subsys_id = i;
3784 subsys[i] = ss;
3785
3786
3787
3788
3789
3790 css = ss->create(ss, dummytop);
3791 if (IS_ERR(css)) {
3792
3793 subsys[i] = NULL;
3794 mutex_unlock(&cgroup_mutex);
3795 return PTR_ERR(css);
3796 }
3797
3798 list_add(&ss->sibling, &rootnode.subsys_list);
3799 ss->root = &rootnode;
3800
3801
3802 init_cgroup_css(css, ss, dummytop);
3803
3804 if (ss->use_id) {
3805 int ret = cgroup_init_idr(ss, css);
3806 if (ret) {
3807 dummytop->subsys[ss->subsys_id] = NULL;
3808 ss->destroy(ss, dummytop);
3809 subsys[i] = NULL;
3810 mutex_unlock(&cgroup_mutex);
3811 return ret;
3812 }
3813 }
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823 write_lock(&css_set_lock);
3824 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
3825 struct css_set *cg;
3826 struct hlist_node *node, *tmp;
3827 struct hlist_head *bucket = &css_set_table[i], *new_bucket;
3828
3829 hlist_for_each_entry_safe(cg, node, tmp, bucket, hlist) {
3830
3831 if (cg->subsys[ss->subsys_id])
3832 continue;
3833
3834 hlist_del(&cg->hlist);
3835
3836 cg->subsys[ss->subsys_id] = css;
3837
3838 new_bucket = css_set_hash(cg->subsys);
3839 hlist_add_head(&cg->hlist, new_bucket);
3840 }
3841 }
3842 write_unlock(&css_set_lock);
3843
3844 mutex_init(&ss->hierarchy_mutex);
3845 lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
3846 ss->active = 1;
3847
3848
3849 mutex_unlock(&cgroup_mutex);
3850 return 0;
3851}
3852EXPORT_SYMBOL_GPL(cgroup_load_subsys);
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862void cgroup_unload_subsys(struct cgroup_subsys *ss)
3863{
3864 struct cg_cgroup_link *link;
3865 struct hlist_head *hhead;
3866
3867 BUG_ON(ss->module == NULL);
3868
3869
3870
3871
3872
3873
3874 BUG_ON(ss->root != &rootnode);
3875
3876 mutex_lock(&cgroup_mutex);
3877
3878 BUG_ON(ss->subsys_id < CGROUP_BUILTIN_SUBSYS_COUNT);
3879 subsys[ss->subsys_id] = NULL;
3880
3881
3882 list_del(&ss->sibling);
3883
3884
3885
3886
3887
3888 write_lock(&css_set_lock);
3889 list_for_each_entry(link, &dummytop->css_sets, cgrp_link_list) {
3890 struct css_set *cg = link->cg;
3891
3892 hlist_del(&cg->hlist);
3893 BUG_ON(!cg->subsys[ss->subsys_id]);
3894 cg->subsys[ss->subsys_id] = NULL;
3895 hhead = css_set_hash(cg->subsys);
3896 hlist_add_head(&cg->hlist, hhead);
3897 }
3898 write_unlock(&css_set_lock);
3899
3900
3901
3902
3903
3904
3905
3906 ss->destroy(ss, dummytop);
3907 dummytop->subsys[ss->subsys_id] = NULL;
3908
3909 mutex_unlock(&cgroup_mutex);
3910}
3911EXPORT_SYMBOL_GPL(cgroup_unload_subsys);
3912
3913
3914
3915
3916
3917
3918
3919int __init cgroup_init_early(void)
3920{
3921 int i;
3922 atomic_set(&init_css_set.refcount, 1);
3923 INIT_LIST_HEAD(&init_css_set.cg_links);
3924 INIT_LIST_HEAD(&init_css_set.tasks);
3925 INIT_HLIST_NODE(&init_css_set.hlist);
3926 css_set_count = 1;
3927 init_cgroup_root(&rootnode);
3928 root_count = 1;
3929 init_task.cgroups = &init_css_set;
3930
3931 init_css_set_link.cg = &init_css_set;
3932 init_css_set_link.cgrp = dummytop;
3933 list_add(&init_css_set_link.cgrp_link_list,
3934 &rootnode.top_cgroup.css_sets);
3935 list_add(&init_css_set_link.cg_link_list,
3936 &init_css_set.cg_links);
3937
3938 for (i = 0; i < CSS_SET_TABLE_SIZE; i++)
3939 INIT_HLIST_HEAD(&css_set_table[i]);
3940
3941
3942 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
3943 struct cgroup_subsys *ss = subsys[i];
3944
3945 BUG_ON(!ss->name);
3946 BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
3947 BUG_ON(!ss->create);
3948 BUG_ON(!ss->destroy);
3949 if (ss->subsys_id != i) {
3950 printk(KERN_ERR "cgroup: Subsys %s id == %d\n",
3951 ss->name, ss->subsys_id);
3952 BUG();
3953 }
3954
3955 if (ss->early_init)
3956 cgroup_init_subsys(ss);
3957 }
3958 return 0;
3959}
3960
3961
3962
3963
3964
3965
3966
3967int __init cgroup_init(void)
3968{
3969 int err;
3970 int i;
3971 struct hlist_head *hhead;
3972
3973 err = bdi_init(&cgroup_backing_dev_info);
3974 if (err)
3975 return err;
3976
3977
3978 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
3979 struct cgroup_subsys *ss = subsys[i];
3980 if (!ss->early_init)
3981 cgroup_init_subsys(ss);
3982 if (ss->use_id)
3983 cgroup_init_idr(ss, init_css_set.subsys[ss->subsys_id]);
3984 }
3985
3986
3987 hhead = css_set_hash(init_css_set.subsys);
3988 hlist_add_head(&init_css_set.hlist, hhead);
3989 BUG_ON(!init_root_id(&rootnode));
3990
3991 cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj);
3992 if (!cgroup_kobj) {
3993 err = -ENOMEM;
3994 goto out;
3995 }
3996
3997 err = register_filesystem(&cgroup_fs_type);
3998 if (err < 0) {
3999 kobject_put(cgroup_kobj);
4000 goto out;
4001 }
4002
4003 proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations);
4004
4005out:
4006 if (err)
4007 bdi_destroy(&cgroup_backing_dev_info);
4008
4009 return err;
4010}
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025static int proc_cgroup_show(struct seq_file *m, void *v)
4026{
4027 struct pid *pid;
4028 struct task_struct *tsk;
4029 char *buf;
4030 int retval;
4031 struct cgroupfs_root *root;
4032
4033 retval = -ENOMEM;
4034 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
4035 if (!buf)
4036 goto out;
4037
4038 retval = -ESRCH;
4039 pid = m->private;
4040 tsk = get_pid_task(pid, PIDTYPE_PID);
4041 if (!tsk)
4042 goto out_free;
4043
4044 retval = 0;
4045
4046 mutex_lock(&cgroup_mutex);
4047
4048 for_each_active_root(root) {
4049 struct cgroup_subsys *ss;
4050 struct cgroup *cgrp;
4051 int count = 0;
4052
4053 seq_printf(m, "%d:", root->hierarchy_id);
4054 for_each_subsys(root, ss)
4055 seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
4056 if (strlen(root->name))
4057 seq_printf(m, "%sname=%s", count ? "," : "",
4058 root->name);
4059 seq_putc(m, ':');
4060 cgrp = task_cgroup_from_root(tsk, root);
4061 retval = cgroup_path(cgrp, buf, PAGE_SIZE);
4062 if (retval < 0)
4063 goto out_unlock;
4064 seq_puts(m, buf);
4065 seq_putc(m, '\n');
4066 }
4067
4068out_unlock:
4069 mutex_unlock(&cgroup_mutex);
4070 put_task_struct(tsk);
4071out_free:
4072 kfree(buf);
4073out:
4074 return retval;
4075}
4076
4077static int cgroup_open(struct inode *inode, struct file *file)
4078{
4079 struct pid *pid = PROC_I(inode)->pid;
4080 return single_open(file, proc_cgroup_show, pid);
4081}
4082
4083const struct file_operations proc_cgroup_operations = {
4084 .open = cgroup_open,
4085 .read = seq_read,
4086 .llseek = seq_lseek,
4087 .release = single_release,
4088};
4089
4090
4091static int proc_cgroupstats_show(struct seq_file *m, void *v)
4092{
4093 int i;
4094
4095 seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
4096
4097
4098
4099
4100
4101 mutex_lock(&cgroup_mutex);
4102 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4103 struct cgroup_subsys *ss = subsys[i];
4104 if (ss == NULL)
4105 continue;
4106 seq_printf(m, "%s\t%d\t%d\t%d\n",
4107 ss->name, ss->root->hierarchy_id,
4108 ss->root->number_of_cgroups, !ss->disabled);
4109 }
4110 mutex_unlock(&cgroup_mutex);
4111 return 0;
4112}
4113
4114static int cgroupstats_open(struct inode *inode, struct file *file)
4115{
4116 return single_open(file, proc_cgroupstats_show, NULL);
4117}
4118
4119static const struct file_operations proc_cgroupstats_operations = {
4120 .open = cgroupstats_open,
4121 .read = seq_read,
4122 .llseek = seq_lseek,
4123 .release = single_release,
4124};
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142void cgroup_fork(struct task_struct *child)
4143{
4144 task_lock(current);
4145 child->cgroups = current->cgroups;
4146 get_css_set(child->cgroups);
4147 task_unlock(current);
4148 INIT_LIST_HEAD(&child->cg_list);
4149}
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159void cgroup_fork_callbacks(struct task_struct *child)
4160{
4161 if (need_forkexit_callback) {
4162 int i;
4163
4164
4165
4166
4167
4168 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4169 struct cgroup_subsys *ss = subsys[i];
4170 if (ss->fork)
4171 ss->fork(ss, child);
4172 }
4173 }
4174}
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185void cgroup_post_fork(struct task_struct *child)
4186{
4187 if (use_task_css_set_links) {
4188 write_lock(&css_set_lock);
4189 task_lock(child);
4190 if (list_empty(&child->cg_list))
4191 list_add(&child->cg_list, &child->cgroups->tasks);
4192 task_unlock(child);
4193 write_unlock(&css_set_lock);
4194 }
4195}
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231void cgroup_exit(struct task_struct *tsk, int run_callbacks)
4232{
4233 int i;
4234 struct css_set *cg;
4235
4236 if (run_callbacks && need_forkexit_callback) {
4237
4238
4239
4240
4241 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4242 struct cgroup_subsys *ss = subsys[i];
4243 if (ss->exit)
4244 ss->exit(ss, tsk);
4245 }
4246 }
4247
4248
4249
4250
4251
4252
4253 if (!list_empty(&tsk->cg_list)) {
4254 write_lock(&css_set_lock);
4255 if (!list_empty(&tsk->cg_list))
4256 list_del(&tsk->cg_list);
4257 write_unlock(&css_set_lock);
4258 }
4259
4260
4261 task_lock(tsk);
4262 cg = tsk->cgroups;
4263 tsk->cgroups = &init_css_set;
4264 task_unlock(tsk);
4265 if (cg)
4266 put_css_set_taskexit(cg);
4267}
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
4280 char *nodename)
4281{
4282 struct dentry *dentry;
4283 int ret = 0;
4284 struct cgroup *parent, *child;
4285 struct inode *inode;
4286 struct css_set *cg;
4287 struct cgroupfs_root *root;
4288 struct cgroup_subsys *ss;
4289
4290
4291 BUG_ON(!subsys->active);
4292
4293
4294
4295 mutex_lock(&cgroup_mutex);
4296 again:
4297 root = subsys->root;
4298 if (root == &rootnode) {
4299 mutex_unlock(&cgroup_mutex);
4300 return 0;
4301 }
4302
4303
4304 if (!atomic_inc_not_zero(&root->sb->s_active)) {
4305
4306 mutex_unlock(&cgroup_mutex);
4307 return 0;
4308 }
4309
4310
4311 task_lock(tsk);
4312 parent = task_cgroup(tsk, subsys->subsys_id);
4313 cg = tsk->cgroups;
4314 get_css_set(cg);
4315 task_unlock(tsk);
4316
4317 mutex_unlock(&cgroup_mutex);
4318
4319
4320 inode = parent->dentry->d_inode;
4321
4322
4323
4324 mutex_lock(&inode->i_mutex);
4325 dentry = lookup_one_len(nodename, parent->dentry, strlen(nodename));
4326 if (IS_ERR(dentry)) {
4327 printk(KERN_INFO
4328 "cgroup: Couldn't allocate dentry for %s: %ld\n", nodename,
4329 PTR_ERR(dentry));
4330 ret = PTR_ERR(dentry);
4331 goto out_release;
4332 }
4333
4334
4335 ret = vfs_mkdir(inode, dentry, 0755);
4336 child = __d_cgrp(dentry);
4337 dput(dentry);
4338 if (ret) {
4339 printk(KERN_INFO
4340 "Failed to create cgroup %s: %d\n", nodename,
4341 ret);
4342 goto out_release;
4343 }
4344
4345
4346
4347
4348 mutex_lock(&cgroup_mutex);
4349 if ((root != subsys->root) ||
4350 (parent != task_cgroup(tsk, subsys->subsys_id))) {
4351
4352 mutex_unlock(&inode->i_mutex);
4353 put_css_set(cg);
4354
4355 deactivate_super(root->sb);
4356
4357
4358
4359 printk(KERN_INFO
4360 "Race in cgroup_clone() - leaking cgroup %s\n",
4361 nodename);
4362 goto again;
4363 }
4364
4365
4366 for_each_subsys(root, ss) {
4367 if (ss->post_clone)
4368 ss->post_clone(ss, child);
4369 }
4370
4371
4372 ret = cgroup_attach_task(child, tsk);
4373 mutex_unlock(&cgroup_mutex);
4374
4375 out_release:
4376 mutex_unlock(&inode->i_mutex);
4377
4378 mutex_lock(&cgroup_mutex);
4379 put_css_set(cg);
4380 mutex_unlock(&cgroup_mutex);
4381 deactivate_super(root->sb);
4382 return ret;
4383}
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task)
4399{
4400 int ret;
4401 struct cgroup *target;
4402
4403 if (cgrp == dummytop)
4404 return 1;
4405
4406 target = task_cgroup_from_root(task, cgrp->root);
4407 while (cgrp != target && cgrp!= cgrp->top_cgroup)
4408 cgrp = cgrp->parent;
4409 ret = (cgrp == target);
4410 return ret;
4411}
4412
4413static void check_for_release(struct cgroup *cgrp)
4414{
4415
4416
4417 if (cgroup_is_releasable(cgrp) && !atomic_read(&cgrp->count)
4418 && list_empty(&cgrp->children) && !cgroup_has_css_refs(cgrp)) {
4419
4420
4421
4422 int need_schedule_work = 0;
4423 spin_lock(&release_list_lock);
4424 if (!cgroup_is_removed(cgrp) &&
4425 list_empty(&cgrp->release_list)) {
4426 list_add(&cgrp->release_list, &release_list);
4427 need_schedule_work = 1;
4428 }
4429 spin_unlock(&release_list_lock);
4430 if (need_schedule_work)
4431 schedule_work(&release_agent_work);
4432 }
4433}
4434
4435
4436void __css_put(struct cgroup_subsys_state *css, int count)
4437{
4438 struct cgroup *cgrp = css->cgroup;
4439 int val;
4440 rcu_read_lock();
4441 val = atomic_sub_return(count, &css->refcnt);
4442 if (val == 1) {
4443 if (notify_on_release(cgrp)) {
4444 set_bit(CGRP_RELEASABLE, &cgrp->flags);
4445 check_for_release(cgrp);
4446 }
4447 cgroup_wakeup_rmdir_waiter(cgrp);
4448 }
4449 rcu_read_unlock();
4450 WARN_ON_ONCE(val < 1);
4451}
4452EXPORT_SYMBOL_GPL(__css_put);
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477static void cgroup_release_agent(struct work_struct *work)
4478{
4479 BUG_ON(work != &release_agent_work);
4480 mutex_lock(&cgroup_mutex);
4481 spin_lock(&release_list_lock);
4482 while (!list_empty(&release_list)) {
4483 char *argv[3], *envp[3];
4484 int i;
4485 char *pathbuf = NULL, *agentbuf = NULL;
4486 struct cgroup *cgrp = list_entry(release_list.next,
4487 struct cgroup,
4488 release_list);
4489 list_del_init(&cgrp->release_list);
4490 spin_unlock(&release_list_lock);
4491 pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
4492 if (!pathbuf)
4493 goto continue_free;
4494 if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0)
4495 goto continue_free;
4496 agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
4497 if (!agentbuf)
4498 goto continue_free;
4499
4500 i = 0;
4501 argv[i++] = agentbuf;
4502 argv[i++] = pathbuf;
4503 argv[i] = NULL;
4504
4505 i = 0;
4506
4507 envp[i++] = "HOME=/";
4508 envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
4509 envp[i] = NULL;
4510
4511
4512
4513
4514 mutex_unlock(&cgroup_mutex);
4515 call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
4516 mutex_lock(&cgroup_mutex);
4517 continue_free:
4518 kfree(pathbuf);
4519 kfree(agentbuf);
4520 spin_lock(&release_list_lock);
4521 }
4522 spin_unlock(&release_list_lock);
4523 mutex_unlock(&cgroup_mutex);
4524}
4525
4526static int __init cgroup_disable(char *str)
4527{
4528 int i;
4529 char *token;
4530
4531 while ((token = strsep(&str, ",")) != NULL) {
4532 if (!*token)
4533 continue;
4534
4535
4536
4537
4538 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4539 struct cgroup_subsys *ss = subsys[i];
4540
4541 if (!strcmp(token, ss->name)) {
4542 ss->disabled = 1;
4543 printk(KERN_INFO "Disabling %s control group"
4544 " subsystem\n", ss->name);
4545 break;
4546 }
4547 }
4548 }
4549 return 1;
4550}
4551__setup("cgroup_disable=", cgroup_disable);
4552
4553
4554
4555
4556
4557
4558
4559
4560unsigned short css_id(struct cgroup_subsys_state *css)
4561{
4562 struct css_id *cssid;
4563
4564
4565
4566
4567
4568
4569 cssid = rcu_dereference_check(css->id,
4570 rcu_read_lock_held() || atomic_read(&css->refcnt));
4571
4572 if (cssid)
4573 return cssid->id;
4574 return 0;
4575}
4576EXPORT_SYMBOL_GPL(css_id);
4577
4578unsigned short css_depth(struct cgroup_subsys_state *css)
4579{
4580 struct css_id *cssid;
4581
4582 cssid = rcu_dereference_check(css->id,
4583 rcu_read_lock_held() || atomic_read(&css->refcnt));
4584
4585 if (cssid)
4586 return cssid->depth;
4587 return 0;
4588}
4589EXPORT_SYMBOL_GPL(css_depth);
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604bool css_is_ancestor(struct cgroup_subsys_state *child,
4605 const struct cgroup_subsys_state *root)
4606{
4607 struct css_id *child_id;
4608 struct css_id *root_id;
4609 bool ret = true;
4610
4611 rcu_read_lock();
4612 child_id = rcu_dereference(child->id);
4613 root_id = rcu_dereference(root->id);
4614 if (!child_id
4615 || !root_id
4616 || (child_id->depth < root_id->depth)
4617 || (child_id->stack[root_id->depth] != root_id->id))
4618 ret = false;
4619 rcu_read_unlock();
4620 return ret;
4621}
4622
4623static void __free_css_id_cb(struct rcu_head *head)
4624{
4625 struct css_id *id;
4626
4627 id = container_of(head, struct css_id, rcu_head);
4628 kfree(id);
4629}
4630
4631void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
4632{
4633 struct css_id *id = css->id;
4634
4635 if (!id)
4636 return;
4637
4638 BUG_ON(!ss->use_id);
4639
4640 rcu_assign_pointer(id->css, NULL);
4641 rcu_assign_pointer(css->id, NULL);
4642 spin_lock(&ss->id_lock);
4643 idr_remove(&ss->idr, id->id);
4644 spin_unlock(&ss->id_lock);
4645 call_rcu(&id->rcu_head, __free_css_id_cb);
4646}
4647EXPORT_SYMBOL_GPL(free_css_id);
4648
4649
4650
4651
4652
4653
4654static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
4655{
4656 struct css_id *newid;
4657 int myid, error, size;
4658
4659 BUG_ON(!ss->use_id);
4660
4661 size = sizeof(*newid) + sizeof(unsigned short) * (depth + 1);
4662 newid = kzalloc(size, GFP_KERNEL);
4663 if (!newid)
4664 return ERR_PTR(-ENOMEM);
4665
4666 if (unlikely(!idr_pre_get(&ss->idr, GFP_KERNEL))) {
4667 error = -ENOMEM;
4668 goto err_out;
4669 }
4670 spin_lock(&ss->id_lock);
4671
4672 error = idr_get_new_above(&ss->idr, newid, 1, &myid);
4673 spin_unlock(&ss->id_lock);
4674
4675
4676 if (error) {
4677 error = -ENOSPC;
4678 goto err_out;
4679 }
4680 if (myid > CSS_ID_MAX)
4681 goto remove_idr;
4682
4683 newid->id = myid;
4684 newid->depth = depth;
4685 return newid;
4686remove_idr:
4687 error = -ENOSPC;
4688 spin_lock(&ss->id_lock);
4689 idr_remove(&ss->idr, myid);
4690 spin_unlock(&ss->id_lock);
4691err_out:
4692 kfree(newid);
4693 return ERR_PTR(error);
4694
4695}
4696
4697static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss,
4698 struct cgroup_subsys_state *rootcss)
4699{
4700 struct css_id *newid;
4701
4702 spin_lock_init(&ss->id_lock);
4703 idr_init(&ss->idr);
4704
4705 newid = get_new_cssid(ss, 0);
4706 if (IS_ERR(newid))
4707 return PTR_ERR(newid);
4708
4709 newid->stack[0] = newid->id;
4710 newid->css = rootcss;
4711 rootcss->id = newid;
4712 return 0;
4713}
4714
4715static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent,
4716 struct cgroup *child)
4717{
4718 int subsys_id, i, depth = 0;
4719 struct cgroup_subsys_state *parent_css, *child_css;
4720 struct css_id *child_id, *parent_id;
4721
4722 subsys_id = ss->subsys_id;
4723 parent_css = parent->subsys[subsys_id];
4724 child_css = child->subsys[subsys_id];
4725 parent_id = parent_css->id;
4726 depth = parent_id->depth + 1;
4727
4728 child_id = get_new_cssid(ss, depth);
4729 if (IS_ERR(child_id))
4730 return PTR_ERR(child_id);
4731
4732 for (i = 0; i < depth; i++)
4733 child_id->stack[i] = parent_id->stack[i];
4734 child_id->stack[depth] = child_id->id;
4735
4736
4737
4738
4739 rcu_assign_pointer(child_css->id, child_id);
4740
4741 return 0;
4742}
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id)
4753{
4754 struct css_id *cssid = NULL;
4755
4756 BUG_ON(!ss->use_id);
4757 cssid = idr_find(&ss->idr, id);
4758
4759 if (unlikely(!cssid))
4760 return NULL;
4761
4762 return rcu_dereference(cssid->css);
4763}
4764EXPORT_SYMBOL_GPL(css_lookup);
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776struct cgroup_subsys_state *
4777css_get_next(struct cgroup_subsys *ss, int id,
4778 struct cgroup_subsys_state *root, int *foundid)
4779{
4780 struct cgroup_subsys_state *ret = NULL;
4781 struct css_id *tmp;
4782 int tmpid;
4783 int rootid = css_id(root);
4784 int depth = css_depth(root);
4785
4786 if (!rootid)
4787 return NULL;
4788
4789 BUG_ON(!ss->use_id);
4790
4791 tmpid = id;
4792 while (1) {
4793
4794
4795
4796
4797 spin_lock(&ss->id_lock);
4798 tmp = idr_get_next(&ss->idr, &tmpid);
4799 spin_unlock(&ss->id_lock);
4800
4801 if (!tmp)
4802 break;
4803 if (tmp->depth >= depth && tmp->stack[depth] == rootid) {
4804 ret = rcu_dereference(tmp->css);
4805 if (ret) {
4806 *foundid = tmpid;
4807 break;
4808 }
4809 }
4810
4811 tmpid = tmpid + 1;
4812 }
4813 return ret;
4814}
4815
4816#ifdef CONFIG_CGROUP_DEBUG
4817static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
4818 struct cgroup *cont)
4819{
4820 struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
4821
4822 if (!css)
4823 return ERR_PTR(-ENOMEM);
4824
4825 return css;
4826}
4827
4828static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
4829{
4830 kfree(cont->subsys[debug_subsys_id]);
4831}
4832
4833static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft)
4834{
4835 return atomic_read(&cont->count);
4836}
4837
4838static u64 debug_taskcount_read(struct cgroup *cont, struct cftype *cft)
4839{
4840 return cgroup_task_count(cont);
4841}
4842
4843static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft)
4844{
4845 return (u64)(unsigned long)current->cgroups;
4846}
4847
4848static u64 current_css_set_refcount_read(struct cgroup *cont,
4849 struct cftype *cft)
4850{
4851 u64 count;
4852
4853 rcu_read_lock();
4854 count = atomic_read(¤t->cgroups->refcount);
4855 rcu_read_unlock();
4856 return count;
4857}
4858
4859static int current_css_set_cg_links_read(struct cgroup *cont,
4860 struct cftype *cft,
4861 struct seq_file *seq)
4862{
4863 struct cg_cgroup_link *link;
4864 struct css_set *cg;
4865
4866 read_lock(&css_set_lock);
4867 rcu_read_lock();
4868 cg = rcu_dereference(current->cgroups);
4869 list_for_each_entry(link, &cg->cg_links, cg_link_list) {
4870 struct cgroup *c = link->cgrp;
4871 const char *name;
4872
4873 if (c->dentry)
4874 name = c->dentry->d_name.name;
4875 else
4876 name = "?";
4877 seq_printf(seq, "Root %d group %s\n",
4878 c->root->hierarchy_id, name);
4879 }
4880 rcu_read_unlock();
4881 read_unlock(&css_set_lock);
4882 return 0;
4883}
4884
4885#define MAX_TASKS_SHOWN_PER_CSS 25
4886static int cgroup_css_links_read(struct cgroup *cont,
4887 struct cftype *cft,
4888 struct seq_file *seq)
4889{
4890 struct cg_cgroup_link *link;
4891
4892 read_lock(&css_set_lock);
4893 list_for_each_entry(link, &cont->css_sets, cgrp_link_list) {
4894 struct css_set *cg = link->cg;
4895 struct task_struct *task;
4896 int count = 0;
4897 seq_printf(seq, "css_set %p\n", cg);
4898 list_for_each_entry(task, &cg->tasks, cg_list) {
4899 if (count++ > MAX_TASKS_SHOWN_PER_CSS) {
4900 seq_puts(seq, " ...\n");
4901 break;
4902 } else {
4903 seq_printf(seq, " task %d\n",
4904 task_pid_vnr(task));
4905 }
4906 }
4907 }
4908 read_unlock(&css_set_lock);
4909 return 0;
4910}
4911
4912static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
4913{
4914 return test_bit(CGRP_RELEASABLE, &cgrp->flags);
4915}
4916
4917static struct cftype debug_files[] = {
4918 {
4919 .name = "cgroup_refcount",
4920 .read_u64 = cgroup_refcount_read,
4921 },
4922 {
4923 .name = "taskcount",
4924 .read_u64 = debug_taskcount_read,
4925 },
4926
4927 {
4928 .name = "current_css_set",
4929 .read_u64 = current_css_set_read,
4930 },
4931
4932 {
4933 .name = "current_css_set_refcount",
4934 .read_u64 = current_css_set_refcount_read,
4935 },
4936
4937 {
4938 .name = "current_css_set_cg_links",
4939 .read_seq_string = current_css_set_cg_links_read,
4940 },
4941
4942 {
4943 .name = "cgroup_css_links",
4944 .read_seq_string = cgroup_css_links_read,
4945 },
4946
4947 {
4948 .name = "releasable",
4949 .read_u64 = releasable_read,
4950 },
4951};
4952
4953static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)
4954{
4955 return cgroup_add_files(cont, ss, debug_files,
4956 ARRAY_SIZE(debug_files));
4957}
4958
4959struct cgroup_subsys debug_subsys = {
4960 .name = "debug",
4961 .create = debug_create,
4962 .destroy = debug_destroy,
4963 .populate = debug_populate,
4964 .subsys_id = debug_subsys_id,
4965};
4966#endif
4967