1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29#include <linux/cgroup.h>
30#include <linux/ctype.h>
31#include <linux/errno.h>
32#include <linux/fs.h>
33#include <linux/kernel.h>
34#include <linux/list.h>
35#include <linux/mm.h>
36#include <linux/mutex.h>
37#include <linux/mount.h>
38#include <linux/pagemap.h>
39#include <linux/proc_fs.h>
40#include <linux/rcupdate.h>
41#include <linux/sched.h>
42#include <linux/backing-dev.h>
43#include <linux/seq_file.h>
44#include <linux/slab.h>
45#include <linux/magic.h>
46#include <linux/spinlock.h>
47#include <linux/string.h>
48#include <linux/sort.h>
49#include <linux/kmod.h>
50#include <linux/module.h>
51#include <linux/delayacct.h>
52#include <linux/cgroupstats.h>
53#include <linux/hash.h>
54#include <linux/namei.h>
55#include <linux/pid_namespace.h>
56#include <linux/idr.h>
57#include <linux/vmalloc.h>
58#include <linux/eventfd.h>
59#include <linux/poll.h>
60#include <linux/flex_array.h>
61
62#include <asm/atomic.h>
63
64static DEFINE_MUTEX(cgroup_mutex);
65
66
67
68
69
70
71
72#define SUBSYS(_x) &_x ## _subsys,
73static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = {
74#include <linux/cgroup_subsys.h>
75};
76
77#define MAX_CGROUP_ROOT_NAMELEN 64
78
79
80
81
82
83
84struct cgroupfs_root {
85 struct super_block *sb;
86
87
88
89
90
91 unsigned long subsys_bits;
92
93
94 int hierarchy_id;
95
96
97 unsigned long actual_subsys_bits;
98
99
100 struct list_head subsys_list;
101
102
103 struct cgroup top_cgroup;
104
105
106 int number_of_cgroups;
107
108
109 struct list_head root_list;
110
111
112 unsigned long flags;
113
114
115 char release_agent_path[PATH_MAX];
116
117
118 char name[MAX_CGROUP_ROOT_NAMELEN];
119};
120
121
122
123
124
125
126static struct cgroupfs_root rootnode;
127
128
129
130
131
132#define CSS_ID_MAX (65535)
133struct css_id {
134
135
136
137
138
139
140
141 struct cgroup_subsys_state __rcu *css;
142
143
144
145 unsigned short id;
146
147
148
149 unsigned short depth;
150
151
152
153 struct rcu_head rcu_head;
154
155
156
157 unsigned short stack[0];
158};
159
160
161
162
163struct cgroup_event {
164
165
166
167 struct cgroup *cgrp;
168
169
170
171 struct cftype *cft;
172
173
174
175 struct eventfd_ctx *eventfd;
176
177
178
179 struct list_head list;
180
181
182
183
184 poll_table pt;
185 wait_queue_head_t *wqh;
186 wait_queue_t wait;
187 struct work_struct remove;
188};
189
190
191
192static LIST_HEAD(roots);
193static int root_count;
194
195static DEFINE_IDA(hierarchy_ida);
196static int next_hierarchy_id;
197static DEFINE_SPINLOCK(hierarchy_id_lock);
198
199
200#define dummytop (&rootnode.top_cgroup)
201
202
203
204
205
206
207static int need_forkexit_callback __read_mostly;
208
209#ifdef CONFIG_PROVE_LOCKING
210int cgroup_lock_is_held(void)
211{
212 return lockdep_is_held(&cgroup_mutex);
213}
214#else
215int cgroup_lock_is_held(void)
216{
217 return mutex_is_locked(&cgroup_mutex);
218}
219#endif
220
221EXPORT_SYMBOL_GPL(cgroup_lock_is_held);
222
223
224inline int cgroup_is_removed(const struct cgroup *cgrp)
225{
226 return test_bit(CGRP_REMOVED, &cgrp->flags);
227}
228
229
230enum {
231 ROOT_NOPREFIX,
232};
233
234static int cgroup_is_releasable(const struct cgroup *cgrp)
235{
236 const int bits =
237 (1 << CGRP_RELEASABLE) |
238 (1 << CGRP_NOTIFY_ON_RELEASE);
239 return (cgrp->flags & bits) == bits;
240}
241
242static int notify_on_release(const struct cgroup *cgrp)
243{
244 return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
245}
246
247static int clone_children(const struct cgroup *cgrp)
248{
249 return test_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
250}
251
252
253
254
255
256#define for_each_subsys(_root, _ss) \
257list_for_each_entry(_ss, &_root->subsys_list, sibling)
258
259
260#define for_each_active_root(_root) \
261list_for_each_entry(_root, &roots, root_list)
262
263
264
265static LIST_HEAD(release_list);
266static DEFINE_SPINLOCK(release_list_lock);
267static void cgroup_release_agent(struct work_struct *work);
268static DECLARE_WORK(release_agent_work, cgroup_release_agent);
269static void check_for_release(struct cgroup *cgrp);
270
271
272struct cg_cgroup_link {
273
274
275
276
277 struct list_head cgrp_link_list;
278 struct cgroup *cgrp;
279
280
281
282
283 struct list_head cg_link_list;
284 struct css_set *cg;
285};
286
287
288
289
290
291
292
293
294static struct css_set init_css_set;
295static struct cg_cgroup_link init_css_set_link;
296
297static int cgroup_init_idr(struct cgroup_subsys *ss,
298 struct cgroup_subsys_state *css);
299
300
301
302
303static DEFINE_RWLOCK(css_set_lock);
304static int css_set_count;
305
306
307
308
309
310
311#define CSS_SET_HASH_BITS 7
312#define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS)
313static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE];
314
315static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
316{
317 int i;
318 int index;
319 unsigned long tmp = 0UL;
320
321 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++)
322 tmp += (unsigned long)css[i];
323 tmp = (tmp >> 16) ^ tmp;
324
325 index = hash_long(tmp, CSS_SET_HASH_BITS);
326
327 return &css_set_table[index];
328}
329
330
331
332
333
334static int use_task_css_set_links __read_mostly;
335
336static void __put_css_set(struct css_set *cg, int taskexit)
337{
338 struct cg_cgroup_link *link;
339 struct cg_cgroup_link *saved_link;
340
341
342
343
344
345 if (atomic_add_unless(&cg->refcount, -1, 1))
346 return;
347 write_lock(&css_set_lock);
348 if (!atomic_dec_and_test(&cg->refcount)) {
349 write_unlock(&css_set_lock);
350 return;
351 }
352
353
354 hlist_del(&cg->hlist);
355 css_set_count--;
356
357 list_for_each_entry_safe(link, saved_link, &cg->cg_links,
358 cg_link_list) {
359 struct cgroup *cgrp = link->cgrp;
360 list_del(&link->cg_link_list);
361 list_del(&link->cgrp_link_list);
362 if (atomic_dec_and_test(&cgrp->count) &&
363 notify_on_release(cgrp)) {
364 if (taskexit)
365 set_bit(CGRP_RELEASABLE, &cgrp->flags);
366 check_for_release(cgrp);
367 }
368
369 kfree(link);
370 }
371
372 write_unlock(&css_set_lock);
373 kfree_rcu(cg, rcu_head);
374}
375
376
377
378
379static inline void get_css_set(struct css_set *cg)
380{
381 atomic_inc(&cg->refcount);
382}
383
384static inline void put_css_set(struct css_set *cg)
385{
386 __put_css_set(cg, 0);
387}
388
389static inline void put_css_set_taskexit(struct css_set *cg)
390{
391 __put_css_set(cg, 1);
392}
393
394
395
396
397
398
399
400
401
402
403
404static bool compare_css_sets(struct css_set *cg,
405 struct css_set *old_cg,
406 struct cgroup *new_cgrp,
407 struct cgroup_subsys_state *template[])
408{
409 struct list_head *l1, *l2;
410
411 if (memcmp(template, cg->subsys, sizeof(cg->subsys))) {
412
413 return false;
414 }
415
416
417
418
419
420
421
422
423
424
425 l1 = &cg->cg_links;
426 l2 = &old_cg->cg_links;
427 while (1) {
428 struct cg_cgroup_link *cgl1, *cgl2;
429 struct cgroup *cg1, *cg2;
430
431 l1 = l1->next;
432 l2 = l2->next;
433
434 if (l1 == &cg->cg_links) {
435 BUG_ON(l2 != &old_cg->cg_links);
436 break;
437 } else {
438 BUG_ON(l2 == &old_cg->cg_links);
439 }
440
441 cgl1 = list_entry(l1, struct cg_cgroup_link, cg_link_list);
442 cgl2 = list_entry(l2, struct cg_cgroup_link, cg_link_list);
443 cg1 = cgl1->cgrp;
444 cg2 = cgl2->cgrp;
445
446 BUG_ON(cg1->root != cg2->root);
447
448
449
450
451
452
453
454
455 if (cg1->root == new_cgrp->root) {
456 if (cg1 != new_cgrp)
457 return false;
458 } else {
459 if (cg1 != cg2)
460 return false;
461 }
462 }
463 return true;
464}
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479static struct css_set *find_existing_css_set(
480 struct css_set *oldcg,
481 struct cgroup *cgrp,
482 struct cgroup_subsys_state *template[])
483{
484 int i;
485 struct cgroupfs_root *root = cgrp->root;
486 struct hlist_head *hhead;
487 struct hlist_node *node;
488 struct css_set *cg;
489
490
491
492
493
494
495 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
496 if (root->subsys_bits & (1UL << i)) {
497
498
499
500 template[i] = cgrp->subsys[i];
501 } else {
502
503
504 template[i] = oldcg->subsys[i];
505 }
506 }
507
508 hhead = css_set_hash(template);
509 hlist_for_each_entry(cg, node, hhead, hlist) {
510 if (!compare_css_sets(cg, oldcg, cgrp, template))
511 continue;
512
513
514 return cg;
515 }
516
517
518 return NULL;
519}
520
521static void free_cg_links(struct list_head *tmp)
522{
523 struct cg_cgroup_link *link;
524 struct cg_cgroup_link *saved_link;
525
526 list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) {
527 list_del(&link->cgrp_link_list);
528 kfree(link);
529 }
530}
531
532
533
534
535
536
537static int allocate_cg_links(int count, struct list_head *tmp)
538{
539 struct cg_cgroup_link *link;
540 int i;
541 INIT_LIST_HEAD(tmp);
542 for (i = 0; i < count; i++) {
543 link = kmalloc(sizeof(*link), GFP_KERNEL);
544 if (!link) {
545 free_cg_links(tmp);
546 return -ENOMEM;
547 }
548 list_add(&link->cgrp_link_list, tmp);
549 }
550 return 0;
551}
552
553
554
555
556
557
558
559static void link_css_set(struct list_head *tmp_cg_links,
560 struct css_set *cg, struct cgroup *cgrp)
561{
562 struct cg_cgroup_link *link;
563
564 BUG_ON(list_empty(tmp_cg_links));
565 link = list_first_entry(tmp_cg_links, struct cg_cgroup_link,
566 cgrp_link_list);
567 link->cg = cg;
568 link->cgrp = cgrp;
569 atomic_inc(&cgrp->count);
570 list_move(&link->cgrp_link_list, &cgrp->css_sets);
571
572
573
574
575 list_add_tail(&link->cg_link_list, &cg->cg_links);
576}
577
578
579
580
581
582
583
584
585static struct css_set *find_css_set(
586 struct css_set *oldcg, struct cgroup *cgrp)
587{
588 struct css_set *res;
589 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
590
591 struct list_head tmp_cg_links;
592
593 struct hlist_head *hhead;
594 struct cg_cgroup_link *link;
595
596
597
598 read_lock(&css_set_lock);
599 res = find_existing_css_set(oldcg, cgrp, template);
600 if (res)
601 get_css_set(res);
602 read_unlock(&css_set_lock);
603
604 if (res)
605 return res;
606
607 res = kmalloc(sizeof(*res), GFP_KERNEL);
608 if (!res)
609 return NULL;
610
611
612 if (allocate_cg_links(root_count, &tmp_cg_links) < 0) {
613 kfree(res);
614 return NULL;
615 }
616
617 atomic_set(&res->refcount, 1);
618 INIT_LIST_HEAD(&res->cg_links);
619 INIT_LIST_HEAD(&res->tasks);
620 INIT_HLIST_NODE(&res->hlist);
621
622
623
624 memcpy(res->subsys, template, sizeof(res->subsys));
625
626 write_lock(&css_set_lock);
627
628 list_for_each_entry(link, &oldcg->cg_links, cg_link_list) {
629 struct cgroup *c = link->cgrp;
630 if (c->root == cgrp->root)
631 c = cgrp;
632 link_css_set(&tmp_cg_links, res, c);
633 }
634
635 BUG_ON(!list_empty(&tmp_cg_links));
636
637 css_set_count++;
638
639
640 hhead = css_set_hash(res->subsys);
641 hlist_add_head(&res->hlist, hhead);
642
643 write_unlock(&css_set_lock);
644
645 return res;
646}
647
648
649
650
651
652static struct cgroup *task_cgroup_from_root(struct task_struct *task,
653 struct cgroupfs_root *root)
654{
655 struct css_set *css;
656 struct cgroup *res = NULL;
657
658 BUG_ON(!mutex_is_locked(&cgroup_mutex));
659 read_lock(&css_set_lock);
660
661
662
663
664
665 css = task->cgroups;
666 if (css == &init_css_set) {
667 res = &root->top_cgroup;
668 } else {
669 struct cg_cgroup_link *link;
670 list_for_each_entry(link, &css->cg_links, cg_link_list) {
671 struct cgroup *c = link->cgrp;
672 if (c->root == root) {
673 res = c;
674 break;
675 }
676 }
677 }
678 read_unlock(&css_set_lock);
679 BUG_ON(!res);
680 return res;
681}
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737void cgroup_lock(void)
738{
739 mutex_lock(&cgroup_mutex);
740}
741EXPORT_SYMBOL_GPL(cgroup_lock);
742
743
744
745
746
747
748void cgroup_unlock(void)
749{
750 mutex_unlock(&cgroup_mutex);
751}
752EXPORT_SYMBOL_GPL(cgroup_unlock);
753
754
755
756
757
758
759
760
761static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode);
762static struct dentry *cgroup_lookup(struct inode *, struct dentry *, struct nameidata *);
763static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
764static int cgroup_populate_dir(struct cgroup *cgrp);
765static const struct inode_operations cgroup_dir_inode_operations;
766static const struct file_operations proc_cgroupstats_operations;
767
768static struct backing_dev_info cgroup_backing_dev_info = {
769 .name = "cgroup",
770 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
771};
772
773static int alloc_css_id(struct cgroup_subsys *ss,
774 struct cgroup *parent, struct cgroup *child);
775
776static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
777{
778 struct inode *inode = new_inode(sb);
779
780 if (inode) {
781 inode->i_ino = get_next_ino();
782 inode->i_mode = mode;
783 inode->i_uid = current_fsuid();
784 inode->i_gid = current_fsgid();
785 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
786 inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;
787 }
788 return inode;
789}
790
791
792
793
794
795static int cgroup_call_pre_destroy(struct cgroup *cgrp)
796{
797 struct cgroup_subsys *ss;
798 int ret = 0;
799
800 for_each_subsys(cgrp->root, ss)
801 if (ss->pre_destroy) {
802 ret = ss->pre_destroy(ss, cgrp);
803 if (ret)
804 break;
805 }
806
807 return ret;
808}
809
810static void cgroup_diput(struct dentry *dentry, struct inode *inode)
811{
812
813 if (S_ISDIR(inode->i_mode)) {
814 struct cgroup *cgrp = dentry->d_fsdata;
815 struct cgroup_subsys *ss;
816 BUG_ON(!(cgroup_is_removed(cgrp)));
817
818
819
820
821
822
823 synchronize_rcu();
824
825 mutex_lock(&cgroup_mutex);
826
827
828
829 for_each_subsys(cgrp->root, ss)
830 ss->destroy(ss, cgrp);
831
832 cgrp->root->number_of_cgroups--;
833 mutex_unlock(&cgroup_mutex);
834
835
836
837
838
839 deactivate_super(cgrp->root->sb);
840
841
842
843
844
845 BUG_ON(!list_empty(&cgrp->pidlists));
846
847 kfree_rcu(cgrp, rcu_head);
848 }
849 iput(inode);
850}
851
852static int cgroup_delete(const struct dentry *d)
853{
854 return 1;
855}
856
857static void remove_dir(struct dentry *d)
858{
859 struct dentry *parent = dget(d->d_parent);
860
861 d_delete(d);
862 simple_rmdir(parent->d_inode, d);
863 dput(parent);
864}
865
866static void cgroup_clear_directory(struct dentry *dentry)
867{
868 struct list_head *node;
869
870 BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
871 spin_lock(&dentry->d_lock);
872 node = dentry->d_subdirs.next;
873 while (node != &dentry->d_subdirs) {
874 struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
875
876 spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
877 list_del_init(node);
878 if (d->d_inode) {
879
880
881 BUG_ON(d->d_inode->i_mode & S_IFDIR);
882 dget_dlock(d);
883 spin_unlock(&d->d_lock);
884 spin_unlock(&dentry->d_lock);
885 d_delete(d);
886 simple_unlink(dentry->d_inode, d);
887 dput(d);
888 spin_lock(&dentry->d_lock);
889 } else
890 spin_unlock(&d->d_lock);
891 node = dentry->d_subdirs.next;
892 }
893 spin_unlock(&dentry->d_lock);
894}
895
896
897
898
899static void cgroup_d_remove_dir(struct dentry *dentry)
900{
901 struct dentry *parent;
902
903 cgroup_clear_directory(dentry);
904
905 parent = dentry->d_parent;
906 spin_lock(&parent->d_lock);
907 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
908 list_del_init(&dentry->d_u.d_child);
909 spin_unlock(&dentry->d_lock);
910 spin_unlock(&parent->d_lock);
911 remove_dir(dentry);
912}
913
914
915
916
917
918
919
920
921
922DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
923
924static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
925{
926 if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
927 wake_up_all(&cgroup_rmdir_waitq);
928}
929
930void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
931{
932 css_get(css);
933}
934
935void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
936{
937 cgroup_wakeup_rmdir_waiter(css->cgroup);
938 css_put(css);
939}
940
941
942
943
944
945
946static int rebind_subsystems(struct cgroupfs_root *root,
947 unsigned long final_bits)
948{
949 unsigned long added_bits, removed_bits;
950 struct cgroup *cgrp = &root->top_cgroup;
951 int i;
952
953 BUG_ON(!mutex_is_locked(&cgroup_mutex));
954
955 removed_bits = root->actual_subsys_bits & ~final_bits;
956 added_bits = final_bits & ~root->actual_subsys_bits;
957
958 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
959 unsigned long bit = 1UL << i;
960 struct cgroup_subsys *ss = subsys[i];
961 if (!(bit & added_bits))
962 continue;
963
964
965
966
967
968 BUG_ON(ss == NULL);
969 if (ss->root != &rootnode) {
970
971 return -EBUSY;
972 }
973 }
974
975
976
977
978
979 if (root->number_of_cgroups > 1)
980 return -EBUSY;
981
982
983 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
984 struct cgroup_subsys *ss = subsys[i];
985 unsigned long bit = 1UL << i;
986 if (bit & added_bits) {
987
988 BUG_ON(ss == NULL);
989 BUG_ON(cgrp->subsys[i]);
990 BUG_ON(!dummytop->subsys[i]);
991 BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
992 mutex_lock(&ss->hierarchy_mutex);
993 cgrp->subsys[i] = dummytop->subsys[i];
994 cgrp->subsys[i]->cgroup = cgrp;
995 list_move(&ss->sibling, &root->subsys_list);
996 ss->root = root;
997 if (ss->bind)
998 ss->bind(ss, cgrp);
999 mutex_unlock(&ss->hierarchy_mutex);
1000
1001 } else if (bit & removed_bits) {
1002
1003 BUG_ON(ss == NULL);
1004 BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
1005 BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
1006 mutex_lock(&ss->hierarchy_mutex);
1007 if (ss->bind)
1008 ss->bind(ss, dummytop);
1009 dummytop->subsys[i]->cgroup = dummytop;
1010 cgrp->subsys[i] = NULL;
1011 subsys[i]->root = &rootnode;
1012 list_move(&ss->sibling, &rootnode.subsys_list);
1013 mutex_unlock(&ss->hierarchy_mutex);
1014
1015 module_put(ss->module);
1016 } else if (bit & final_bits) {
1017
1018 BUG_ON(ss == NULL);
1019 BUG_ON(!cgrp->subsys[i]);
1020
1021
1022
1023
1024 module_put(ss->module);
1025#ifdef CONFIG_MODULE_UNLOAD
1026 BUG_ON(ss->module && !module_refcount(ss->module));
1027#endif
1028 } else {
1029
1030 BUG_ON(cgrp->subsys[i]);
1031 }
1032 }
1033 root->subsys_bits = root->actual_subsys_bits = final_bits;
1034 synchronize_rcu();
1035
1036 return 0;
1037}
1038
1039static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
1040{
1041 struct cgroupfs_root *root = vfs->mnt_sb->s_fs_info;
1042 struct cgroup_subsys *ss;
1043
1044 mutex_lock(&cgroup_mutex);
1045 for_each_subsys(root, ss)
1046 seq_printf(seq, ",%s", ss->name);
1047 if (test_bit(ROOT_NOPREFIX, &root->flags))
1048 seq_puts(seq, ",noprefix");
1049 if (strlen(root->release_agent_path))
1050 seq_printf(seq, ",release_agent=%s", root->release_agent_path);
1051 if (clone_children(&root->top_cgroup))
1052 seq_puts(seq, ",clone_children");
1053 if (strlen(root->name))
1054 seq_printf(seq, ",name=%s", root->name);
1055 mutex_unlock(&cgroup_mutex);
1056 return 0;
1057}
1058
1059struct cgroup_sb_opts {
1060 unsigned long subsys_bits;
1061 unsigned long flags;
1062 char *release_agent;
1063 bool clone_children;
1064 char *name;
1065
1066 bool none;
1067
1068 struct cgroupfs_root *new_root;
1069
1070};
1071
1072
1073
1074
1075
1076
1077
1078static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1079{
1080 char *token, *o = data;
1081 bool all_ss = false, one_ss = false;
1082 unsigned long mask = (unsigned long)-1;
1083 int i;
1084 bool module_pin_failed = false;
1085
1086 BUG_ON(!mutex_is_locked(&cgroup_mutex));
1087
1088#ifdef CONFIG_CPUSETS
1089 mask = ~(1UL << cpuset_subsys_id);
1090#endif
1091
1092 memset(opts, 0, sizeof(*opts));
1093
1094 while ((token = strsep(&o, ",")) != NULL) {
1095 if (!*token)
1096 return -EINVAL;
1097 if (!strcmp(token, "none")) {
1098
1099 opts->none = true;
1100 continue;
1101 }
1102 if (!strcmp(token, "all")) {
1103
1104 if (one_ss)
1105 return -EINVAL;
1106 all_ss = true;
1107 continue;
1108 }
1109 if (!strcmp(token, "noprefix")) {
1110 set_bit(ROOT_NOPREFIX, &opts->flags);
1111 continue;
1112 }
1113 if (!strcmp(token, "clone_children")) {
1114 opts->clone_children = true;
1115 continue;
1116 }
1117 if (!strncmp(token, "release_agent=", 14)) {
1118
1119 if (opts->release_agent)
1120 return -EINVAL;
1121 opts->release_agent =
1122 kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL);
1123 if (!opts->release_agent)
1124 return -ENOMEM;
1125 continue;
1126 }
1127 if (!strncmp(token, "name=", 5)) {
1128 const char *name = token + 5;
1129
1130 if (!strlen(name))
1131 return -EINVAL;
1132
1133 for (i = 0; i < strlen(name); i++) {
1134 char c = name[i];
1135 if (isalnum(c))
1136 continue;
1137 if ((c == '.') || (c == '-') || (c == '_'))
1138 continue;
1139 return -EINVAL;
1140 }
1141
1142 if (opts->name)
1143 return -EINVAL;
1144 opts->name = kstrndup(name,
1145 MAX_CGROUP_ROOT_NAMELEN - 1,
1146 GFP_KERNEL);
1147 if (!opts->name)
1148 return -ENOMEM;
1149
1150 continue;
1151 }
1152
1153 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1154 struct cgroup_subsys *ss = subsys[i];
1155 if (ss == NULL)
1156 continue;
1157 if (strcmp(token, ss->name))
1158 continue;
1159 if (ss->disabled)
1160 continue;
1161
1162
1163 if (all_ss)
1164 return -EINVAL;
1165 set_bit(i, &opts->subsys_bits);
1166 one_ss = true;
1167
1168 break;
1169 }
1170 if (i == CGROUP_SUBSYS_COUNT)
1171 return -ENOENT;
1172 }
1173
1174
1175
1176
1177
1178
1179 if (all_ss || (!all_ss && !one_ss && !opts->none)) {
1180 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1181 struct cgroup_subsys *ss = subsys[i];
1182 if (ss == NULL)
1183 continue;
1184 if (ss->disabled)
1185 continue;
1186 set_bit(i, &opts->subsys_bits);
1187 }
1188 }
1189
1190
1191
1192
1193
1194
1195
1196
1197 if (test_bit(ROOT_NOPREFIX, &opts->flags) &&
1198 (opts->subsys_bits & mask))
1199 return -EINVAL;
1200
1201
1202
1203 if (opts->subsys_bits && opts->none)
1204 return -EINVAL;
1205
1206
1207
1208
1209
1210 if (!opts->subsys_bits && !opts->name)
1211 return -EINVAL;
1212
1213
1214
1215
1216
1217
1218
1219 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
1220 unsigned long bit = 1UL << i;
1221
1222 if (!(bit & opts->subsys_bits))
1223 continue;
1224 if (!try_module_get(subsys[i]->module)) {
1225 module_pin_failed = true;
1226 break;
1227 }
1228 }
1229 if (module_pin_failed) {
1230
1231
1232
1233
1234
1235 for (i--; i >= CGROUP_BUILTIN_SUBSYS_COUNT; i--) {
1236
1237 unsigned long bit = 1UL << i;
1238
1239 if (!(bit & opts->subsys_bits))
1240 continue;
1241 module_put(subsys[i]->module);
1242 }
1243 return -ENOENT;
1244 }
1245
1246 return 0;
1247}
1248
1249static void drop_parsed_module_refcounts(unsigned long subsys_bits)
1250{
1251 int i;
1252 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
1253 unsigned long bit = 1UL << i;
1254
1255 if (!(bit & subsys_bits))
1256 continue;
1257 module_put(subsys[i]->module);
1258 }
1259}
1260
1261static int cgroup_remount(struct super_block *sb, int *flags, char *data)
1262{
1263 int ret = 0;
1264 struct cgroupfs_root *root = sb->s_fs_info;
1265 struct cgroup *cgrp = &root->top_cgroup;
1266 struct cgroup_sb_opts opts;
1267
1268 mutex_lock(&cgrp->dentry->d_inode->i_mutex);
1269 mutex_lock(&cgroup_mutex);
1270
1271
1272 ret = parse_cgroupfs_options(data, &opts);
1273 if (ret)
1274 goto out_unlock;
1275
1276
1277 if (opts.flags != root->flags ||
1278 (opts.name && strcmp(opts.name, root->name))) {
1279 ret = -EINVAL;
1280 drop_parsed_module_refcounts(opts.subsys_bits);
1281 goto out_unlock;
1282 }
1283
1284 ret = rebind_subsystems(root, opts.subsys_bits);
1285 if (ret) {
1286 drop_parsed_module_refcounts(opts.subsys_bits);
1287 goto out_unlock;
1288 }
1289
1290
1291 cgroup_populate_dir(cgrp);
1292
1293 if (opts.release_agent)
1294 strcpy(root->release_agent_path, opts.release_agent);
1295 out_unlock:
1296 kfree(opts.release_agent);
1297 kfree(opts.name);
1298 mutex_unlock(&cgroup_mutex);
1299 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
1300 return ret;
1301}
1302
1303static const struct super_operations cgroup_ops = {
1304 .statfs = simple_statfs,
1305 .drop_inode = generic_delete_inode,
1306 .show_options = cgroup_show_options,
1307 .remount_fs = cgroup_remount,
1308};
1309
1310static void init_cgroup_housekeeping(struct cgroup *cgrp)
1311{
1312 INIT_LIST_HEAD(&cgrp->sibling);
1313 INIT_LIST_HEAD(&cgrp->children);
1314 INIT_LIST_HEAD(&cgrp->css_sets);
1315 INIT_LIST_HEAD(&cgrp->release_list);
1316 INIT_LIST_HEAD(&cgrp->pidlists);
1317 mutex_init(&cgrp->pidlist_mutex);
1318 INIT_LIST_HEAD(&cgrp->event_list);
1319 spin_lock_init(&cgrp->event_list_lock);
1320}
1321
1322static void init_cgroup_root(struct cgroupfs_root *root)
1323{
1324 struct cgroup *cgrp = &root->top_cgroup;
1325 INIT_LIST_HEAD(&root->subsys_list);
1326 INIT_LIST_HEAD(&root->root_list);
1327 root->number_of_cgroups = 1;
1328 cgrp->root = root;
1329 cgrp->top_cgroup = cgrp;
1330 init_cgroup_housekeeping(cgrp);
1331}
1332
1333static bool init_root_id(struct cgroupfs_root *root)
1334{
1335 int ret = 0;
1336
1337 do {
1338 if (!ida_pre_get(&hierarchy_ida, GFP_KERNEL))
1339 return false;
1340 spin_lock(&hierarchy_id_lock);
1341
1342 ret = ida_get_new_above(&hierarchy_ida, next_hierarchy_id,
1343 &root->hierarchy_id);
1344 if (ret == -ENOSPC)
1345
1346 ret = ida_get_new(&hierarchy_ida, &root->hierarchy_id);
1347 if (!ret) {
1348 next_hierarchy_id = root->hierarchy_id + 1;
1349 } else if (ret != -EAGAIN) {
1350
1351 BUG_ON(ret);
1352 }
1353 spin_unlock(&hierarchy_id_lock);
1354 } while (ret);
1355 return true;
1356}
1357
1358static int cgroup_test_super(struct super_block *sb, void *data)
1359{
1360 struct cgroup_sb_opts *opts = data;
1361 struct cgroupfs_root *root = sb->s_fs_info;
1362
1363
1364 if (opts->name && strcmp(opts->name, root->name))
1365 return 0;
1366
1367
1368
1369
1370
1371 if ((opts->subsys_bits || opts->none)
1372 && (opts->subsys_bits != root->subsys_bits))
1373 return 0;
1374
1375 return 1;
1376}
1377
1378static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
1379{
1380 struct cgroupfs_root *root;
1381
1382 if (!opts->subsys_bits && !opts->none)
1383 return NULL;
1384
1385 root = kzalloc(sizeof(*root), GFP_KERNEL);
1386 if (!root)
1387 return ERR_PTR(-ENOMEM);
1388
1389 if (!init_root_id(root)) {
1390 kfree(root);
1391 return ERR_PTR(-ENOMEM);
1392 }
1393 init_cgroup_root(root);
1394
1395 root->subsys_bits = opts->subsys_bits;
1396 root->flags = opts->flags;
1397 if (opts->release_agent)
1398 strcpy(root->release_agent_path, opts->release_agent);
1399 if (opts->name)
1400 strcpy(root->name, opts->name);
1401 if (opts->clone_children)
1402 set_bit(CGRP_CLONE_CHILDREN, &root->top_cgroup.flags);
1403 return root;
1404}
1405
1406static void cgroup_drop_root(struct cgroupfs_root *root)
1407{
1408 if (!root)
1409 return;
1410
1411 BUG_ON(!root->hierarchy_id);
1412 spin_lock(&hierarchy_id_lock);
1413 ida_remove(&hierarchy_ida, root->hierarchy_id);
1414 spin_unlock(&hierarchy_id_lock);
1415 kfree(root);
1416}
1417
1418static int cgroup_set_super(struct super_block *sb, void *data)
1419{
1420 int ret;
1421 struct cgroup_sb_opts *opts = data;
1422
1423
1424 if (!opts->new_root)
1425 return -EINVAL;
1426
1427 BUG_ON(!opts->subsys_bits && !opts->none);
1428
1429 ret = set_anon_super(sb, NULL);
1430 if (ret)
1431 return ret;
1432
1433 sb->s_fs_info = opts->new_root;
1434 opts->new_root->sb = sb;
1435
1436 sb->s_blocksize = PAGE_CACHE_SIZE;
1437 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
1438 sb->s_magic = CGROUP_SUPER_MAGIC;
1439 sb->s_op = &cgroup_ops;
1440
1441 return 0;
1442}
1443
1444static int cgroup_get_rootdir(struct super_block *sb)
1445{
1446 static const struct dentry_operations cgroup_dops = {
1447 .d_iput = cgroup_diput,
1448 .d_delete = cgroup_delete,
1449 };
1450
1451 struct inode *inode =
1452 cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb);
1453 struct dentry *dentry;
1454
1455 if (!inode)
1456 return -ENOMEM;
1457
1458 inode->i_fop = &simple_dir_operations;
1459 inode->i_op = &cgroup_dir_inode_operations;
1460
1461 inc_nlink(inode);
1462 dentry = d_alloc_root(inode);
1463 if (!dentry) {
1464 iput(inode);
1465 return -ENOMEM;
1466 }
1467 sb->s_root = dentry;
1468
1469 sb->s_d_op = &cgroup_dops;
1470 return 0;
1471}
1472
1473static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1474 int flags, const char *unused_dev_name,
1475 void *data)
1476{
1477 struct cgroup_sb_opts opts;
1478 struct cgroupfs_root *root;
1479 int ret = 0;
1480 struct super_block *sb;
1481 struct cgroupfs_root *new_root;
1482
1483
1484 mutex_lock(&cgroup_mutex);
1485 ret = parse_cgroupfs_options(data, &opts);
1486 mutex_unlock(&cgroup_mutex);
1487 if (ret)
1488 goto out_err;
1489
1490
1491
1492
1493
1494 new_root = cgroup_root_from_opts(&opts);
1495 if (IS_ERR(new_root)) {
1496 ret = PTR_ERR(new_root);
1497 goto drop_modules;
1498 }
1499 opts.new_root = new_root;
1500
1501
1502 sb = sget(fs_type, cgroup_test_super, cgroup_set_super, &opts);
1503 if (IS_ERR(sb)) {
1504 ret = PTR_ERR(sb);
1505 cgroup_drop_root(opts.new_root);
1506 goto drop_modules;
1507 }
1508
1509 root = sb->s_fs_info;
1510 BUG_ON(!root);
1511 if (root == opts.new_root) {
1512
1513 struct list_head tmp_cg_links;
1514 struct cgroup *root_cgrp = &root->top_cgroup;
1515 struct inode *inode;
1516 struct cgroupfs_root *existing_root;
1517 int i;
1518
1519 BUG_ON(sb->s_root != NULL);
1520
1521 ret = cgroup_get_rootdir(sb);
1522 if (ret)
1523 goto drop_new_super;
1524 inode = sb->s_root->d_inode;
1525
1526 mutex_lock(&inode->i_mutex);
1527 mutex_lock(&cgroup_mutex);
1528
1529 if (strlen(root->name)) {
1530
1531 for_each_active_root(existing_root) {
1532 if (!strcmp(existing_root->name, root->name)) {
1533 ret = -EBUSY;
1534 mutex_unlock(&cgroup_mutex);
1535 mutex_unlock(&inode->i_mutex);
1536 goto drop_new_super;
1537 }
1538 }
1539 }
1540
1541
1542
1543
1544
1545
1546
1547
1548 ret = allocate_cg_links(css_set_count, &tmp_cg_links);
1549 if (ret) {
1550 mutex_unlock(&cgroup_mutex);
1551 mutex_unlock(&inode->i_mutex);
1552 goto drop_new_super;
1553 }
1554
1555 ret = rebind_subsystems(root, root->subsys_bits);
1556 if (ret == -EBUSY) {
1557 mutex_unlock(&cgroup_mutex);
1558 mutex_unlock(&inode->i_mutex);
1559 free_cg_links(&tmp_cg_links);
1560 goto drop_new_super;
1561 }
1562
1563
1564
1565
1566
1567
1568
1569 BUG_ON(ret);
1570
1571 list_add(&root->root_list, &roots);
1572 root_count++;
1573
1574 sb->s_root->d_fsdata = root_cgrp;
1575 root->top_cgroup.dentry = sb->s_root;
1576
1577
1578
1579 write_lock(&css_set_lock);
1580 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
1581 struct hlist_head *hhead = &css_set_table[i];
1582 struct hlist_node *node;
1583 struct css_set *cg;
1584
1585 hlist_for_each_entry(cg, node, hhead, hlist)
1586 link_css_set(&tmp_cg_links, cg, root_cgrp);
1587 }
1588 write_unlock(&css_set_lock);
1589
1590 free_cg_links(&tmp_cg_links);
1591
1592 BUG_ON(!list_empty(&root_cgrp->sibling));
1593 BUG_ON(!list_empty(&root_cgrp->children));
1594 BUG_ON(root->number_of_cgroups != 1);
1595
1596 cgroup_populate_dir(root_cgrp);
1597 mutex_unlock(&cgroup_mutex);
1598 mutex_unlock(&inode->i_mutex);
1599 } else {
1600
1601
1602
1603
1604 cgroup_drop_root(opts.new_root);
1605
1606 drop_parsed_module_refcounts(opts.subsys_bits);
1607 }
1608
1609 kfree(opts.release_agent);
1610 kfree(opts.name);
1611 return dget(sb->s_root);
1612
1613 drop_new_super:
1614 deactivate_locked_super(sb);
1615 drop_modules:
1616 drop_parsed_module_refcounts(opts.subsys_bits);
1617 out_err:
1618 kfree(opts.release_agent);
1619 kfree(opts.name);
1620 return ERR_PTR(ret);
1621}
1622
1623static void cgroup_kill_sb(struct super_block *sb) {
1624 struct cgroupfs_root *root = sb->s_fs_info;
1625 struct cgroup *cgrp = &root->top_cgroup;
1626 int ret;
1627 struct cg_cgroup_link *link;
1628 struct cg_cgroup_link *saved_link;
1629
1630 BUG_ON(!root);
1631
1632 BUG_ON(root->number_of_cgroups != 1);
1633 BUG_ON(!list_empty(&cgrp->children));
1634 BUG_ON(!list_empty(&cgrp->sibling));
1635
1636 mutex_lock(&cgroup_mutex);
1637
1638
1639 ret = rebind_subsystems(root, 0);
1640
1641 BUG_ON(ret);
1642
1643
1644
1645
1646
1647 write_lock(&css_set_lock);
1648
1649 list_for_each_entry_safe(link, saved_link, &cgrp->css_sets,
1650 cgrp_link_list) {
1651 list_del(&link->cg_link_list);
1652 list_del(&link->cgrp_link_list);
1653 kfree(link);
1654 }
1655 write_unlock(&css_set_lock);
1656
1657 if (!list_empty(&root->root_list)) {
1658 list_del(&root->root_list);
1659 root_count--;
1660 }
1661
1662 mutex_unlock(&cgroup_mutex);
1663
1664 kill_litter_super(sb);
1665 cgroup_drop_root(root);
1666}
1667
1668static struct file_system_type cgroup_fs_type = {
1669 .name = "cgroup",
1670 .mount = cgroup_mount,
1671 .kill_sb = cgroup_kill_sb,
1672};
1673
1674static struct kobject *cgroup_kobj;
1675
1676static inline struct cgroup *__d_cgrp(struct dentry *dentry)
1677{
1678 return dentry->d_fsdata;
1679}
1680
1681static inline struct cftype *__d_cft(struct dentry *dentry)
1682{
1683 return dentry->d_fsdata;
1684}
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1697{
1698 char *start;
1699 struct dentry *dentry = rcu_dereference_check(cgrp->dentry,
1700 rcu_read_lock_held() ||
1701 cgroup_lock_is_held());
1702
1703 if (!dentry || cgrp == dummytop) {
1704
1705
1706
1707
1708 strcpy(buf, "/");
1709 return 0;
1710 }
1711
1712 start = buf + buflen;
1713
1714 *--start = '\0';
1715 for (;;) {
1716 int len = dentry->d_name.len;
1717
1718 if ((start -= len) < buf)
1719 return -ENAMETOOLONG;
1720 memcpy(start, dentry->d_name.name, len);
1721 cgrp = cgrp->parent;
1722 if (!cgrp)
1723 break;
1724
1725 dentry = rcu_dereference_check(cgrp->dentry,
1726 rcu_read_lock_held() ||
1727 cgroup_lock_is_held());
1728 if (!cgrp->parent)
1729 continue;
1730 if (--start < buf)
1731 return -ENAMETOOLONG;
1732 *start = '/';
1733 }
1734 memmove(buf, start, buf + buflen - start);
1735 return 0;
1736}
1737EXPORT_SYMBOL_GPL(cgroup_path);
1738
1739
1740
1741
1742
1743
1744
1745
1746static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
1747 struct task_struct *tsk, bool guarantee)
1748{
1749 struct css_set *oldcg;
1750 struct css_set *newcg;
1751
1752
1753
1754
1755
1756
1757 task_lock(tsk);
1758 oldcg = tsk->cgroups;
1759 get_css_set(oldcg);
1760 task_unlock(tsk);
1761
1762
1763 if (guarantee) {
1764
1765 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
1766 read_lock(&css_set_lock);
1767 newcg = find_existing_css_set(oldcg, cgrp, template);
1768 BUG_ON(!newcg);
1769 get_css_set(newcg);
1770 read_unlock(&css_set_lock);
1771 } else {
1772 might_sleep();
1773
1774 newcg = find_css_set(oldcg, cgrp);
1775 if (!newcg) {
1776 put_css_set(oldcg);
1777 return -ENOMEM;
1778 }
1779 }
1780 put_css_set(oldcg);
1781
1782
1783 task_lock(tsk);
1784 if (tsk->flags & PF_EXITING) {
1785 task_unlock(tsk);
1786 put_css_set(newcg);
1787 return -ESRCH;
1788 }
1789 rcu_assign_pointer(tsk->cgroups, newcg);
1790 task_unlock(tsk);
1791
1792
1793 write_lock(&css_set_lock);
1794 if (!list_empty(&tsk->cg_list))
1795 list_move(&tsk->cg_list, &newcg->tasks);
1796 write_unlock(&css_set_lock);
1797
1798
1799
1800
1801
1802
1803 put_css_set(oldcg);
1804
1805 set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
1806 return 0;
1807}
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1818{
1819 int retval;
1820 struct cgroup_subsys *ss, *failed_ss = NULL;
1821 struct cgroup *oldcgrp;
1822 struct cgroupfs_root *root = cgrp->root;
1823
1824
1825 oldcgrp = task_cgroup_from_root(tsk, root);
1826 if (cgrp == oldcgrp)
1827 return 0;
1828
1829 for_each_subsys(root, ss) {
1830 if (ss->can_attach) {
1831 retval = ss->can_attach(ss, cgrp, tsk);
1832 if (retval) {
1833
1834
1835
1836
1837
1838
1839 failed_ss = ss;
1840 goto out;
1841 }
1842 }
1843 if (ss->can_attach_task) {
1844 retval = ss->can_attach_task(cgrp, tsk);
1845 if (retval) {
1846 failed_ss = ss;
1847 goto out;
1848 }
1849 }
1850 }
1851
1852 retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, false);
1853 if (retval)
1854 goto out;
1855
1856 for_each_subsys(root, ss) {
1857 if (ss->pre_attach)
1858 ss->pre_attach(cgrp);
1859 if (ss->attach_task)
1860 ss->attach_task(cgrp, tsk);
1861 if (ss->attach)
1862 ss->attach(ss, cgrp, oldcgrp, tsk);
1863 }
1864
1865 synchronize_rcu();
1866
1867
1868
1869
1870
1871 cgroup_wakeup_rmdir_waiter(cgrp);
1872out:
1873 if (retval) {
1874 for_each_subsys(root, ss) {
1875 if (ss == failed_ss)
1876
1877
1878
1879
1880
1881
1882 break;
1883 if (ss->cancel_attach)
1884 ss->cancel_attach(ss, cgrp, tsk);
1885 }
1886 }
1887 return retval;
1888}
1889
1890
1891
1892
1893
1894
1895int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
1896{
1897 struct cgroupfs_root *root;
1898 int retval = 0;
1899
1900 cgroup_lock();
1901 for_each_active_root(root) {
1902 struct cgroup *from_cg = task_cgroup_from_root(from, root);
1903
1904 retval = cgroup_attach_task(from_cg, tsk);
1905 if (retval)
1906 break;
1907 }
1908 cgroup_unlock();
1909
1910 return retval;
1911}
1912EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
1913
1914
1915
1916
1917
1918
1919
1920struct cg_list_entry {
1921 struct css_set *cg;
1922 struct list_head links;
1923};
1924
1925static bool css_set_check_fetched(struct cgroup *cgrp,
1926 struct task_struct *tsk, struct css_set *cg,
1927 struct list_head *newcg_list)
1928{
1929 struct css_set *newcg;
1930 struct cg_list_entry *cg_entry;
1931 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
1932
1933 read_lock(&css_set_lock);
1934 newcg = find_existing_css_set(cg, cgrp, template);
1935 if (newcg)
1936 get_css_set(newcg);
1937 read_unlock(&css_set_lock);
1938
1939
1940 if (!newcg)
1941 return false;
1942
1943 list_for_each_entry(cg_entry, newcg_list, links) {
1944 if (cg_entry->cg == newcg) {
1945 put_css_set(newcg);
1946 return true;
1947 }
1948 }
1949
1950
1951 put_css_set(newcg);
1952 return false;
1953}
1954
1955
1956
1957
1958
1959static int css_set_prefetch(struct cgroup *cgrp, struct css_set *cg,
1960 struct list_head *newcg_list)
1961{
1962 struct css_set *newcg;
1963 struct cg_list_entry *cg_entry;
1964
1965
1966 newcg = find_css_set(cg, cgrp);
1967 if (!newcg)
1968 return -ENOMEM;
1969
1970 cg_entry = kmalloc(sizeof(struct cg_list_entry), GFP_KERNEL);
1971 if (!cg_entry) {
1972 put_css_set(newcg);
1973 return -ENOMEM;
1974 }
1975 cg_entry->cg = newcg;
1976 list_add(&cg_entry->links, newcg_list);
1977 return 0;
1978}
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
1989{
1990 int retval, i, group_size;
1991 struct cgroup_subsys *ss, *failed_ss = NULL;
1992 bool cancel_failed_ss = false;
1993
1994 struct cgroup *oldcgrp = NULL;
1995 struct css_set *oldcg;
1996 struct cgroupfs_root *root = cgrp->root;
1997
1998 struct task_struct *tsk;
1999 struct flex_array *group;
2000
2001
2002
2003
2004
2005 struct list_head newcg_list;
2006 struct cg_list_entry *cg_entry, *temp_nobe;
2007
2008
2009
2010
2011
2012
2013
2014
2015 group_size = get_nr_threads(leader);
2016
2017 group = flex_array_alloc(sizeof(struct task_struct *), group_size,
2018 GFP_KERNEL);
2019 if (!group)
2020 return -ENOMEM;
2021
2022 retval = flex_array_prealloc(group, 0, group_size - 1, GFP_KERNEL);
2023 if (retval)
2024 goto out_free_group_list;
2025
2026
2027 rcu_read_lock();
2028 if (!thread_group_leader(leader)) {
2029
2030
2031
2032
2033
2034
2035
2036 rcu_read_unlock();
2037 retval = -EAGAIN;
2038 goto out_free_group_list;
2039 }
2040
2041 tsk = leader;
2042 i = 0;
2043 do {
2044
2045 BUG_ON(i >= group_size);
2046 get_task_struct(tsk);
2047
2048
2049
2050
2051 retval = flex_array_put_ptr(group, i, tsk, GFP_ATOMIC);
2052 BUG_ON(retval != 0);
2053 i++;
2054 } while_each_thread(leader, tsk);
2055
2056 group_size = i;
2057 rcu_read_unlock();
2058
2059
2060
2061
2062 for_each_subsys(root, ss) {
2063 if (ss->can_attach) {
2064 retval = ss->can_attach(ss, cgrp, leader);
2065 if (retval) {
2066 failed_ss = ss;
2067 goto out_cancel_attach;
2068 }
2069 }
2070
2071 if (ss->can_attach_task) {
2072
2073 for (i = 0; i < group_size; i++) {
2074 tsk = flex_array_get_ptr(group, i);
2075 retval = ss->can_attach_task(cgrp, tsk);
2076 if (retval) {
2077 failed_ss = ss;
2078 cancel_failed_ss = true;
2079 goto out_cancel_attach;
2080 }
2081 }
2082 }
2083 }
2084
2085
2086
2087
2088
2089 INIT_LIST_HEAD(&newcg_list);
2090 for (i = 0; i < group_size; i++) {
2091 tsk = flex_array_get_ptr(group, i);
2092
2093 oldcgrp = task_cgroup_from_root(tsk, root);
2094 if (cgrp == oldcgrp)
2095 continue;
2096
2097 task_lock(tsk);
2098 if (tsk->flags & PF_EXITING) {
2099
2100 task_unlock(tsk);
2101 continue;
2102 }
2103 oldcg = tsk->cgroups;
2104 get_css_set(oldcg);
2105 task_unlock(tsk);
2106
2107 if (css_set_check_fetched(cgrp, tsk, oldcg, &newcg_list)) {
2108
2109 put_css_set(oldcg);
2110 } else {
2111
2112 retval = css_set_prefetch(cgrp, oldcg, &newcg_list);
2113 put_css_set(oldcg);
2114 if (retval)
2115 goto out_list_teardown;
2116 }
2117 }
2118
2119
2120
2121
2122
2123
2124
2125 for_each_subsys(root, ss) {
2126 if (ss->pre_attach)
2127 ss->pre_attach(cgrp);
2128 }
2129 for (i = 0; i < group_size; i++) {
2130 tsk = flex_array_get_ptr(group, i);
2131
2132 oldcgrp = task_cgroup_from_root(tsk, root);
2133 if (cgrp == oldcgrp)
2134 continue;
2135
2136 for_each_subsys(root, ss) {
2137 if (ss->attach_task)
2138 ss->attach_task(cgrp, tsk);
2139 }
2140
2141 retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, true);
2142 BUG_ON(retval != 0 && retval != -ESRCH);
2143 }
2144
2145
2146
2147
2148
2149
2150
2151 for_each_subsys(root, ss) {
2152 if (ss->attach)
2153 ss->attach(ss, cgrp, oldcgrp, leader);
2154 }
2155
2156
2157
2158
2159 synchronize_rcu();
2160 cgroup_wakeup_rmdir_waiter(cgrp);
2161 retval = 0;
2162out_list_teardown:
2163
2164 list_for_each_entry_safe(cg_entry, temp_nobe, &newcg_list, links) {
2165 list_del(&cg_entry->links);
2166 put_css_set(cg_entry->cg);
2167 kfree(cg_entry);
2168 }
2169out_cancel_attach:
2170
2171 if (retval) {
2172 for_each_subsys(root, ss) {
2173 if (ss == failed_ss) {
2174 if (cancel_failed_ss && ss->cancel_attach)
2175 ss->cancel_attach(ss, cgrp, leader);
2176 break;
2177 }
2178 if (ss->cancel_attach)
2179 ss->cancel_attach(ss, cgrp, leader);
2180 }
2181 }
2182
2183 for (i = 0; i < group_size; i++) {
2184 tsk = flex_array_get_ptr(group, i);
2185 put_task_struct(tsk);
2186 }
2187out_free_group_list:
2188 flex_array_free(group);
2189 return retval;
2190}
2191
2192
2193
2194
2195
2196
2197static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
2198{
2199 struct task_struct *tsk;
2200 const struct cred *cred = current_cred(), *tcred;
2201 int ret;
2202
2203 if (!cgroup_lock_live_group(cgrp))
2204 return -ENODEV;
2205
2206 if (pid) {
2207 rcu_read_lock();
2208 tsk = find_task_by_vpid(pid);
2209 if (!tsk) {
2210 rcu_read_unlock();
2211 cgroup_unlock();
2212 return -ESRCH;
2213 }
2214 if (threadgroup) {
2215
2216
2217
2218
2219
2220
2221 tsk = tsk->group_leader;
2222 } else if (tsk->flags & PF_EXITING) {
2223
2224 rcu_read_unlock();
2225 cgroup_unlock();
2226 return -ESRCH;
2227 }
2228
2229
2230
2231
2232
2233 tcred = __task_cred(tsk);
2234 if (cred->euid &&
2235 cred->euid != tcred->uid &&
2236 cred->euid != tcred->suid) {
2237 rcu_read_unlock();
2238 cgroup_unlock();
2239 return -EACCES;
2240 }
2241 get_task_struct(tsk);
2242 rcu_read_unlock();
2243 } else {
2244 if (threadgroup)
2245 tsk = current->group_leader;
2246 else
2247 tsk = current;
2248 get_task_struct(tsk);
2249 }
2250
2251 if (threadgroup) {
2252 threadgroup_fork_write_lock(tsk);
2253 ret = cgroup_attach_proc(cgrp, tsk);
2254 threadgroup_fork_write_unlock(tsk);
2255 } else {
2256 ret = cgroup_attach_task(cgrp, tsk);
2257 }
2258 put_task_struct(tsk);
2259 cgroup_unlock();
2260 return ret;
2261}
2262
2263static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
2264{
2265 return attach_task_by_pid(cgrp, pid, false);
2266}
2267
2268static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid)
2269{
2270 int ret;
2271 do {
2272
2273
2274
2275
2276
2277 ret = attach_task_by_pid(cgrp, tgid, true);
2278 } while (ret == -EAGAIN);
2279 return ret;
2280}
2281
2282
2283
2284
2285
2286
2287
2288
2289bool cgroup_lock_live_group(struct cgroup *cgrp)
2290{
2291 mutex_lock(&cgroup_mutex);
2292 if (cgroup_is_removed(cgrp)) {
2293 mutex_unlock(&cgroup_mutex);
2294 return false;
2295 }
2296 return true;
2297}
2298EXPORT_SYMBOL_GPL(cgroup_lock_live_group);
2299
2300static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
2301 const char *buffer)
2302{
2303 BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
2304 if (strlen(buffer) >= PATH_MAX)
2305 return -EINVAL;
2306 if (!cgroup_lock_live_group(cgrp))
2307 return -ENODEV;
2308 strcpy(cgrp->root->release_agent_path, buffer);
2309 cgroup_unlock();
2310 return 0;
2311}
2312
2313static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
2314 struct seq_file *seq)
2315{
2316 if (!cgroup_lock_live_group(cgrp))
2317 return -ENODEV;
2318 seq_puts(seq, cgrp->root->release_agent_path);
2319 seq_putc(seq, '\n');
2320 cgroup_unlock();
2321 return 0;
2322}
2323
2324
2325#define CGROUP_LOCAL_BUFFER_SIZE 64
2326
2327static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
2328 struct file *file,
2329 const char __user *userbuf,
2330 size_t nbytes, loff_t *unused_ppos)
2331{
2332 char buffer[CGROUP_LOCAL_BUFFER_SIZE];
2333 int retval = 0;
2334 char *end;
2335
2336 if (!nbytes)
2337 return -EINVAL;
2338 if (nbytes >= sizeof(buffer))
2339 return -E2BIG;
2340 if (copy_from_user(buffer, userbuf, nbytes))
2341 return -EFAULT;
2342
2343 buffer[nbytes] = 0;
2344 if (cft->write_u64) {
2345 u64 val = simple_strtoull(strstrip(buffer), &end, 0);
2346 if (*end)
2347 return -EINVAL;
2348 retval = cft->write_u64(cgrp, cft, val);
2349 } else {
2350 s64 val = simple_strtoll(strstrip(buffer), &end, 0);
2351 if (*end)
2352 return -EINVAL;
2353 retval = cft->write_s64(cgrp, cft, val);
2354 }
2355 if (!retval)
2356 retval = nbytes;
2357 return retval;
2358}
2359
2360static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
2361 struct file *file,
2362 const char __user *userbuf,
2363 size_t nbytes, loff_t *unused_ppos)
2364{
2365 char local_buffer[CGROUP_LOCAL_BUFFER_SIZE];
2366 int retval = 0;
2367 size_t max_bytes = cft->max_write_len;
2368 char *buffer = local_buffer;
2369
2370 if (!max_bytes)
2371 max_bytes = sizeof(local_buffer) - 1;
2372 if (nbytes >= max_bytes)
2373 return -E2BIG;
2374
2375 if (nbytes >= sizeof(local_buffer)) {
2376 buffer = kmalloc(nbytes + 1, GFP_KERNEL);
2377 if (buffer == NULL)
2378 return -ENOMEM;
2379 }
2380 if (nbytes && copy_from_user(buffer, userbuf, nbytes)) {
2381 retval = -EFAULT;
2382 goto out;
2383 }
2384
2385 buffer[nbytes] = 0;
2386 retval = cft->write_string(cgrp, cft, strstrip(buffer));
2387 if (!retval)
2388 retval = nbytes;
2389out:
2390 if (buffer != local_buffer)
2391 kfree(buffer);
2392 return retval;
2393}
2394
2395static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
2396 size_t nbytes, loff_t *ppos)
2397{
2398 struct cftype *cft = __d_cft(file->f_dentry);
2399 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2400
2401 if (cgroup_is_removed(cgrp))
2402 return -ENODEV;
2403 if (cft->write)
2404 return cft->write(cgrp, cft, file, buf, nbytes, ppos);
2405 if (cft->write_u64 || cft->write_s64)
2406 return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos);
2407 if (cft->write_string)
2408 return cgroup_write_string(cgrp, cft, file, buf, nbytes, ppos);
2409 if (cft->trigger) {
2410 int ret = cft->trigger(cgrp, (unsigned int)cft->private);
2411 return ret ? ret : nbytes;
2412 }
2413 return -EINVAL;
2414}
2415
2416static ssize_t cgroup_read_u64(struct cgroup *cgrp, struct cftype *cft,
2417 struct file *file,
2418 char __user *buf, size_t nbytes,
2419 loff_t *ppos)
2420{
2421 char tmp[CGROUP_LOCAL_BUFFER_SIZE];
2422 u64 val = cft->read_u64(cgrp, cft);
2423 int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
2424
2425 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
2426}
2427
2428static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft,
2429 struct file *file,
2430 char __user *buf, size_t nbytes,
2431 loff_t *ppos)
2432{
2433 char tmp[CGROUP_LOCAL_BUFFER_SIZE];
2434 s64 val = cft->read_s64(cgrp, cft);
2435 int len = sprintf(tmp, "%lld\n", (long long) val);
2436
2437 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
2438}
2439
2440static ssize_t cgroup_file_read(struct file *file, char __user *buf,
2441 size_t nbytes, loff_t *ppos)
2442{
2443 struct cftype *cft = __d_cft(file->f_dentry);
2444 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2445
2446 if (cgroup_is_removed(cgrp))
2447 return -ENODEV;
2448
2449 if (cft->read)
2450 return cft->read(cgrp, cft, file, buf, nbytes, ppos);
2451 if (cft->read_u64)
2452 return cgroup_read_u64(cgrp, cft, file, buf, nbytes, ppos);
2453 if (cft->read_s64)
2454 return cgroup_read_s64(cgrp, cft, file, buf, nbytes, ppos);
2455 return -EINVAL;
2456}
2457
2458
2459
2460
2461
2462
2463struct cgroup_seqfile_state {
2464 struct cftype *cft;
2465 struct cgroup *cgroup;
2466};
2467
2468static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value)
2469{
2470 struct seq_file *sf = cb->state;
2471 return seq_printf(sf, "%s %llu\n", key, (unsigned long long)value);
2472}
2473
2474static int cgroup_seqfile_show(struct seq_file *m, void *arg)
2475{
2476 struct cgroup_seqfile_state *state = m->private;
2477 struct cftype *cft = state->cft;
2478 if (cft->read_map) {
2479 struct cgroup_map_cb cb = {
2480 .fill = cgroup_map_add,
2481 .state = m,
2482 };
2483 return cft->read_map(state->cgroup, cft, &cb);
2484 }
2485 return cft->read_seq_string(state->cgroup, cft, m);
2486}
2487
2488static int cgroup_seqfile_release(struct inode *inode, struct file *file)
2489{
2490 struct seq_file *seq = file->private_data;
2491 kfree(seq->private);
2492 return single_release(inode, file);
2493}
2494
2495static const struct file_operations cgroup_seqfile_operations = {
2496 .read = seq_read,
2497 .write = cgroup_file_write,
2498 .llseek = seq_lseek,
2499 .release = cgroup_seqfile_release,
2500};
2501
2502static int cgroup_file_open(struct inode *inode, struct file *file)
2503{
2504 int err;
2505 struct cftype *cft;
2506
2507 err = generic_file_open(inode, file);
2508 if (err)
2509 return err;
2510 cft = __d_cft(file->f_dentry);
2511
2512 if (cft->read_map || cft->read_seq_string) {
2513 struct cgroup_seqfile_state *state =
2514 kzalloc(sizeof(*state), GFP_USER);
2515 if (!state)
2516 return -ENOMEM;
2517 state->cft = cft;
2518 state->cgroup = __d_cgrp(file->f_dentry->d_parent);
2519 file->f_op = &cgroup_seqfile_operations;
2520 err = single_open(file, cgroup_seqfile_show, state);
2521 if (err < 0)
2522 kfree(state);
2523 } else if (cft->open)
2524 err = cft->open(inode, file);
2525 else
2526 err = 0;
2527
2528 return err;
2529}
2530
2531static int cgroup_file_release(struct inode *inode, struct file *file)
2532{
2533 struct cftype *cft = __d_cft(file->f_dentry);
2534 if (cft->release)
2535 return cft->release(inode, file);
2536 return 0;
2537}
2538
2539
2540
2541
2542static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
2543 struct inode *new_dir, struct dentry *new_dentry)
2544{
2545 if (!S_ISDIR(old_dentry->d_inode->i_mode))
2546 return -ENOTDIR;
2547 if (new_dentry->d_inode)
2548 return -EEXIST;
2549 if (old_dir != new_dir)
2550 return -EIO;
2551 return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
2552}
2553
2554static const struct file_operations cgroup_file_operations = {
2555 .read = cgroup_file_read,
2556 .write = cgroup_file_write,
2557 .llseek = generic_file_llseek,
2558 .open = cgroup_file_open,
2559 .release = cgroup_file_release,
2560};
2561
2562static const struct inode_operations cgroup_dir_inode_operations = {
2563 .lookup = cgroup_lookup,
2564 .mkdir = cgroup_mkdir,
2565 .rmdir = cgroup_rmdir,
2566 .rename = cgroup_rename,
2567};
2568
2569static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
2570{
2571 if (dentry->d_name.len > NAME_MAX)
2572 return ERR_PTR(-ENAMETOOLONG);
2573 d_add(dentry, NULL);
2574 return NULL;
2575}
2576
2577
2578
2579
2580static inline struct cftype *__file_cft(struct file *file)
2581{
2582 if (file->f_dentry->d_inode->i_fop != &cgroup_file_operations)
2583 return ERR_PTR(-EINVAL);
2584 return __d_cft(file->f_dentry);
2585}
2586
2587static int cgroup_create_file(struct dentry *dentry, mode_t mode,
2588 struct super_block *sb)
2589{
2590 struct inode *inode;
2591
2592 if (!dentry)
2593 return -ENOENT;
2594 if (dentry->d_inode)
2595 return -EEXIST;
2596
2597 inode = cgroup_new_inode(mode, sb);
2598 if (!inode)
2599 return -ENOMEM;
2600
2601 if (S_ISDIR(mode)) {
2602 inode->i_op = &cgroup_dir_inode_operations;
2603 inode->i_fop = &simple_dir_operations;
2604
2605
2606 inc_nlink(inode);
2607
2608
2609
2610 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
2611 } else if (S_ISREG(mode)) {
2612 inode->i_size = 0;
2613 inode->i_fop = &cgroup_file_operations;
2614 }
2615 d_instantiate(dentry, inode);
2616 dget(dentry);
2617 return 0;
2618}
2619
2620
2621
2622
2623
2624
2625
2626
2627static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,
2628 mode_t mode)
2629{
2630 struct dentry *parent;
2631 int error = 0;
2632
2633 parent = cgrp->parent->dentry;
2634 error = cgroup_create_file(dentry, S_IFDIR | mode, cgrp->root->sb);
2635 if (!error) {
2636 dentry->d_fsdata = cgrp;
2637 inc_nlink(parent->d_inode);
2638 rcu_assign_pointer(cgrp->dentry, dentry);
2639 dget(dentry);
2640 }
2641 dput(dentry);
2642
2643 return error;
2644}
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655static mode_t cgroup_file_mode(const struct cftype *cft)
2656{
2657 mode_t mode = 0;
2658
2659 if (cft->mode)
2660 return cft->mode;
2661
2662 if (cft->read || cft->read_u64 || cft->read_s64 ||
2663 cft->read_map || cft->read_seq_string)
2664 mode |= S_IRUGO;
2665
2666 if (cft->write || cft->write_u64 || cft->write_s64 ||
2667 cft->write_string || cft->trigger)
2668 mode |= S_IWUSR;
2669
2670 return mode;
2671}
2672
2673int cgroup_add_file(struct cgroup *cgrp,
2674 struct cgroup_subsys *subsys,
2675 const struct cftype *cft)
2676{
2677 struct dentry *dir = cgrp->dentry;
2678 struct dentry *dentry;
2679 int error;
2680 mode_t mode;
2681
2682 char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
2683 if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) {
2684 strcpy(name, subsys->name);
2685 strcat(name, ".");
2686 }
2687 strcat(name, cft->name);
2688 BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex));
2689 dentry = lookup_one_len(name, dir, strlen(name));
2690 if (!IS_ERR(dentry)) {
2691 mode = cgroup_file_mode(cft);
2692 error = cgroup_create_file(dentry, mode | S_IFREG,
2693 cgrp->root->sb);
2694 if (!error)
2695 dentry->d_fsdata = (void *)cft;
2696 dput(dentry);
2697 } else
2698 error = PTR_ERR(dentry);
2699 return error;
2700}
2701EXPORT_SYMBOL_GPL(cgroup_add_file);
2702
2703int cgroup_add_files(struct cgroup *cgrp,
2704 struct cgroup_subsys *subsys,
2705 const struct cftype cft[],
2706 int count)
2707{
2708 int i, err;
2709 for (i = 0; i < count; i++) {
2710 err = cgroup_add_file(cgrp, subsys, &cft[i]);
2711 if (err)
2712 return err;
2713 }
2714 return 0;
2715}
2716EXPORT_SYMBOL_GPL(cgroup_add_files);
2717
2718
2719
2720
2721
2722
2723
2724int cgroup_task_count(const struct cgroup *cgrp)
2725{
2726 int count = 0;
2727 struct cg_cgroup_link *link;
2728
2729 read_lock(&css_set_lock);
2730 list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
2731 count += atomic_read(&link->cg->refcount);
2732 }
2733 read_unlock(&css_set_lock);
2734 return count;
2735}
2736
2737
2738
2739
2740
2741static void cgroup_advance_iter(struct cgroup *cgrp,
2742 struct cgroup_iter *it)
2743{
2744 struct list_head *l = it->cg_link;
2745 struct cg_cgroup_link *link;
2746 struct css_set *cg;
2747
2748
2749 do {
2750 l = l->next;
2751 if (l == &cgrp->css_sets) {
2752 it->cg_link = NULL;
2753 return;
2754 }
2755 link = list_entry(l, struct cg_cgroup_link, cgrp_link_list);
2756 cg = link->cg;
2757 } while (list_empty(&cg->tasks));
2758 it->cg_link = l;
2759 it->task = cg->tasks.next;
2760}
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771static void cgroup_enable_task_cg_lists(void)
2772{
2773 struct task_struct *p, *g;
2774 write_lock(&css_set_lock);
2775 use_task_css_set_links = 1;
2776 do_each_thread(g, p) {
2777 task_lock(p);
2778
2779
2780
2781
2782
2783 if (!(p->flags & PF_EXITING) && list_empty(&p->cg_list))
2784 list_add(&p->cg_list, &p->cgroups->tasks);
2785 task_unlock(p);
2786 } while_each_thread(g, p);
2787 write_unlock(&css_set_lock);
2788}
2789
2790void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
2791{
2792
2793
2794
2795
2796
2797 if (!use_task_css_set_links)
2798 cgroup_enable_task_cg_lists();
2799
2800 read_lock(&css_set_lock);
2801 it->cg_link = &cgrp->css_sets;
2802 cgroup_advance_iter(cgrp, it);
2803}
2804
2805struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
2806 struct cgroup_iter *it)
2807{
2808 struct task_struct *res;
2809 struct list_head *l = it->task;
2810 struct cg_cgroup_link *link;
2811
2812
2813 if (!it->cg_link)
2814 return NULL;
2815 res = list_entry(l, struct task_struct, cg_list);
2816
2817 l = l->next;
2818 link = list_entry(it->cg_link, struct cg_cgroup_link, cgrp_link_list);
2819 if (l == &link->cg->tasks) {
2820
2821
2822 cgroup_advance_iter(cgrp, it);
2823 } else {
2824 it->task = l;
2825 }
2826 return res;
2827}
2828
2829void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it)
2830{
2831 read_unlock(&css_set_lock);
2832}
2833
2834static inline int started_after_time(struct task_struct *t1,
2835 struct timespec *time,
2836 struct task_struct *t2)
2837{
2838 int start_diff = timespec_compare(&t1->start_time, time);
2839 if (start_diff > 0) {
2840 return 1;
2841 } else if (start_diff < 0) {
2842 return 0;
2843 } else {
2844
2845
2846
2847
2848
2849
2850
2851
2852 return t1 > t2;
2853 }
2854}
2855
2856
2857
2858
2859
2860
2861static inline int started_after(void *p1, void *p2)
2862{
2863 struct task_struct *t1 = p1;
2864 struct task_struct *t2 = p2;
2865 return started_after_time(t1, &t2->start_time, t2);
2866}
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895int cgroup_scan_tasks(struct cgroup_scanner *scan)
2896{
2897 int retval, i;
2898 struct cgroup_iter it;
2899 struct task_struct *p, *dropped;
2900
2901 struct task_struct *latest_task = NULL;
2902 struct ptr_heap tmp_heap;
2903 struct ptr_heap *heap;
2904 struct timespec latest_time = { 0, 0 };
2905
2906 if (scan->heap) {
2907
2908 heap = scan->heap;
2909 heap->gt = &started_after;
2910 } else {
2911
2912 heap = &tmp_heap;
2913 retval = heap_init(heap, PAGE_SIZE, GFP_KERNEL, &started_after);
2914 if (retval)
2915
2916 return retval;
2917 }
2918
2919 again:
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932 heap->size = 0;
2933 cgroup_iter_start(scan->cg, &it);
2934 while ((p = cgroup_iter_next(scan->cg, &it))) {
2935
2936
2937
2938
2939 if (scan->test_task && !scan->test_task(p, scan))
2940 continue;
2941
2942
2943
2944
2945 if (!started_after_time(p, &latest_time, latest_task))
2946 continue;
2947 dropped = heap_insert(heap, p);
2948 if (dropped == NULL) {
2949
2950
2951
2952
2953 get_task_struct(p);
2954 } else if (dropped != p) {
2955
2956
2957
2958
2959 get_task_struct(p);
2960 put_task_struct(dropped);
2961 }
2962
2963
2964
2965
2966 }
2967 cgroup_iter_end(scan->cg, &it);
2968
2969 if (heap->size) {
2970 for (i = 0; i < heap->size; i++) {
2971 struct task_struct *q = heap->ptrs[i];
2972 if (i == 0) {
2973 latest_time = q->start_time;
2974 latest_task = q;
2975 }
2976
2977 scan->process_task(q, scan);
2978 put_task_struct(q);
2979 }
2980
2981
2982
2983
2984
2985
2986
2987 goto again;
2988 }
2989 if (heap == &tmp_heap)
2990 heap_free(&tmp_heap);
2991 return 0;
2992}
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009#define PIDLIST_TOO_LARGE(c) ((c) * sizeof(pid_t) > (PAGE_SIZE * 2))
3010static void *pidlist_allocate(int count)
3011{
3012 if (PIDLIST_TOO_LARGE(count))
3013 return vmalloc(count * sizeof(pid_t));
3014 else
3015 return kmalloc(count * sizeof(pid_t), GFP_KERNEL);
3016}
3017static void pidlist_free(void *p)
3018{
3019 if (is_vmalloc_addr(p))
3020 vfree(p);
3021 else
3022 kfree(p);
3023}
3024static void *pidlist_resize(void *p, int newcount)
3025{
3026 void *newlist;
3027
3028 if (is_vmalloc_addr(p)) {
3029 newlist = vmalloc(newcount * sizeof(pid_t));
3030 if (!newlist)
3031 return NULL;
3032 memcpy(newlist, p, newcount * sizeof(pid_t));
3033 vfree(p);
3034 } else {
3035 newlist = krealloc(p, newcount * sizeof(pid_t), GFP_KERNEL);
3036 }
3037 return newlist;
3038}
3039
3040
3041
3042
3043
3044
3045
3046
3047#define PIDLIST_REALLOC_DIFFERENCE(old, new) ((old) - PAGE_SIZE >= (new))
3048static int pidlist_uniq(pid_t **p, int length)
3049{
3050 int src, dest = 1;
3051 pid_t *list = *p;
3052 pid_t *newlist;
3053
3054
3055
3056
3057
3058 if (length == 0 || length == 1)
3059 return length;
3060
3061 for (src = 1; src < length; src++) {
3062
3063 while (list[src] == list[src-1]) {
3064 src++;
3065 if (src == length)
3066 goto after;
3067 }
3068
3069 list[dest] = list[src];
3070 dest++;
3071 }
3072after:
3073
3074
3075
3076
3077
3078 if (PIDLIST_REALLOC_DIFFERENCE(length, dest)) {
3079 newlist = pidlist_resize(list, dest);
3080 if (newlist)
3081 *p = newlist;
3082 }
3083 return dest;
3084}
3085
3086static int cmppid(const void *a, const void *b)
3087{
3088 return *(pid_t *)a - *(pid_t *)b;
3089}
3090
3091
3092
3093
3094
3095
3096
3097static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
3098 enum cgroup_filetype type)
3099{
3100 struct cgroup_pidlist *l;
3101
3102 struct pid_namespace *ns = current->nsproxy->pid_ns;
3103
3104
3105
3106
3107
3108
3109
3110 mutex_lock(&cgrp->pidlist_mutex);
3111 list_for_each_entry(l, &cgrp->pidlists, links) {
3112 if (l->key.type == type && l->key.ns == ns) {
3113
3114 down_write(&l->mutex);
3115 mutex_unlock(&cgrp->pidlist_mutex);
3116 return l;
3117 }
3118 }
3119
3120 l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
3121 if (!l) {
3122 mutex_unlock(&cgrp->pidlist_mutex);
3123 return l;
3124 }
3125 init_rwsem(&l->mutex);
3126 down_write(&l->mutex);
3127 l->key.type = type;
3128 l->key.ns = get_pid_ns(ns);
3129 l->use_count = 0;
3130 l->list = NULL;
3131 l->owner = cgrp;
3132 list_add(&l->links, &cgrp->pidlists);
3133 mutex_unlock(&cgrp->pidlist_mutex);
3134 return l;
3135}
3136
3137
3138
3139
3140static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
3141 struct cgroup_pidlist **lp)
3142{
3143 pid_t *array;
3144 int length;
3145 int pid, n = 0;
3146 struct cgroup_iter it;
3147 struct task_struct *tsk;
3148 struct cgroup_pidlist *l;
3149
3150
3151
3152
3153
3154
3155
3156 length = cgroup_task_count(cgrp);
3157 array = pidlist_allocate(length);
3158 if (!array)
3159 return -ENOMEM;
3160
3161 cgroup_iter_start(cgrp, &it);
3162 while ((tsk = cgroup_iter_next(cgrp, &it))) {
3163 if (unlikely(n == length))
3164 break;
3165
3166 if (type == CGROUP_FILE_PROCS)
3167 pid = task_tgid_vnr(tsk);
3168 else
3169 pid = task_pid_vnr(tsk);
3170 if (pid > 0)
3171 array[n++] = pid;
3172 }
3173 cgroup_iter_end(cgrp, &it);
3174 length = n;
3175
3176 sort(array, length, sizeof(pid_t), cmppid, NULL);
3177 if (type == CGROUP_FILE_PROCS)
3178 length = pidlist_uniq(&array, length);
3179 l = cgroup_pidlist_find(cgrp, type);
3180 if (!l) {
3181 pidlist_free(array);
3182 return -ENOMEM;
3183 }
3184
3185 pidlist_free(l->list);
3186 l->list = array;
3187 l->length = length;
3188 l->use_count++;
3189 up_write(&l->mutex);
3190 *lp = l;
3191 return 0;
3192}
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
3204{
3205 int ret = -EINVAL;
3206 struct cgroup *cgrp;
3207 struct cgroup_iter it;
3208 struct task_struct *tsk;
3209
3210
3211
3212
3213
3214 if (dentry->d_sb->s_op != &cgroup_ops ||
3215 !S_ISDIR(dentry->d_inode->i_mode))
3216 goto err;
3217
3218 ret = 0;
3219 cgrp = dentry->d_fsdata;
3220
3221 cgroup_iter_start(cgrp, &it);
3222 while ((tsk = cgroup_iter_next(cgrp, &it))) {
3223 switch (tsk->state) {
3224 case TASK_RUNNING:
3225 stats->nr_running++;
3226 break;
3227 case TASK_INTERRUPTIBLE:
3228 stats->nr_sleeping++;
3229 break;
3230 case TASK_UNINTERRUPTIBLE:
3231 stats->nr_uninterruptible++;
3232 break;
3233 case TASK_STOPPED:
3234 stats->nr_stopped++;
3235 break;
3236 default:
3237 if (delayacct_is_task_waiting_on_io(tsk))
3238 stats->nr_io_wait++;
3239 break;
3240 }
3241 }
3242 cgroup_iter_end(cgrp, &it);
3243
3244err:
3245 return ret;
3246}
3247
3248
3249
3250
3251
3252
3253
3254
3255static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
3256{
3257
3258
3259
3260
3261
3262
3263 struct cgroup_pidlist *l = s->private;
3264 int index = 0, pid = *pos;
3265 int *iter;
3266
3267 down_read(&l->mutex);
3268 if (pid) {
3269 int end = l->length;
3270
3271 while (index < end) {
3272 int mid = (index + end) / 2;
3273 if (l->list[mid] == pid) {
3274 index = mid;
3275 break;
3276 } else if (l->list[mid] <= pid)
3277 index = mid + 1;
3278 else
3279 end = mid;
3280 }
3281 }
3282
3283 if (index >= l->length)
3284 return NULL;
3285
3286 iter = l->list + index;
3287 *pos = *iter;
3288 return iter;
3289}
3290
3291static void cgroup_pidlist_stop(struct seq_file *s, void *v)
3292{
3293 struct cgroup_pidlist *l = s->private;
3294 up_read(&l->mutex);
3295}
3296
3297static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
3298{
3299 struct cgroup_pidlist *l = s->private;
3300 pid_t *p = v;
3301 pid_t *end = l->list + l->length;
3302
3303
3304
3305
3306 p++;
3307 if (p >= end) {
3308 return NULL;
3309 } else {
3310 *pos = *p;
3311 return p;
3312 }
3313}
3314
3315static int cgroup_pidlist_show(struct seq_file *s, void *v)
3316{
3317 return seq_printf(s, "%d\n", *(int *)v);
3318}
3319
3320
3321
3322
3323
3324static const struct seq_operations cgroup_pidlist_seq_operations = {
3325 .start = cgroup_pidlist_start,
3326 .stop = cgroup_pidlist_stop,
3327 .next = cgroup_pidlist_next,
3328 .show = cgroup_pidlist_show,
3329};
3330
3331static void cgroup_release_pid_array(struct cgroup_pidlist *l)
3332{
3333
3334
3335
3336
3337
3338
3339 mutex_lock(&l->owner->pidlist_mutex);
3340 down_write(&l->mutex);
3341 BUG_ON(!l->use_count);
3342 if (!--l->use_count) {
3343
3344 list_del(&l->links);
3345 mutex_unlock(&l->owner->pidlist_mutex);
3346 pidlist_free(l->list);
3347 put_pid_ns(l->key.ns);
3348 up_write(&l->mutex);
3349 kfree(l);
3350 return;
3351 }
3352 mutex_unlock(&l->owner->pidlist_mutex);
3353 up_write(&l->mutex);
3354}
3355
3356static int cgroup_pidlist_release(struct inode *inode, struct file *file)
3357{
3358 struct cgroup_pidlist *l;
3359 if (!(file->f_mode & FMODE_READ))
3360 return 0;
3361
3362
3363
3364
3365 l = ((struct seq_file *)file->private_data)->private;
3366 cgroup_release_pid_array(l);
3367 return seq_release(inode, file);
3368}
3369
3370static const struct file_operations cgroup_pidlist_operations = {
3371 .read = seq_read,
3372 .llseek = seq_lseek,
3373 .write = cgroup_file_write,
3374 .release = cgroup_pidlist_release,
3375};
3376
3377
3378
3379
3380
3381
3382
3383static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type)
3384{
3385 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
3386 struct cgroup_pidlist *l;
3387 int retval;
3388
3389
3390 if (!(file->f_mode & FMODE_READ))
3391 return 0;
3392
3393
3394 retval = pidlist_array_load(cgrp, type, &l);
3395 if (retval)
3396 return retval;
3397
3398 file->f_op = &cgroup_pidlist_operations;
3399
3400 retval = seq_open(file, &cgroup_pidlist_seq_operations);
3401 if (retval) {
3402 cgroup_release_pid_array(l);
3403 return retval;
3404 }
3405 ((struct seq_file *)file->private_data)->private = l;
3406 return 0;
3407}
3408static int cgroup_tasks_open(struct inode *unused, struct file *file)
3409{
3410 return cgroup_pidlist_open(file, CGROUP_FILE_TASKS);
3411}
3412static int cgroup_procs_open(struct inode *unused, struct file *file)
3413{
3414 return cgroup_pidlist_open(file, CGROUP_FILE_PROCS);
3415}
3416
3417static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
3418 struct cftype *cft)
3419{
3420 return notify_on_release(cgrp);
3421}
3422
3423static int cgroup_write_notify_on_release(struct cgroup *cgrp,
3424 struct cftype *cft,
3425 u64 val)
3426{
3427 clear_bit(CGRP_RELEASABLE, &cgrp->flags);
3428 if (val)
3429 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3430 else
3431 clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3432 return 0;
3433}
3434
3435
3436
3437
3438
3439
3440static void cgroup_event_remove(struct work_struct *work)
3441{
3442 struct cgroup_event *event = container_of(work, struct cgroup_event,
3443 remove);
3444 struct cgroup *cgrp = event->cgrp;
3445
3446 event->cft->unregister_event(cgrp, event->cft, event->eventfd);
3447
3448 eventfd_ctx_put(event->eventfd);
3449 kfree(event);
3450 dput(cgrp->dentry);
3451}
3452
3453
3454
3455
3456
3457
3458static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
3459 int sync, void *key)
3460{
3461 struct cgroup_event *event = container_of(wait,
3462 struct cgroup_event, wait);
3463 struct cgroup *cgrp = event->cgrp;
3464 unsigned long flags = (unsigned long)key;
3465
3466 if (flags & POLLHUP) {
3467 __remove_wait_queue(event->wqh, &event->wait);
3468 spin_lock(&cgrp->event_list_lock);
3469 list_del(&event->list);
3470 spin_unlock(&cgrp->event_list_lock);
3471
3472
3473
3474
3475 schedule_work(&event->remove);
3476 }
3477
3478 return 0;
3479}
3480
3481static void cgroup_event_ptable_queue_proc(struct file *file,
3482 wait_queue_head_t *wqh, poll_table *pt)
3483{
3484 struct cgroup_event *event = container_of(pt,
3485 struct cgroup_event, pt);
3486
3487 event->wqh = wqh;
3488 add_wait_queue(wqh, &event->wait);
3489}
3490
3491
3492
3493
3494
3495
3496
3497static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
3498 const char *buffer)
3499{
3500 struct cgroup_event *event = NULL;
3501 unsigned int efd, cfd;
3502 struct file *efile = NULL;
3503 struct file *cfile = NULL;
3504 char *endp;
3505 int ret;
3506
3507 efd = simple_strtoul(buffer, &endp, 10);
3508 if (*endp != ' ')
3509 return -EINVAL;
3510 buffer = endp + 1;
3511
3512 cfd = simple_strtoul(buffer, &endp, 10);
3513 if ((*endp != ' ') && (*endp != '\0'))
3514 return -EINVAL;
3515 buffer = endp + 1;
3516
3517 event = kzalloc(sizeof(*event), GFP_KERNEL);
3518 if (!event)
3519 return -ENOMEM;
3520 event->cgrp = cgrp;
3521 INIT_LIST_HEAD(&event->list);
3522 init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc);
3523 init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
3524 INIT_WORK(&event->remove, cgroup_event_remove);
3525
3526 efile = eventfd_fget(efd);
3527 if (IS_ERR(efile)) {
3528 ret = PTR_ERR(efile);
3529 goto fail;
3530 }
3531
3532 event->eventfd = eventfd_ctx_fileget(efile);
3533 if (IS_ERR(event->eventfd)) {
3534 ret = PTR_ERR(event->eventfd);
3535 goto fail;
3536 }
3537
3538 cfile = fget(cfd);
3539 if (!cfile) {
3540 ret = -EBADF;
3541 goto fail;
3542 }
3543
3544
3545 ret = file_permission(cfile, MAY_READ);
3546 if (ret < 0)
3547 goto fail;
3548
3549 event->cft = __file_cft(cfile);
3550 if (IS_ERR(event->cft)) {
3551 ret = PTR_ERR(event->cft);
3552 goto fail;
3553 }
3554
3555 if (!event->cft->register_event || !event->cft->unregister_event) {
3556 ret = -EINVAL;
3557 goto fail;
3558 }
3559
3560 ret = event->cft->register_event(cgrp, event->cft,
3561 event->eventfd, buffer);
3562 if (ret)
3563 goto fail;
3564
3565 if (efile->f_op->poll(efile, &event->pt) & POLLHUP) {
3566 event->cft->unregister_event(cgrp, event->cft, event->eventfd);
3567 ret = 0;
3568 goto fail;
3569 }
3570
3571
3572
3573
3574
3575
3576 dget(cgrp->dentry);
3577
3578 spin_lock(&cgrp->event_list_lock);
3579 list_add(&event->list, &cgrp->event_list);
3580 spin_unlock(&cgrp->event_list_lock);
3581
3582 fput(cfile);
3583 fput(efile);
3584
3585 return 0;
3586
3587fail:
3588 if (cfile)
3589 fput(cfile);
3590
3591 if (event && event->eventfd && !IS_ERR(event->eventfd))
3592 eventfd_ctx_put(event->eventfd);
3593
3594 if (!IS_ERR_OR_NULL(efile))
3595 fput(efile);
3596
3597 kfree(event);
3598
3599 return ret;
3600}
3601
3602static u64 cgroup_clone_children_read(struct cgroup *cgrp,
3603 struct cftype *cft)
3604{
3605 return clone_children(cgrp);
3606}
3607
3608static int cgroup_clone_children_write(struct cgroup *cgrp,
3609 struct cftype *cft,
3610 u64 val)
3611{
3612 if (val)
3613 set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3614 else
3615 clear_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3616 return 0;
3617}
3618
3619
3620
3621
3622
3623#define CGROUP_FILE_GENERIC_PREFIX "cgroup."
3624static struct cftype files[] = {
3625 {
3626 .name = "tasks",
3627 .open = cgroup_tasks_open,
3628 .write_u64 = cgroup_tasks_write,
3629 .release = cgroup_pidlist_release,
3630 .mode = S_IRUGO | S_IWUSR,
3631 },
3632 {
3633 .name = CGROUP_FILE_GENERIC_PREFIX "procs",
3634 .open = cgroup_procs_open,
3635 .write_u64 = cgroup_procs_write,
3636 .release = cgroup_pidlist_release,
3637 .mode = S_IRUGO | S_IWUSR,
3638 },
3639 {
3640 .name = "notify_on_release",
3641 .read_u64 = cgroup_read_notify_on_release,
3642 .write_u64 = cgroup_write_notify_on_release,
3643 },
3644 {
3645 .name = CGROUP_FILE_GENERIC_PREFIX "event_control",
3646 .write_string = cgroup_write_event_control,
3647 .mode = S_IWUGO,
3648 },
3649 {
3650 .name = "cgroup.clone_children",
3651 .read_u64 = cgroup_clone_children_read,
3652 .write_u64 = cgroup_clone_children_write,
3653 },
3654};
3655
3656static struct cftype cft_release_agent = {
3657 .name = "release_agent",
3658 .read_seq_string = cgroup_release_agent_show,
3659 .write_string = cgroup_release_agent_write,
3660 .max_write_len = PATH_MAX,
3661};
3662
3663static int cgroup_populate_dir(struct cgroup *cgrp)
3664{
3665 int err;
3666 struct cgroup_subsys *ss;
3667
3668
3669 cgroup_clear_directory(cgrp->dentry);
3670
3671 err = cgroup_add_files(cgrp, NULL, files, ARRAY_SIZE(files));
3672 if (err < 0)
3673 return err;
3674
3675 if (cgrp == cgrp->top_cgroup) {
3676 if ((err = cgroup_add_file(cgrp, NULL, &cft_release_agent)) < 0)
3677 return err;
3678 }
3679
3680 for_each_subsys(cgrp->root, ss) {
3681 if (ss->populate && (err = ss->populate(ss, cgrp)) < 0)
3682 return err;
3683 }
3684
3685 for_each_subsys(cgrp->root, ss) {
3686 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
3687
3688
3689
3690
3691
3692 if (css->id)
3693 rcu_assign_pointer(css->id->css, css);
3694 }
3695
3696 return 0;
3697}
3698
3699static void init_cgroup_css(struct cgroup_subsys_state *css,
3700 struct cgroup_subsys *ss,
3701 struct cgroup *cgrp)
3702{
3703 css->cgroup = cgrp;
3704 atomic_set(&css->refcnt, 1);
3705 css->flags = 0;
3706 css->id = NULL;
3707 if (cgrp == dummytop)
3708 set_bit(CSS_ROOT, &css->flags);
3709 BUG_ON(cgrp->subsys[ss->subsys_id]);
3710 cgrp->subsys[ss->subsys_id] = css;
3711}
3712
3713static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
3714{
3715
3716 int i;
3717
3718
3719
3720
3721
3722 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3723 struct cgroup_subsys *ss = subsys[i];
3724 if (ss == NULL)
3725 continue;
3726 if (ss->root == root)
3727 mutex_lock(&ss->hierarchy_mutex);
3728 }
3729}
3730
3731static void cgroup_unlock_hierarchy(struct cgroupfs_root *root)
3732{
3733 int i;
3734
3735 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3736 struct cgroup_subsys *ss = subsys[i];
3737 if (ss == NULL)
3738 continue;
3739 if (ss->root == root)
3740 mutex_unlock(&ss->hierarchy_mutex);
3741 }
3742}
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
3753 mode_t mode)
3754{
3755 struct cgroup *cgrp;
3756 struct cgroupfs_root *root = parent->root;
3757 int err = 0;
3758 struct cgroup_subsys *ss;
3759 struct super_block *sb = root->sb;
3760
3761 cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL);
3762 if (!cgrp)
3763 return -ENOMEM;
3764
3765
3766
3767
3768
3769
3770 atomic_inc(&sb->s_active);
3771
3772 mutex_lock(&cgroup_mutex);
3773
3774 init_cgroup_housekeeping(cgrp);
3775
3776 cgrp->parent = parent;
3777 cgrp->root = parent->root;
3778 cgrp->top_cgroup = parent->top_cgroup;
3779
3780 if (notify_on_release(parent))
3781 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3782
3783 if (clone_children(parent))
3784 set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3785
3786 for_each_subsys(root, ss) {
3787 struct cgroup_subsys_state *css = ss->create(ss, cgrp);
3788
3789 if (IS_ERR(css)) {
3790 err = PTR_ERR(css);
3791 goto err_destroy;
3792 }
3793 init_cgroup_css(css, ss, cgrp);
3794 if (ss->use_id) {
3795 err = alloc_css_id(ss, parent, cgrp);
3796 if (err)
3797 goto err_destroy;
3798 }
3799
3800 if (clone_children(parent) && ss->post_clone)
3801 ss->post_clone(ss, cgrp);
3802 }
3803
3804 cgroup_lock_hierarchy(root);
3805 list_add(&cgrp->sibling, &cgrp->parent->children);
3806 cgroup_unlock_hierarchy(root);
3807 root->number_of_cgroups++;
3808
3809 err = cgroup_create_dir(cgrp, dentry, mode);
3810 if (err < 0)
3811 goto err_remove;
3812
3813
3814 BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex));
3815
3816 err = cgroup_populate_dir(cgrp);
3817
3818
3819 mutex_unlock(&cgroup_mutex);
3820 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
3821
3822 return 0;
3823
3824 err_remove:
3825
3826 cgroup_lock_hierarchy(root);
3827 list_del(&cgrp->sibling);
3828 cgroup_unlock_hierarchy(root);
3829 root->number_of_cgroups--;
3830
3831 err_destroy:
3832
3833 for_each_subsys(root, ss) {
3834 if (cgrp->subsys[ss->subsys_id])
3835 ss->destroy(ss, cgrp);
3836 }
3837
3838 mutex_unlock(&cgroup_mutex);
3839
3840
3841 deactivate_super(sb);
3842
3843 kfree(cgrp);
3844 return err;
3845}
3846
3847static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode)
3848{
3849 struct cgroup *c_parent = dentry->d_parent->d_fsdata;
3850
3851
3852 return cgroup_create(c_parent, dentry, mode | S_IFDIR);
3853}
3854
3855static int cgroup_has_css_refs(struct cgroup *cgrp)
3856{
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866 int i;
3867
3868
3869
3870
3871
3872 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3873 struct cgroup_subsys *ss = subsys[i];
3874 struct cgroup_subsys_state *css;
3875
3876 if (ss == NULL || ss->root != cgrp->root)
3877 continue;
3878 css = cgrp->subsys[ss->subsys_id];
3879
3880
3881
3882
3883
3884
3885 if (css && (atomic_read(&css->refcnt) > 1))
3886 return 1;
3887 }
3888 return 0;
3889}
3890
3891
3892
3893
3894
3895
3896
3897static int cgroup_clear_css_refs(struct cgroup *cgrp)
3898{
3899 struct cgroup_subsys *ss;
3900 unsigned long flags;
3901 bool failed = false;
3902 local_irq_save(flags);
3903 for_each_subsys(cgrp->root, ss) {
3904 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
3905 int refcnt;
3906 while (1) {
3907
3908 refcnt = atomic_read(&css->refcnt);
3909 if (refcnt > 1) {
3910 failed = true;
3911 goto done;
3912 }
3913 BUG_ON(!refcnt);
3914
3915
3916
3917
3918
3919
3920 if (atomic_cmpxchg(&css->refcnt, refcnt, 0) == refcnt)
3921 break;
3922 cpu_relax();
3923 }
3924 }
3925 done:
3926 for_each_subsys(cgrp->root, ss) {
3927 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
3928 if (failed) {
3929
3930
3931
3932
3933 if (!atomic_read(&css->refcnt))
3934 atomic_set(&css->refcnt, 1);
3935 } else {
3936
3937 set_bit(CSS_REMOVED, &css->flags);
3938 }
3939 }
3940 local_irq_restore(flags);
3941 return !failed;
3942}
3943
3944static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
3945{
3946 struct cgroup *cgrp = dentry->d_fsdata;
3947 struct dentry *d;
3948 struct cgroup *parent;
3949 DEFINE_WAIT(wait);
3950 struct cgroup_event *event, *tmp;
3951 int ret;
3952
3953
3954again:
3955 mutex_lock(&cgroup_mutex);
3956 if (atomic_read(&cgrp->count) != 0) {
3957 mutex_unlock(&cgroup_mutex);
3958 return -EBUSY;
3959 }
3960 if (!list_empty(&cgrp->children)) {
3961 mutex_unlock(&cgroup_mutex);
3962 return -EBUSY;
3963 }
3964 mutex_unlock(&cgroup_mutex);
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975 set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
3976
3977
3978
3979
3980
3981 ret = cgroup_call_pre_destroy(cgrp);
3982 if (ret) {
3983 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
3984 return ret;
3985 }
3986
3987 mutex_lock(&cgroup_mutex);
3988 parent = cgrp->parent;
3989 if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
3990 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
3991 mutex_unlock(&cgroup_mutex);
3992 return -EBUSY;
3993 }
3994 prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);
3995 if (!cgroup_clear_css_refs(cgrp)) {
3996 mutex_unlock(&cgroup_mutex);
3997
3998
3999
4000
4001 if (test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))
4002 schedule();
4003 finish_wait(&cgroup_rmdir_waitq, &wait);
4004 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
4005 if (signal_pending(current))
4006 return -EINTR;
4007 goto again;
4008 }
4009
4010 finish_wait(&cgroup_rmdir_waitq, &wait);
4011 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
4012
4013 spin_lock(&release_list_lock);
4014 set_bit(CGRP_REMOVED, &cgrp->flags);
4015 if (!list_empty(&cgrp->release_list))
4016 list_del_init(&cgrp->release_list);
4017 spin_unlock(&release_list_lock);
4018
4019 cgroup_lock_hierarchy(cgrp->root);
4020
4021 list_del_init(&cgrp->sibling);
4022 cgroup_unlock_hierarchy(cgrp->root);
4023
4024 d = dget(cgrp->dentry);
4025
4026 cgroup_d_remove_dir(d);
4027 dput(d);
4028
4029 set_bit(CGRP_RELEASABLE, &parent->flags);
4030 check_for_release(parent);
4031
4032
4033
4034
4035
4036
4037 spin_lock(&cgrp->event_list_lock);
4038 list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
4039 list_del(&event->list);
4040 remove_wait_queue(event->wqh, &event->wait);
4041 eventfd_signal(event->eventfd, 1);
4042 schedule_work(&event->remove);
4043 }
4044 spin_unlock(&cgrp->event_list_lock);
4045
4046 mutex_unlock(&cgroup_mutex);
4047 return 0;
4048}
4049
4050static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
4051{
4052 struct cgroup_subsys_state *css;
4053
4054 printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
4055
4056
4057 list_add(&ss->sibling, &rootnode.subsys_list);
4058 ss->root = &rootnode;
4059 css = ss->create(ss, dummytop);
4060
4061 BUG_ON(IS_ERR(css));
4062 init_cgroup_css(css, ss, dummytop);
4063
4064
4065
4066
4067
4068 init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
4069
4070 need_forkexit_callback |= ss->fork || ss->exit;
4071
4072
4073
4074
4075 BUG_ON(!list_empty(&init_task.tasks));
4076
4077 mutex_init(&ss->hierarchy_mutex);
4078 lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
4079 ss->active = 1;
4080
4081
4082
4083 BUG_ON(ss->module);
4084}
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
4096{
4097 int i;
4098 struct cgroup_subsys_state *css;
4099
4100
4101 if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN ||
4102 ss->create == NULL || ss->destroy == NULL)
4103 return -EINVAL;
4104
4105
4106
4107
4108
4109
4110
4111 if (ss->fork || ss->exit)
4112 return -EINVAL;
4113
4114
4115
4116
4117
4118 if (ss->module == NULL) {
4119
4120 BUG_ON(ss->subsys_id >= CGROUP_BUILTIN_SUBSYS_COUNT);
4121 BUG_ON(subsys[ss->subsys_id] != ss);
4122 return 0;
4123 }
4124
4125
4126
4127
4128
4129 mutex_lock(&cgroup_mutex);
4130
4131 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
4132 if (subsys[i] == NULL)
4133 break;
4134 }
4135 if (i == CGROUP_SUBSYS_COUNT) {
4136
4137 mutex_unlock(&cgroup_mutex);
4138 return -EBUSY;
4139 }
4140
4141 ss->subsys_id = i;
4142 subsys[i] = ss;
4143
4144
4145
4146
4147
4148 css = ss->create(ss, dummytop);
4149 if (IS_ERR(css)) {
4150
4151 subsys[i] = NULL;
4152 mutex_unlock(&cgroup_mutex);
4153 return PTR_ERR(css);
4154 }
4155
4156 list_add(&ss->sibling, &rootnode.subsys_list);
4157 ss->root = &rootnode;
4158
4159
4160 init_cgroup_css(css, ss, dummytop);
4161
4162 if (ss->use_id) {
4163 int ret = cgroup_init_idr(ss, css);
4164 if (ret) {
4165 dummytop->subsys[ss->subsys_id] = NULL;
4166 ss->destroy(ss, dummytop);
4167 subsys[i] = NULL;
4168 mutex_unlock(&cgroup_mutex);
4169 return ret;
4170 }
4171 }
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181 write_lock(&css_set_lock);
4182 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
4183 struct css_set *cg;
4184 struct hlist_node *node, *tmp;
4185 struct hlist_head *bucket = &css_set_table[i], *new_bucket;
4186
4187 hlist_for_each_entry_safe(cg, node, tmp, bucket, hlist) {
4188
4189 if (cg->subsys[ss->subsys_id])
4190 continue;
4191
4192 hlist_del(&cg->hlist);
4193
4194 cg->subsys[ss->subsys_id] = css;
4195
4196 new_bucket = css_set_hash(cg->subsys);
4197 hlist_add_head(&cg->hlist, new_bucket);
4198 }
4199 }
4200 write_unlock(&css_set_lock);
4201
4202 mutex_init(&ss->hierarchy_mutex);
4203 lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
4204 ss->active = 1;
4205
4206
4207 mutex_unlock(&cgroup_mutex);
4208 return 0;
4209}
4210EXPORT_SYMBOL_GPL(cgroup_load_subsys);
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220void cgroup_unload_subsys(struct cgroup_subsys *ss)
4221{
4222 struct cg_cgroup_link *link;
4223 struct hlist_head *hhead;
4224
4225 BUG_ON(ss->module == NULL);
4226
4227
4228
4229
4230
4231
4232 BUG_ON(ss->root != &rootnode);
4233
4234 mutex_lock(&cgroup_mutex);
4235
4236 BUG_ON(ss->subsys_id < CGROUP_BUILTIN_SUBSYS_COUNT);
4237 subsys[ss->subsys_id] = NULL;
4238
4239
4240 list_del_init(&ss->sibling);
4241
4242
4243
4244
4245
4246 write_lock(&css_set_lock);
4247 list_for_each_entry(link, &dummytop->css_sets, cgrp_link_list) {
4248 struct css_set *cg = link->cg;
4249
4250 hlist_del(&cg->hlist);
4251 BUG_ON(!cg->subsys[ss->subsys_id]);
4252 cg->subsys[ss->subsys_id] = NULL;
4253 hhead = css_set_hash(cg->subsys);
4254 hlist_add_head(&cg->hlist, hhead);
4255 }
4256 write_unlock(&css_set_lock);
4257
4258
4259
4260
4261
4262
4263
4264 ss->destroy(ss, dummytop);
4265 dummytop->subsys[ss->subsys_id] = NULL;
4266
4267 mutex_unlock(&cgroup_mutex);
4268}
4269EXPORT_SYMBOL_GPL(cgroup_unload_subsys);
4270
4271
4272
4273
4274
4275
4276
4277int __init cgroup_init_early(void)
4278{
4279 int i;
4280 atomic_set(&init_css_set.refcount, 1);
4281 INIT_LIST_HEAD(&init_css_set.cg_links);
4282 INIT_LIST_HEAD(&init_css_set.tasks);
4283 INIT_HLIST_NODE(&init_css_set.hlist);
4284 css_set_count = 1;
4285 init_cgroup_root(&rootnode);
4286 root_count = 1;
4287 init_task.cgroups = &init_css_set;
4288
4289 init_css_set_link.cg = &init_css_set;
4290 init_css_set_link.cgrp = dummytop;
4291 list_add(&init_css_set_link.cgrp_link_list,
4292 &rootnode.top_cgroup.css_sets);
4293 list_add(&init_css_set_link.cg_link_list,
4294 &init_css_set.cg_links);
4295
4296 for (i = 0; i < CSS_SET_TABLE_SIZE; i++)
4297 INIT_HLIST_HEAD(&css_set_table[i]);
4298
4299
4300 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4301 struct cgroup_subsys *ss = subsys[i];
4302
4303 BUG_ON(!ss->name);
4304 BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
4305 BUG_ON(!ss->create);
4306 BUG_ON(!ss->destroy);
4307 if (ss->subsys_id != i) {
4308 printk(KERN_ERR "cgroup: Subsys %s id == %d\n",
4309 ss->name, ss->subsys_id);
4310 BUG();
4311 }
4312
4313 if (ss->early_init)
4314 cgroup_init_subsys(ss);
4315 }
4316 return 0;
4317}
4318
4319
4320
4321
4322
4323
4324
4325int __init cgroup_init(void)
4326{
4327 int err;
4328 int i;
4329 struct hlist_head *hhead;
4330
4331 err = bdi_init(&cgroup_backing_dev_info);
4332 if (err)
4333 return err;
4334
4335
4336 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4337 struct cgroup_subsys *ss = subsys[i];
4338 if (!ss->early_init)
4339 cgroup_init_subsys(ss);
4340 if (ss->use_id)
4341 cgroup_init_idr(ss, init_css_set.subsys[ss->subsys_id]);
4342 }
4343
4344
4345 hhead = css_set_hash(init_css_set.subsys);
4346 hlist_add_head(&init_css_set.hlist, hhead);
4347 BUG_ON(!init_root_id(&rootnode));
4348
4349 cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj);
4350 if (!cgroup_kobj) {
4351 err = -ENOMEM;
4352 goto out;
4353 }
4354
4355 err = register_filesystem(&cgroup_fs_type);
4356 if (err < 0) {
4357 kobject_put(cgroup_kobj);
4358 goto out;
4359 }
4360
4361 proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations);
4362
4363out:
4364 if (err)
4365 bdi_destroy(&cgroup_backing_dev_info);
4366
4367 return err;
4368}
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383static int proc_cgroup_show(struct seq_file *m, void *v)
4384{
4385 struct pid *pid;
4386 struct task_struct *tsk;
4387 char *buf;
4388 int retval;
4389 struct cgroupfs_root *root;
4390
4391 retval = -ENOMEM;
4392 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
4393 if (!buf)
4394 goto out;
4395
4396 retval = -ESRCH;
4397 pid = m->private;
4398 tsk = get_pid_task(pid, PIDTYPE_PID);
4399 if (!tsk)
4400 goto out_free;
4401
4402 retval = 0;
4403
4404 mutex_lock(&cgroup_mutex);
4405
4406 for_each_active_root(root) {
4407 struct cgroup_subsys *ss;
4408 struct cgroup *cgrp;
4409 int count = 0;
4410
4411 seq_printf(m, "%d:", root->hierarchy_id);
4412 for_each_subsys(root, ss)
4413 seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
4414 if (strlen(root->name))
4415 seq_printf(m, "%sname=%s", count ? "," : "",
4416 root->name);
4417 seq_putc(m, ':');
4418 cgrp = task_cgroup_from_root(tsk, root);
4419 retval = cgroup_path(cgrp, buf, PAGE_SIZE);
4420 if (retval < 0)
4421 goto out_unlock;
4422 seq_puts(m, buf);
4423 seq_putc(m, '\n');
4424 }
4425
4426out_unlock:
4427 mutex_unlock(&cgroup_mutex);
4428 put_task_struct(tsk);
4429out_free:
4430 kfree(buf);
4431out:
4432 return retval;
4433}
4434
4435static int cgroup_open(struct inode *inode, struct file *file)
4436{
4437 struct pid *pid = PROC_I(inode)->pid;
4438 return single_open(file, proc_cgroup_show, pid);
4439}
4440
4441const struct file_operations proc_cgroup_operations = {
4442 .open = cgroup_open,
4443 .read = seq_read,
4444 .llseek = seq_lseek,
4445 .release = single_release,
4446};
4447
4448
4449static int proc_cgroupstats_show(struct seq_file *m, void *v)
4450{
4451 int i;
4452
4453 seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
4454
4455
4456
4457
4458
4459 mutex_lock(&cgroup_mutex);
4460 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4461 struct cgroup_subsys *ss = subsys[i];
4462 if (ss == NULL)
4463 continue;
4464 seq_printf(m, "%s\t%d\t%d\t%d\n",
4465 ss->name, ss->root->hierarchy_id,
4466 ss->root->number_of_cgroups, !ss->disabled);
4467 }
4468 mutex_unlock(&cgroup_mutex);
4469 return 0;
4470}
4471
4472static int cgroupstats_open(struct inode *inode, struct file *file)
4473{
4474 return single_open(file, proc_cgroupstats_show, NULL);
4475}
4476
4477static const struct file_operations proc_cgroupstats_operations = {
4478 .open = cgroupstats_open,
4479 .read = seq_read,
4480 .llseek = seq_lseek,
4481 .release = single_release,
4482};
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500void cgroup_fork(struct task_struct *child)
4501{
4502 task_lock(current);
4503 child->cgroups = current->cgroups;
4504 get_css_set(child->cgroups);
4505 task_unlock(current);
4506 INIT_LIST_HEAD(&child->cg_list);
4507}
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517void cgroup_fork_callbacks(struct task_struct *child)
4518{
4519 if (need_forkexit_callback) {
4520 int i;
4521
4522
4523
4524
4525
4526 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4527 struct cgroup_subsys *ss = subsys[i];
4528 if (ss->fork)
4529 ss->fork(ss, child);
4530 }
4531 }
4532}
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543void cgroup_post_fork(struct task_struct *child)
4544{
4545 if (use_task_css_set_links) {
4546 write_lock(&css_set_lock);
4547 task_lock(child);
4548 if (list_empty(&child->cg_list))
4549 list_add(&child->cg_list, &child->cgroups->tasks);
4550 task_unlock(child);
4551 write_unlock(&css_set_lock);
4552 }
4553}
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589void cgroup_exit(struct task_struct *tsk, int run_callbacks)
4590{
4591 struct css_set *cg;
4592 int i;
4593
4594
4595
4596
4597
4598
4599 if (!list_empty(&tsk->cg_list)) {
4600 write_lock(&css_set_lock);
4601 if (!list_empty(&tsk->cg_list))
4602 list_del_init(&tsk->cg_list);
4603 write_unlock(&css_set_lock);
4604 }
4605
4606
4607 task_lock(tsk);
4608 cg = tsk->cgroups;
4609 tsk->cgroups = &init_css_set;
4610
4611 if (run_callbacks && need_forkexit_callback) {
4612
4613
4614
4615
4616 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4617 struct cgroup_subsys *ss = subsys[i];
4618 if (ss->exit) {
4619 struct cgroup *old_cgrp =
4620 rcu_dereference_raw(cg->subsys[i])->cgroup;
4621 struct cgroup *cgrp = task_cgroup(tsk, i);
4622 ss->exit(ss, cgrp, old_cgrp, tsk);
4623 }
4624 }
4625 }
4626 task_unlock(tsk);
4627
4628 if (cg)
4629 put_css_set_taskexit(cg);
4630}
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task)
4646{
4647 int ret;
4648 struct cgroup *target;
4649
4650 if (cgrp == dummytop)
4651 return 1;
4652
4653 target = task_cgroup_from_root(task, cgrp->root);
4654 while (cgrp != target && cgrp!= cgrp->top_cgroup)
4655 cgrp = cgrp->parent;
4656 ret = (cgrp == target);
4657 return ret;
4658}
4659
4660static void check_for_release(struct cgroup *cgrp)
4661{
4662
4663
4664 if (cgroup_is_releasable(cgrp) && !atomic_read(&cgrp->count)
4665 && list_empty(&cgrp->children) && !cgroup_has_css_refs(cgrp)) {
4666
4667
4668
4669 int need_schedule_work = 0;
4670 spin_lock(&release_list_lock);
4671 if (!cgroup_is_removed(cgrp) &&
4672 list_empty(&cgrp->release_list)) {
4673 list_add(&cgrp->release_list, &release_list);
4674 need_schedule_work = 1;
4675 }
4676 spin_unlock(&release_list_lock);
4677 if (need_schedule_work)
4678 schedule_work(&release_agent_work);
4679 }
4680}
4681
4682
4683void __css_put(struct cgroup_subsys_state *css, int count)
4684{
4685 struct cgroup *cgrp = css->cgroup;
4686 int val;
4687 rcu_read_lock();
4688 val = atomic_sub_return(count, &css->refcnt);
4689 if (val == 1) {
4690 if (notify_on_release(cgrp)) {
4691 set_bit(CGRP_RELEASABLE, &cgrp->flags);
4692 check_for_release(cgrp);
4693 }
4694 cgroup_wakeup_rmdir_waiter(cgrp);
4695 }
4696 rcu_read_unlock();
4697 WARN_ON_ONCE(val < 1);
4698}
4699EXPORT_SYMBOL_GPL(__css_put);
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724static void cgroup_release_agent(struct work_struct *work)
4725{
4726 BUG_ON(work != &release_agent_work);
4727 mutex_lock(&cgroup_mutex);
4728 spin_lock(&release_list_lock);
4729 while (!list_empty(&release_list)) {
4730 char *argv[3], *envp[3];
4731 int i;
4732 char *pathbuf = NULL, *agentbuf = NULL;
4733 struct cgroup *cgrp = list_entry(release_list.next,
4734 struct cgroup,
4735 release_list);
4736 list_del_init(&cgrp->release_list);
4737 spin_unlock(&release_list_lock);
4738 pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
4739 if (!pathbuf)
4740 goto continue_free;
4741 if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0)
4742 goto continue_free;
4743 agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
4744 if (!agentbuf)
4745 goto continue_free;
4746
4747 i = 0;
4748 argv[i++] = agentbuf;
4749 argv[i++] = pathbuf;
4750 argv[i] = NULL;
4751
4752 i = 0;
4753
4754 envp[i++] = "HOME=/";
4755 envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
4756 envp[i] = NULL;
4757
4758
4759
4760
4761 mutex_unlock(&cgroup_mutex);
4762 call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
4763 mutex_lock(&cgroup_mutex);
4764 continue_free:
4765 kfree(pathbuf);
4766 kfree(agentbuf);
4767 spin_lock(&release_list_lock);
4768 }
4769 spin_unlock(&release_list_lock);
4770 mutex_unlock(&cgroup_mutex);
4771}
4772
4773static int __init cgroup_disable(char *str)
4774{
4775 int i;
4776 char *token;
4777
4778 while ((token = strsep(&str, ",")) != NULL) {
4779 if (!*token)
4780 continue;
4781
4782
4783
4784
4785 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
4786 struct cgroup_subsys *ss = subsys[i];
4787
4788 if (!strcmp(token, ss->name)) {
4789 ss->disabled = 1;
4790 printk(KERN_INFO "Disabling %s control group"
4791 " subsystem\n", ss->name);
4792 break;
4793 }
4794 }
4795 }
4796 return 1;
4797}
4798__setup("cgroup_disable=", cgroup_disable);
4799
4800
4801
4802
4803
4804
4805
4806
4807unsigned short css_id(struct cgroup_subsys_state *css)
4808{
4809 struct css_id *cssid;
4810
4811
4812
4813
4814
4815
4816 cssid = rcu_dereference_check(css->id,
4817 rcu_read_lock_held() || atomic_read(&css->refcnt));
4818
4819 if (cssid)
4820 return cssid->id;
4821 return 0;
4822}
4823EXPORT_SYMBOL_GPL(css_id);
4824
4825unsigned short css_depth(struct cgroup_subsys_state *css)
4826{
4827 struct css_id *cssid;
4828
4829 cssid = rcu_dereference_check(css->id,
4830 rcu_read_lock_held() || atomic_read(&css->refcnt));
4831
4832 if (cssid)
4833 return cssid->depth;
4834 return 0;
4835}
4836EXPORT_SYMBOL_GPL(css_depth);
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851bool css_is_ancestor(struct cgroup_subsys_state *child,
4852 const struct cgroup_subsys_state *root)
4853{
4854 struct css_id *child_id;
4855 struct css_id *root_id;
4856 bool ret = true;
4857
4858 rcu_read_lock();
4859 child_id = rcu_dereference(child->id);
4860 root_id = rcu_dereference(root->id);
4861 if (!child_id
4862 || !root_id
4863 || (child_id->depth < root_id->depth)
4864 || (child_id->stack[root_id->depth] != root_id->id))
4865 ret = false;
4866 rcu_read_unlock();
4867 return ret;
4868}
4869
4870void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
4871{
4872 struct css_id *id = css->id;
4873
4874 if (!id)
4875 return;
4876
4877 BUG_ON(!ss->use_id);
4878
4879 rcu_assign_pointer(id->css, NULL);
4880 rcu_assign_pointer(css->id, NULL);
4881 spin_lock(&ss->id_lock);
4882 idr_remove(&ss->idr, id->id);
4883 spin_unlock(&ss->id_lock);
4884 kfree_rcu(id, rcu_head);
4885}
4886EXPORT_SYMBOL_GPL(free_css_id);
4887
4888
4889
4890
4891
4892
4893static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
4894{
4895 struct css_id *newid;
4896 int myid, error, size;
4897
4898 BUG_ON(!ss->use_id);
4899
4900 size = sizeof(*newid) + sizeof(unsigned short) * (depth + 1);
4901 newid = kzalloc(size, GFP_KERNEL);
4902 if (!newid)
4903 return ERR_PTR(-ENOMEM);
4904
4905 if (unlikely(!idr_pre_get(&ss->idr, GFP_KERNEL))) {
4906 error = -ENOMEM;
4907 goto err_out;
4908 }
4909 spin_lock(&ss->id_lock);
4910
4911 error = idr_get_new_above(&ss->idr, newid, 1, &myid);
4912 spin_unlock(&ss->id_lock);
4913
4914
4915 if (error) {
4916 error = -ENOSPC;
4917 goto err_out;
4918 }
4919 if (myid > CSS_ID_MAX)
4920 goto remove_idr;
4921
4922 newid->id = myid;
4923 newid->depth = depth;
4924 return newid;
4925remove_idr:
4926 error = -ENOSPC;
4927 spin_lock(&ss->id_lock);
4928 idr_remove(&ss->idr, myid);
4929 spin_unlock(&ss->id_lock);
4930err_out:
4931 kfree(newid);
4932 return ERR_PTR(error);
4933
4934}
4935
4936static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss,
4937 struct cgroup_subsys_state *rootcss)
4938{
4939 struct css_id *newid;
4940
4941 spin_lock_init(&ss->id_lock);
4942 idr_init(&ss->idr);
4943
4944 newid = get_new_cssid(ss, 0);
4945 if (IS_ERR(newid))
4946 return PTR_ERR(newid);
4947
4948 newid->stack[0] = newid->id;
4949 newid->css = rootcss;
4950 rootcss->id = newid;
4951 return 0;
4952}
4953
4954static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent,
4955 struct cgroup *child)
4956{
4957 int subsys_id, i, depth = 0;
4958 struct cgroup_subsys_state *parent_css, *child_css;
4959 struct css_id *child_id, *parent_id;
4960
4961 subsys_id = ss->subsys_id;
4962 parent_css = parent->subsys[subsys_id];
4963 child_css = child->subsys[subsys_id];
4964 parent_id = parent_css->id;
4965 depth = parent_id->depth + 1;
4966
4967 child_id = get_new_cssid(ss, depth);
4968 if (IS_ERR(child_id))
4969 return PTR_ERR(child_id);
4970
4971 for (i = 0; i < depth; i++)
4972 child_id->stack[i] = parent_id->stack[i];
4973 child_id->stack[depth] = child_id->id;
4974
4975
4976
4977
4978 rcu_assign_pointer(child_css->id, child_id);
4979
4980 return 0;
4981}
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id)
4992{
4993 struct css_id *cssid = NULL;
4994
4995 BUG_ON(!ss->use_id);
4996 cssid = idr_find(&ss->idr, id);
4997
4998 if (unlikely(!cssid))
4999 return NULL;
5000
5001 return rcu_dereference(cssid->css);
5002}
5003EXPORT_SYMBOL_GPL(css_lookup);
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015struct cgroup_subsys_state *
5016css_get_next(struct cgroup_subsys *ss, int id,
5017 struct cgroup_subsys_state *root, int *foundid)
5018{
5019 struct cgroup_subsys_state *ret = NULL;
5020 struct css_id *tmp;
5021 int tmpid;
5022 int rootid = css_id(root);
5023 int depth = css_depth(root);
5024
5025 if (!rootid)
5026 return NULL;
5027
5028 BUG_ON(!ss->use_id);
5029
5030 tmpid = id;
5031 while (1) {
5032
5033
5034
5035
5036 spin_lock(&ss->id_lock);
5037 tmp = idr_get_next(&ss->idr, &tmpid);
5038 spin_unlock(&ss->id_lock);
5039
5040 if (!tmp)
5041 break;
5042 if (tmp->depth >= depth && tmp->stack[depth] == rootid) {
5043 ret = rcu_dereference(tmp->css);
5044 if (ret) {
5045 *foundid = tmpid;
5046 break;
5047 }
5048 }
5049
5050 tmpid = tmpid + 1;
5051 }
5052 return ret;
5053}
5054
5055
5056
5057
5058struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id)
5059{
5060 struct cgroup *cgrp;
5061 struct inode *inode;
5062 struct cgroup_subsys_state *css;
5063
5064 inode = f->f_dentry->d_inode;
5065
5066 if (inode->i_op != &cgroup_dir_inode_operations)
5067 return ERR_PTR(-EBADF);
5068
5069 if (id < 0 || id >= CGROUP_SUBSYS_COUNT)
5070 return ERR_PTR(-EINVAL);
5071
5072
5073 cgrp = __d_cgrp(f->f_dentry);
5074 css = cgrp->subsys[id];
5075 return css ? css : ERR_PTR(-ENOENT);
5076}
5077
5078#ifdef CONFIG_CGROUP_DEBUG
5079static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
5080 struct cgroup *cont)
5081{
5082 struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
5083
5084 if (!css)
5085 return ERR_PTR(-ENOMEM);
5086
5087 return css;
5088}
5089
5090static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
5091{
5092 kfree(cont->subsys[debug_subsys_id]);
5093}
5094
5095static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft)
5096{
5097 return atomic_read(&cont->count);
5098}
5099
5100static u64 debug_taskcount_read(struct cgroup *cont, struct cftype *cft)
5101{
5102 return cgroup_task_count(cont);
5103}
5104
5105static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft)
5106{
5107 return (u64)(unsigned long)current->cgroups;
5108}
5109
5110static u64 current_css_set_refcount_read(struct cgroup *cont,
5111 struct cftype *cft)
5112{
5113 u64 count;
5114
5115 rcu_read_lock();
5116 count = atomic_read(¤t->cgroups->refcount);
5117 rcu_read_unlock();
5118 return count;
5119}
5120
5121static int current_css_set_cg_links_read(struct cgroup *cont,
5122 struct cftype *cft,
5123 struct seq_file *seq)
5124{
5125 struct cg_cgroup_link *link;
5126 struct css_set *cg;
5127
5128 read_lock(&css_set_lock);
5129 rcu_read_lock();
5130 cg = rcu_dereference(current->cgroups);
5131 list_for_each_entry(link, &cg->cg_links, cg_link_list) {
5132 struct cgroup *c = link->cgrp;
5133 const char *name;
5134
5135 if (c->dentry)
5136 name = c->dentry->d_name.name;
5137 else
5138 name = "?";
5139 seq_printf(seq, "Root %d group %s\n",
5140 c->root->hierarchy_id, name);
5141 }
5142 rcu_read_unlock();
5143 read_unlock(&css_set_lock);
5144 return 0;
5145}
5146
5147#define MAX_TASKS_SHOWN_PER_CSS 25
5148static int cgroup_css_links_read(struct cgroup *cont,
5149 struct cftype *cft,
5150 struct seq_file *seq)
5151{
5152 struct cg_cgroup_link *link;
5153
5154 read_lock(&css_set_lock);
5155 list_for_each_entry(link, &cont->css_sets, cgrp_link_list) {
5156 struct css_set *cg = link->cg;
5157 struct task_struct *task;
5158 int count = 0;
5159 seq_printf(seq, "css_set %p\n", cg);
5160 list_for_each_entry(task, &cg->tasks, cg_list) {
5161 if (count++ > MAX_TASKS_SHOWN_PER_CSS) {
5162 seq_puts(seq, " ...\n");
5163 break;
5164 } else {
5165 seq_printf(seq, " task %d\n",
5166 task_pid_vnr(task));
5167 }
5168 }
5169 }
5170 read_unlock(&css_set_lock);
5171 return 0;
5172}
5173
5174static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
5175{
5176 return test_bit(CGRP_RELEASABLE, &cgrp->flags);
5177}
5178
5179static struct cftype debug_files[] = {
5180 {
5181 .name = "cgroup_refcount",
5182 .read_u64 = cgroup_refcount_read,
5183 },
5184 {
5185 .name = "taskcount",
5186 .read_u64 = debug_taskcount_read,
5187 },
5188
5189 {
5190 .name = "current_css_set",
5191 .read_u64 = current_css_set_read,
5192 },
5193
5194 {
5195 .name = "current_css_set_refcount",
5196 .read_u64 = current_css_set_refcount_read,
5197 },
5198
5199 {
5200 .name = "current_css_set_cg_links",
5201 .read_seq_string = current_css_set_cg_links_read,
5202 },
5203
5204 {
5205 .name = "cgroup_css_links",
5206 .read_seq_string = cgroup_css_links_read,
5207 },
5208
5209 {
5210 .name = "releasable",
5211 .read_u64 = releasable_read,
5212 },
5213};
5214
5215static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)
5216{
5217 return cgroup_add_files(cont, ss, debug_files,
5218 ARRAY_SIZE(debug_files));
5219}
5220
5221struct cgroup_subsys debug_subsys = {
5222 .name = "debug",
5223 .create = debug_create,
5224 .destroy = debug_destroy,
5225 .populate = debug_populate,
5226 .subsys_id = debug_subsys_id,
5227};
5228#endif
5229