1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include <linux/cgroup.h>
26#include <linux/ctype.h>
27#include <linux/errno.h>
28#include <linux/fs.h>
29#include <linux/kernel.h>
30#include <linux/list.h>
31#include <linux/mm.h>
32#include <linux/mutex.h>
33#include <linux/mount.h>
34#include <linux/pagemap.h>
35#include <linux/proc_fs.h>
36#include <linux/rcupdate.h>
37#include <linux/sched.h>
38#include <linux/backing-dev.h>
39#include <linux/seq_file.h>
40#include <linux/slab.h>
41#include <linux/magic.h>
42#include <linux/spinlock.h>
43#include <linux/string.h>
44#include <linux/sort.h>
45#include <linux/kmod.h>
46#include <linux/delayacct.h>
47#include <linux/cgroupstats.h>
48#include <linux/hash.h>
49#include <linux/namei.h>
50#include <linux/smp_lock.h>
51#include <linux/pid_namespace.h>
52#include <linux/idr.h>
53#include <linux/vmalloc.h>
54
55#include <asm/atomic.h>
56
57static DEFINE_MUTEX(cgroup_mutex);
58
59
60#define SUBSYS(_x) &_x ## _subsys,
61
62static struct cgroup_subsys *subsys[] = {
63#include <linux/cgroup_subsys.h>
64};
65
66#define MAX_CGROUP_ROOT_NAMELEN 64
67
68
69
70
71
72
73struct cgroupfs_root {
74 struct super_block *sb;
75
76
77
78
79
80 unsigned long subsys_bits;
81
82
83 int hierarchy_id;
84
85
86 unsigned long actual_subsys_bits;
87
88
89 struct list_head subsys_list;
90
91
92 struct cgroup top_cgroup;
93
94
95 int number_of_cgroups;
96
97
98 struct list_head root_list;
99
100
101 unsigned long flags;
102
103
104 char release_agent_path[PATH_MAX];
105
106
107 char name[MAX_CGROUP_ROOT_NAMELEN];
108};
109
110
111
112
113
114
115static struct cgroupfs_root rootnode;
116
117
118
119
120
121#define CSS_ID_MAX (65535)
122struct css_id {
123
124
125
126
127
128
129
130 struct cgroup_subsys_state *css;
131
132
133
134 unsigned short id;
135
136
137
138 unsigned short depth;
139
140
141
142 struct rcu_head rcu_head;
143
144
145
146 unsigned short stack[0];
147};
148
149
150
151
152static LIST_HEAD(roots);
153static int root_count;
154
155static DEFINE_IDA(hierarchy_ida);
156static int next_hierarchy_id;
157static DEFINE_SPINLOCK(hierarchy_id_lock);
158
159
160#define dummytop (&rootnode.top_cgroup)
161
162
163
164
165
166
167static int need_forkexit_callback __read_mostly;
168
169
170inline int cgroup_is_removed(const struct cgroup *cgrp)
171{
172 return test_bit(CGRP_REMOVED, &cgrp->flags);
173}
174
175
176enum {
177 ROOT_NOPREFIX,
178};
179
180static int cgroup_is_releasable(const struct cgroup *cgrp)
181{
182 const int bits =
183 (1 << CGRP_RELEASABLE) |
184 (1 << CGRP_NOTIFY_ON_RELEASE);
185 return (cgrp->flags & bits) == bits;
186}
187
188static int notify_on_release(const struct cgroup *cgrp)
189{
190 return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
191}
192
193
194
195
196
197#define for_each_subsys(_root, _ss) \
198list_for_each_entry(_ss, &_root->subsys_list, sibling)
199
200
201#define for_each_active_root(_root) \
202list_for_each_entry(_root, &roots, root_list)
203
204
205
206static LIST_HEAD(release_list);
207static DEFINE_SPINLOCK(release_list_lock);
208static void cgroup_release_agent(struct work_struct *work);
209static DECLARE_WORK(release_agent_work, cgroup_release_agent);
210static void check_for_release(struct cgroup *cgrp);
211
212
213struct cg_cgroup_link {
214
215
216
217
218 struct list_head cgrp_link_list;
219 struct cgroup *cgrp;
220
221
222
223
224 struct list_head cg_link_list;
225 struct css_set *cg;
226};
227
228
229
230
231
232
233
234
235static struct css_set init_css_set;
236static struct cg_cgroup_link init_css_set_link;
237
238static int cgroup_subsys_init_idr(struct cgroup_subsys *ss);
239
240
241
242
243static DEFINE_RWLOCK(css_set_lock);
244static int css_set_count;
245
246
247
248
249
250
251#define CSS_SET_HASH_BITS 7
252#define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS)
253static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE];
254
255static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
256{
257 int i;
258 int index;
259 unsigned long tmp = 0UL;
260
261 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++)
262 tmp += (unsigned long)css[i];
263 tmp = (tmp >> 16) ^ tmp;
264
265 index = hash_long(tmp, CSS_SET_HASH_BITS);
266
267 return &css_set_table[index];
268}
269
270static void free_css_set_rcu(struct rcu_head *obj)
271{
272 struct css_set *cg = container_of(obj, struct css_set, rcu_head);
273 kfree(cg);
274}
275
276
277
278
279
280static int use_task_css_set_links __read_mostly;
281
282static void __put_css_set(struct css_set *cg, int taskexit)
283{
284 struct cg_cgroup_link *link;
285 struct cg_cgroup_link *saved_link;
286
287
288
289
290
291 if (atomic_add_unless(&cg->refcount, -1, 1))
292 return;
293 write_lock(&css_set_lock);
294 if (!atomic_dec_and_test(&cg->refcount)) {
295 write_unlock(&css_set_lock);
296 return;
297 }
298
299
300 hlist_del(&cg->hlist);
301 css_set_count--;
302
303 list_for_each_entry_safe(link, saved_link, &cg->cg_links,
304 cg_link_list) {
305 struct cgroup *cgrp = link->cgrp;
306 list_del(&link->cg_link_list);
307 list_del(&link->cgrp_link_list);
308 if (atomic_dec_and_test(&cgrp->count) &&
309 notify_on_release(cgrp)) {
310 if (taskexit)
311 set_bit(CGRP_RELEASABLE, &cgrp->flags);
312 check_for_release(cgrp);
313 }
314
315 kfree(link);
316 }
317
318 write_unlock(&css_set_lock);
319 call_rcu(&cg->rcu_head, free_css_set_rcu);
320}
321
322
323
324
325static inline void get_css_set(struct css_set *cg)
326{
327 atomic_inc(&cg->refcount);
328}
329
330static inline void put_css_set(struct css_set *cg)
331{
332 __put_css_set(cg, 0);
333}
334
335static inline void put_css_set_taskexit(struct css_set *cg)
336{
337 __put_css_set(cg, 1);
338}
339
340
341
342
343
344
345
346
347
348
349
350static bool compare_css_sets(struct css_set *cg,
351 struct css_set *old_cg,
352 struct cgroup *new_cgrp,
353 struct cgroup_subsys_state *template[])
354{
355 struct list_head *l1, *l2;
356
357 if (memcmp(template, cg->subsys, sizeof(cg->subsys))) {
358
359 return false;
360 }
361
362
363
364
365
366
367
368
369
370
371 l1 = &cg->cg_links;
372 l2 = &old_cg->cg_links;
373 while (1) {
374 struct cg_cgroup_link *cgl1, *cgl2;
375 struct cgroup *cg1, *cg2;
376
377 l1 = l1->next;
378 l2 = l2->next;
379
380 if (l1 == &cg->cg_links) {
381 BUG_ON(l2 != &old_cg->cg_links);
382 break;
383 } else {
384 BUG_ON(l2 == &old_cg->cg_links);
385 }
386
387 cgl1 = list_entry(l1, struct cg_cgroup_link, cg_link_list);
388 cgl2 = list_entry(l2, struct cg_cgroup_link, cg_link_list);
389 cg1 = cgl1->cgrp;
390 cg2 = cgl2->cgrp;
391
392 BUG_ON(cg1->root != cg2->root);
393
394
395
396
397
398
399
400
401 if (cg1->root == new_cgrp->root) {
402 if (cg1 != new_cgrp)
403 return false;
404 } else {
405 if (cg1 != cg2)
406 return false;
407 }
408 }
409 return true;
410}
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425static struct css_set *find_existing_css_set(
426 struct css_set *oldcg,
427 struct cgroup *cgrp,
428 struct cgroup_subsys_state *template[])
429{
430 int i;
431 struct cgroupfs_root *root = cgrp->root;
432 struct hlist_head *hhead;
433 struct hlist_node *node;
434 struct css_set *cg;
435
436
437
438 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
439 if (root->subsys_bits & (1UL << i)) {
440
441
442
443 template[i] = cgrp->subsys[i];
444 } else {
445
446
447 template[i] = oldcg->subsys[i];
448 }
449 }
450
451 hhead = css_set_hash(template);
452 hlist_for_each_entry(cg, node, hhead, hlist) {
453 if (!compare_css_sets(cg, oldcg, cgrp, template))
454 continue;
455
456
457 return cg;
458 }
459
460
461 return NULL;
462}
463
464static void free_cg_links(struct list_head *tmp)
465{
466 struct cg_cgroup_link *link;
467 struct cg_cgroup_link *saved_link;
468
469 list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) {
470 list_del(&link->cgrp_link_list);
471 kfree(link);
472 }
473}
474
475
476
477
478
479
480static int allocate_cg_links(int count, struct list_head *tmp)
481{
482 struct cg_cgroup_link *link;
483 int i;
484 INIT_LIST_HEAD(tmp);
485 for (i = 0; i < count; i++) {
486 link = kmalloc(sizeof(*link), GFP_KERNEL);
487 if (!link) {
488 free_cg_links(tmp);
489 return -ENOMEM;
490 }
491 list_add(&link->cgrp_link_list, tmp);
492 }
493 return 0;
494}
495
496
497
498
499
500
501
502static void link_css_set(struct list_head *tmp_cg_links,
503 struct css_set *cg, struct cgroup *cgrp)
504{
505 struct cg_cgroup_link *link;
506
507 BUG_ON(list_empty(tmp_cg_links));
508 link = list_first_entry(tmp_cg_links, struct cg_cgroup_link,
509 cgrp_link_list);
510 link->cg = cg;
511 link->cgrp = cgrp;
512 atomic_inc(&cgrp->count);
513 list_move(&link->cgrp_link_list, &cgrp->css_sets);
514
515
516
517
518 list_add_tail(&link->cg_link_list, &cg->cg_links);
519}
520
521
522
523
524
525
526
527
528static struct css_set *find_css_set(
529 struct css_set *oldcg, struct cgroup *cgrp)
530{
531 struct css_set *res;
532 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
533
534 struct list_head tmp_cg_links;
535
536 struct hlist_head *hhead;
537 struct cg_cgroup_link *link;
538
539
540
541 read_lock(&css_set_lock);
542 res = find_existing_css_set(oldcg, cgrp, template);
543 if (res)
544 get_css_set(res);
545 read_unlock(&css_set_lock);
546
547 if (res)
548 return res;
549
550 res = kmalloc(sizeof(*res), GFP_KERNEL);
551 if (!res)
552 return NULL;
553
554
555 if (allocate_cg_links(root_count, &tmp_cg_links) < 0) {
556 kfree(res);
557 return NULL;
558 }
559
560 atomic_set(&res->refcount, 1);
561 INIT_LIST_HEAD(&res->cg_links);
562 INIT_LIST_HEAD(&res->tasks);
563 INIT_HLIST_NODE(&res->hlist);
564
565
566
567 memcpy(res->subsys, template, sizeof(res->subsys));
568
569 write_lock(&css_set_lock);
570
571 list_for_each_entry(link, &oldcg->cg_links, cg_link_list) {
572 struct cgroup *c = link->cgrp;
573 if (c->root == cgrp->root)
574 c = cgrp;
575 link_css_set(&tmp_cg_links, res, c);
576 }
577
578 BUG_ON(!list_empty(&tmp_cg_links));
579
580 css_set_count++;
581
582
583 hhead = css_set_hash(res->subsys);
584 hlist_add_head(&res->hlist, hhead);
585
586 write_unlock(&css_set_lock);
587
588 return res;
589}
590
591
592
593
594
595static struct cgroup *task_cgroup_from_root(struct task_struct *task,
596 struct cgroupfs_root *root)
597{
598 struct css_set *css;
599 struct cgroup *res = NULL;
600
601 BUG_ON(!mutex_is_locked(&cgroup_mutex));
602 read_lock(&css_set_lock);
603
604
605
606
607
608 css = task->cgroups;
609 if (css == &init_css_set) {
610 res = &root->top_cgroup;
611 } else {
612 struct cg_cgroup_link *link;
613 list_for_each_entry(link, &css->cg_links, cg_link_list) {
614 struct cgroup *c = link->cgrp;
615 if (c->root == root) {
616 res = c;
617 break;
618 }
619 }
620 }
621 read_unlock(&css_set_lock);
622 BUG_ON(!res);
623 return res;
624}
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680void cgroup_lock(void)
681{
682 mutex_lock(&cgroup_mutex);
683}
684
685
686
687
688
689
690void cgroup_unlock(void)
691{
692 mutex_unlock(&cgroup_mutex);
693}
694
695
696
697
698
699
700
701
702static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode);
703static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
704static int cgroup_populate_dir(struct cgroup *cgrp);
705static const struct inode_operations cgroup_dir_inode_operations;
706static const struct file_operations proc_cgroupstats_operations;
707
708static struct backing_dev_info cgroup_backing_dev_info = {
709 .name = "cgroup",
710 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
711};
712
713static int alloc_css_id(struct cgroup_subsys *ss,
714 struct cgroup *parent, struct cgroup *child);
715
716static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
717{
718 struct inode *inode = new_inode(sb);
719
720 if (inode) {
721 inode->i_mode = mode;
722 inode->i_uid = current_fsuid();
723 inode->i_gid = current_fsgid();
724 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
725 inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;
726 }
727 return inode;
728}
729
730
731
732
733
734static int cgroup_call_pre_destroy(struct cgroup *cgrp)
735{
736 struct cgroup_subsys *ss;
737 int ret = 0;
738
739 for_each_subsys(cgrp->root, ss)
740 if (ss->pre_destroy) {
741 ret = ss->pre_destroy(ss, cgrp);
742 if (ret)
743 break;
744 }
745 return ret;
746}
747
748static void free_cgroup_rcu(struct rcu_head *obj)
749{
750 struct cgroup *cgrp = container_of(obj, struct cgroup, rcu_head);
751
752 kfree(cgrp);
753}
754
755static void cgroup_diput(struct dentry *dentry, struct inode *inode)
756{
757
758 if (S_ISDIR(inode->i_mode)) {
759 struct cgroup *cgrp = dentry->d_fsdata;
760 struct cgroup_subsys *ss;
761 BUG_ON(!(cgroup_is_removed(cgrp)));
762
763
764
765
766
767
768 synchronize_rcu();
769
770 mutex_lock(&cgroup_mutex);
771
772
773
774 for_each_subsys(cgrp->root, ss)
775 ss->destroy(ss, cgrp);
776
777 cgrp->root->number_of_cgroups--;
778 mutex_unlock(&cgroup_mutex);
779
780
781
782
783
784 deactivate_super(cgrp->root->sb);
785
786
787
788
789
790 BUG_ON(!list_empty(&cgrp->pidlists));
791
792 call_rcu(&cgrp->rcu_head, free_cgroup_rcu);
793 }
794 iput(inode);
795}
796
797static void remove_dir(struct dentry *d)
798{
799 struct dentry *parent = dget(d->d_parent);
800
801 d_delete(d);
802 simple_rmdir(parent->d_inode, d);
803 dput(parent);
804}
805
806static void cgroup_clear_directory(struct dentry *dentry)
807{
808 struct list_head *node;
809
810 BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
811 spin_lock(&dcache_lock);
812 node = dentry->d_subdirs.next;
813 while (node != &dentry->d_subdirs) {
814 struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
815 list_del_init(node);
816 if (d->d_inode) {
817
818
819 BUG_ON(d->d_inode->i_mode & S_IFDIR);
820 d = dget_locked(d);
821 spin_unlock(&dcache_lock);
822 d_delete(d);
823 simple_unlink(dentry->d_inode, d);
824 dput(d);
825 spin_lock(&dcache_lock);
826 }
827 node = dentry->d_subdirs.next;
828 }
829 spin_unlock(&dcache_lock);
830}
831
832
833
834
835static void cgroup_d_remove_dir(struct dentry *dentry)
836{
837 cgroup_clear_directory(dentry);
838
839 spin_lock(&dcache_lock);
840 list_del_init(&dentry->d_u.d_child);
841 spin_unlock(&dcache_lock);
842 remove_dir(dentry);
843}
844
845
846
847
848
849
850
851
852
853DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
854
855static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
856{
857 if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
858 wake_up_all(&cgroup_rmdir_waitq);
859}
860
861void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
862{
863 css_get(css);
864}
865
866void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
867{
868 cgroup_wakeup_rmdir_waiter(css->cgroup);
869 css_put(css);
870}
871
872
873static int rebind_subsystems(struct cgroupfs_root *root,
874 unsigned long final_bits)
875{
876 unsigned long added_bits, removed_bits;
877 struct cgroup *cgrp = &root->top_cgroup;
878 int i;
879
880 removed_bits = root->actual_subsys_bits & ~final_bits;
881 added_bits = final_bits & ~root->actual_subsys_bits;
882
883 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
884 unsigned long bit = 1UL << i;
885 struct cgroup_subsys *ss = subsys[i];
886 if (!(bit & added_bits))
887 continue;
888 if (ss->root != &rootnode) {
889
890 return -EBUSY;
891 }
892 }
893
894
895
896
897
898 if (root->number_of_cgroups > 1)
899 return -EBUSY;
900
901
902 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
903 struct cgroup_subsys *ss = subsys[i];
904 unsigned long bit = 1UL << i;
905 if (bit & added_bits) {
906
907 BUG_ON(cgrp->subsys[i]);
908 BUG_ON(!dummytop->subsys[i]);
909 BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
910 mutex_lock(&ss->hierarchy_mutex);
911 cgrp->subsys[i] = dummytop->subsys[i];
912 cgrp->subsys[i]->cgroup = cgrp;
913 list_move(&ss->sibling, &root->subsys_list);
914 ss->root = root;
915 if (ss->bind)
916 ss->bind(ss, cgrp);
917 mutex_unlock(&ss->hierarchy_mutex);
918 } else if (bit & removed_bits) {
919
920 BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
921 BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
922 mutex_lock(&ss->hierarchy_mutex);
923 if (ss->bind)
924 ss->bind(ss, dummytop);
925 dummytop->subsys[i]->cgroup = dummytop;
926 cgrp->subsys[i] = NULL;
927 subsys[i]->root = &rootnode;
928 list_move(&ss->sibling, &rootnode.subsys_list);
929 mutex_unlock(&ss->hierarchy_mutex);
930 } else if (bit & final_bits) {
931
932 BUG_ON(!cgrp->subsys[i]);
933 } else {
934
935 BUG_ON(cgrp->subsys[i]);
936 }
937 }
938 root->subsys_bits = root->actual_subsys_bits = final_bits;
939 synchronize_rcu();
940
941 return 0;
942}
943
944static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
945{
946 struct cgroupfs_root *root = vfs->mnt_sb->s_fs_info;
947 struct cgroup_subsys *ss;
948
949 mutex_lock(&cgroup_mutex);
950 for_each_subsys(root, ss)
951 seq_printf(seq, ",%s", ss->name);
952 if (test_bit(ROOT_NOPREFIX, &root->flags))
953 seq_puts(seq, ",noprefix");
954 if (strlen(root->release_agent_path))
955 seq_printf(seq, ",release_agent=%s", root->release_agent_path);
956 if (strlen(root->name))
957 seq_printf(seq, ",name=%s", root->name);
958 mutex_unlock(&cgroup_mutex);
959 return 0;
960}
961
962struct cgroup_sb_opts {
963 unsigned long subsys_bits;
964 unsigned long flags;
965 char *release_agent;
966 char *name;
967
968 bool none;
969
970 struct cgroupfs_root *new_root;
971
972};
973
974
975
976static int parse_cgroupfs_options(char *data,
977 struct cgroup_sb_opts *opts)
978{
979 char *token, *o = data ?: "all";
980 unsigned long mask = (unsigned long)-1;
981
982#ifdef CONFIG_CPUSETS
983 mask = ~(1UL << cpuset_subsys_id);
984#endif
985
986 memset(opts, 0, sizeof(*opts));
987
988 while ((token = strsep(&o, ",")) != NULL) {
989 if (!*token)
990 return -EINVAL;
991 if (!strcmp(token, "all")) {
992
993 int i;
994 opts->subsys_bits = 0;
995 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
996 struct cgroup_subsys *ss = subsys[i];
997 if (!ss->disabled)
998 opts->subsys_bits |= 1ul << i;
999 }
1000 } else if (!strcmp(token, "none")) {
1001
1002 opts->none = true;
1003 } else if (!strcmp(token, "noprefix")) {
1004 set_bit(ROOT_NOPREFIX, &opts->flags);
1005 } else if (!strncmp(token, "release_agent=", 14)) {
1006
1007 if (opts->release_agent)
1008 return -EINVAL;
1009 opts->release_agent =
1010 kstrndup(token + 14, PATH_MAX, GFP_KERNEL);
1011 if (!opts->release_agent)
1012 return -ENOMEM;
1013 } else if (!strncmp(token, "name=", 5)) {
1014 int i;
1015 const char *name = token + 5;
1016
1017 if (!strlen(name))
1018 return -EINVAL;
1019
1020 for (i = 0; i < strlen(name); i++) {
1021 char c = name[i];
1022 if (isalnum(c))
1023 continue;
1024 if ((c == '.') || (c == '-') || (c == '_'))
1025 continue;
1026 return -EINVAL;
1027 }
1028
1029 if (opts->name)
1030 return -EINVAL;
1031 opts->name = kstrndup(name,
1032 MAX_CGROUP_ROOT_NAMELEN,
1033 GFP_KERNEL);
1034 if (!opts->name)
1035 return -ENOMEM;
1036 } else {
1037 struct cgroup_subsys *ss;
1038 int i;
1039 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1040 ss = subsys[i];
1041 if (!strcmp(token, ss->name)) {
1042 if (!ss->disabled)
1043 set_bit(i, &opts->subsys_bits);
1044 break;
1045 }
1046 }
1047 if (i == CGROUP_SUBSYS_COUNT)
1048 return -ENOENT;
1049 }
1050 }
1051
1052
1053
1054
1055
1056
1057
1058
1059 if (test_bit(ROOT_NOPREFIX, &opts->flags) &&
1060 (opts->subsys_bits & mask))
1061 return -EINVAL;
1062
1063
1064
1065 if (opts->subsys_bits && opts->none)
1066 return -EINVAL;
1067
1068
1069
1070
1071
1072 if (!opts->subsys_bits && !opts->name)
1073 return -EINVAL;
1074
1075 return 0;
1076}
1077
1078static int cgroup_remount(struct super_block *sb, int *flags, char *data)
1079{
1080 int ret = 0;
1081 struct cgroupfs_root *root = sb->s_fs_info;
1082 struct cgroup *cgrp = &root->top_cgroup;
1083 struct cgroup_sb_opts opts;
1084
1085 lock_kernel();
1086 mutex_lock(&cgrp->dentry->d_inode->i_mutex);
1087 mutex_lock(&cgroup_mutex);
1088
1089
1090 ret = parse_cgroupfs_options(data, &opts);
1091 if (ret)
1092 goto out_unlock;
1093
1094
1095 if (opts.flags != root->flags) {
1096 ret = -EINVAL;
1097 goto out_unlock;
1098 }
1099
1100
1101 if (opts.name && strcmp(opts.name, root->name)) {
1102 ret = -EINVAL;
1103 goto out_unlock;
1104 }
1105
1106 ret = rebind_subsystems(root, opts.subsys_bits);
1107 if (ret)
1108 goto out_unlock;
1109
1110
1111 cgroup_populate_dir(cgrp);
1112
1113 if (opts.release_agent)
1114 strcpy(root->release_agent_path, opts.release_agent);
1115 out_unlock:
1116 kfree(opts.release_agent);
1117 kfree(opts.name);
1118 mutex_unlock(&cgroup_mutex);
1119 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
1120 unlock_kernel();
1121 return ret;
1122}
1123
1124static const struct super_operations cgroup_ops = {
1125 .statfs = simple_statfs,
1126 .drop_inode = generic_delete_inode,
1127 .show_options = cgroup_show_options,
1128 .remount_fs = cgroup_remount,
1129};
1130
1131static void init_cgroup_housekeeping(struct cgroup *cgrp)
1132{
1133 INIT_LIST_HEAD(&cgrp->sibling);
1134 INIT_LIST_HEAD(&cgrp->children);
1135 INIT_LIST_HEAD(&cgrp->css_sets);
1136 INIT_LIST_HEAD(&cgrp->release_list);
1137 INIT_LIST_HEAD(&cgrp->pidlists);
1138 mutex_init(&cgrp->pidlist_mutex);
1139}
1140
1141static void init_cgroup_root(struct cgroupfs_root *root)
1142{
1143 struct cgroup *cgrp = &root->top_cgroup;
1144 INIT_LIST_HEAD(&root->subsys_list);
1145 INIT_LIST_HEAD(&root->root_list);
1146 root->number_of_cgroups = 1;
1147 cgrp->root = root;
1148 cgrp->top_cgroup = cgrp;
1149 init_cgroup_housekeeping(cgrp);
1150}
1151
1152static bool init_root_id(struct cgroupfs_root *root)
1153{
1154 int ret = 0;
1155
1156 do {
1157 if (!ida_pre_get(&hierarchy_ida, GFP_KERNEL))
1158 return false;
1159 spin_lock(&hierarchy_id_lock);
1160
1161 ret = ida_get_new_above(&hierarchy_ida, next_hierarchy_id,
1162 &root->hierarchy_id);
1163 if (ret == -ENOSPC)
1164
1165 ret = ida_get_new(&hierarchy_ida, &root->hierarchy_id);
1166 if (!ret) {
1167 next_hierarchy_id = root->hierarchy_id + 1;
1168 } else if (ret != -EAGAIN) {
1169
1170 BUG_ON(ret);
1171 }
1172 spin_unlock(&hierarchy_id_lock);
1173 } while (ret);
1174 return true;
1175}
1176
1177static int cgroup_test_super(struct super_block *sb, void *data)
1178{
1179 struct cgroup_sb_opts *opts = data;
1180 struct cgroupfs_root *root = sb->s_fs_info;
1181
1182
1183 if (opts->name && strcmp(opts->name, root->name))
1184 return 0;
1185
1186
1187
1188
1189
1190 if ((opts->subsys_bits || opts->none)
1191 && (opts->subsys_bits != root->subsys_bits))
1192 return 0;
1193
1194 return 1;
1195}
1196
1197static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
1198{
1199 struct cgroupfs_root *root;
1200
1201 if (!opts->subsys_bits && !opts->none)
1202 return NULL;
1203
1204 root = kzalloc(sizeof(*root), GFP_KERNEL);
1205 if (!root)
1206 return ERR_PTR(-ENOMEM);
1207
1208 if (!init_root_id(root)) {
1209 kfree(root);
1210 return ERR_PTR(-ENOMEM);
1211 }
1212 init_cgroup_root(root);
1213
1214 root->subsys_bits = opts->subsys_bits;
1215 root->flags = opts->flags;
1216 if (opts->release_agent)
1217 strcpy(root->release_agent_path, opts->release_agent);
1218 if (opts->name)
1219 strcpy(root->name, opts->name);
1220 return root;
1221}
1222
1223static void cgroup_drop_root(struct cgroupfs_root *root)
1224{
1225 if (!root)
1226 return;
1227
1228 BUG_ON(!root->hierarchy_id);
1229 spin_lock(&hierarchy_id_lock);
1230 ida_remove(&hierarchy_ida, root->hierarchy_id);
1231 spin_unlock(&hierarchy_id_lock);
1232 kfree(root);
1233}
1234
1235static int cgroup_set_super(struct super_block *sb, void *data)
1236{
1237 int ret;
1238 struct cgroup_sb_opts *opts = data;
1239
1240
1241 if (!opts->new_root)
1242 return -EINVAL;
1243
1244 BUG_ON(!opts->subsys_bits && !opts->none);
1245
1246 ret = set_anon_super(sb, NULL);
1247 if (ret)
1248 return ret;
1249
1250 sb->s_fs_info = opts->new_root;
1251 opts->new_root->sb = sb;
1252
1253 sb->s_blocksize = PAGE_CACHE_SIZE;
1254 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
1255 sb->s_magic = CGROUP_SUPER_MAGIC;
1256 sb->s_op = &cgroup_ops;
1257
1258 return 0;
1259}
1260
1261static int cgroup_get_rootdir(struct super_block *sb)
1262{
1263 struct inode *inode =
1264 cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb);
1265 struct dentry *dentry;
1266
1267 if (!inode)
1268 return -ENOMEM;
1269
1270 inode->i_fop = &simple_dir_operations;
1271 inode->i_op = &cgroup_dir_inode_operations;
1272
1273 inc_nlink(inode);
1274 dentry = d_alloc_root(inode);
1275 if (!dentry) {
1276 iput(inode);
1277 return -ENOMEM;
1278 }
1279 sb->s_root = dentry;
1280 return 0;
1281}
1282
1283static int cgroup_get_sb(struct file_system_type *fs_type,
1284 int flags, const char *unused_dev_name,
1285 void *data, struct vfsmount *mnt)
1286{
1287 struct cgroup_sb_opts opts;
1288 struct cgroupfs_root *root;
1289 int ret = 0;
1290 struct super_block *sb;
1291 struct cgroupfs_root *new_root;
1292
1293
1294 ret = parse_cgroupfs_options(data, &opts);
1295 if (ret)
1296 goto out_err;
1297
1298
1299
1300
1301
1302 new_root = cgroup_root_from_opts(&opts);
1303 if (IS_ERR(new_root)) {
1304 ret = PTR_ERR(new_root);
1305 goto out_err;
1306 }
1307 opts.new_root = new_root;
1308
1309
1310 sb = sget(fs_type, cgroup_test_super, cgroup_set_super, &opts);
1311 if (IS_ERR(sb)) {
1312 ret = PTR_ERR(sb);
1313 cgroup_drop_root(opts.new_root);
1314 goto out_err;
1315 }
1316
1317 root = sb->s_fs_info;
1318 BUG_ON(!root);
1319 if (root == opts.new_root) {
1320
1321 struct list_head tmp_cg_links;
1322 struct cgroup *root_cgrp = &root->top_cgroup;
1323 struct inode *inode;
1324 struct cgroupfs_root *existing_root;
1325 int i;
1326
1327 BUG_ON(sb->s_root != NULL);
1328
1329 ret = cgroup_get_rootdir(sb);
1330 if (ret)
1331 goto drop_new_super;
1332 inode = sb->s_root->d_inode;
1333
1334 mutex_lock(&inode->i_mutex);
1335 mutex_lock(&cgroup_mutex);
1336
1337 if (strlen(root->name)) {
1338
1339 for_each_active_root(existing_root) {
1340 if (!strcmp(existing_root->name, root->name)) {
1341 ret = -EBUSY;
1342 mutex_unlock(&cgroup_mutex);
1343 mutex_unlock(&inode->i_mutex);
1344 goto drop_new_super;
1345 }
1346 }
1347 }
1348
1349
1350
1351
1352
1353
1354
1355
1356 ret = allocate_cg_links(css_set_count, &tmp_cg_links);
1357 if (ret) {
1358 mutex_unlock(&cgroup_mutex);
1359 mutex_unlock(&inode->i_mutex);
1360 goto drop_new_super;
1361 }
1362
1363 ret = rebind_subsystems(root, root->subsys_bits);
1364 if (ret == -EBUSY) {
1365 mutex_unlock(&cgroup_mutex);
1366 mutex_unlock(&inode->i_mutex);
1367 free_cg_links(&tmp_cg_links);
1368 goto drop_new_super;
1369 }
1370
1371
1372 BUG_ON(ret);
1373
1374 list_add(&root->root_list, &roots);
1375 root_count++;
1376
1377 sb->s_root->d_fsdata = root_cgrp;
1378 root->top_cgroup.dentry = sb->s_root;
1379
1380
1381
1382 write_lock(&css_set_lock);
1383 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
1384 struct hlist_head *hhead = &css_set_table[i];
1385 struct hlist_node *node;
1386 struct css_set *cg;
1387
1388 hlist_for_each_entry(cg, node, hhead, hlist)
1389 link_css_set(&tmp_cg_links, cg, root_cgrp);
1390 }
1391 write_unlock(&css_set_lock);
1392
1393 free_cg_links(&tmp_cg_links);
1394
1395 BUG_ON(!list_empty(&root_cgrp->sibling));
1396 BUG_ON(!list_empty(&root_cgrp->children));
1397 BUG_ON(root->number_of_cgroups != 1);
1398
1399 cgroup_populate_dir(root_cgrp);
1400 mutex_unlock(&cgroup_mutex);
1401 mutex_unlock(&inode->i_mutex);
1402 } else {
1403
1404
1405
1406
1407 cgroup_drop_root(opts.new_root);
1408 }
1409
1410 simple_set_mnt(mnt, sb);
1411 kfree(opts.release_agent);
1412 kfree(opts.name);
1413 return 0;
1414
1415 drop_new_super:
1416 deactivate_locked_super(sb);
1417 out_err:
1418 kfree(opts.release_agent);
1419 kfree(opts.name);
1420
1421 return ret;
1422}
1423
1424static void cgroup_kill_sb(struct super_block *sb) {
1425 struct cgroupfs_root *root = sb->s_fs_info;
1426 struct cgroup *cgrp = &root->top_cgroup;
1427 int ret;
1428 struct cg_cgroup_link *link;
1429 struct cg_cgroup_link *saved_link;
1430
1431 BUG_ON(!root);
1432
1433 BUG_ON(root->number_of_cgroups != 1);
1434 BUG_ON(!list_empty(&cgrp->children));
1435 BUG_ON(!list_empty(&cgrp->sibling));
1436
1437 mutex_lock(&cgroup_mutex);
1438
1439
1440 ret = rebind_subsystems(root, 0);
1441
1442 BUG_ON(ret);
1443
1444
1445
1446
1447
1448 write_lock(&css_set_lock);
1449
1450 list_for_each_entry_safe(link, saved_link, &cgrp->css_sets,
1451 cgrp_link_list) {
1452 list_del(&link->cg_link_list);
1453 list_del(&link->cgrp_link_list);
1454 kfree(link);
1455 }
1456 write_unlock(&css_set_lock);
1457
1458 if (!list_empty(&root->root_list)) {
1459 list_del(&root->root_list);
1460 root_count--;
1461 }
1462
1463 mutex_unlock(&cgroup_mutex);
1464
1465 kill_litter_super(sb);
1466 cgroup_drop_root(root);
1467}
1468
1469static struct file_system_type cgroup_fs_type = {
1470 .name = "cgroup",
1471 .get_sb = cgroup_get_sb,
1472 .kill_sb = cgroup_kill_sb,
1473};
1474
1475static inline struct cgroup *__d_cgrp(struct dentry *dentry)
1476{
1477 return dentry->d_fsdata;
1478}
1479
1480static inline struct cftype *__d_cft(struct dentry *dentry)
1481{
1482 return dentry->d_fsdata;
1483}
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1496{
1497 char *start;
1498 struct dentry *dentry = rcu_dereference(cgrp->dentry);
1499
1500 if (!dentry || cgrp == dummytop) {
1501
1502
1503
1504
1505 strcpy(buf, "/");
1506 return 0;
1507 }
1508
1509 start = buf + buflen;
1510
1511 *--start = '\0';
1512 for (;;) {
1513 int len = dentry->d_name.len;
1514 if ((start -= len) < buf)
1515 return -ENAMETOOLONG;
1516 memcpy(start, cgrp->dentry->d_name.name, len);
1517 cgrp = cgrp->parent;
1518 if (!cgrp)
1519 break;
1520 dentry = rcu_dereference(cgrp->dentry);
1521 if (!cgrp->parent)
1522 continue;
1523 if (--start < buf)
1524 return -ENAMETOOLONG;
1525 *start = '/';
1526 }
1527 memmove(buf, start, buf + buflen - start);
1528 return 0;
1529}
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1540{
1541 int retval = 0;
1542 struct cgroup_subsys *ss;
1543 struct cgroup *oldcgrp;
1544 struct css_set *cg;
1545 struct css_set *newcg;
1546 struct cgroupfs_root *root = cgrp->root;
1547
1548
1549 oldcgrp = task_cgroup_from_root(tsk, root);
1550 if (cgrp == oldcgrp)
1551 return 0;
1552
1553 for_each_subsys(root, ss) {
1554 if (ss->can_attach) {
1555 retval = ss->can_attach(ss, cgrp, tsk, false);
1556 if (retval)
1557 return retval;
1558 }
1559 }
1560
1561 task_lock(tsk);
1562 cg = tsk->cgroups;
1563 get_css_set(cg);
1564 task_unlock(tsk);
1565
1566
1567
1568
1569 newcg = find_css_set(cg, cgrp);
1570 put_css_set(cg);
1571 if (!newcg)
1572 return -ENOMEM;
1573
1574 task_lock(tsk);
1575 if (tsk->flags & PF_EXITING) {
1576 task_unlock(tsk);
1577 put_css_set(newcg);
1578 return -ESRCH;
1579 }
1580 rcu_assign_pointer(tsk->cgroups, newcg);
1581 task_unlock(tsk);
1582
1583
1584 write_lock(&css_set_lock);
1585 if (!list_empty(&tsk->cg_list)) {
1586 list_del(&tsk->cg_list);
1587 list_add(&tsk->cg_list, &newcg->tasks);
1588 }
1589 write_unlock(&css_set_lock);
1590
1591 for_each_subsys(root, ss) {
1592 if (ss->attach)
1593 ss->attach(ss, cgrp, oldcgrp, tsk, false);
1594 }
1595 set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
1596 synchronize_rcu();
1597 put_css_set(cg);
1598
1599
1600
1601
1602
1603 cgroup_wakeup_rmdir_waiter(cgrp);
1604 return 0;
1605}
1606
1607
1608
1609
1610
1611static int attach_task_by_pid(struct cgroup *cgrp, u64 pid)
1612{
1613 struct task_struct *tsk;
1614 const struct cred *cred = current_cred(), *tcred;
1615 int ret;
1616
1617 if (pid) {
1618 rcu_read_lock();
1619 tsk = find_task_by_vpid(pid);
1620 if (!tsk || tsk->flags & PF_EXITING) {
1621 rcu_read_unlock();
1622 return -ESRCH;
1623 }
1624
1625 tcred = __task_cred(tsk);
1626 if (cred->euid &&
1627 cred->euid != tcred->uid &&
1628 cred->euid != tcred->suid) {
1629 rcu_read_unlock();
1630 return -EACCES;
1631 }
1632 get_task_struct(tsk);
1633 rcu_read_unlock();
1634 } else {
1635 tsk = current;
1636 get_task_struct(tsk);
1637 }
1638
1639 ret = cgroup_attach_task(cgrp, tsk);
1640 put_task_struct(tsk);
1641 return ret;
1642}
1643
1644static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
1645{
1646 int ret;
1647 if (!cgroup_lock_live_group(cgrp))
1648 return -ENODEV;
1649 ret = attach_task_by_pid(cgrp, pid);
1650 cgroup_unlock();
1651 return ret;
1652}
1653
1654
1655
1656
1657
1658
1659
1660
1661bool cgroup_lock_live_group(struct cgroup *cgrp)
1662{
1663 mutex_lock(&cgroup_mutex);
1664 if (cgroup_is_removed(cgrp)) {
1665 mutex_unlock(&cgroup_mutex);
1666 return false;
1667 }
1668 return true;
1669}
1670
1671static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
1672 const char *buffer)
1673{
1674 BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
1675 if (!cgroup_lock_live_group(cgrp))
1676 return -ENODEV;
1677 strcpy(cgrp->root->release_agent_path, buffer);
1678 cgroup_unlock();
1679 return 0;
1680}
1681
1682static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
1683 struct seq_file *seq)
1684{
1685 if (!cgroup_lock_live_group(cgrp))
1686 return -ENODEV;
1687 seq_puts(seq, cgrp->root->release_agent_path);
1688 seq_putc(seq, '\n');
1689 cgroup_unlock();
1690 return 0;
1691}
1692
1693
1694#define CGROUP_LOCAL_BUFFER_SIZE 64
1695
1696static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
1697 struct file *file,
1698 const char __user *userbuf,
1699 size_t nbytes, loff_t *unused_ppos)
1700{
1701 char buffer[CGROUP_LOCAL_BUFFER_SIZE];
1702 int retval = 0;
1703 char *end;
1704
1705 if (!nbytes)
1706 return -EINVAL;
1707 if (nbytes >= sizeof(buffer))
1708 return -E2BIG;
1709 if (copy_from_user(buffer, userbuf, nbytes))
1710 return -EFAULT;
1711
1712 buffer[nbytes] = 0;
1713 if (cft->write_u64) {
1714 u64 val = simple_strtoull(strstrip(buffer), &end, 0);
1715 if (*end)
1716 return -EINVAL;
1717 retval = cft->write_u64(cgrp, cft, val);
1718 } else {
1719 s64 val = simple_strtoll(strstrip(buffer), &end, 0);
1720 if (*end)
1721 return -EINVAL;
1722 retval = cft->write_s64(cgrp, cft, val);
1723 }
1724 if (!retval)
1725 retval = nbytes;
1726 return retval;
1727}
1728
1729static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
1730 struct file *file,
1731 const char __user *userbuf,
1732 size_t nbytes, loff_t *unused_ppos)
1733{
1734 char local_buffer[CGROUP_LOCAL_BUFFER_SIZE];
1735 int retval = 0;
1736 size_t max_bytes = cft->max_write_len;
1737 char *buffer = local_buffer;
1738
1739 if (!max_bytes)
1740 max_bytes = sizeof(local_buffer) - 1;
1741 if (nbytes >= max_bytes)
1742 return -E2BIG;
1743
1744 if (nbytes >= sizeof(local_buffer)) {
1745 buffer = kmalloc(nbytes + 1, GFP_KERNEL);
1746 if (buffer == NULL)
1747 return -ENOMEM;
1748 }
1749 if (nbytes && copy_from_user(buffer, userbuf, nbytes)) {
1750 retval = -EFAULT;
1751 goto out;
1752 }
1753
1754 buffer[nbytes] = 0;
1755 retval = cft->write_string(cgrp, cft, strstrip(buffer));
1756 if (!retval)
1757 retval = nbytes;
1758out:
1759 if (buffer != local_buffer)
1760 kfree(buffer);
1761 return retval;
1762}
1763
1764static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
1765 size_t nbytes, loff_t *ppos)
1766{
1767 struct cftype *cft = __d_cft(file->f_dentry);
1768 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
1769
1770 if (cgroup_is_removed(cgrp))
1771 return -ENODEV;
1772 if (cft->write)
1773 return cft->write(cgrp, cft, file, buf, nbytes, ppos);
1774 if (cft->write_u64 || cft->write_s64)
1775 return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos);
1776 if (cft->write_string)
1777 return cgroup_write_string(cgrp, cft, file, buf, nbytes, ppos);
1778 if (cft->trigger) {
1779 int ret = cft->trigger(cgrp, (unsigned int)cft->private);
1780 return ret ? ret : nbytes;
1781 }
1782 return -EINVAL;
1783}
1784
1785static ssize_t cgroup_read_u64(struct cgroup *cgrp, struct cftype *cft,
1786 struct file *file,
1787 char __user *buf, size_t nbytes,
1788 loff_t *ppos)
1789{
1790 char tmp[CGROUP_LOCAL_BUFFER_SIZE];
1791 u64 val = cft->read_u64(cgrp, cft);
1792 int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
1793
1794 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
1795}
1796
1797static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft,
1798 struct file *file,
1799 char __user *buf, size_t nbytes,
1800 loff_t *ppos)
1801{
1802 char tmp[CGROUP_LOCAL_BUFFER_SIZE];
1803 s64 val = cft->read_s64(cgrp, cft);
1804 int len = sprintf(tmp, "%lld\n", (long long) val);
1805
1806 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
1807}
1808
1809static ssize_t cgroup_file_read(struct file *file, char __user *buf,
1810 size_t nbytes, loff_t *ppos)
1811{
1812 struct cftype *cft = __d_cft(file->f_dentry);
1813 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
1814
1815 if (cgroup_is_removed(cgrp))
1816 return -ENODEV;
1817
1818 if (cft->read)
1819 return cft->read(cgrp, cft, file, buf, nbytes, ppos);
1820 if (cft->read_u64)
1821 return cgroup_read_u64(cgrp, cft, file, buf, nbytes, ppos);
1822 if (cft->read_s64)
1823 return cgroup_read_s64(cgrp, cft, file, buf, nbytes, ppos);
1824 return -EINVAL;
1825}
1826
1827
1828
1829
1830
1831
1832struct cgroup_seqfile_state {
1833 struct cftype *cft;
1834 struct cgroup *cgroup;
1835};
1836
1837static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value)
1838{
1839 struct seq_file *sf = cb->state;
1840 return seq_printf(sf, "%s %llu\n", key, (unsigned long long)value);
1841}
1842
1843static int cgroup_seqfile_show(struct seq_file *m, void *arg)
1844{
1845 struct cgroup_seqfile_state *state = m->private;
1846 struct cftype *cft = state->cft;
1847 if (cft->read_map) {
1848 struct cgroup_map_cb cb = {
1849 .fill = cgroup_map_add,
1850 .state = m,
1851 };
1852 return cft->read_map(state->cgroup, cft, &cb);
1853 }
1854 return cft->read_seq_string(state->cgroup, cft, m);
1855}
1856
1857static int cgroup_seqfile_release(struct inode *inode, struct file *file)
1858{
1859 struct seq_file *seq = file->private_data;
1860 kfree(seq->private);
1861 return single_release(inode, file);
1862}
1863
1864static const struct file_operations cgroup_seqfile_operations = {
1865 .read = seq_read,
1866 .write = cgroup_file_write,
1867 .llseek = seq_lseek,
1868 .release = cgroup_seqfile_release,
1869};
1870
1871static int cgroup_file_open(struct inode *inode, struct file *file)
1872{
1873 int err;
1874 struct cftype *cft;
1875
1876 err = generic_file_open(inode, file);
1877 if (err)
1878 return err;
1879 cft = __d_cft(file->f_dentry);
1880
1881 if (cft->read_map || cft->read_seq_string) {
1882 struct cgroup_seqfile_state *state =
1883 kzalloc(sizeof(*state), GFP_USER);
1884 if (!state)
1885 return -ENOMEM;
1886 state->cft = cft;
1887 state->cgroup = __d_cgrp(file->f_dentry->d_parent);
1888 file->f_op = &cgroup_seqfile_operations;
1889 err = single_open(file, cgroup_seqfile_show, state);
1890 if (err < 0)
1891 kfree(state);
1892 } else if (cft->open)
1893 err = cft->open(inode, file);
1894 else
1895 err = 0;
1896
1897 return err;
1898}
1899
1900static int cgroup_file_release(struct inode *inode, struct file *file)
1901{
1902 struct cftype *cft = __d_cft(file->f_dentry);
1903 if (cft->release)
1904 return cft->release(inode, file);
1905 return 0;
1906}
1907
1908
1909
1910
1911static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
1912 struct inode *new_dir, struct dentry *new_dentry)
1913{
1914 if (!S_ISDIR(old_dentry->d_inode->i_mode))
1915 return -ENOTDIR;
1916 if (new_dentry->d_inode)
1917 return -EEXIST;
1918 if (old_dir != new_dir)
1919 return -EIO;
1920 return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
1921}
1922
1923static const struct file_operations cgroup_file_operations = {
1924 .read = cgroup_file_read,
1925 .write = cgroup_file_write,
1926 .llseek = generic_file_llseek,
1927 .open = cgroup_file_open,
1928 .release = cgroup_file_release,
1929};
1930
1931static const struct inode_operations cgroup_dir_inode_operations = {
1932 .lookup = simple_lookup,
1933 .mkdir = cgroup_mkdir,
1934 .rmdir = cgroup_rmdir,
1935 .rename = cgroup_rename,
1936};
1937
1938static int cgroup_create_file(struct dentry *dentry, mode_t mode,
1939 struct super_block *sb)
1940{
1941 static const struct dentry_operations cgroup_dops = {
1942 .d_iput = cgroup_diput,
1943 };
1944
1945 struct inode *inode;
1946
1947 if (!dentry)
1948 return -ENOENT;
1949 if (dentry->d_inode)
1950 return -EEXIST;
1951
1952 inode = cgroup_new_inode(mode, sb);
1953 if (!inode)
1954 return -ENOMEM;
1955
1956 if (S_ISDIR(mode)) {
1957 inode->i_op = &cgroup_dir_inode_operations;
1958 inode->i_fop = &simple_dir_operations;
1959
1960
1961 inc_nlink(inode);
1962
1963
1964
1965 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
1966 } else if (S_ISREG(mode)) {
1967 inode->i_size = 0;
1968 inode->i_fop = &cgroup_file_operations;
1969 }
1970 dentry->d_op = &cgroup_dops;
1971 d_instantiate(dentry, inode);
1972 dget(dentry);
1973 return 0;
1974}
1975
1976
1977
1978
1979
1980
1981
1982
1983static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,
1984 mode_t mode)
1985{
1986 struct dentry *parent;
1987 int error = 0;
1988
1989 parent = cgrp->parent->dentry;
1990 error = cgroup_create_file(dentry, S_IFDIR | mode, cgrp->root->sb);
1991 if (!error) {
1992 dentry->d_fsdata = cgrp;
1993 inc_nlink(parent->d_inode);
1994 rcu_assign_pointer(cgrp->dentry, dentry);
1995 dget(dentry);
1996 }
1997 dput(dentry);
1998
1999 return error;
2000}
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011static mode_t cgroup_file_mode(const struct cftype *cft)
2012{
2013 mode_t mode = 0;
2014
2015 if (cft->mode)
2016 return cft->mode;
2017
2018 if (cft->read || cft->read_u64 || cft->read_s64 ||
2019 cft->read_map || cft->read_seq_string)
2020 mode |= S_IRUGO;
2021
2022 if (cft->write || cft->write_u64 || cft->write_s64 ||
2023 cft->write_string || cft->trigger)
2024 mode |= S_IWUSR;
2025
2026 return mode;
2027}
2028
2029int cgroup_add_file(struct cgroup *cgrp,
2030 struct cgroup_subsys *subsys,
2031 const struct cftype *cft)
2032{
2033 struct dentry *dir = cgrp->dentry;
2034 struct dentry *dentry;
2035 int error;
2036 mode_t mode;
2037
2038 char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
2039 if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) {
2040 strcpy(name, subsys->name);
2041 strcat(name, ".");
2042 }
2043 strcat(name, cft->name);
2044 BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex));
2045 dentry = lookup_one_len(name, dir, strlen(name));
2046 if (!IS_ERR(dentry)) {
2047 mode = cgroup_file_mode(cft);
2048 error = cgroup_create_file(dentry, mode | S_IFREG,
2049 cgrp->root->sb);
2050 if (!error)
2051 dentry->d_fsdata = (void *)cft;
2052 dput(dentry);
2053 } else
2054 error = PTR_ERR(dentry);
2055 return error;
2056}
2057
2058int cgroup_add_files(struct cgroup *cgrp,
2059 struct cgroup_subsys *subsys,
2060 const struct cftype cft[],
2061 int count)
2062{
2063 int i, err;
2064 for (i = 0; i < count; i++) {
2065 err = cgroup_add_file(cgrp, subsys, &cft[i]);
2066 if (err)
2067 return err;
2068 }
2069 return 0;
2070}
2071
2072
2073
2074
2075
2076
2077
2078int cgroup_task_count(const struct cgroup *cgrp)
2079{
2080 int count = 0;
2081 struct cg_cgroup_link *link;
2082
2083 read_lock(&css_set_lock);
2084 list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
2085 count += atomic_read(&link->cg->refcount);
2086 }
2087 read_unlock(&css_set_lock);
2088 return count;
2089}
2090
2091
2092
2093
2094
2095static void cgroup_advance_iter(struct cgroup *cgrp,
2096 struct cgroup_iter *it)
2097{
2098 struct list_head *l = it->cg_link;
2099 struct cg_cgroup_link *link;
2100 struct css_set *cg;
2101
2102
2103 do {
2104 l = l->next;
2105 if (l == &cgrp->css_sets) {
2106 it->cg_link = NULL;
2107 return;
2108 }
2109 link = list_entry(l, struct cg_cgroup_link, cgrp_link_list);
2110 cg = link->cg;
2111 } while (list_empty(&cg->tasks));
2112 it->cg_link = l;
2113 it->task = cg->tasks.next;
2114}
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125static void cgroup_enable_task_cg_lists(void)
2126{
2127 struct task_struct *p, *g;
2128 write_lock(&css_set_lock);
2129 use_task_css_set_links = 1;
2130 do_each_thread(g, p) {
2131 task_lock(p);
2132
2133
2134
2135
2136
2137 if (!(p->flags & PF_EXITING) && list_empty(&p->cg_list))
2138 list_add(&p->cg_list, &p->cgroups->tasks);
2139 task_unlock(p);
2140 } while_each_thread(g, p);
2141 write_unlock(&css_set_lock);
2142}
2143
2144void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
2145{
2146
2147
2148
2149
2150
2151 if (!use_task_css_set_links)
2152 cgroup_enable_task_cg_lists();
2153
2154 read_lock(&css_set_lock);
2155 it->cg_link = &cgrp->css_sets;
2156 cgroup_advance_iter(cgrp, it);
2157}
2158
2159struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
2160 struct cgroup_iter *it)
2161{
2162 struct task_struct *res;
2163 struct list_head *l = it->task;
2164 struct cg_cgroup_link *link;
2165
2166
2167 if (!it->cg_link)
2168 return NULL;
2169 res = list_entry(l, struct task_struct, cg_list);
2170
2171 l = l->next;
2172 link = list_entry(it->cg_link, struct cg_cgroup_link, cgrp_link_list);
2173 if (l == &link->cg->tasks) {
2174
2175
2176 cgroup_advance_iter(cgrp, it);
2177 } else {
2178 it->task = l;
2179 }
2180 return res;
2181}
2182
2183void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it)
2184{
2185 read_unlock(&css_set_lock);
2186}
2187
2188static inline int started_after_time(struct task_struct *t1,
2189 struct timespec *time,
2190 struct task_struct *t2)
2191{
2192 int start_diff = timespec_compare(&t1->start_time, time);
2193 if (start_diff > 0) {
2194 return 1;
2195 } else if (start_diff < 0) {
2196 return 0;
2197 } else {
2198
2199
2200
2201
2202
2203
2204
2205
2206 return t1 > t2;
2207 }
2208}
2209
2210
2211
2212
2213
2214
2215static inline int started_after(void *p1, void *p2)
2216{
2217 struct task_struct *t1 = p1;
2218 struct task_struct *t2 = p2;
2219 return started_after_time(t1, &t2->start_time, t2);
2220}
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249int cgroup_scan_tasks(struct cgroup_scanner *scan)
2250{
2251 int retval, i;
2252 struct cgroup_iter it;
2253 struct task_struct *p, *dropped;
2254
2255 struct task_struct *latest_task = NULL;
2256 struct ptr_heap tmp_heap;
2257 struct ptr_heap *heap;
2258 struct timespec latest_time = { 0, 0 };
2259
2260 if (scan->heap) {
2261
2262 heap = scan->heap;
2263 heap->gt = &started_after;
2264 } else {
2265
2266 heap = &tmp_heap;
2267 retval = heap_init(heap, PAGE_SIZE, GFP_KERNEL, &started_after);
2268 if (retval)
2269
2270 return retval;
2271 }
2272
2273 again:
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286 heap->size = 0;
2287 cgroup_iter_start(scan->cg, &it);
2288 while ((p = cgroup_iter_next(scan->cg, &it))) {
2289
2290
2291
2292
2293 if (scan->test_task && !scan->test_task(p, scan))
2294 continue;
2295
2296
2297
2298
2299 if (!started_after_time(p, &latest_time, latest_task))
2300 continue;
2301 dropped = heap_insert(heap, p);
2302 if (dropped == NULL) {
2303
2304
2305
2306
2307 get_task_struct(p);
2308 } else if (dropped != p) {
2309
2310
2311
2312
2313 get_task_struct(p);
2314 put_task_struct(dropped);
2315 }
2316
2317
2318
2319
2320 }
2321 cgroup_iter_end(scan->cg, &it);
2322
2323 if (heap->size) {
2324 for (i = 0; i < heap->size; i++) {
2325 struct task_struct *q = heap->ptrs[i];
2326 if (i == 0) {
2327 latest_time = q->start_time;
2328 latest_task = q;
2329 }
2330
2331 scan->process_task(q, scan);
2332 put_task_struct(q);
2333 }
2334
2335
2336
2337
2338
2339
2340
2341 goto again;
2342 }
2343 if (heap == &tmp_heap)
2344 heap_free(&tmp_heap);
2345 return 0;
2346}
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363#define PIDLIST_TOO_LARGE(c) ((c) * sizeof(pid_t) > (PAGE_SIZE * 2))
2364static void *pidlist_allocate(int count)
2365{
2366 if (PIDLIST_TOO_LARGE(count))
2367 return vmalloc(count * sizeof(pid_t));
2368 else
2369 return kmalloc(count * sizeof(pid_t), GFP_KERNEL);
2370}
2371static void pidlist_free(void *p)
2372{
2373 if (is_vmalloc_addr(p))
2374 vfree(p);
2375 else
2376 kfree(p);
2377}
2378static void *pidlist_resize(void *p, int newcount)
2379{
2380 void *newlist;
2381
2382 if (is_vmalloc_addr(p)) {
2383 newlist = vmalloc(newcount * sizeof(pid_t));
2384 if (!newlist)
2385 return NULL;
2386 memcpy(newlist, p, newcount * sizeof(pid_t));
2387 vfree(p);
2388 } else {
2389 newlist = krealloc(p, newcount * sizeof(pid_t), GFP_KERNEL);
2390 }
2391 return newlist;
2392}
2393
2394
2395
2396
2397
2398
2399
2400
2401#define PIDLIST_REALLOC_DIFFERENCE(old, new) ((old) - PAGE_SIZE >= (new))
2402static int pidlist_uniq(pid_t **p, int length)
2403{
2404 int src, dest = 1;
2405 pid_t *list = *p;
2406 pid_t *newlist;
2407
2408
2409
2410
2411
2412 if (length == 0 || length == 1)
2413 return length;
2414
2415 for (src = 1; src < length; src++) {
2416
2417 while (list[src] == list[src-1]) {
2418 src++;
2419 if (src == length)
2420 goto after;
2421 }
2422
2423 list[dest] = list[src];
2424 dest++;
2425 }
2426after:
2427
2428
2429
2430
2431
2432 if (PIDLIST_REALLOC_DIFFERENCE(length, dest)) {
2433 newlist = pidlist_resize(list, dest);
2434 if (newlist)
2435 *p = newlist;
2436 }
2437 return dest;
2438}
2439
2440static int cmppid(const void *a, const void *b)
2441{
2442 return *(pid_t *)a - *(pid_t *)b;
2443}
2444
2445
2446
2447
2448
2449
2450
2451static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
2452 enum cgroup_filetype type)
2453{
2454 struct cgroup_pidlist *l;
2455
2456 struct pid_namespace *ns = get_pid_ns(current->nsproxy->pid_ns);
2457
2458
2459
2460
2461
2462
2463 mutex_lock(&cgrp->pidlist_mutex);
2464 list_for_each_entry(l, &cgrp->pidlists, links) {
2465 if (l->key.type == type && l->key.ns == ns) {
2466
2467 put_pid_ns(ns);
2468
2469 down_write(&l->mutex);
2470 mutex_unlock(&cgrp->pidlist_mutex);
2471 l->use_count++;
2472 return l;
2473 }
2474 }
2475
2476 l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
2477 if (!l) {
2478 mutex_unlock(&cgrp->pidlist_mutex);
2479 put_pid_ns(ns);
2480 return l;
2481 }
2482 init_rwsem(&l->mutex);
2483 down_write(&l->mutex);
2484 l->key.type = type;
2485 l->key.ns = ns;
2486 l->use_count = 0;
2487 l->list = NULL;
2488 l->owner = cgrp;
2489 list_add(&l->links, &cgrp->pidlists);
2490 mutex_unlock(&cgrp->pidlist_mutex);
2491 return l;
2492}
2493
2494
2495
2496
2497static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
2498 struct cgroup_pidlist **lp)
2499{
2500 pid_t *array;
2501 int length;
2502 int pid, n = 0;
2503 struct cgroup_iter it;
2504 struct task_struct *tsk;
2505 struct cgroup_pidlist *l;
2506
2507
2508
2509
2510
2511
2512
2513 length = cgroup_task_count(cgrp);
2514 array = pidlist_allocate(length);
2515 if (!array)
2516 return -ENOMEM;
2517
2518 cgroup_iter_start(cgrp, &it);
2519 while ((tsk = cgroup_iter_next(cgrp, &it))) {
2520 if (unlikely(n == length))
2521 break;
2522
2523 if (type == CGROUP_FILE_PROCS)
2524 pid = task_tgid_vnr(tsk);
2525 else
2526 pid = task_pid_vnr(tsk);
2527 if (pid > 0)
2528 array[n++] = pid;
2529 }
2530 cgroup_iter_end(cgrp, &it);
2531 length = n;
2532
2533 sort(array, length, sizeof(pid_t), cmppid, NULL);
2534 if (type == CGROUP_FILE_PROCS)
2535 length = pidlist_uniq(&array, length);
2536 l = cgroup_pidlist_find(cgrp, type);
2537 if (!l) {
2538 pidlist_free(array);
2539 return -ENOMEM;
2540 }
2541
2542 pidlist_free(l->list);
2543 l->list = array;
2544 l->length = length;
2545 l->use_count++;
2546 up_write(&l->mutex);
2547 *lp = l;
2548 return 0;
2549}
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
2561{
2562 int ret = -EINVAL;
2563 struct cgroup *cgrp;
2564 struct cgroup_iter it;
2565 struct task_struct *tsk;
2566
2567
2568
2569
2570
2571 if (dentry->d_sb->s_op != &cgroup_ops ||
2572 !S_ISDIR(dentry->d_inode->i_mode))
2573 goto err;
2574
2575 ret = 0;
2576 cgrp = dentry->d_fsdata;
2577
2578 cgroup_iter_start(cgrp, &it);
2579 while ((tsk = cgroup_iter_next(cgrp, &it))) {
2580 switch (tsk->state) {
2581 case TASK_RUNNING:
2582 stats->nr_running++;
2583 break;
2584 case TASK_INTERRUPTIBLE:
2585 stats->nr_sleeping++;
2586 break;
2587 case TASK_UNINTERRUPTIBLE:
2588 stats->nr_uninterruptible++;
2589 break;
2590 case TASK_STOPPED:
2591 stats->nr_stopped++;
2592 break;
2593 default:
2594 if (delayacct_is_task_waiting_on_io(tsk))
2595 stats->nr_io_wait++;
2596 break;
2597 }
2598 }
2599 cgroup_iter_end(cgrp, &it);
2600
2601err:
2602 return ret;
2603}
2604
2605
2606
2607
2608
2609
2610
2611
2612static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
2613{
2614
2615
2616
2617
2618
2619
2620 struct cgroup_pidlist *l = s->private;
2621 int index = 0, pid = *pos;
2622 int *iter;
2623
2624 down_read(&l->mutex);
2625 if (pid) {
2626 int end = l->length;
2627
2628 while (index < end) {
2629 int mid = (index + end) / 2;
2630 if (l->list[mid] == pid) {
2631 index = mid;
2632 break;
2633 } else if (l->list[mid] <= pid)
2634 index = mid + 1;
2635 else
2636 end = mid;
2637 }
2638 }
2639
2640 if (index >= l->length)
2641 return NULL;
2642
2643 iter = l->list + index;
2644 *pos = *iter;
2645 return iter;
2646}
2647
2648static void cgroup_pidlist_stop(struct seq_file *s, void *v)
2649{
2650 struct cgroup_pidlist *l = s->private;
2651 up_read(&l->mutex);
2652}
2653
2654static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
2655{
2656 struct cgroup_pidlist *l = s->private;
2657 pid_t *p = v;
2658 pid_t *end = l->list + l->length;
2659
2660
2661
2662
2663 p++;
2664 if (p >= end) {
2665 return NULL;
2666 } else {
2667 *pos = *p;
2668 return p;
2669 }
2670}
2671
2672static int cgroup_pidlist_show(struct seq_file *s, void *v)
2673{
2674 return seq_printf(s, "%d\n", *(int *)v);
2675}
2676
2677
2678
2679
2680
2681static const struct seq_operations cgroup_pidlist_seq_operations = {
2682 .start = cgroup_pidlist_start,
2683 .stop = cgroup_pidlist_stop,
2684 .next = cgroup_pidlist_next,
2685 .show = cgroup_pidlist_show,
2686};
2687
2688static void cgroup_release_pid_array(struct cgroup_pidlist *l)
2689{
2690
2691
2692
2693
2694
2695
2696 mutex_lock(&l->owner->pidlist_mutex);
2697 down_write(&l->mutex);
2698 BUG_ON(!l->use_count);
2699 if (!--l->use_count) {
2700
2701 list_del(&l->links);
2702 mutex_unlock(&l->owner->pidlist_mutex);
2703 pidlist_free(l->list);
2704 put_pid_ns(l->key.ns);
2705 up_write(&l->mutex);
2706 kfree(l);
2707 return;
2708 }
2709 mutex_unlock(&l->owner->pidlist_mutex);
2710 up_write(&l->mutex);
2711}
2712
2713static int cgroup_pidlist_release(struct inode *inode, struct file *file)
2714{
2715 struct cgroup_pidlist *l;
2716 if (!(file->f_mode & FMODE_READ))
2717 return 0;
2718
2719
2720
2721
2722 l = ((struct seq_file *)file->private_data)->private;
2723 cgroup_release_pid_array(l);
2724 return seq_release(inode, file);
2725}
2726
2727static const struct file_operations cgroup_pidlist_operations = {
2728 .read = seq_read,
2729 .llseek = seq_lseek,
2730 .write = cgroup_file_write,
2731 .release = cgroup_pidlist_release,
2732};
2733
2734
2735
2736
2737
2738
2739
2740static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type)
2741{
2742 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2743 struct cgroup_pidlist *l;
2744 int retval;
2745
2746
2747 if (!(file->f_mode & FMODE_READ))
2748 return 0;
2749
2750
2751 retval = pidlist_array_load(cgrp, type, &l);
2752 if (retval)
2753 return retval;
2754
2755 file->f_op = &cgroup_pidlist_operations;
2756
2757 retval = seq_open(file, &cgroup_pidlist_seq_operations);
2758 if (retval) {
2759 cgroup_release_pid_array(l);
2760 return retval;
2761 }
2762 ((struct seq_file *)file->private_data)->private = l;
2763 return 0;
2764}
2765static int cgroup_tasks_open(struct inode *unused, struct file *file)
2766{
2767 return cgroup_pidlist_open(file, CGROUP_FILE_TASKS);
2768}
2769static int cgroup_procs_open(struct inode *unused, struct file *file)
2770{
2771 return cgroup_pidlist_open(file, CGROUP_FILE_PROCS);
2772}
2773
2774static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
2775 struct cftype *cft)
2776{
2777 return notify_on_release(cgrp);
2778}
2779
2780static int cgroup_write_notify_on_release(struct cgroup *cgrp,
2781 struct cftype *cft,
2782 u64 val)
2783{
2784 clear_bit(CGRP_RELEASABLE, &cgrp->flags);
2785 if (val)
2786 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
2787 else
2788 clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
2789 return 0;
2790}
2791
2792
2793
2794
2795
2796#define CGROUP_FILE_GENERIC_PREFIX "cgroup."
2797static struct cftype files[] = {
2798 {
2799 .name = "tasks",
2800 .open = cgroup_tasks_open,
2801 .write_u64 = cgroup_tasks_write,
2802 .release = cgroup_pidlist_release,
2803 .mode = S_IRUGO | S_IWUSR,
2804 },
2805 {
2806 .name = CGROUP_FILE_GENERIC_PREFIX "procs",
2807 .open = cgroup_procs_open,
2808
2809 .release = cgroup_pidlist_release,
2810 .mode = S_IRUGO,
2811 },
2812 {
2813 .name = "notify_on_release",
2814 .read_u64 = cgroup_read_notify_on_release,
2815 .write_u64 = cgroup_write_notify_on_release,
2816 },
2817};
2818
2819static struct cftype cft_release_agent = {
2820 .name = "release_agent",
2821 .read_seq_string = cgroup_release_agent_show,
2822 .write_string = cgroup_release_agent_write,
2823 .max_write_len = PATH_MAX,
2824};
2825
2826static int cgroup_populate_dir(struct cgroup *cgrp)
2827{
2828 int err;
2829 struct cgroup_subsys *ss;
2830
2831
2832 cgroup_clear_directory(cgrp->dentry);
2833
2834 err = cgroup_add_files(cgrp, NULL, files, ARRAY_SIZE(files));
2835 if (err < 0)
2836 return err;
2837
2838 if (cgrp == cgrp->top_cgroup) {
2839 if ((err = cgroup_add_file(cgrp, NULL, &cft_release_agent)) < 0)
2840 return err;
2841 }
2842
2843 for_each_subsys(cgrp->root, ss) {
2844 if (ss->populate && (err = ss->populate(ss, cgrp)) < 0)
2845 return err;
2846 }
2847
2848 for_each_subsys(cgrp->root, ss) {
2849 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
2850
2851
2852
2853
2854
2855 if (css->id)
2856 rcu_assign_pointer(css->id->css, css);
2857 }
2858
2859 return 0;
2860}
2861
2862static void init_cgroup_css(struct cgroup_subsys_state *css,
2863 struct cgroup_subsys *ss,
2864 struct cgroup *cgrp)
2865{
2866 css->cgroup = cgrp;
2867 atomic_set(&css->refcnt, 1);
2868 css->flags = 0;
2869 css->id = NULL;
2870 if (cgrp == dummytop)
2871 set_bit(CSS_ROOT, &css->flags);
2872 BUG_ON(cgrp->subsys[ss->subsys_id]);
2873 cgrp->subsys[ss->subsys_id] = css;
2874}
2875
2876static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
2877{
2878
2879 int i;
2880
2881 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2882 struct cgroup_subsys *ss = subsys[i];
2883 if (ss->root == root)
2884 mutex_lock(&ss->hierarchy_mutex);
2885 }
2886}
2887
2888static void cgroup_unlock_hierarchy(struct cgroupfs_root *root)
2889{
2890 int i;
2891
2892 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2893 struct cgroup_subsys *ss = subsys[i];
2894 if (ss->root == root)
2895 mutex_unlock(&ss->hierarchy_mutex);
2896 }
2897}
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
2908 mode_t mode)
2909{
2910 struct cgroup *cgrp;
2911 struct cgroupfs_root *root = parent->root;
2912 int err = 0;
2913 struct cgroup_subsys *ss;
2914 struct super_block *sb = root->sb;
2915
2916 cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL);
2917 if (!cgrp)
2918 return -ENOMEM;
2919
2920
2921
2922
2923
2924
2925 atomic_inc(&sb->s_active);
2926
2927 mutex_lock(&cgroup_mutex);
2928
2929 init_cgroup_housekeeping(cgrp);
2930
2931 cgrp->parent = parent;
2932 cgrp->root = parent->root;
2933 cgrp->top_cgroup = parent->top_cgroup;
2934
2935 if (notify_on_release(parent))
2936 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
2937
2938 for_each_subsys(root, ss) {
2939 struct cgroup_subsys_state *css = ss->create(ss, cgrp);
2940 if (IS_ERR(css)) {
2941 err = PTR_ERR(css);
2942 goto err_destroy;
2943 }
2944 init_cgroup_css(css, ss, cgrp);
2945 if (ss->use_id)
2946 if (alloc_css_id(ss, parent, cgrp))
2947 goto err_destroy;
2948
2949 }
2950
2951 cgroup_lock_hierarchy(root);
2952 list_add(&cgrp->sibling, &cgrp->parent->children);
2953 cgroup_unlock_hierarchy(root);
2954 root->number_of_cgroups++;
2955
2956 err = cgroup_create_dir(cgrp, dentry, mode);
2957 if (err < 0)
2958 goto err_remove;
2959
2960
2961 BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex));
2962
2963 err = cgroup_populate_dir(cgrp);
2964
2965
2966 mutex_unlock(&cgroup_mutex);
2967 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
2968
2969 return 0;
2970
2971 err_remove:
2972
2973 cgroup_lock_hierarchy(root);
2974 list_del(&cgrp->sibling);
2975 cgroup_unlock_hierarchy(root);
2976 root->number_of_cgroups--;
2977
2978 err_destroy:
2979
2980 for_each_subsys(root, ss) {
2981 if (cgrp->subsys[ss->subsys_id])
2982 ss->destroy(ss, cgrp);
2983 }
2984
2985 mutex_unlock(&cgroup_mutex);
2986
2987
2988 deactivate_super(sb);
2989
2990 kfree(cgrp);
2991 return err;
2992}
2993
2994static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode)
2995{
2996 struct cgroup *c_parent = dentry->d_parent->d_fsdata;
2997
2998
2999 return cgroup_create(c_parent, dentry, mode | S_IFDIR);
3000}
3001
3002static int cgroup_has_css_refs(struct cgroup *cgrp)
3003{
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013 int i;
3014 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3015 struct cgroup_subsys *ss = subsys[i];
3016 struct cgroup_subsys_state *css;
3017
3018 if (ss->root != cgrp->root)
3019 continue;
3020 css = cgrp->subsys[ss->subsys_id];
3021
3022
3023
3024
3025
3026
3027 if (css && (atomic_read(&css->refcnt) > 1))
3028 return 1;
3029 }
3030 return 0;
3031}
3032
3033
3034
3035
3036
3037
3038
3039static int cgroup_clear_css_refs(struct cgroup *cgrp)
3040{
3041 struct cgroup_subsys *ss;
3042 unsigned long flags;
3043 bool failed = false;
3044 local_irq_save(flags);
3045 for_each_subsys(cgrp->root, ss) {
3046 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
3047 int refcnt;
3048 while (1) {
3049
3050 refcnt = atomic_read(&css->refcnt);
3051 if (refcnt > 1) {
3052 failed = true;
3053 goto done;
3054 }
3055 BUG_ON(!refcnt);
3056
3057
3058
3059
3060
3061
3062 if (atomic_cmpxchg(&css->refcnt, refcnt, 0) == refcnt)
3063 break;
3064 cpu_relax();
3065 }
3066 }
3067 done:
3068 for_each_subsys(cgrp->root, ss) {
3069 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
3070 if (failed) {
3071
3072
3073
3074
3075 if (!atomic_read(&css->refcnt))
3076 atomic_set(&css->refcnt, 1);
3077 } else {
3078
3079 set_bit(CSS_REMOVED, &css->flags);
3080 }
3081 }
3082 local_irq_restore(flags);
3083 return !failed;
3084}
3085
3086static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
3087{
3088 struct cgroup *cgrp = dentry->d_fsdata;
3089 struct dentry *d;
3090 struct cgroup *parent;
3091 DEFINE_WAIT(wait);
3092 int ret;
3093
3094
3095again:
3096 mutex_lock(&cgroup_mutex);
3097 if (atomic_read(&cgrp->count) != 0) {
3098 mutex_unlock(&cgroup_mutex);
3099 return -EBUSY;
3100 }
3101 if (!list_empty(&cgrp->children)) {
3102 mutex_unlock(&cgroup_mutex);
3103 return -EBUSY;
3104 }
3105 mutex_unlock(&cgroup_mutex);
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116 set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
3117
3118
3119
3120
3121
3122 ret = cgroup_call_pre_destroy(cgrp);
3123 if (ret) {
3124 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
3125 return ret;
3126 }
3127
3128 mutex_lock(&cgroup_mutex);
3129 parent = cgrp->parent;
3130 if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
3131 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
3132 mutex_unlock(&cgroup_mutex);
3133 return -EBUSY;
3134 }
3135 prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);
3136 if (!cgroup_clear_css_refs(cgrp)) {
3137 mutex_unlock(&cgroup_mutex);
3138
3139
3140
3141
3142 if (test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))
3143 schedule();
3144 finish_wait(&cgroup_rmdir_waitq, &wait);
3145 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
3146 if (signal_pending(current))
3147 return -EINTR;
3148 goto again;
3149 }
3150
3151 finish_wait(&cgroup_rmdir_waitq, &wait);
3152 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
3153
3154 spin_lock(&release_list_lock);
3155 set_bit(CGRP_REMOVED, &cgrp->flags);
3156 if (!list_empty(&cgrp->release_list))
3157 list_del(&cgrp->release_list);
3158 spin_unlock(&release_list_lock);
3159
3160 cgroup_lock_hierarchy(cgrp->root);
3161
3162 list_del(&cgrp->sibling);
3163 cgroup_unlock_hierarchy(cgrp->root);
3164
3165 spin_lock(&cgrp->dentry->d_lock);
3166 d = dget(cgrp->dentry);
3167 spin_unlock(&d->d_lock);
3168
3169 cgroup_d_remove_dir(d);
3170 dput(d);
3171
3172 set_bit(CGRP_RELEASABLE, &parent->flags);
3173 check_for_release(parent);
3174
3175 mutex_unlock(&cgroup_mutex);
3176 return 0;
3177}
3178
3179static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
3180{
3181 struct cgroup_subsys_state *css;
3182
3183 printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
3184
3185
3186 list_add(&ss->sibling, &rootnode.subsys_list);
3187 ss->root = &rootnode;
3188 css = ss->create(ss, dummytop);
3189
3190 BUG_ON(IS_ERR(css));
3191 init_cgroup_css(css, ss, dummytop);
3192
3193
3194
3195
3196
3197 init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
3198
3199 need_forkexit_callback |= ss->fork || ss->exit;
3200
3201
3202
3203
3204 BUG_ON(!list_empty(&init_task.tasks));
3205
3206 mutex_init(&ss->hierarchy_mutex);
3207 lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
3208 ss->active = 1;
3209}
3210
3211
3212
3213
3214
3215
3216
3217int __init cgroup_init_early(void)
3218{
3219 int i;
3220 atomic_set(&init_css_set.refcount, 1);
3221 INIT_LIST_HEAD(&init_css_set.cg_links);
3222 INIT_LIST_HEAD(&init_css_set.tasks);
3223 INIT_HLIST_NODE(&init_css_set.hlist);
3224 css_set_count = 1;
3225 init_cgroup_root(&rootnode);
3226 root_count = 1;
3227 init_task.cgroups = &init_css_set;
3228
3229 init_css_set_link.cg = &init_css_set;
3230 init_css_set_link.cgrp = dummytop;
3231 list_add(&init_css_set_link.cgrp_link_list,
3232 &rootnode.top_cgroup.css_sets);
3233 list_add(&init_css_set_link.cg_link_list,
3234 &init_css_set.cg_links);
3235
3236 for (i = 0; i < CSS_SET_TABLE_SIZE; i++)
3237 INIT_HLIST_HEAD(&css_set_table[i]);
3238
3239 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3240 struct cgroup_subsys *ss = subsys[i];
3241
3242 BUG_ON(!ss->name);
3243 BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
3244 BUG_ON(!ss->create);
3245 BUG_ON(!ss->destroy);
3246 if (ss->subsys_id != i) {
3247 printk(KERN_ERR "cgroup: Subsys %s id == %d\n",
3248 ss->name, ss->subsys_id);
3249 BUG();
3250 }
3251
3252 if (ss->early_init)
3253 cgroup_init_subsys(ss);
3254 }
3255 return 0;
3256}
3257
3258
3259
3260
3261
3262
3263
3264int __init cgroup_init(void)
3265{
3266 int err;
3267 int i;
3268 struct hlist_head *hhead;
3269
3270 err = bdi_init(&cgroup_backing_dev_info);
3271 if (err)
3272 return err;
3273
3274 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3275 struct cgroup_subsys *ss = subsys[i];
3276 if (!ss->early_init)
3277 cgroup_init_subsys(ss);
3278 if (ss->use_id)
3279 cgroup_subsys_init_idr(ss);
3280 }
3281
3282
3283 hhead = css_set_hash(init_css_set.subsys);
3284 hlist_add_head(&init_css_set.hlist, hhead);
3285 BUG_ON(!init_root_id(&rootnode));
3286 err = register_filesystem(&cgroup_fs_type);
3287 if (err < 0)
3288 goto out;
3289
3290 proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations);
3291
3292out:
3293 if (err)
3294 bdi_destroy(&cgroup_backing_dev_info);
3295
3296 return err;
3297}
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312static int proc_cgroup_show(struct seq_file *m, void *v)
3313{
3314 struct pid *pid;
3315 struct task_struct *tsk;
3316 char *buf;
3317 int retval;
3318 struct cgroupfs_root *root;
3319
3320 retval = -ENOMEM;
3321 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
3322 if (!buf)
3323 goto out;
3324
3325 retval = -ESRCH;
3326 pid = m->private;
3327 tsk = get_pid_task(pid, PIDTYPE_PID);
3328 if (!tsk)
3329 goto out_free;
3330
3331 retval = 0;
3332
3333 mutex_lock(&cgroup_mutex);
3334
3335 for_each_active_root(root) {
3336 struct cgroup_subsys *ss;
3337 struct cgroup *cgrp;
3338 int count = 0;
3339
3340 seq_printf(m, "%d:", root->hierarchy_id);
3341 for_each_subsys(root, ss)
3342 seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
3343 if (strlen(root->name))
3344 seq_printf(m, "%sname=%s", count ? "," : "",
3345 root->name);
3346 seq_putc(m, ':');
3347 cgrp = task_cgroup_from_root(tsk, root);
3348 retval = cgroup_path(cgrp, buf, PAGE_SIZE);
3349 if (retval < 0)
3350 goto out_unlock;
3351 seq_puts(m, buf);
3352 seq_putc(m, '\n');
3353 }
3354
3355out_unlock:
3356 mutex_unlock(&cgroup_mutex);
3357 put_task_struct(tsk);
3358out_free:
3359 kfree(buf);
3360out:
3361 return retval;
3362}
3363
3364static int cgroup_open(struct inode *inode, struct file *file)
3365{
3366 struct pid *pid = PROC_I(inode)->pid;
3367 return single_open(file, proc_cgroup_show, pid);
3368}
3369
3370const struct file_operations proc_cgroup_operations = {
3371 .open = cgroup_open,
3372 .read = seq_read,
3373 .llseek = seq_lseek,
3374 .release = single_release,
3375};
3376
3377
3378static int proc_cgroupstats_show(struct seq_file *m, void *v)
3379{
3380 int i;
3381
3382 seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
3383 mutex_lock(&cgroup_mutex);
3384 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3385 struct cgroup_subsys *ss = subsys[i];
3386 seq_printf(m, "%s\t%d\t%d\t%d\n",
3387 ss->name, ss->root->hierarchy_id,
3388 ss->root->number_of_cgroups, !ss->disabled);
3389 }
3390 mutex_unlock(&cgroup_mutex);
3391 return 0;
3392}
3393
3394static int cgroupstats_open(struct inode *inode, struct file *file)
3395{
3396 return single_open(file, proc_cgroupstats_show, NULL);
3397}
3398
3399static const struct file_operations proc_cgroupstats_operations = {
3400 .open = cgroupstats_open,
3401 .read = seq_read,
3402 .llseek = seq_lseek,
3403 .release = single_release,
3404};
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422void cgroup_fork(struct task_struct *child)
3423{
3424 task_lock(current);
3425 child->cgroups = current->cgroups;
3426 get_css_set(child->cgroups);
3427 task_unlock(current);
3428 INIT_LIST_HEAD(&child->cg_list);
3429}
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439void cgroup_fork_callbacks(struct task_struct *child)
3440{
3441 if (need_forkexit_callback) {
3442 int i;
3443 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3444 struct cgroup_subsys *ss = subsys[i];
3445 if (ss->fork)
3446 ss->fork(ss, child);
3447 }
3448 }
3449}
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460void cgroup_post_fork(struct task_struct *child)
3461{
3462 if (use_task_css_set_links) {
3463 write_lock(&css_set_lock);
3464 task_lock(child);
3465 if (list_empty(&child->cg_list))
3466 list_add(&child->cg_list, &child->cgroups->tasks);
3467 task_unlock(child);
3468 write_unlock(&css_set_lock);
3469 }
3470}
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506void cgroup_exit(struct task_struct *tsk, int run_callbacks)
3507{
3508 int i;
3509 struct css_set *cg;
3510
3511 if (run_callbacks && need_forkexit_callback) {
3512 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3513 struct cgroup_subsys *ss = subsys[i];
3514 if (ss->exit)
3515 ss->exit(ss, tsk);
3516 }
3517 }
3518
3519
3520
3521
3522
3523
3524 if (!list_empty(&tsk->cg_list)) {
3525 write_lock(&css_set_lock);
3526 if (!list_empty(&tsk->cg_list))
3527 list_del(&tsk->cg_list);
3528 write_unlock(&css_set_lock);
3529 }
3530
3531
3532 task_lock(tsk);
3533 cg = tsk->cgroups;
3534 tsk->cgroups = &init_css_set;
3535 task_unlock(tsk);
3536 if (cg)
3537 put_css_set_taskexit(cg);
3538}
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
3551 char *nodename)
3552{
3553 struct dentry *dentry;
3554 int ret = 0;
3555 struct cgroup *parent, *child;
3556 struct inode *inode;
3557 struct css_set *cg;
3558 struct cgroupfs_root *root;
3559 struct cgroup_subsys *ss;
3560
3561
3562 BUG_ON(!subsys->active);
3563
3564
3565
3566 mutex_lock(&cgroup_mutex);
3567 again:
3568 root = subsys->root;
3569 if (root == &rootnode) {
3570 mutex_unlock(&cgroup_mutex);
3571 return 0;
3572 }
3573
3574
3575 if (!atomic_inc_not_zero(&root->sb->s_active)) {
3576
3577 mutex_unlock(&cgroup_mutex);
3578 return 0;
3579 }
3580
3581
3582 task_lock(tsk);
3583 parent = task_cgroup(tsk, subsys->subsys_id);
3584 cg = tsk->cgroups;
3585 get_css_set(cg);
3586 task_unlock(tsk);
3587
3588 mutex_unlock(&cgroup_mutex);
3589
3590
3591 inode = parent->dentry->d_inode;
3592
3593
3594
3595 mutex_lock(&inode->i_mutex);
3596 dentry = lookup_one_len(nodename, parent->dentry, strlen(nodename));
3597 if (IS_ERR(dentry)) {
3598 printk(KERN_INFO
3599 "cgroup: Couldn't allocate dentry for %s: %ld\n", nodename,
3600 PTR_ERR(dentry));
3601 ret = PTR_ERR(dentry);
3602 goto out_release;
3603 }
3604
3605
3606 ret = vfs_mkdir(inode, dentry, 0755);
3607 child = __d_cgrp(dentry);
3608 dput(dentry);
3609 if (ret) {
3610 printk(KERN_INFO
3611 "Failed to create cgroup %s: %d\n", nodename,
3612 ret);
3613 goto out_release;
3614 }
3615
3616
3617
3618
3619 mutex_lock(&cgroup_mutex);
3620 if ((root != subsys->root) ||
3621 (parent != task_cgroup(tsk, subsys->subsys_id))) {
3622
3623 mutex_unlock(&inode->i_mutex);
3624 put_css_set(cg);
3625
3626 deactivate_super(root->sb);
3627
3628
3629
3630 printk(KERN_INFO
3631 "Race in cgroup_clone() - leaking cgroup %s\n",
3632 nodename);
3633 goto again;
3634 }
3635
3636
3637 for_each_subsys(root, ss) {
3638 if (ss->post_clone)
3639 ss->post_clone(ss, child);
3640 }
3641
3642
3643 ret = cgroup_attach_task(child, tsk);
3644 mutex_unlock(&cgroup_mutex);
3645
3646 out_release:
3647 mutex_unlock(&inode->i_mutex);
3648
3649 mutex_lock(&cgroup_mutex);
3650 put_css_set(cg);
3651 mutex_unlock(&cgroup_mutex);
3652 deactivate_super(root->sb);
3653 return ret;
3654}
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task)
3670{
3671 int ret;
3672 struct cgroup *target;
3673
3674 if (cgrp == dummytop)
3675 return 1;
3676
3677 target = task_cgroup_from_root(task, cgrp->root);
3678 while (cgrp != target && cgrp!= cgrp->top_cgroup)
3679 cgrp = cgrp->parent;
3680 ret = (cgrp == target);
3681 return ret;
3682}
3683
3684static void check_for_release(struct cgroup *cgrp)
3685{
3686
3687
3688 if (cgroup_is_releasable(cgrp) && !atomic_read(&cgrp->count)
3689 && list_empty(&cgrp->children) && !cgroup_has_css_refs(cgrp)) {
3690
3691
3692
3693 int need_schedule_work = 0;
3694 spin_lock(&release_list_lock);
3695 if (!cgroup_is_removed(cgrp) &&
3696 list_empty(&cgrp->release_list)) {
3697 list_add(&cgrp->release_list, &release_list);
3698 need_schedule_work = 1;
3699 }
3700 spin_unlock(&release_list_lock);
3701 if (need_schedule_work)
3702 schedule_work(&release_agent_work);
3703 }
3704}
3705
3706void __css_put(struct cgroup_subsys_state *css)
3707{
3708 struct cgroup *cgrp = css->cgroup;
3709 int val;
3710 rcu_read_lock();
3711 val = atomic_dec_return(&css->refcnt);
3712 if (val == 1) {
3713 if (notify_on_release(cgrp)) {
3714 set_bit(CGRP_RELEASABLE, &cgrp->flags);
3715 check_for_release(cgrp);
3716 }
3717 cgroup_wakeup_rmdir_waiter(cgrp);
3718 }
3719 rcu_read_unlock();
3720 WARN_ON_ONCE(val < 1);
3721}
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746static void cgroup_release_agent(struct work_struct *work)
3747{
3748 BUG_ON(work != &release_agent_work);
3749 mutex_lock(&cgroup_mutex);
3750 spin_lock(&release_list_lock);
3751 while (!list_empty(&release_list)) {
3752 char *argv[3], *envp[3];
3753 int i;
3754 char *pathbuf = NULL, *agentbuf = NULL;
3755 struct cgroup *cgrp = list_entry(release_list.next,
3756 struct cgroup,
3757 release_list);
3758 list_del_init(&cgrp->release_list);
3759 spin_unlock(&release_list_lock);
3760 pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
3761 if (!pathbuf)
3762 goto continue_free;
3763 if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0)
3764 goto continue_free;
3765 agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
3766 if (!agentbuf)
3767 goto continue_free;
3768
3769 i = 0;
3770 argv[i++] = agentbuf;
3771 argv[i++] = pathbuf;
3772 argv[i] = NULL;
3773
3774 i = 0;
3775
3776 envp[i++] = "HOME=/";
3777 envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
3778 envp[i] = NULL;
3779
3780
3781
3782
3783 mutex_unlock(&cgroup_mutex);
3784 call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
3785 mutex_lock(&cgroup_mutex);
3786 continue_free:
3787 kfree(pathbuf);
3788 kfree(agentbuf);
3789 spin_lock(&release_list_lock);
3790 }
3791 spin_unlock(&release_list_lock);
3792 mutex_unlock(&cgroup_mutex);
3793}
3794
3795static int __init cgroup_disable(char *str)
3796{
3797 int i;
3798 char *token;
3799
3800 while ((token = strsep(&str, ",")) != NULL) {
3801 if (!*token)
3802 continue;
3803
3804 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3805 struct cgroup_subsys *ss = subsys[i];
3806
3807 if (!strcmp(token, ss->name)) {
3808 ss->disabled = 1;
3809 printk(KERN_INFO "Disabling %s control group"
3810 " subsystem\n", ss->name);
3811 break;
3812 }
3813 }
3814 }
3815 return 1;
3816}
3817__setup("cgroup_disable=", cgroup_disable);
3818
3819
3820
3821
3822
3823
3824
3825
3826unsigned short css_id(struct cgroup_subsys_state *css)
3827{
3828 struct css_id *cssid = rcu_dereference(css->id);
3829
3830 if (cssid)
3831 return cssid->id;
3832 return 0;
3833}
3834
3835unsigned short css_depth(struct cgroup_subsys_state *css)
3836{
3837 struct css_id *cssid = rcu_dereference(css->id);
3838
3839 if (cssid)
3840 return cssid->depth;
3841 return 0;
3842}
3843
3844bool css_is_ancestor(struct cgroup_subsys_state *child,
3845 const struct cgroup_subsys_state *root)
3846{
3847 struct css_id *child_id = rcu_dereference(child->id);
3848 struct css_id *root_id = rcu_dereference(root->id);
3849
3850 if (!child_id || !root_id || (child_id->depth < root_id->depth))
3851 return false;
3852 return child_id->stack[root_id->depth] == root_id->id;
3853}
3854
3855static void __free_css_id_cb(struct rcu_head *head)
3856{
3857 struct css_id *id;
3858
3859 id = container_of(head, struct css_id, rcu_head);
3860 kfree(id);
3861}
3862
3863void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
3864{
3865 struct css_id *id = css->id;
3866
3867 if (!id)
3868 return;
3869
3870 BUG_ON(!ss->use_id);
3871
3872 rcu_assign_pointer(id->css, NULL);
3873 rcu_assign_pointer(css->id, NULL);
3874 spin_lock(&ss->id_lock);
3875 idr_remove(&ss->idr, id->id);
3876 spin_unlock(&ss->id_lock);
3877 call_rcu(&id->rcu_head, __free_css_id_cb);
3878}
3879
3880
3881
3882
3883
3884
3885static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
3886{
3887 struct css_id *newid;
3888 int myid, error, size;
3889
3890 BUG_ON(!ss->use_id);
3891
3892 size = sizeof(*newid) + sizeof(unsigned short) * (depth + 1);
3893 newid = kzalloc(size, GFP_KERNEL);
3894 if (!newid)
3895 return ERR_PTR(-ENOMEM);
3896
3897 if (unlikely(!idr_pre_get(&ss->idr, GFP_KERNEL))) {
3898 error = -ENOMEM;
3899 goto err_out;
3900 }
3901 spin_lock(&ss->id_lock);
3902
3903 error = idr_get_new_above(&ss->idr, newid, 1, &myid);
3904 spin_unlock(&ss->id_lock);
3905
3906
3907 if (error) {
3908 error = -ENOSPC;
3909 goto err_out;
3910 }
3911 if (myid > CSS_ID_MAX)
3912 goto remove_idr;
3913
3914 newid->id = myid;
3915 newid->depth = depth;
3916 return newid;
3917remove_idr:
3918 error = -ENOSPC;
3919 spin_lock(&ss->id_lock);
3920 idr_remove(&ss->idr, myid);
3921 spin_unlock(&ss->id_lock);
3922err_out:
3923 kfree(newid);
3924 return ERR_PTR(error);
3925
3926}
3927
3928static int __init cgroup_subsys_init_idr(struct cgroup_subsys *ss)
3929{
3930 struct css_id *newid;
3931 struct cgroup_subsys_state *rootcss;
3932
3933 spin_lock_init(&ss->id_lock);
3934 idr_init(&ss->idr);
3935
3936 rootcss = init_css_set.subsys[ss->subsys_id];
3937 newid = get_new_cssid(ss, 0);
3938 if (IS_ERR(newid))
3939 return PTR_ERR(newid);
3940
3941 newid->stack[0] = newid->id;
3942 newid->css = rootcss;
3943 rootcss->id = newid;
3944 return 0;
3945}
3946
3947static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent,
3948 struct cgroup *child)
3949{
3950 int subsys_id, i, depth = 0;
3951 struct cgroup_subsys_state *parent_css, *child_css;
3952 struct css_id *child_id, *parent_id = NULL;
3953
3954 subsys_id = ss->subsys_id;
3955 parent_css = parent->subsys[subsys_id];
3956 child_css = child->subsys[subsys_id];
3957 depth = css_depth(parent_css) + 1;
3958 parent_id = parent_css->id;
3959
3960 child_id = get_new_cssid(ss, depth);
3961 if (IS_ERR(child_id))
3962 return PTR_ERR(child_id);
3963
3964 for (i = 0; i < depth; i++)
3965 child_id->stack[i] = parent_id->stack[i];
3966 child_id->stack[depth] = child_id->id;
3967
3968
3969
3970
3971 rcu_assign_pointer(child_css->id, child_id);
3972
3973 return 0;
3974}
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id)
3985{
3986 struct css_id *cssid = NULL;
3987
3988 BUG_ON(!ss->use_id);
3989 cssid = idr_find(&ss->idr, id);
3990
3991 if (unlikely(!cssid))
3992 return NULL;
3993
3994 return rcu_dereference(cssid->css);
3995}
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007struct cgroup_subsys_state *
4008css_get_next(struct cgroup_subsys *ss, int id,
4009 struct cgroup_subsys_state *root, int *foundid)
4010{
4011 struct cgroup_subsys_state *ret = NULL;
4012 struct css_id *tmp;
4013 int tmpid;
4014 int rootid = css_id(root);
4015 int depth = css_depth(root);
4016
4017 if (!rootid)
4018 return NULL;
4019
4020 BUG_ON(!ss->use_id);
4021
4022 tmpid = id;
4023 while (1) {
4024
4025
4026
4027
4028 spin_lock(&ss->id_lock);
4029 tmp = idr_get_next(&ss->idr, &tmpid);
4030 spin_unlock(&ss->id_lock);
4031
4032 if (!tmp)
4033 break;
4034 if (tmp->depth >= depth && tmp->stack[depth] == rootid) {
4035 ret = rcu_dereference(tmp->css);
4036 if (ret) {
4037 *foundid = tmpid;
4038 break;
4039 }
4040 }
4041
4042 tmpid = tmpid + 1;
4043 }
4044 return ret;
4045}
4046
4047#ifdef CONFIG_CGROUP_DEBUG
4048static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
4049 struct cgroup *cont)
4050{
4051 struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
4052
4053 if (!css)
4054 return ERR_PTR(-ENOMEM);
4055
4056 return css;
4057}
4058
4059static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
4060{
4061 kfree(cont->subsys[debug_subsys_id]);
4062}
4063
4064static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft)
4065{
4066 return atomic_read(&cont->count);
4067}
4068
4069static u64 debug_taskcount_read(struct cgroup *cont, struct cftype *cft)
4070{
4071 return cgroup_task_count(cont);
4072}
4073
4074static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft)
4075{
4076 return (u64)(unsigned long)current->cgroups;
4077}
4078
4079static u64 current_css_set_refcount_read(struct cgroup *cont,
4080 struct cftype *cft)
4081{
4082 u64 count;
4083
4084 rcu_read_lock();
4085 count = atomic_read(¤t->cgroups->refcount);
4086 rcu_read_unlock();
4087 return count;
4088}
4089
4090static int current_css_set_cg_links_read(struct cgroup *cont,
4091 struct cftype *cft,
4092 struct seq_file *seq)
4093{
4094 struct cg_cgroup_link *link;
4095 struct css_set *cg;
4096
4097 read_lock(&css_set_lock);
4098 rcu_read_lock();
4099 cg = rcu_dereference(current->cgroups);
4100 list_for_each_entry(link, &cg->cg_links, cg_link_list) {
4101 struct cgroup *c = link->cgrp;
4102 const char *name;
4103
4104 if (c->dentry)
4105 name = c->dentry->d_name.name;
4106 else
4107 name = "?";
4108 seq_printf(seq, "Root %d group %s\n",
4109 c->root->hierarchy_id, name);
4110 }
4111 rcu_read_unlock();
4112 read_unlock(&css_set_lock);
4113 return 0;
4114}
4115
4116#define MAX_TASKS_SHOWN_PER_CSS 25
4117static int cgroup_css_links_read(struct cgroup *cont,
4118 struct cftype *cft,
4119 struct seq_file *seq)
4120{
4121 struct cg_cgroup_link *link;
4122
4123 read_lock(&css_set_lock);
4124 list_for_each_entry(link, &cont->css_sets, cgrp_link_list) {
4125 struct css_set *cg = link->cg;
4126 struct task_struct *task;
4127 int count = 0;
4128 seq_printf(seq, "css_set %p\n", cg);
4129 list_for_each_entry(task, &cg->tasks, cg_list) {
4130 if (count++ > MAX_TASKS_SHOWN_PER_CSS) {
4131 seq_puts(seq, " ...\n");
4132 break;
4133 } else {
4134 seq_printf(seq, " task %d\n",
4135 task_pid_vnr(task));
4136 }
4137 }
4138 }
4139 read_unlock(&css_set_lock);
4140 return 0;
4141}
4142
4143static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
4144{
4145 return test_bit(CGRP_RELEASABLE, &cgrp->flags);
4146}
4147
4148static struct cftype debug_files[] = {
4149 {
4150 .name = "cgroup_refcount",
4151 .read_u64 = cgroup_refcount_read,
4152 },
4153 {
4154 .name = "taskcount",
4155 .read_u64 = debug_taskcount_read,
4156 },
4157
4158 {
4159 .name = "current_css_set",
4160 .read_u64 = current_css_set_read,
4161 },
4162
4163 {
4164 .name = "current_css_set_refcount",
4165 .read_u64 = current_css_set_refcount_read,
4166 },
4167
4168 {
4169 .name = "current_css_set_cg_links",
4170 .read_seq_string = current_css_set_cg_links_read,
4171 },
4172
4173 {
4174 .name = "cgroup_css_links",
4175 .read_seq_string = cgroup_css_links_read,
4176 },
4177
4178 {
4179 .name = "releasable",
4180 .read_u64 = releasable_read,
4181 },
4182};
4183
4184static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)
4185{
4186 return cgroup_add_files(cont, ss, debug_files,
4187 ARRAY_SIZE(debug_files));
4188}
4189
4190struct cgroup_subsys debug_subsys = {
4191 .name = "debug",
4192 .create = debug_create,
4193 .destroy = debug_destroy,
4194 .populate = debug_populate,
4195 .subsys_id = debug_subsys_id,
4196};
4197#endif
4198