1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include <linux/cgroup.h>
26#include <linux/errno.h>
27#include <linux/fs.h>
28#include <linux/kernel.h>
29#include <linux/list.h>
30#include <linux/mm.h>
31#include <linux/mutex.h>
32#include <linux/mount.h>
33#include <linux/pagemap.h>
34#include <linux/proc_fs.h>
35#include <linux/rcupdate.h>
36#include <linux/sched.h>
37#include <linux/backing-dev.h>
38#include <linux/seq_file.h>
39#include <linux/slab.h>
40#include <linux/magic.h>
41#include <linux/spinlock.h>
42#include <linux/string.h>
43#include <linux/sort.h>
44#include <linux/kmod.h>
45#include <linux/delayacct.h>
46#include <linux/cgroupstats.h>
47
48#include <asm/atomic.h>
49
50static DEFINE_MUTEX(cgroup_mutex);
51
52
53#define SUBSYS(_x) &_x ## _subsys,
54
55static struct cgroup_subsys *subsys[] = {
56#include <linux/cgroup_subsys.h>
57};
58
59
60
61
62
63
64struct cgroupfs_root {
65 struct super_block *sb;
66
67
68
69
70
71 unsigned long subsys_bits;
72
73
74 unsigned long actual_subsys_bits;
75
76
77 struct list_head subsys_list;
78
79
80 struct cgroup top_cgroup;
81
82
83 int number_of_cgroups;
84
85
86 struct list_head root_list;
87
88
89 unsigned long flags;
90
91
92
93
94
95
96 char release_agent_path[PATH_MAX];
97};
98
99
100
101
102
103
104
105static struct cgroupfs_root rootnode;
106
107
108
109static LIST_HEAD(roots);
110static int root_count;
111
112
113#define dummytop (&rootnode.top_cgroup)
114
115
116
117
118
119
120static int need_forkexit_callback;
121
122
123enum {
124
125 CGRP_REMOVED,
126
127
128 CGRP_RELEASABLE,
129
130 CGRP_NOTIFY_ON_RELEASE,
131};
132
133
134inline int cgroup_is_removed(const struct cgroup *cgrp)
135{
136 return test_bit(CGRP_REMOVED, &cgrp->flags);
137}
138
139
140enum {
141 ROOT_NOPREFIX,
142};
143
144inline int cgroup_is_releasable(const struct cgroup *cgrp)
145{
146 const int bits =
147 (1 << CGRP_RELEASABLE) |
148 (1 << CGRP_NOTIFY_ON_RELEASE);
149 return (cgrp->flags & bits) == bits;
150}
151
152inline int notify_on_release(const struct cgroup *cgrp)
153{
154 return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
155}
156
157
158
159
160
161#define for_each_subsys(_root, _ss) \
162list_for_each_entry(_ss, &_root->subsys_list, sibling)
163
164
165#define for_each_root(_root) \
166list_for_each_entry(_root, &roots, root_list)
167
168
169
170static LIST_HEAD(release_list);
171static DEFINE_SPINLOCK(release_list_lock);
172static void cgroup_release_agent(struct work_struct *work);
173static DECLARE_WORK(release_agent_work, cgroup_release_agent);
174static void check_for_release(struct cgroup *cgrp);
175
176
177struct cg_cgroup_link {
178
179
180
181
182 struct list_head cgrp_link_list;
183
184
185
186
187 struct list_head cg_link_list;
188 struct css_set *cg;
189};
190
191
192
193
194
195
196
197
198static struct css_set init_css_set;
199static struct cg_cgroup_link init_css_set_link;
200
201
202
203
204static DEFINE_RWLOCK(css_set_lock);
205static int css_set_count;
206
207
208
209
210
211static int use_task_css_set_links;
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230static void unlink_css_set(struct css_set *cg)
231{
232 write_lock(&css_set_lock);
233 list_del(&cg->list);
234 css_set_count--;
235 while (!list_empty(&cg->cg_links)) {
236 struct cg_cgroup_link *link;
237 link = list_entry(cg->cg_links.next,
238 struct cg_cgroup_link, cg_link_list);
239 list_del(&link->cg_link_list);
240 list_del(&link->cgrp_link_list);
241 kfree(link);
242 }
243 write_unlock(&css_set_lock);
244}
245
246static void __release_css_set(struct kref *k, int taskexit)
247{
248 int i;
249 struct css_set *cg = container_of(k, struct css_set, ref);
250
251 unlink_css_set(cg);
252
253 rcu_read_lock();
254 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
255 struct cgroup *cgrp = cg->subsys[i]->cgroup;
256 if (atomic_dec_and_test(&cgrp->count) &&
257 notify_on_release(cgrp)) {
258 if (taskexit)
259 set_bit(CGRP_RELEASABLE, &cgrp->flags);
260 check_for_release(cgrp);
261 }
262 }
263 rcu_read_unlock();
264 kfree(cg);
265}
266
267static void release_css_set(struct kref *k)
268{
269 __release_css_set(k, 0);
270}
271
272static void release_css_set_taskexit(struct kref *k)
273{
274 __release_css_set(k, 1);
275}
276
277
278
279
280static inline void get_css_set(struct css_set *cg)
281{
282 kref_get(&cg->ref);
283}
284
285static inline void put_css_set(struct css_set *cg)
286{
287 kref_put(&cg->ref, release_css_set);
288}
289
290static inline void put_css_set_taskexit(struct css_set *cg)
291{
292 kref_put(&cg->ref, release_css_set_taskexit);
293}
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311static struct css_set *find_existing_css_set(
312 struct css_set *oldcg,
313 struct cgroup *cgrp,
314 struct cgroup_subsys_state *template[])
315{
316 int i;
317 struct cgroupfs_root *root = cgrp->root;
318 struct list_head *l = &init_css_set.list;
319
320
321
322 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
323 if (root->subsys_bits & (1ull << i)) {
324
325
326
327 template[i] = cgrp->subsys[i];
328 } else {
329
330
331 template[i] = oldcg->subsys[i];
332 }
333 }
334
335
336 do {
337 struct css_set *cg =
338 list_entry(l, struct css_set, list);
339
340 if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) {
341
342 return cg;
343 }
344
345 l = l->next;
346 } while (l != &init_css_set.list);
347
348
349 return NULL;
350}
351
352
353
354
355
356
357
358static int allocate_cg_links(int count, struct list_head *tmp)
359{
360 struct cg_cgroup_link *link;
361 int i;
362 INIT_LIST_HEAD(tmp);
363 for (i = 0; i < count; i++) {
364 link = kmalloc(sizeof(*link), GFP_KERNEL);
365 if (!link) {
366 while (!list_empty(tmp)) {
367 link = list_entry(tmp->next,
368 struct cg_cgroup_link,
369 cgrp_link_list);
370 list_del(&link->cgrp_link_list);
371 kfree(link);
372 }
373 return -ENOMEM;
374 }
375 list_add(&link->cgrp_link_list, tmp);
376 }
377 return 0;
378}
379
380static void free_cg_links(struct list_head *tmp)
381{
382 while (!list_empty(tmp)) {
383 struct cg_cgroup_link *link;
384 link = list_entry(tmp->next,
385 struct cg_cgroup_link,
386 cgrp_link_list);
387 list_del(&link->cgrp_link_list);
388 kfree(link);
389 }
390}
391
392
393
394
395
396
397
398
399
400static struct css_set *find_css_set(
401 struct css_set *oldcg, struct cgroup *cgrp)
402{
403 struct css_set *res;
404 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
405 int i;
406
407 struct list_head tmp_cg_links;
408 struct cg_cgroup_link *link;
409
410
411
412 write_lock(&css_set_lock);
413 res = find_existing_css_set(oldcg, cgrp, template);
414 if (res)
415 get_css_set(res);
416 write_unlock(&css_set_lock);
417
418 if (res)
419 return res;
420
421 res = kmalloc(sizeof(*res), GFP_KERNEL);
422 if (!res)
423 return NULL;
424
425
426 if (allocate_cg_links(root_count, &tmp_cg_links) < 0) {
427 kfree(res);
428 return NULL;
429 }
430
431 kref_init(&res->ref);
432 INIT_LIST_HEAD(&res->cg_links);
433 INIT_LIST_HEAD(&res->tasks);
434
435
436
437 memcpy(res->subsys, template, sizeof(res->subsys));
438
439 write_lock(&css_set_lock);
440
441 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
442 struct cgroup *cgrp = res->subsys[i]->cgroup;
443 struct cgroup_subsys *ss = subsys[i];
444 atomic_inc(&cgrp->count);
445
446
447
448
449
450 if (ss->root->subsys_list.next == &ss->sibling) {
451 BUG_ON(list_empty(&tmp_cg_links));
452 link = list_entry(tmp_cg_links.next,
453 struct cg_cgroup_link,
454 cgrp_link_list);
455 list_del(&link->cgrp_link_list);
456 list_add(&link->cgrp_link_list, &cgrp->css_sets);
457 link->cg = res;
458 list_add(&link->cg_link_list, &res->cg_links);
459 }
460 }
461 if (list_empty(&rootnode.subsys_list)) {
462 link = list_entry(tmp_cg_links.next,
463 struct cg_cgroup_link,
464 cgrp_link_list);
465 list_del(&link->cgrp_link_list);
466 list_add(&link->cgrp_link_list, &dummytop->css_sets);
467 link->cg = res;
468 list_add(&link->cg_link_list, &res->cg_links);
469 }
470
471 BUG_ON(!list_empty(&tmp_cg_links));
472
473
474 list_add(&res->list, &init_css_set.list);
475 css_set_count++;
476 INIT_LIST_HEAD(&res->tasks);
477 write_unlock(&css_set_lock);
478
479 return res;
480}
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541void cgroup_lock(void)
542{
543 mutex_lock(&cgroup_mutex);
544}
545
546
547
548
549
550
551
552void cgroup_unlock(void)
553{
554 mutex_unlock(&cgroup_mutex);
555}
556
557
558
559
560
561
562
563
564static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode);
565static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
566static int cgroup_populate_dir(struct cgroup *cgrp);
567static struct inode_operations cgroup_dir_inode_operations;
568static struct file_operations proc_cgroupstats_operations;
569
570static struct backing_dev_info cgroup_backing_dev_info = {
571 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
572};
573
574static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
575{
576 struct inode *inode = new_inode(sb);
577
578 if (inode) {
579 inode->i_mode = mode;
580 inode->i_uid = current->fsuid;
581 inode->i_gid = current->fsgid;
582 inode->i_blocks = 0;
583 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
584 inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;
585 }
586 return inode;
587}
588
589static void cgroup_diput(struct dentry *dentry, struct inode *inode)
590{
591
592 if (S_ISDIR(inode->i_mode)) {
593 struct cgroup *cgrp = dentry->d_fsdata;
594 BUG_ON(!(cgroup_is_removed(cgrp)));
595
596
597
598
599
600
601 synchronize_rcu();
602 kfree(cgrp);
603 }
604 iput(inode);
605}
606
607static void remove_dir(struct dentry *d)
608{
609 struct dentry *parent = dget(d->d_parent);
610
611 d_delete(d);
612 simple_rmdir(parent->d_inode, d);
613 dput(parent);
614}
615
616static void cgroup_clear_directory(struct dentry *dentry)
617{
618 struct list_head *node;
619
620 BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
621 spin_lock(&dcache_lock);
622 node = dentry->d_subdirs.next;
623 while (node != &dentry->d_subdirs) {
624 struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
625 list_del_init(node);
626 if (d->d_inode) {
627
628
629 BUG_ON(d->d_inode->i_mode & S_IFDIR);
630 d = dget_locked(d);
631 spin_unlock(&dcache_lock);
632 d_delete(d);
633 simple_unlink(dentry->d_inode, d);
634 dput(d);
635 spin_lock(&dcache_lock);
636 }
637 node = dentry->d_subdirs.next;
638 }
639 spin_unlock(&dcache_lock);
640}
641
642
643
644
645static void cgroup_d_remove_dir(struct dentry *dentry)
646{
647 cgroup_clear_directory(dentry);
648
649 spin_lock(&dcache_lock);
650 list_del_init(&dentry->d_u.d_child);
651 spin_unlock(&dcache_lock);
652 remove_dir(dentry);
653}
654
655static int rebind_subsystems(struct cgroupfs_root *root,
656 unsigned long final_bits)
657{
658 unsigned long added_bits, removed_bits;
659 struct cgroup *cgrp = &root->top_cgroup;
660 int i;
661
662 removed_bits = root->actual_subsys_bits & ~final_bits;
663 added_bits = final_bits & ~root->actual_subsys_bits;
664
665 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
666 unsigned long long bit = 1ull << i;
667 struct cgroup_subsys *ss = subsys[i];
668 if (!(bit & added_bits))
669 continue;
670 if (ss->root != &rootnode) {
671
672 return -EBUSY;
673 }
674 }
675
676
677
678
679
680 if (!list_empty(&cgrp->children))
681 return -EBUSY;
682
683
684 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
685 struct cgroup_subsys *ss = subsys[i];
686 unsigned long bit = 1UL << i;
687 if (bit & added_bits) {
688
689 BUG_ON(cgrp->subsys[i]);
690 BUG_ON(!dummytop->subsys[i]);
691 BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
692 cgrp->subsys[i] = dummytop->subsys[i];
693 cgrp->subsys[i]->cgroup = cgrp;
694 list_add(&ss->sibling, &root->subsys_list);
695 rcu_assign_pointer(ss->root, root);
696 if (ss->bind)
697 ss->bind(ss, cgrp);
698
699 } else if (bit & removed_bits) {
700
701 BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
702 BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
703 if (ss->bind)
704 ss->bind(ss, dummytop);
705 dummytop->subsys[i]->cgroup = dummytop;
706 cgrp->subsys[i] = NULL;
707 rcu_assign_pointer(subsys[i]->root, &rootnode);
708 list_del(&ss->sibling);
709 } else if (bit & final_bits) {
710
711 BUG_ON(!cgrp->subsys[i]);
712 } else {
713
714 BUG_ON(cgrp->subsys[i]);
715 }
716 }
717 root->subsys_bits = root->actual_subsys_bits = final_bits;
718 synchronize_rcu();
719
720 return 0;
721}
722
723static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
724{
725 struct cgroupfs_root *root = vfs->mnt_sb->s_fs_info;
726 struct cgroup_subsys *ss;
727
728 mutex_lock(&cgroup_mutex);
729 for_each_subsys(root, ss)
730 seq_printf(seq, ",%s", ss->name);
731 if (test_bit(ROOT_NOPREFIX, &root->flags))
732 seq_puts(seq, ",noprefix");
733 if (strlen(root->release_agent_path))
734 seq_printf(seq, ",release_agent=%s", root->release_agent_path);
735 mutex_unlock(&cgroup_mutex);
736 return 0;
737}
738
739struct cgroup_sb_opts {
740 unsigned long subsys_bits;
741 unsigned long flags;
742 char *release_agent;
743};
744
745
746
747static int parse_cgroupfs_options(char *data,
748 struct cgroup_sb_opts *opts)
749{
750 char *token, *o = data ?: "all";
751
752 opts->subsys_bits = 0;
753 opts->flags = 0;
754 opts->release_agent = NULL;
755
756 while ((token = strsep(&o, ",")) != NULL) {
757 if (!*token)
758 return -EINVAL;
759 if (!strcmp(token, "all")) {
760 opts->subsys_bits = (1 << CGROUP_SUBSYS_COUNT) - 1;
761 } else if (!strcmp(token, "noprefix")) {
762 set_bit(ROOT_NOPREFIX, &opts->flags);
763 } else if (!strncmp(token, "release_agent=", 14)) {
764
765 if (opts->release_agent)
766 return -EINVAL;
767 opts->release_agent = kzalloc(PATH_MAX, GFP_KERNEL);
768 if (!opts->release_agent)
769 return -ENOMEM;
770 strncpy(opts->release_agent, token + 14, PATH_MAX - 1);
771 opts->release_agent[PATH_MAX - 1] = 0;
772 } else {
773 struct cgroup_subsys *ss;
774 int i;
775 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
776 ss = subsys[i];
777 if (!strcmp(token, ss->name)) {
778 set_bit(i, &opts->subsys_bits);
779 break;
780 }
781 }
782 if (i == CGROUP_SUBSYS_COUNT)
783 return -ENOENT;
784 }
785 }
786
787
788 if (!opts->subsys_bits)
789 return -EINVAL;
790
791 return 0;
792}
793
794static int cgroup_remount(struct super_block *sb, int *flags, char *data)
795{
796 int ret = 0;
797 struct cgroupfs_root *root = sb->s_fs_info;
798 struct cgroup *cgrp = &root->top_cgroup;
799 struct cgroup_sb_opts opts;
800
801 mutex_lock(&cgrp->dentry->d_inode->i_mutex);
802 mutex_lock(&cgroup_mutex);
803
804
805 ret = parse_cgroupfs_options(data, &opts);
806 if (ret)
807 goto out_unlock;
808
809
810 if (opts.flags != root->flags) {
811 ret = -EINVAL;
812 goto out_unlock;
813 }
814
815 ret = rebind_subsystems(root, opts.subsys_bits);
816
817
818 if (!ret)
819 cgroup_populate_dir(cgrp);
820
821 if (opts.release_agent)
822 strcpy(root->release_agent_path, opts.release_agent);
823 out_unlock:
824 if (opts.release_agent)
825 kfree(opts.release_agent);
826 mutex_unlock(&cgroup_mutex);
827 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
828 return ret;
829}
830
831static struct super_operations cgroup_ops = {
832 .statfs = simple_statfs,
833 .drop_inode = generic_delete_inode,
834 .show_options = cgroup_show_options,
835 .remount_fs = cgroup_remount,
836};
837
838static void init_cgroup_root(struct cgroupfs_root *root)
839{
840 struct cgroup *cgrp = &root->top_cgroup;
841 INIT_LIST_HEAD(&root->subsys_list);
842 INIT_LIST_HEAD(&root->root_list);
843 root->number_of_cgroups = 1;
844 cgrp->root = root;
845 cgrp->top_cgroup = cgrp;
846 INIT_LIST_HEAD(&cgrp->sibling);
847 INIT_LIST_HEAD(&cgrp->children);
848 INIT_LIST_HEAD(&cgrp->css_sets);
849 INIT_LIST_HEAD(&cgrp->release_list);
850}
851
852static int cgroup_test_super(struct super_block *sb, void *data)
853{
854 struct cgroupfs_root *new = data;
855 struct cgroupfs_root *root = sb->s_fs_info;
856
857
858 if (new->subsys_bits != root->subsys_bits)
859 return 0;
860
861
862 if (new->flags != root->flags)
863 return 0;
864
865 return 1;
866}
867
868static int cgroup_set_super(struct super_block *sb, void *data)
869{
870 int ret;
871 struct cgroupfs_root *root = data;
872
873 ret = set_anon_super(sb, NULL);
874 if (ret)
875 return ret;
876
877 sb->s_fs_info = root;
878 root->sb = sb;
879
880 sb->s_blocksize = PAGE_CACHE_SIZE;
881 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
882 sb->s_magic = CGROUP_SUPER_MAGIC;
883 sb->s_op = &cgroup_ops;
884
885 return 0;
886}
887
888static int cgroup_get_rootdir(struct super_block *sb)
889{
890 struct inode *inode =
891 cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb);
892 struct dentry *dentry;
893
894 if (!inode)
895 return -ENOMEM;
896
897 inode->i_op = &simple_dir_inode_operations;
898 inode->i_fop = &simple_dir_operations;
899 inode->i_op = &cgroup_dir_inode_operations;
900
901 inc_nlink(inode);
902 dentry = d_alloc_root(inode);
903 if (!dentry) {
904 iput(inode);
905 return -ENOMEM;
906 }
907 sb->s_root = dentry;
908 return 0;
909}
910
911static int cgroup_get_sb(struct file_system_type *fs_type,
912 int flags, const char *unused_dev_name,
913 void *data, struct vfsmount *mnt)
914{
915 struct cgroup_sb_opts opts;
916 int ret = 0;
917 struct super_block *sb;
918 struct cgroupfs_root *root;
919 struct list_head tmp_cg_links, *l;
920 INIT_LIST_HEAD(&tmp_cg_links);
921
922
923 ret = parse_cgroupfs_options(data, &opts);
924 if (ret) {
925 if (opts.release_agent)
926 kfree(opts.release_agent);
927 return ret;
928 }
929
930 root = kzalloc(sizeof(*root), GFP_KERNEL);
931 if (!root)
932 return -ENOMEM;
933
934 init_cgroup_root(root);
935 root->subsys_bits = opts.subsys_bits;
936 root->flags = opts.flags;
937 if (opts.release_agent) {
938 strcpy(root->release_agent_path, opts.release_agent);
939 kfree(opts.release_agent);
940 }
941
942 sb = sget(fs_type, cgroup_test_super, cgroup_set_super, root);
943
944 if (IS_ERR(sb)) {
945 kfree(root);
946 return PTR_ERR(sb);
947 }
948
949 if (sb->s_fs_info != root) {
950
951 BUG_ON(sb->s_root == NULL);
952 kfree(root);
953 root = NULL;
954 } else {
955
956 struct cgroup *cgrp = &root->top_cgroup;
957 struct inode *inode;
958
959 BUG_ON(sb->s_root != NULL);
960
961 ret = cgroup_get_rootdir(sb);
962 if (ret)
963 goto drop_new_super;
964 inode = sb->s_root->d_inode;
965
966 mutex_lock(&inode->i_mutex);
967 mutex_lock(&cgroup_mutex);
968
969
970
971
972
973
974
975
976 ret = allocate_cg_links(css_set_count, &tmp_cg_links);
977 if (ret) {
978 mutex_unlock(&cgroup_mutex);
979 mutex_unlock(&inode->i_mutex);
980 goto drop_new_super;
981 }
982
983 ret = rebind_subsystems(root, root->subsys_bits);
984 if (ret == -EBUSY) {
985 mutex_unlock(&cgroup_mutex);
986 mutex_unlock(&inode->i_mutex);
987 goto drop_new_super;
988 }
989
990
991 BUG_ON(ret);
992
993 list_add(&root->root_list, &roots);
994 root_count++;
995
996 sb->s_root->d_fsdata = &root->top_cgroup;
997 root->top_cgroup.dentry = sb->s_root;
998
999
1000
1001 write_lock(&css_set_lock);
1002 l = &init_css_set.list;
1003 do {
1004 struct css_set *cg;
1005 struct cg_cgroup_link *link;
1006 cg = list_entry(l, struct css_set, list);
1007 BUG_ON(list_empty(&tmp_cg_links));
1008 link = list_entry(tmp_cg_links.next,
1009 struct cg_cgroup_link,
1010 cgrp_link_list);
1011 list_del(&link->cgrp_link_list);
1012 link->cg = cg;
1013 list_add(&link->cgrp_link_list,
1014 &root->top_cgroup.css_sets);
1015 list_add(&link->cg_link_list, &cg->cg_links);
1016 l = l->next;
1017 } while (l != &init_css_set.list);
1018 write_unlock(&css_set_lock);
1019
1020 free_cg_links(&tmp_cg_links);
1021
1022 BUG_ON(!list_empty(&cgrp->sibling));
1023 BUG_ON(!list_empty(&cgrp->children));
1024 BUG_ON(root->number_of_cgroups != 1);
1025
1026 cgroup_populate_dir(cgrp);
1027 mutex_unlock(&inode->i_mutex);
1028 mutex_unlock(&cgroup_mutex);
1029 }
1030
1031 return simple_set_mnt(mnt, sb);
1032
1033 drop_new_super:
1034 up_write(&sb->s_umount);
1035 deactivate_super(sb);
1036 free_cg_links(&tmp_cg_links);
1037 return ret;
1038}
1039
1040static void cgroup_kill_sb(struct super_block *sb) {
1041 struct cgroupfs_root *root = sb->s_fs_info;
1042 struct cgroup *cgrp = &root->top_cgroup;
1043 int ret;
1044
1045 BUG_ON(!root);
1046
1047 BUG_ON(root->number_of_cgroups != 1);
1048 BUG_ON(!list_empty(&cgrp->children));
1049 BUG_ON(!list_empty(&cgrp->sibling));
1050
1051 mutex_lock(&cgroup_mutex);
1052
1053
1054 ret = rebind_subsystems(root, 0);
1055
1056 BUG_ON(ret);
1057
1058
1059
1060
1061
1062 write_lock(&css_set_lock);
1063 while (!list_empty(&cgrp->css_sets)) {
1064 struct cg_cgroup_link *link;
1065 link = list_entry(cgrp->css_sets.next,
1066 struct cg_cgroup_link, cgrp_link_list);
1067 list_del(&link->cg_link_list);
1068 list_del(&link->cgrp_link_list);
1069 kfree(link);
1070 }
1071 write_unlock(&css_set_lock);
1072
1073 if (!list_empty(&root->root_list)) {
1074 list_del(&root->root_list);
1075 root_count--;
1076 }
1077 mutex_unlock(&cgroup_mutex);
1078
1079 kfree(root);
1080 kill_litter_super(sb);
1081}
1082
1083static struct file_system_type cgroup_fs_type = {
1084 .name = "cgroup",
1085 .get_sb = cgroup_get_sb,
1086 .kill_sb = cgroup_kill_sb,
1087};
1088
1089static inline struct cgroup *__d_cgrp(struct dentry *dentry)
1090{
1091 return dentry->d_fsdata;
1092}
1093
1094static inline struct cftype *__d_cft(struct dentry *dentry)
1095{
1096 return dentry->d_fsdata;
1097}
1098
1099
1100
1101
1102
1103int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1104{
1105 char *start;
1106
1107 if (cgrp == dummytop) {
1108
1109
1110
1111
1112 strcpy(buf, "/");
1113 return 0;
1114 }
1115
1116 start = buf + buflen;
1117
1118 *--start = '\0';
1119 for (;;) {
1120 int len = cgrp->dentry->d_name.len;
1121 if ((start -= len) < buf)
1122 return -ENAMETOOLONG;
1123 memcpy(start, cgrp->dentry->d_name.name, len);
1124 cgrp = cgrp->parent;
1125 if (!cgrp)
1126 break;
1127 if (!cgrp->parent)
1128 continue;
1129 if (--start < buf)
1130 return -ENAMETOOLONG;
1131 *start = '/';
1132 }
1133 memmove(buf, start, buf + buflen - start);
1134 return 0;
1135}
1136
1137
1138
1139
1140
1141
1142static void get_first_subsys(const struct cgroup *cgrp,
1143 struct cgroup_subsys_state **css, int *subsys_id)
1144{
1145 const struct cgroupfs_root *root = cgrp->root;
1146 const struct cgroup_subsys *test_ss;
1147 BUG_ON(list_empty(&root->subsys_list));
1148 test_ss = list_entry(root->subsys_list.next,
1149 struct cgroup_subsys, sibling);
1150 if (css) {
1151 *css = cgrp->subsys[test_ss->subsys_id];
1152 BUG_ON(!*css);
1153 }
1154 if (subsys_id)
1155 *subsys_id = test_ss->subsys_id;
1156}
1157
1158
1159
1160
1161
1162
1163
1164static int attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1165{
1166 int retval = 0;
1167 struct cgroup_subsys *ss;
1168 struct cgroup *oldcgrp;
1169 struct css_set *cg = tsk->cgroups;
1170 struct css_set *newcg;
1171 struct cgroupfs_root *root = cgrp->root;
1172 int subsys_id;
1173
1174 get_first_subsys(cgrp, NULL, &subsys_id);
1175
1176
1177 oldcgrp = task_cgroup(tsk, subsys_id);
1178 if (cgrp == oldcgrp)
1179 return 0;
1180
1181 for_each_subsys(root, ss) {
1182 if (ss->can_attach) {
1183 retval = ss->can_attach(ss, cgrp, tsk);
1184 if (retval) {
1185 return retval;
1186 }
1187 }
1188 }
1189
1190
1191
1192
1193
1194 newcg = find_css_set(cg, cgrp);
1195 if (!newcg) {
1196 return -ENOMEM;
1197 }
1198
1199 task_lock(tsk);
1200 if (tsk->flags & PF_EXITING) {
1201 task_unlock(tsk);
1202 put_css_set(newcg);
1203 return -ESRCH;
1204 }
1205 rcu_assign_pointer(tsk->cgroups, newcg);
1206 task_unlock(tsk);
1207
1208
1209 write_lock(&css_set_lock);
1210 if (!list_empty(&tsk->cg_list)) {
1211 list_del(&tsk->cg_list);
1212 list_add(&tsk->cg_list, &newcg->tasks);
1213 }
1214 write_unlock(&css_set_lock);
1215
1216 for_each_subsys(root, ss) {
1217 if (ss->attach) {
1218 ss->attach(ss, cgrp, oldcgrp, tsk);
1219 }
1220 }
1221 set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
1222 synchronize_rcu();
1223 put_css_set(cg);
1224 return 0;
1225}
1226
1227
1228
1229
1230
1231static int attach_task_by_pid(struct cgroup *cgrp, char *pidbuf)
1232{
1233 pid_t pid;
1234 struct task_struct *tsk;
1235 int ret;
1236
1237 if (sscanf(pidbuf, "%d", &pid) != 1)
1238 return -EIO;
1239
1240 if (pid) {
1241 rcu_read_lock();
1242 tsk = find_task_by_pid(pid);
1243 if (!tsk || tsk->flags & PF_EXITING) {
1244 rcu_read_unlock();
1245 return -ESRCH;
1246 }
1247 get_task_struct(tsk);
1248 rcu_read_unlock();
1249
1250 if ((current->euid) && (current->euid != tsk->uid)
1251 && (current->euid != tsk->suid)) {
1252 put_task_struct(tsk);
1253 return -EACCES;
1254 }
1255 } else {
1256 tsk = current;
1257 get_task_struct(tsk);
1258 }
1259
1260 ret = attach_task(cgrp, tsk);
1261 put_task_struct(tsk);
1262 return ret;
1263}
1264
1265
1266
1267enum cgroup_filetype {
1268 FILE_ROOT,
1269 FILE_DIR,
1270 FILE_TASKLIST,
1271 FILE_NOTIFY_ON_RELEASE,
1272 FILE_RELEASABLE,
1273 FILE_RELEASE_AGENT,
1274};
1275
1276static ssize_t cgroup_write_uint(struct cgroup *cgrp, struct cftype *cft,
1277 struct file *file,
1278 const char __user *userbuf,
1279 size_t nbytes, loff_t *unused_ppos)
1280{
1281 char buffer[64];
1282 int retval = 0;
1283 u64 val;
1284 char *end;
1285
1286 if (!nbytes)
1287 return -EINVAL;
1288 if (nbytes >= sizeof(buffer))
1289 return -E2BIG;
1290 if (copy_from_user(buffer, userbuf, nbytes))
1291 return -EFAULT;
1292
1293 buffer[nbytes] = 0;
1294
1295
1296 if (nbytes && (buffer[nbytes-1] == '\n'))
1297 buffer[nbytes-1] = 0;
1298 val = simple_strtoull(buffer, &end, 0);
1299 if (*end)
1300 return -EINVAL;
1301
1302
1303 retval = cft->write_uint(cgrp, cft, val);
1304 if (!retval)
1305 retval = nbytes;
1306 return retval;
1307}
1308
1309static ssize_t cgroup_common_file_write(struct cgroup *cgrp,
1310 struct cftype *cft,
1311 struct file *file,
1312 const char __user *userbuf,
1313 size_t nbytes, loff_t *unused_ppos)
1314{
1315 enum cgroup_filetype type = cft->private;
1316 char *buffer;
1317 int retval = 0;
1318
1319 if (nbytes >= PATH_MAX)
1320 return -E2BIG;
1321
1322
1323 buffer = kmalloc(nbytes + 1, GFP_KERNEL);
1324 if (buffer == NULL)
1325 return -ENOMEM;
1326
1327 if (copy_from_user(buffer, userbuf, nbytes)) {
1328 retval = -EFAULT;
1329 goto out1;
1330 }
1331 buffer[nbytes] = 0;
1332
1333 mutex_lock(&cgroup_mutex);
1334
1335 if (cgroup_is_removed(cgrp)) {
1336 retval = -ENODEV;
1337 goto out2;
1338 }
1339
1340 switch (type) {
1341 case FILE_TASKLIST:
1342 retval = attach_task_by_pid(cgrp, buffer);
1343 break;
1344 case FILE_NOTIFY_ON_RELEASE:
1345 clear_bit(CGRP_RELEASABLE, &cgrp->flags);
1346 if (simple_strtoul(buffer, NULL, 10) != 0)
1347 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
1348 else
1349 clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
1350 break;
1351 case FILE_RELEASE_AGENT:
1352 {
1353 struct cgroupfs_root *root = cgrp->root;
1354
1355 if (nbytes && (buffer[nbytes-1] == '\n')) {
1356 buffer[nbytes-1] = 0;
1357 }
1358 if (nbytes < sizeof(root->release_agent_path)) {
1359
1360
1361
1362
1363 strncpy(root->release_agent_path, buffer, nbytes);
1364 root->release_agent_path[nbytes] = 0;
1365 } else {
1366 retval = -ENOSPC;
1367 }
1368 break;
1369 }
1370 default:
1371 retval = -EINVAL;
1372 goto out2;
1373 }
1374
1375 if (retval == 0)
1376 retval = nbytes;
1377out2:
1378 mutex_unlock(&cgroup_mutex);
1379out1:
1380 kfree(buffer);
1381 return retval;
1382}
1383
1384static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
1385 size_t nbytes, loff_t *ppos)
1386{
1387 struct cftype *cft = __d_cft(file->f_dentry);
1388 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
1389
1390 if (!cft)
1391 return -ENODEV;
1392 if (cft->write)
1393 return cft->write(cgrp, cft, file, buf, nbytes, ppos);
1394 if (cft->write_uint)
1395 return cgroup_write_uint(cgrp, cft, file, buf, nbytes, ppos);
1396 return -EINVAL;
1397}
1398
1399static ssize_t cgroup_read_uint(struct cgroup *cgrp, struct cftype *cft,
1400 struct file *file,
1401 char __user *buf, size_t nbytes,
1402 loff_t *ppos)
1403{
1404 char tmp[64];
1405 u64 val = cft->read_uint(cgrp, cft);
1406 int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
1407
1408 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
1409}
1410
1411static ssize_t cgroup_common_file_read(struct cgroup *cgrp,
1412 struct cftype *cft,
1413 struct file *file,
1414 char __user *buf,
1415 size_t nbytes, loff_t *ppos)
1416{
1417 enum cgroup_filetype type = cft->private;
1418 char *page;
1419 ssize_t retval = 0;
1420 char *s;
1421
1422 if (!(page = (char *)__get_free_page(GFP_KERNEL)))
1423 return -ENOMEM;
1424
1425 s = page;
1426
1427 switch (type) {
1428 case FILE_RELEASE_AGENT:
1429 {
1430 struct cgroupfs_root *root;
1431 size_t n;
1432 mutex_lock(&cgroup_mutex);
1433 root = cgrp->root;
1434 n = strnlen(root->release_agent_path,
1435 sizeof(root->release_agent_path));
1436 n = min(n, (size_t) PAGE_SIZE);
1437 strncpy(s, root->release_agent_path, n);
1438 mutex_unlock(&cgroup_mutex);
1439 s += n;
1440 break;
1441 }
1442 default:
1443 retval = -EINVAL;
1444 goto out;
1445 }
1446 *s++ = '\n';
1447
1448 retval = simple_read_from_buffer(buf, nbytes, ppos, page, s - page);
1449out:
1450 free_page((unsigned long)page);
1451 return retval;
1452}
1453
1454static ssize_t cgroup_file_read(struct file *file, char __user *buf,
1455 size_t nbytes, loff_t *ppos)
1456{
1457 struct cftype *cft = __d_cft(file->f_dentry);
1458 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
1459
1460 if (!cft)
1461 return -ENODEV;
1462
1463 if (cft->read)
1464 return cft->read(cgrp, cft, file, buf, nbytes, ppos);
1465 if (cft->read_uint)
1466 return cgroup_read_uint(cgrp, cft, file, buf, nbytes, ppos);
1467 return -EINVAL;
1468}
1469
1470static int cgroup_file_open(struct inode *inode, struct file *file)
1471{
1472 int err;
1473 struct cftype *cft;
1474
1475 err = generic_file_open(inode, file);
1476 if (err)
1477 return err;
1478
1479 cft = __d_cft(file->f_dentry);
1480 if (!cft)
1481 return -ENODEV;
1482 if (cft->open)
1483 err = cft->open(inode, file);
1484 else
1485 err = 0;
1486
1487 return err;
1488}
1489
1490static int cgroup_file_release(struct inode *inode, struct file *file)
1491{
1492 struct cftype *cft = __d_cft(file->f_dentry);
1493 if (cft->release)
1494 return cft->release(inode, file);
1495 return 0;
1496}
1497
1498
1499
1500
1501static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
1502 struct inode *new_dir, struct dentry *new_dentry)
1503{
1504 if (!S_ISDIR(old_dentry->d_inode->i_mode))
1505 return -ENOTDIR;
1506 if (new_dentry->d_inode)
1507 return -EEXIST;
1508 if (old_dir != new_dir)
1509 return -EIO;
1510 return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
1511}
1512
1513static struct file_operations cgroup_file_operations = {
1514 .read = cgroup_file_read,
1515 .write = cgroup_file_write,
1516 .llseek = generic_file_llseek,
1517 .open = cgroup_file_open,
1518 .release = cgroup_file_release,
1519};
1520
1521static struct inode_operations cgroup_dir_inode_operations = {
1522 .lookup = simple_lookup,
1523 .mkdir = cgroup_mkdir,
1524 .rmdir = cgroup_rmdir,
1525 .rename = cgroup_rename,
1526};
1527
1528static int cgroup_create_file(struct dentry *dentry, int mode,
1529 struct super_block *sb)
1530{
1531 static struct dentry_operations cgroup_dops = {
1532 .d_iput = cgroup_diput,
1533 };
1534
1535 struct inode *inode;
1536
1537 if (!dentry)
1538 return -ENOENT;
1539 if (dentry->d_inode)
1540 return -EEXIST;
1541
1542 inode = cgroup_new_inode(mode, sb);
1543 if (!inode)
1544 return -ENOMEM;
1545
1546 if (S_ISDIR(mode)) {
1547 inode->i_op = &cgroup_dir_inode_operations;
1548 inode->i_fop = &simple_dir_operations;
1549
1550
1551 inc_nlink(inode);
1552
1553
1554
1555 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
1556 } else if (S_ISREG(mode)) {
1557 inode->i_size = 0;
1558 inode->i_fop = &cgroup_file_operations;
1559 }
1560 dentry->d_op = &cgroup_dops;
1561 d_instantiate(dentry, inode);
1562 dget(dentry);
1563 return 0;
1564}
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,
1575 int mode)
1576{
1577 struct dentry *parent;
1578 int error = 0;
1579
1580 parent = cgrp->parent->dentry;
1581 error = cgroup_create_file(dentry, S_IFDIR | mode, cgrp->root->sb);
1582 if (!error) {
1583 dentry->d_fsdata = cgrp;
1584 inc_nlink(parent->d_inode);
1585 cgrp->dentry = dentry;
1586 dget(dentry);
1587 }
1588 dput(dentry);
1589
1590 return error;
1591}
1592
1593int cgroup_add_file(struct cgroup *cgrp,
1594 struct cgroup_subsys *subsys,
1595 const struct cftype *cft)
1596{
1597 struct dentry *dir = cgrp->dentry;
1598 struct dentry *dentry;
1599 int error;
1600
1601 char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
1602 if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) {
1603 strcpy(name, subsys->name);
1604 strcat(name, ".");
1605 }
1606 strcat(name, cft->name);
1607 BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex));
1608 dentry = lookup_one_len(name, dir, strlen(name));
1609 if (!IS_ERR(dentry)) {
1610 error = cgroup_create_file(dentry, 0644 | S_IFREG,
1611 cgrp->root->sb);
1612 if (!error)
1613 dentry->d_fsdata = (void *)cft;
1614 dput(dentry);
1615 } else
1616 error = PTR_ERR(dentry);
1617 return error;
1618}
1619
1620int cgroup_add_files(struct cgroup *cgrp,
1621 struct cgroup_subsys *subsys,
1622 const struct cftype cft[],
1623 int count)
1624{
1625 int i, err;
1626 for (i = 0; i < count; i++) {
1627 err = cgroup_add_file(cgrp, subsys, &cft[i]);
1628 if (err)
1629 return err;
1630 }
1631 return 0;
1632}
1633
1634
1635
1636int cgroup_task_count(const struct cgroup *cgrp)
1637{
1638 int count = 0;
1639 struct list_head *l;
1640
1641 read_lock(&css_set_lock);
1642 l = cgrp->css_sets.next;
1643 while (l != &cgrp->css_sets) {
1644 struct cg_cgroup_link *link =
1645 list_entry(l, struct cg_cgroup_link, cgrp_link_list);
1646 count += atomic_read(&link->cg->ref.refcount);
1647 l = l->next;
1648 }
1649 read_unlock(&css_set_lock);
1650 return count;
1651}
1652
1653
1654
1655
1656
1657static void cgroup_advance_iter(struct cgroup *cgrp,
1658 struct cgroup_iter *it)
1659{
1660 struct list_head *l = it->cg_link;
1661 struct cg_cgroup_link *link;
1662 struct css_set *cg;
1663
1664
1665 do {
1666 l = l->next;
1667 if (l == &cgrp->css_sets) {
1668 it->cg_link = NULL;
1669 return;
1670 }
1671 link = list_entry(l, struct cg_cgroup_link, cgrp_link_list);
1672 cg = link->cg;
1673 } while (list_empty(&cg->tasks));
1674 it->cg_link = l;
1675 it->task = cg->tasks.next;
1676}
1677
1678void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
1679{
1680
1681
1682
1683
1684
1685 if (!use_task_css_set_links) {
1686 struct task_struct *p, *g;
1687 write_lock(&css_set_lock);
1688 use_task_css_set_links = 1;
1689 do_each_thread(g, p) {
1690 task_lock(p);
1691 if (list_empty(&p->cg_list))
1692 list_add(&p->cg_list, &p->cgroups->tasks);
1693 task_unlock(p);
1694 } while_each_thread(g, p);
1695 write_unlock(&css_set_lock);
1696 }
1697 read_lock(&css_set_lock);
1698 it->cg_link = &cgrp->css_sets;
1699 cgroup_advance_iter(cgrp, it);
1700}
1701
1702struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
1703 struct cgroup_iter *it)
1704{
1705 struct task_struct *res;
1706 struct list_head *l = it->task;
1707
1708
1709 if (!it->cg_link)
1710 return NULL;
1711 res = list_entry(l, struct task_struct, cg_list);
1712
1713 l = l->next;
1714 if (l == &res->cgroups->tasks) {
1715
1716
1717 cgroup_advance_iter(cgrp, it);
1718 } else {
1719 it->task = l;
1720 }
1721 return res;
1722}
1723
1724void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it)
1725{
1726 read_unlock(&css_set_lock);
1727}
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743struct ctr_struct {
1744 char *buf;
1745 int bufsz;
1746};
1747
1748
1749
1750
1751
1752
1753
1754
1755static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp)
1756{
1757 int n = 0;
1758 struct cgroup_iter it;
1759 struct task_struct *tsk;
1760 cgroup_iter_start(cgrp, &it);
1761 while ((tsk = cgroup_iter_next(cgrp, &it))) {
1762 if (unlikely(n == npids))
1763 break;
1764 pidarray[n++] = task_pid_nr(tsk);
1765 }
1766 cgroup_iter_end(cgrp, &it);
1767 return n;
1768}
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
1779{
1780 int ret = -EINVAL;
1781 struct cgroup *cgrp;
1782 struct cgroup_iter it;
1783 struct task_struct *tsk;
1784
1785
1786
1787 if (dentry->d_sb->s_op != &cgroup_ops)
1788 goto err;
1789
1790 ret = 0;
1791 cgrp = dentry->d_fsdata;
1792 rcu_read_lock();
1793
1794 cgroup_iter_start(cgrp, &it);
1795 while ((tsk = cgroup_iter_next(cgrp, &it))) {
1796 switch (tsk->state) {
1797 case TASK_RUNNING:
1798 stats->nr_running++;
1799 break;
1800 case TASK_INTERRUPTIBLE:
1801 stats->nr_sleeping++;
1802 break;
1803 case TASK_UNINTERRUPTIBLE:
1804 stats->nr_uninterruptible++;
1805 break;
1806 case TASK_STOPPED:
1807 stats->nr_stopped++;
1808 break;
1809 default:
1810 if (delayacct_is_task_waiting_on_io(tsk))
1811 stats->nr_io_wait++;
1812 break;
1813 }
1814 }
1815 cgroup_iter_end(cgrp, &it);
1816
1817 rcu_read_unlock();
1818err:
1819 return ret;
1820}
1821
1822static int cmppid(const void *a, const void *b)
1823{
1824 return *(pid_t *)a - *(pid_t *)b;
1825}
1826
1827
1828
1829
1830
1831
1832static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids)
1833{
1834 int cnt = 0;
1835 int i;
1836
1837 for (i = 0; i < npids; i++)
1838 cnt += snprintf(buf + cnt, max(sz - cnt, 0), "%d\n", a[i]);
1839 return cnt;
1840}
1841
1842
1843
1844
1845
1846
1847
1848static int cgroup_tasks_open(struct inode *unused, struct file *file)
1849{
1850 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
1851 struct ctr_struct *ctr;
1852 pid_t *pidarray;
1853 int npids;
1854 char c;
1855
1856 if (!(file->f_mode & FMODE_READ))
1857 return 0;
1858
1859 ctr = kmalloc(sizeof(*ctr), GFP_KERNEL);
1860 if (!ctr)
1861 goto err0;
1862
1863
1864
1865
1866
1867
1868
1869 npids = cgroup_task_count(cgrp);
1870 if (npids) {
1871 pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
1872 if (!pidarray)
1873 goto err1;
1874
1875 npids = pid_array_load(pidarray, npids, cgrp);
1876 sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
1877
1878
1879 ctr->bufsz = pid_array_to_buf(&c, sizeof(c), pidarray, npids) + 1;
1880 ctr->buf = kmalloc(ctr->bufsz, GFP_KERNEL);
1881 if (!ctr->buf)
1882 goto err2;
1883 ctr->bufsz = pid_array_to_buf(ctr->buf, ctr->bufsz, pidarray, npids);
1884
1885 kfree(pidarray);
1886 } else {
1887 ctr->buf = 0;
1888 ctr->bufsz = 0;
1889 }
1890 file->private_data = ctr;
1891 return 0;
1892
1893err2:
1894 kfree(pidarray);
1895err1:
1896 kfree(ctr);
1897err0:
1898 return -ENOMEM;
1899}
1900
1901static ssize_t cgroup_tasks_read(struct cgroup *cgrp,
1902 struct cftype *cft,
1903 struct file *file, char __user *buf,
1904 size_t nbytes, loff_t *ppos)
1905{
1906 struct ctr_struct *ctr = file->private_data;
1907
1908 return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz);
1909}
1910
1911static int cgroup_tasks_release(struct inode *unused_inode,
1912 struct file *file)
1913{
1914 struct ctr_struct *ctr;
1915
1916 if (file->f_mode & FMODE_READ) {
1917 ctr = file->private_data;
1918 kfree(ctr->buf);
1919 kfree(ctr);
1920 }
1921 return 0;
1922}
1923
1924static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
1925 struct cftype *cft)
1926{
1927 return notify_on_release(cgrp);
1928}
1929
1930static u64 cgroup_read_releasable(struct cgroup *cgrp, struct cftype *cft)
1931{
1932 return test_bit(CGRP_RELEASABLE, &cgrp->flags);
1933}
1934
1935
1936
1937
1938static struct cftype files[] = {
1939 {
1940 .name = "tasks",
1941 .open = cgroup_tasks_open,
1942 .read = cgroup_tasks_read,
1943 .write = cgroup_common_file_write,
1944 .release = cgroup_tasks_release,
1945 .private = FILE_TASKLIST,
1946 },
1947
1948 {
1949 .name = "notify_on_release",
1950 .read_uint = cgroup_read_notify_on_release,
1951 .write = cgroup_common_file_write,
1952 .private = FILE_NOTIFY_ON_RELEASE,
1953 },
1954
1955 {
1956 .name = "releasable",
1957 .read_uint = cgroup_read_releasable,
1958 .private = FILE_RELEASABLE,
1959 }
1960};
1961
1962static struct cftype cft_release_agent = {
1963 .name = "release_agent",
1964 .read = cgroup_common_file_read,
1965 .write = cgroup_common_file_write,
1966 .private = FILE_RELEASE_AGENT,
1967};
1968
1969static int cgroup_populate_dir(struct cgroup *cgrp)
1970{
1971 int err;
1972 struct cgroup_subsys *ss;
1973
1974
1975 cgroup_clear_directory(cgrp->dentry);
1976
1977 err = cgroup_add_files(cgrp, NULL, files, ARRAY_SIZE(files));
1978 if (err < 0)
1979 return err;
1980
1981 if (cgrp == cgrp->top_cgroup) {
1982 if ((err = cgroup_add_file(cgrp, NULL, &cft_release_agent)) < 0)
1983 return err;
1984 }
1985
1986 for_each_subsys(cgrp->root, ss) {
1987 if (ss->populate && (err = ss->populate(ss, cgrp)) < 0)
1988 return err;
1989 }
1990
1991 return 0;
1992}
1993
1994static void init_cgroup_css(struct cgroup_subsys_state *css,
1995 struct cgroup_subsys *ss,
1996 struct cgroup *cgrp)
1997{
1998 css->cgroup = cgrp;
1999 atomic_set(&css->refcnt, 0);
2000 css->flags = 0;
2001 if (cgrp == dummytop)
2002 set_bit(CSS_ROOT, &css->flags);
2003 BUG_ON(cgrp->subsys[ss->subsys_id]);
2004 cgrp->subsys[ss->subsys_id] = css;
2005}
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
2017 int mode)
2018{
2019 struct cgroup *cgrp;
2020 struct cgroupfs_root *root = parent->root;
2021 int err = 0;
2022 struct cgroup_subsys *ss;
2023 struct super_block *sb = root->sb;
2024
2025 cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL);
2026 if (!cgrp)
2027 return -ENOMEM;
2028
2029
2030
2031
2032
2033
2034 atomic_inc(&sb->s_active);
2035
2036 mutex_lock(&cgroup_mutex);
2037
2038 cgrp->flags = 0;
2039 INIT_LIST_HEAD(&cgrp->sibling);
2040 INIT_LIST_HEAD(&cgrp->children);
2041 INIT_LIST_HEAD(&cgrp->css_sets);
2042 INIT_LIST_HEAD(&cgrp->release_list);
2043
2044 cgrp->parent = parent;
2045 cgrp->root = parent->root;
2046 cgrp->top_cgroup = parent->top_cgroup;
2047
2048 for_each_subsys(root, ss) {
2049 struct cgroup_subsys_state *css = ss->create(ss, cgrp);
2050 if (IS_ERR(css)) {
2051 err = PTR_ERR(css);
2052 goto err_destroy;
2053 }
2054 init_cgroup_css(css, ss, cgrp);
2055 }
2056
2057 list_add(&cgrp->sibling, &cgrp->parent->children);
2058 root->number_of_cgroups++;
2059
2060 err = cgroup_create_dir(cgrp, dentry, mode);
2061 if (err < 0)
2062 goto err_remove;
2063
2064
2065 BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex));
2066
2067 err = cgroup_populate_dir(cgrp);
2068
2069
2070 mutex_unlock(&cgroup_mutex);
2071 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
2072
2073 return 0;
2074
2075 err_remove:
2076
2077 list_del(&cgrp->sibling);
2078 root->number_of_cgroups--;
2079
2080 err_destroy:
2081
2082 for_each_subsys(root, ss) {
2083 if (cgrp->subsys[ss->subsys_id])
2084 ss->destroy(ss, cgrp);
2085 }
2086
2087 mutex_unlock(&cgroup_mutex);
2088
2089
2090 deactivate_super(sb);
2091
2092 kfree(cgrp);
2093 return err;
2094}
2095
2096static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode)
2097{
2098 struct cgroup *c_parent = dentry->d_parent->d_fsdata;
2099
2100
2101 return cgroup_create(c_parent, dentry, mode | S_IFDIR);
2102}
2103
2104static inline int cgroup_has_css_refs(struct cgroup *cgrp)
2105{
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115 int i;
2116 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2117 struct cgroup_subsys *ss = subsys[i];
2118 struct cgroup_subsys_state *css;
2119
2120 if (ss->root != cgrp->root)
2121 continue;
2122 css = cgrp->subsys[ss->subsys_id];
2123
2124
2125
2126
2127
2128
2129 if (css && atomic_read(&css->refcnt)) {
2130 return 1;
2131 }
2132 }
2133 return 0;
2134}
2135
2136static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
2137{
2138 struct cgroup *cgrp = dentry->d_fsdata;
2139 struct dentry *d;
2140 struct cgroup *parent;
2141 struct cgroup_subsys *ss;
2142 struct super_block *sb;
2143 struct cgroupfs_root *root;
2144
2145
2146
2147 mutex_lock(&cgroup_mutex);
2148 if (atomic_read(&cgrp->count) != 0) {
2149 mutex_unlock(&cgroup_mutex);
2150 return -EBUSY;
2151 }
2152 if (!list_empty(&cgrp->children)) {
2153 mutex_unlock(&cgroup_mutex);
2154 return -EBUSY;
2155 }
2156
2157 parent = cgrp->parent;
2158 root = cgrp->root;
2159 sb = root->sb;
2160
2161 if (cgroup_has_css_refs(cgrp)) {
2162 mutex_unlock(&cgroup_mutex);
2163 return -EBUSY;
2164 }
2165
2166 for_each_subsys(root, ss) {
2167 if (cgrp->subsys[ss->subsys_id])
2168 ss->destroy(ss, cgrp);
2169 }
2170
2171 spin_lock(&release_list_lock);
2172 set_bit(CGRP_REMOVED, &cgrp->flags);
2173 if (!list_empty(&cgrp->release_list))
2174 list_del(&cgrp->release_list);
2175 spin_unlock(&release_list_lock);
2176
2177 list_del(&cgrp->sibling);
2178 spin_lock(&cgrp->dentry->d_lock);
2179 d = dget(cgrp->dentry);
2180 cgrp->dentry = NULL;
2181 spin_unlock(&d->d_lock);
2182
2183 cgroup_d_remove_dir(d);
2184 dput(d);
2185 root->number_of_cgroups--;
2186
2187 set_bit(CGRP_RELEASABLE, &parent->flags);
2188 check_for_release(parent);
2189
2190 mutex_unlock(&cgroup_mutex);
2191
2192
2193 deactivate_super(sb);
2194 return 0;
2195}
2196
2197static void cgroup_init_subsys(struct cgroup_subsys *ss)
2198{
2199 struct cgroup_subsys_state *css;
2200 struct list_head *l;
2201
2202 printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
2203
2204
2205 ss->root = &rootnode;
2206 css = ss->create(ss, dummytop);
2207
2208 BUG_ON(IS_ERR(css));
2209 init_cgroup_css(css, ss, dummytop);
2210
2211
2212
2213
2214
2215 write_lock(&css_set_lock);
2216 l = &init_css_set.list;
2217 do {
2218 struct css_set *cg =
2219 list_entry(l, struct css_set, list);
2220 cg->subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
2221 l = l->next;
2222 } while (l != &init_css_set.list);
2223 write_unlock(&css_set_lock);
2224
2225
2226
2227
2228 if (ss->fork) {
2229 struct task_struct *g, *p;
2230
2231 read_lock(&tasklist_lock);
2232 do_each_thread(g, p) {
2233 ss->fork(ss, p);
2234 } while_each_thread(g, p);
2235 read_unlock(&tasklist_lock);
2236 }
2237
2238 need_forkexit_callback |= ss->fork || ss->exit;
2239
2240 ss->active = 1;
2241}
2242
2243
2244
2245
2246
2247int __init cgroup_init_early(void)
2248{
2249 int i;
2250 kref_init(&init_css_set.ref);
2251 kref_get(&init_css_set.ref);
2252 INIT_LIST_HEAD(&init_css_set.list);
2253 INIT_LIST_HEAD(&init_css_set.cg_links);
2254 INIT_LIST_HEAD(&init_css_set.tasks);
2255 css_set_count = 1;
2256 init_cgroup_root(&rootnode);
2257 list_add(&rootnode.root_list, &roots);
2258 root_count = 1;
2259 init_task.cgroups = &init_css_set;
2260
2261 init_css_set_link.cg = &init_css_set;
2262 list_add(&init_css_set_link.cgrp_link_list,
2263 &rootnode.top_cgroup.css_sets);
2264 list_add(&init_css_set_link.cg_link_list,
2265 &init_css_set.cg_links);
2266
2267 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2268 struct cgroup_subsys *ss = subsys[i];
2269
2270 BUG_ON(!ss->name);
2271 BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
2272 BUG_ON(!ss->create);
2273 BUG_ON(!ss->destroy);
2274 if (ss->subsys_id != i) {
2275 printk(KERN_ERR "cgroup: Subsys %s id == %d\n",
2276 ss->name, ss->subsys_id);
2277 BUG();
2278 }
2279
2280 if (ss->early_init)
2281 cgroup_init_subsys(ss);
2282 }
2283 return 0;
2284}
2285
2286
2287
2288
2289
2290int __init cgroup_init(void)
2291{
2292 int err;
2293 int i;
2294 struct proc_dir_entry *entry;
2295
2296 err = bdi_init(&cgroup_backing_dev_info);
2297 if (err)
2298 return err;
2299
2300 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2301 struct cgroup_subsys *ss = subsys[i];
2302 if (!ss->early_init)
2303 cgroup_init_subsys(ss);
2304 }
2305
2306 err = register_filesystem(&cgroup_fs_type);
2307 if (err < 0)
2308 goto out;
2309
2310 entry = create_proc_entry("cgroups", 0, NULL);
2311 if (entry)
2312 entry->proc_fops = &proc_cgroupstats_operations;
2313
2314out:
2315 if (err)
2316 bdi_destroy(&cgroup_backing_dev_info);
2317
2318 return err;
2319}
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334static int proc_cgroup_show(struct seq_file *m, void *v)
2335{
2336 struct pid *pid;
2337 struct task_struct *tsk;
2338 char *buf;
2339 int retval;
2340 struct cgroupfs_root *root;
2341
2342 retval = -ENOMEM;
2343 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
2344 if (!buf)
2345 goto out;
2346
2347 retval = -ESRCH;
2348 pid = m->private;
2349 tsk = get_pid_task(pid, PIDTYPE_PID);
2350 if (!tsk)
2351 goto out_free;
2352
2353 retval = 0;
2354
2355 mutex_lock(&cgroup_mutex);
2356
2357 for_each_root(root) {
2358 struct cgroup_subsys *ss;
2359 struct cgroup *cgrp;
2360 int subsys_id;
2361 int count = 0;
2362
2363
2364 if (!root->actual_subsys_bits)
2365 continue;
2366 for_each_subsys(root, ss)
2367 seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
2368 seq_putc(m, ':');
2369 get_first_subsys(&root->top_cgroup, NULL, &subsys_id);
2370 cgrp = task_cgroup(tsk, subsys_id);
2371 retval = cgroup_path(cgrp, buf, PAGE_SIZE);
2372 if (retval < 0)
2373 goto out_unlock;
2374 seq_puts(m, buf);
2375 seq_putc(m, '\n');
2376 }
2377
2378out_unlock:
2379 mutex_unlock(&cgroup_mutex);
2380 put_task_struct(tsk);
2381out_free:
2382 kfree(buf);
2383out:
2384 return retval;
2385}
2386
2387static int cgroup_open(struct inode *inode, struct file *file)
2388{
2389 struct pid *pid = PROC_I(inode)->pid;
2390 return single_open(file, proc_cgroup_show, pid);
2391}
2392
2393struct file_operations proc_cgroup_operations = {
2394 .open = cgroup_open,
2395 .read = seq_read,
2396 .llseek = seq_lseek,
2397 .release = single_release,
2398};
2399
2400
2401static int proc_cgroupstats_show(struct seq_file *m, void *v)
2402{
2403 int i;
2404
2405 seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\n");
2406 mutex_lock(&cgroup_mutex);
2407 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2408 struct cgroup_subsys *ss = subsys[i];
2409 seq_printf(m, "%s\t%lu\t%d\n",
2410 ss->name, ss->root->subsys_bits,
2411 ss->root->number_of_cgroups);
2412 }
2413 mutex_unlock(&cgroup_mutex);
2414 return 0;
2415}
2416
2417static int cgroupstats_open(struct inode *inode, struct file *file)
2418{
2419 return single_open(file, proc_cgroupstats_show, 0);
2420}
2421
2422static struct file_operations proc_cgroupstats_operations = {
2423 .open = cgroupstats_open,
2424 .read = seq_read,
2425 .llseek = seq_lseek,
2426 .release = single_release,
2427};
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445void cgroup_fork(struct task_struct *child)
2446{
2447 task_lock(current);
2448 child->cgroups = current->cgroups;
2449 get_css_set(child->cgroups);
2450 task_unlock(current);
2451 INIT_LIST_HEAD(&child->cg_list);
2452}
2453
2454
2455
2456
2457
2458
2459void cgroup_fork_callbacks(struct task_struct *child)
2460{
2461 if (need_forkexit_callback) {
2462 int i;
2463 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2464 struct cgroup_subsys *ss = subsys[i];
2465 if (ss->fork)
2466 ss->fork(ss, child);
2467 }
2468 }
2469}
2470
2471
2472
2473
2474
2475
2476
2477void cgroup_post_fork(struct task_struct *child)
2478{
2479 if (use_task_css_set_links) {
2480 write_lock(&css_set_lock);
2481 if (list_empty(&child->cg_list))
2482 list_add(&child->cg_list, &child->cgroups->tasks);
2483 write_unlock(&css_set_lock);
2484 }
2485}
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521void cgroup_exit(struct task_struct *tsk, int run_callbacks)
2522{
2523 int i;
2524 struct css_set *cg;
2525
2526 if (run_callbacks && need_forkexit_callback) {
2527 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2528 struct cgroup_subsys *ss = subsys[i];
2529 if (ss->exit)
2530 ss->exit(ss, tsk);
2531 }
2532 }
2533
2534
2535
2536
2537
2538
2539 if (!list_empty(&tsk->cg_list)) {
2540 write_lock(&css_set_lock);
2541 if (!list_empty(&tsk->cg_list))
2542 list_del(&tsk->cg_list);
2543 write_unlock(&css_set_lock);
2544 }
2545
2546
2547 task_lock(tsk);
2548 cg = tsk->cgroups;
2549 tsk->cgroups = &init_css_set;
2550 task_unlock(tsk);
2551 if (cg)
2552 put_css_set_taskexit(cg);
2553}
2554
2555
2556
2557
2558
2559
2560int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys)
2561{
2562 struct dentry *dentry;
2563 int ret = 0;
2564 char nodename[MAX_CGROUP_TYPE_NAMELEN];
2565 struct cgroup *parent, *child;
2566 struct inode *inode;
2567 struct css_set *cg;
2568 struct cgroupfs_root *root;
2569 struct cgroup_subsys *ss;
2570
2571
2572 BUG_ON(!subsys->active);
2573
2574
2575
2576 mutex_lock(&cgroup_mutex);
2577 again:
2578 root = subsys->root;
2579 if (root == &rootnode) {
2580 printk(KERN_INFO
2581 "Not cloning cgroup for unused subsystem %s\n",
2582 subsys->name);
2583 mutex_unlock(&cgroup_mutex);
2584 return 0;
2585 }
2586 cg = tsk->cgroups;
2587 parent = task_cgroup(tsk, subsys->subsys_id);
2588
2589 snprintf(nodename, MAX_CGROUP_TYPE_NAMELEN, "node_%d", tsk->pid);
2590
2591
2592 atomic_inc(&parent->root->sb->s_active);
2593
2594
2595 get_css_set(cg);
2596 mutex_unlock(&cgroup_mutex);
2597
2598
2599 inode = parent->dentry->d_inode;
2600
2601
2602
2603 mutex_lock(&inode->i_mutex);
2604 dentry = lookup_one_len(nodename, parent->dentry, strlen(nodename));
2605 if (IS_ERR(dentry)) {
2606 printk(KERN_INFO
2607 "cgroup: Couldn't allocate dentry for %s: %ld\n", nodename,
2608 PTR_ERR(dentry));
2609 ret = PTR_ERR(dentry);
2610 goto out_release;
2611 }
2612
2613
2614 ret = vfs_mkdir(inode, dentry, S_IFDIR | 0755);
2615 child = __d_cgrp(dentry);
2616 dput(dentry);
2617 if (ret) {
2618 printk(KERN_INFO
2619 "Failed to create cgroup %s: %d\n", nodename,
2620 ret);
2621 goto out_release;
2622 }
2623
2624 if (!child) {
2625 printk(KERN_INFO
2626 "Couldn't find new cgroup %s\n", nodename);
2627 ret = -ENOMEM;
2628 goto out_release;
2629 }
2630
2631
2632
2633
2634 mutex_lock(&cgroup_mutex);
2635 if ((root != subsys->root) ||
2636 (parent != task_cgroup(tsk, subsys->subsys_id))) {
2637
2638 mutex_unlock(&inode->i_mutex);
2639 put_css_set(cg);
2640
2641 deactivate_super(parent->root->sb);
2642
2643
2644
2645 printk(KERN_INFO
2646 "Race in cgroup_clone() - leaking cgroup %s\n",
2647 nodename);
2648 goto again;
2649 }
2650
2651
2652 for_each_subsys(root, ss) {
2653 if (ss->post_clone)
2654 ss->post_clone(ss, child);
2655 }
2656
2657
2658 ret = attach_task(child, tsk);
2659 mutex_unlock(&cgroup_mutex);
2660
2661 out_release:
2662 mutex_unlock(&inode->i_mutex);
2663
2664 mutex_lock(&cgroup_mutex);
2665 put_css_set(cg);
2666 mutex_unlock(&cgroup_mutex);
2667 deactivate_super(parent->root->sb);
2668 return ret;
2669}
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680int cgroup_is_descendant(const struct cgroup *cgrp)
2681{
2682 int ret;
2683 struct cgroup *target;
2684 int subsys_id;
2685
2686 if (cgrp == dummytop)
2687 return 1;
2688
2689 get_first_subsys(cgrp, NULL, &subsys_id);
2690 target = task_cgroup(current, subsys_id);
2691 while (cgrp != target && cgrp!= cgrp->top_cgroup)
2692 cgrp = cgrp->parent;
2693 ret = (cgrp == target);
2694 return ret;
2695}
2696
2697static void check_for_release(struct cgroup *cgrp)
2698{
2699
2700
2701 if (cgroup_is_releasable(cgrp) && !atomic_read(&cgrp->count)
2702 && list_empty(&cgrp->children) && !cgroup_has_css_refs(cgrp)) {
2703
2704
2705
2706 int need_schedule_work = 0;
2707 spin_lock(&release_list_lock);
2708 if (!cgroup_is_removed(cgrp) &&
2709 list_empty(&cgrp->release_list)) {
2710 list_add(&cgrp->release_list, &release_list);
2711 need_schedule_work = 1;
2712 }
2713 spin_unlock(&release_list_lock);
2714 if (need_schedule_work)
2715 schedule_work(&release_agent_work);
2716 }
2717}
2718
2719void __css_put(struct cgroup_subsys_state *css)
2720{
2721 struct cgroup *cgrp = css->cgroup;
2722 rcu_read_lock();
2723 if (atomic_dec_and_test(&css->refcnt) && notify_on_release(cgrp)) {
2724 set_bit(CGRP_RELEASABLE, &cgrp->flags);
2725 check_for_release(cgrp);
2726 }
2727 rcu_read_unlock();
2728}
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755static void cgroup_release_agent(struct work_struct *work)
2756{
2757 BUG_ON(work != &release_agent_work);
2758 mutex_lock(&cgroup_mutex);
2759 spin_lock(&release_list_lock);
2760 while (!list_empty(&release_list)) {
2761 char *argv[3], *envp[3];
2762 int i;
2763 char *pathbuf;
2764 struct cgroup *cgrp = list_entry(release_list.next,
2765 struct cgroup,
2766 release_list);
2767 list_del_init(&cgrp->release_list);
2768 spin_unlock(&release_list_lock);
2769 pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
2770 if (!pathbuf) {
2771 spin_lock(&release_list_lock);
2772 continue;
2773 }
2774
2775 if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0) {
2776 kfree(pathbuf);
2777 spin_lock(&release_list_lock);
2778 continue;
2779 }
2780
2781 i = 0;
2782 argv[i++] = cgrp->root->release_agent_path;
2783 argv[i++] = (char *)pathbuf;
2784 argv[i] = NULL;
2785
2786 i = 0;
2787
2788 envp[i++] = "HOME=/";
2789 envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
2790 envp[i] = NULL;
2791
2792
2793
2794
2795 mutex_unlock(&cgroup_mutex);
2796 call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
2797 kfree(pathbuf);
2798 mutex_lock(&cgroup_mutex);
2799 spin_lock(&release_list_lock);
2800 }
2801 spin_unlock(&release_list_lock);
2802 mutex_unlock(&cgroup_mutex);
2803}
2804