1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
30
31#include <linux/cgroup.h>
32#include <linux/cred.h>
33#include <linux/ctype.h>
34#include <linux/errno.h>
35#include <linux/init_task.h>
36#include <linux/kernel.h>
37#include <linux/list.h>
38#include <linux/magic.h>
39#include <linux/mm.h>
40#include <linux/mutex.h>
41#include <linux/mount.h>
42#include <linux/pagemap.h>
43#include <linux/proc_fs.h>
44#include <linux/rcupdate.h>
45#include <linux/sched.h>
46#include <linux/slab.h>
47#include <linux/spinlock.h>
48#include <linux/percpu-rwsem.h>
49#include <linux/string.h>
50#include <linux/sort.h>
51#include <linux/kmod.h>
52#include <linux/delayacct.h>
53#include <linux/cgroupstats.h>
54#include <linux/hashtable.h>
55#include <linux/pid_namespace.h>
56#include <linux/idr.h>
57#include <linux/vmalloc.h>
58#include <linux/kthread.h>
59#include <linux/delay.h>
60#include <linux/atomic.h>
61#include <linux/cpuset.h>
62#include <linux/proc_ns.h>
63#include <linux/nsproxy.h>
64#include <linux/proc_ns.h>
65#include <net/sock.h>
66
67
68
69
70
71
72
73#define CGROUP_PIDLIST_DESTROY_DELAY HZ
74
75#define CGROUP_FILE_NAME_MAX (MAX_CGROUP_TYPE_NAMELEN + \
76 MAX_CFTYPE_NAME + 2)
77
78
79
80
81
82
83
84
85
86
87
88#ifdef CONFIG_PROVE_RCU
89DEFINE_MUTEX(cgroup_mutex);
90DEFINE_SPINLOCK(css_set_lock);
91EXPORT_SYMBOL_GPL(cgroup_mutex);
92EXPORT_SYMBOL_GPL(css_set_lock);
93#else
94static DEFINE_MUTEX(cgroup_mutex);
95static DEFINE_SPINLOCK(css_set_lock);
96#endif
97
98
99
100
101
102static DEFINE_SPINLOCK(cgroup_idr_lock);
103
104
105
106
107
108static DEFINE_SPINLOCK(cgroup_file_kn_lock);
109
110
111
112
113
114static DEFINE_SPINLOCK(release_agent_path_lock);
115
116struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
117
118#define cgroup_assert_mutex_or_rcu_locked() \
119 RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
120 !lockdep_is_held(&cgroup_mutex), \
121 "cgroup_mutex or RCU read lock required");
122
123
124
125
126
127
128
129static struct workqueue_struct *cgroup_destroy_wq;
130
131
132
133
134
135static struct workqueue_struct *cgroup_pidlist_destroy_wq;
136
137
138#define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys,
139static struct cgroup_subsys *cgroup_subsys[] = {
140#include <linux/cgroup_subsys.h>
141};
142#undef SUBSYS
143
144
145#define SUBSYS(_x) [_x ## _cgrp_id] = #_x,
146static const char *cgroup_subsys_name[] = {
147#include <linux/cgroup_subsys.h>
148};
149#undef SUBSYS
150
151
152#define SUBSYS(_x) \
153 DEFINE_STATIC_KEY_TRUE(_x ## _cgrp_subsys_enabled_key); \
154 DEFINE_STATIC_KEY_TRUE(_x ## _cgrp_subsys_on_dfl_key); \
155 EXPORT_SYMBOL_GPL(_x ## _cgrp_subsys_enabled_key); \
156 EXPORT_SYMBOL_GPL(_x ## _cgrp_subsys_on_dfl_key);
157#include <linux/cgroup_subsys.h>
158#undef SUBSYS
159
160#define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys_enabled_key,
161static struct static_key_true *cgroup_subsys_enabled_key[] = {
162#include <linux/cgroup_subsys.h>
163};
164#undef SUBSYS
165
166#define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys_on_dfl_key,
167static struct static_key_true *cgroup_subsys_on_dfl_key[] = {
168#include <linux/cgroup_subsys.h>
169};
170#undef SUBSYS
171
172
173
174
175
176
177struct cgroup_root cgrp_dfl_root;
178EXPORT_SYMBOL_GPL(cgrp_dfl_root);
179
180
181
182
183
184static bool cgrp_dfl_visible;
185
186
187static u16 cgroup_no_v1_mask;
188
189
190static u16 cgrp_dfl_inhibit_ss_mask;
191
192
193static unsigned long cgrp_dfl_implicit_ss_mask;
194
195
196
197static LIST_HEAD(cgroup_roots);
198static int cgroup_root_count;
199
200
201static DEFINE_IDR(cgroup_hierarchy_idr);
202
203
204
205
206
207
208
209
210static u64 css_serial_nr_next = 1;
211
212
213
214
215
216
217static u16 have_fork_callback __read_mostly;
218static u16 have_exit_callback __read_mostly;
219static u16 have_free_callback __read_mostly;
220
221
222struct cgroup_namespace init_cgroup_ns = {
223 .count = { .counter = 2, },
224 .user_ns = &init_user_ns,
225 .ns.ops = &cgroupns_operations,
226 .ns.inum = PROC_CGROUP_INIT_INO,
227 .root_cset = &init_css_set,
228};
229
230
231static u16 have_canfork_callback __read_mostly;
232
233static struct file_system_type cgroup2_fs_type;
234static struct cftype cgroup_dfl_base_files[];
235static struct cftype cgroup_legacy_base_files[];
236
237static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask);
238static void cgroup_lock_and_drain_offline(struct cgroup *cgrp);
239static int cgroup_apply_control(struct cgroup *cgrp);
240static void cgroup_finalize_control(struct cgroup *cgrp, int ret);
241static void css_task_iter_advance(struct css_task_iter *it);
242static int cgroup_destroy_locked(struct cgroup *cgrp);
243static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
244 struct cgroup_subsys *ss);
245static void css_release(struct percpu_ref *ref);
246static void kill_css(struct cgroup_subsys_state *css);
247static int cgroup_addrm_files(struct cgroup_subsys_state *css,
248 struct cgroup *cgrp, struct cftype cfts[],
249 bool is_add);
250
251
252
253
254
255
256
257
258
259static bool cgroup_ssid_enabled(int ssid)
260{
261 if (CGROUP_SUBSYS_COUNT == 0)
262 return false;
263
264 return static_key_enabled(cgroup_subsys_enabled_key[ssid]);
265}
266
267static bool cgroup_ssid_no_v1(int ssid)
268{
269 return cgroup_no_v1_mask & (1 << ssid);
270}
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325static bool cgroup_on_dfl(const struct cgroup *cgrp)
326{
327 return cgrp->root == &cgrp_dfl_root;
328}
329
330
331static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end,
332 gfp_t gfp_mask)
333{
334 int ret;
335
336 idr_preload(gfp_mask);
337 spin_lock_bh(&cgroup_idr_lock);
338 ret = idr_alloc(idr, ptr, start, end, gfp_mask & ~__GFP_DIRECT_RECLAIM);
339 spin_unlock_bh(&cgroup_idr_lock);
340 idr_preload_end();
341 return ret;
342}
343
344static void *cgroup_idr_replace(struct idr *idr, void *ptr, int id)
345{
346 void *ret;
347
348 spin_lock_bh(&cgroup_idr_lock);
349 ret = idr_replace(idr, ptr, id);
350 spin_unlock_bh(&cgroup_idr_lock);
351 return ret;
352}
353
354static void cgroup_idr_remove(struct idr *idr, int id)
355{
356 spin_lock_bh(&cgroup_idr_lock);
357 idr_remove(idr, id);
358 spin_unlock_bh(&cgroup_idr_lock);
359}
360
361static struct cgroup *cgroup_parent(struct cgroup *cgrp)
362{
363 struct cgroup_subsys_state *parent_css = cgrp->self.parent;
364
365 if (parent_css)
366 return container_of(parent_css, struct cgroup, self);
367 return NULL;
368}
369
370
371static u16 cgroup_control(struct cgroup *cgrp)
372{
373 struct cgroup *parent = cgroup_parent(cgrp);
374 u16 root_ss_mask = cgrp->root->subsys_mask;
375
376 if (parent)
377 return parent->subtree_control;
378
379 if (cgroup_on_dfl(cgrp))
380 root_ss_mask &= ~(cgrp_dfl_inhibit_ss_mask |
381 cgrp_dfl_implicit_ss_mask);
382 return root_ss_mask;
383}
384
385
386static u16 cgroup_ss_mask(struct cgroup *cgrp)
387{
388 struct cgroup *parent = cgroup_parent(cgrp);
389
390 if (parent)
391 return parent->subtree_ss_mask;
392
393 return cgrp->root->subsys_mask;
394}
395
396
397
398
399
400
401
402
403
404
405
406
407static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
408 struct cgroup_subsys *ss)
409{
410 if (ss)
411 return rcu_dereference_check(cgrp->subsys[ss->id],
412 lockdep_is_held(&cgroup_mutex));
413 else
414 return &cgrp->self;
415}
416
417
418
419
420
421
422
423
424
425
426
427static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
428 struct cgroup_subsys *ss)
429{
430 lockdep_assert_held(&cgroup_mutex);
431
432 if (!ss)
433 return &cgrp->self;
434
435
436
437
438
439 while (!(cgroup_ss_mask(cgrp) & (1 << ss->id))) {
440 cgrp = cgroup_parent(cgrp);
441 if (!cgrp)
442 return NULL;
443 }
444
445 return cgroup_css(cgrp, ss);
446}
447
448
449
450
451
452
453
454
455
456
457
458
459struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgrp,
460 struct cgroup_subsys *ss)
461{
462 struct cgroup_subsys_state *css;
463
464 rcu_read_lock();
465
466 do {
467 css = cgroup_css(cgrp, ss);
468
469 if (css && css_tryget_online(css))
470 goto out_unlock;
471 cgrp = cgroup_parent(cgrp);
472 } while (cgrp);
473
474 css = init_css_set.subsys[ss->id];
475 css_get(css);
476out_unlock:
477 rcu_read_unlock();
478 return css;
479}
480
481
482static inline bool cgroup_is_dead(const struct cgroup *cgrp)
483{
484 return !(cgrp->self.flags & CSS_ONLINE);
485}
486
487static void cgroup_get(struct cgroup *cgrp)
488{
489 WARN_ON_ONCE(cgroup_is_dead(cgrp));
490 css_get(&cgrp->self);
491}
492
493static bool cgroup_tryget(struct cgroup *cgrp)
494{
495 return css_tryget(&cgrp->self);
496}
497
498struct cgroup_subsys_state *of_css(struct kernfs_open_file *of)
499{
500 struct cgroup *cgrp = of->kn->parent->priv;
501 struct cftype *cft = of_cft(of);
502
503
504
505
506
507
508
509
510
511 if (cft->ss)
512 return rcu_dereference_raw(cgrp->subsys[cft->ss->id]);
513 else
514 return &cgrp->self;
515}
516EXPORT_SYMBOL_GPL(of_css);
517
518static int notify_on_release(const struct cgroup *cgrp)
519{
520 return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
521}
522
523
524
525
526
527
528
529
530
531#define for_each_css(css, ssid, cgrp) \
532 for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
533 if (!((css) = rcu_dereference_check( \
534 (cgrp)->subsys[(ssid)], \
535 lockdep_is_held(&cgroup_mutex)))) { } \
536 else
537
538
539
540
541
542
543
544
545
546#define for_each_e_css(css, ssid, cgrp) \
547 for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
548 if (!((css) = cgroup_e_css(cgrp, cgroup_subsys[(ssid)]))) \
549 ; \
550 else
551
552
553
554
555
556
557#define for_each_subsys(ss, ssid) \
558 for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT && \
559 (((ss) = cgroup_subsys[ssid]) || true); (ssid)++)
560
561
562
563
564
565
566
567
568
569
570#define do_each_subsys_mask(ss, ssid, ss_mask) do { \
571 unsigned long __ss_mask = (ss_mask); \
572 if (!CGROUP_SUBSYS_COUNT) { \
573 (ssid) = 0; \
574 break; \
575 } \
576 for_each_set_bit(ssid, &__ss_mask, CGROUP_SUBSYS_COUNT) { \
577 (ss) = cgroup_subsys[ssid]; \
578 {
579
580#define while_each_subsys_mask() \
581 } \
582 } \
583} while (false)
584
585
586#define for_each_root(root) \
587 list_for_each_entry((root), &cgroup_roots, root_list)
588
589
590#define cgroup_for_each_live_child(child, cgrp) \
591 list_for_each_entry((child), &(cgrp)->self.children, self.sibling) \
592 if (({ lockdep_assert_held(&cgroup_mutex); \
593 cgroup_is_dead(child); })) \
594 ; \
595 else
596
597
598#define cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) \
599 css_for_each_descendant_pre((d_css), cgroup_css((cgrp), NULL)) \
600 if (({ lockdep_assert_held(&cgroup_mutex); \
601 (dsct) = (d_css)->cgroup; \
602 cgroup_is_dead(dsct); })) \
603 ; \
604 else
605
606
607#define cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) \
608 css_for_each_descendant_post((d_css), cgroup_css((cgrp), NULL)) \
609 if (({ lockdep_assert_held(&cgroup_mutex); \
610 (dsct) = (d_css)->cgroup; \
611 cgroup_is_dead(dsct); })) \
612 ; \
613 else
614
615static void cgroup_release_agent(struct work_struct *work);
616static void check_for_release(struct cgroup *cgrp);
617
618
619
620
621
622
623
624
625
626struct cgrp_cset_link {
627
628 struct cgroup *cgrp;
629 struct css_set *cset;
630
631
632 struct list_head cset_link;
633
634
635 struct list_head cgrp_link;
636};
637
638
639
640
641
642
643
644
645struct css_set init_css_set = {
646 .refcount = ATOMIC_INIT(1),
647 .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links),
648 .tasks = LIST_HEAD_INIT(init_css_set.tasks),
649 .mg_tasks = LIST_HEAD_INIT(init_css_set.mg_tasks),
650 .mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node),
651 .mg_node = LIST_HEAD_INIT(init_css_set.mg_node),
652 .task_iters = LIST_HEAD_INIT(init_css_set.task_iters),
653};
654
655static int css_set_count = 1;
656
657
658
659
660
661static bool css_set_populated(struct css_set *cset)
662{
663 lockdep_assert_held(&css_set_lock);
664
665 return !list_empty(&cset->tasks) || !list_empty(&cset->mg_tasks);
666}
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684static void cgroup_update_populated(struct cgroup *cgrp, bool populated)
685{
686 lockdep_assert_held(&css_set_lock);
687
688 do {
689 bool trigger;
690
691 if (populated)
692 trigger = !cgrp->populated_cnt++;
693 else
694 trigger = !--cgrp->populated_cnt;
695
696 if (!trigger)
697 break;
698
699 check_for_release(cgrp);
700 cgroup_file_notify(&cgrp->events_file);
701
702 cgrp = cgroup_parent(cgrp);
703 } while (cgrp);
704}
705
706
707
708
709
710
711
712
713
714static void css_set_update_populated(struct css_set *cset, bool populated)
715{
716 struct cgrp_cset_link *link;
717
718 lockdep_assert_held(&css_set_lock);
719
720 list_for_each_entry(link, &cset->cgrp_links, cgrp_link)
721 cgroup_update_populated(link->cgrp, populated);
722}
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739static void css_set_move_task(struct task_struct *task,
740 struct css_set *from_cset, struct css_set *to_cset,
741 bool use_mg_tasks)
742{
743 lockdep_assert_held(&css_set_lock);
744
745 if (to_cset && !css_set_populated(to_cset))
746 css_set_update_populated(to_cset, true);
747
748 if (from_cset) {
749 struct css_task_iter *it, *pos;
750
751 WARN_ON_ONCE(list_empty(&task->cg_list));
752
753
754
755
756
757
758
759
760 list_for_each_entry_safe(it, pos, &from_cset->task_iters,
761 iters_node)
762 if (it->task_pos == &task->cg_list)
763 css_task_iter_advance(it);
764
765 list_del_init(&task->cg_list);
766 if (!css_set_populated(from_cset))
767 css_set_update_populated(from_cset, false);
768 } else {
769 WARN_ON_ONCE(!list_empty(&task->cg_list));
770 }
771
772 if (to_cset) {
773
774
775
776
777
778
779 WARN_ON_ONCE(task->flags & PF_EXITING);
780
781 rcu_assign_pointer(task->cgroups, to_cset);
782 list_add_tail(&task->cg_list, use_mg_tasks ? &to_cset->mg_tasks :
783 &to_cset->tasks);
784 }
785}
786
787
788
789
790
791
792#define CSS_SET_HASH_BITS 7
793static DEFINE_HASHTABLE(css_set_table, CSS_SET_HASH_BITS);
794
795static unsigned long css_set_hash(struct cgroup_subsys_state *css[])
796{
797 unsigned long key = 0UL;
798 struct cgroup_subsys *ss;
799 int i;
800
801 for_each_subsys(ss, i)
802 key += (unsigned long)css[i];
803 key = (key >> 16) ^ key;
804
805 return key;
806}
807
808static void put_css_set_locked(struct css_set *cset)
809{
810 struct cgrp_cset_link *link, *tmp_link;
811 struct cgroup_subsys *ss;
812 int ssid;
813
814 lockdep_assert_held(&css_set_lock);
815
816 if (!atomic_dec_and_test(&cset->refcount))
817 return;
818
819
820 for_each_subsys(ss, ssid) {
821 list_del(&cset->e_cset_node[ssid]);
822 css_put(cset->subsys[ssid]);
823 }
824 hash_del(&cset->hlist);
825 css_set_count--;
826
827 list_for_each_entry_safe(link, tmp_link, &cset->cgrp_links, cgrp_link) {
828 list_del(&link->cset_link);
829 list_del(&link->cgrp_link);
830 if (cgroup_parent(link->cgrp))
831 cgroup_put(link->cgrp);
832 kfree(link);
833 }
834
835 kfree_rcu(cset, rcu_head);
836}
837
838static void put_css_set(struct css_set *cset)
839{
840 unsigned long flags;
841
842
843
844
845
846
847 if (atomic_add_unless(&cset->refcount, -1, 1))
848 return;
849
850 spin_lock_irqsave(&css_set_lock, flags);
851 put_css_set_locked(cset);
852 spin_unlock_irqrestore(&css_set_lock, flags);
853}
854
855
856
857
858static inline void get_css_set(struct css_set *cset)
859{
860 atomic_inc(&cset->refcount);
861}
862
863
864
865
866
867
868
869
870
871
872
873static bool compare_css_sets(struct css_set *cset,
874 struct css_set *old_cset,
875 struct cgroup *new_cgrp,
876 struct cgroup_subsys_state *template[])
877{
878 struct list_head *l1, *l2;
879
880
881
882
883
884
885 if (memcmp(template, cset->subsys, sizeof(cset->subsys)))
886 return false;
887
888
889
890
891
892
893
894 l1 = &cset->cgrp_links;
895 l2 = &old_cset->cgrp_links;
896 while (1) {
897 struct cgrp_cset_link *link1, *link2;
898 struct cgroup *cgrp1, *cgrp2;
899
900 l1 = l1->next;
901 l2 = l2->next;
902
903 if (l1 == &cset->cgrp_links) {
904 BUG_ON(l2 != &old_cset->cgrp_links);
905 break;
906 } else {
907 BUG_ON(l2 == &old_cset->cgrp_links);
908 }
909
910 link1 = list_entry(l1, struct cgrp_cset_link, cgrp_link);
911 link2 = list_entry(l2, struct cgrp_cset_link, cgrp_link);
912 cgrp1 = link1->cgrp;
913 cgrp2 = link2->cgrp;
914
915 BUG_ON(cgrp1->root != cgrp2->root);
916
917
918
919
920
921
922
923
924 if (cgrp1->root == new_cgrp->root) {
925 if (cgrp1 != new_cgrp)
926 return false;
927 } else {
928 if (cgrp1 != cgrp2)
929 return false;
930 }
931 }
932 return true;
933}
934
935
936
937
938
939
940
941static struct css_set *find_existing_css_set(struct css_set *old_cset,
942 struct cgroup *cgrp,
943 struct cgroup_subsys_state *template[])
944{
945 struct cgroup_root *root = cgrp->root;
946 struct cgroup_subsys *ss;
947 struct css_set *cset;
948 unsigned long key;
949 int i;
950
951
952
953
954
955
956 for_each_subsys(ss, i) {
957 if (root->subsys_mask & (1UL << i)) {
958
959
960
961
962 template[i] = cgroup_e_css(cgrp, ss);
963 } else {
964
965
966
967
968 template[i] = old_cset->subsys[i];
969 }
970 }
971
972 key = css_set_hash(template);
973 hash_for_each_possible(css_set_table, cset, hlist, key) {
974 if (!compare_css_sets(cset, old_cset, cgrp, template))
975 continue;
976
977
978 return cset;
979 }
980
981
982 return NULL;
983}
984
985static void free_cgrp_cset_links(struct list_head *links_to_free)
986{
987 struct cgrp_cset_link *link, *tmp_link;
988
989 list_for_each_entry_safe(link, tmp_link, links_to_free, cset_link) {
990 list_del(&link->cset_link);
991 kfree(link);
992 }
993}
994
995
996
997
998
999
1000
1001
1002
1003static int allocate_cgrp_cset_links(int count, struct list_head *tmp_links)
1004{
1005 struct cgrp_cset_link *link;
1006 int i;
1007
1008 INIT_LIST_HEAD(tmp_links);
1009
1010 for (i = 0; i < count; i++) {
1011 link = kzalloc(sizeof(*link), GFP_KERNEL);
1012 if (!link) {
1013 free_cgrp_cset_links(tmp_links);
1014 return -ENOMEM;
1015 }
1016 list_add(&link->cset_link, tmp_links);
1017 }
1018 return 0;
1019}
1020
1021
1022
1023
1024
1025
1026
1027static void link_css_set(struct list_head *tmp_links, struct css_set *cset,
1028 struct cgroup *cgrp)
1029{
1030 struct cgrp_cset_link *link;
1031
1032 BUG_ON(list_empty(tmp_links));
1033
1034 if (cgroup_on_dfl(cgrp))
1035 cset->dfl_cgrp = cgrp;
1036
1037 link = list_first_entry(tmp_links, struct cgrp_cset_link, cset_link);
1038 link->cset = cset;
1039 link->cgrp = cgrp;
1040
1041
1042
1043
1044
1045 list_move_tail(&link->cset_link, &cgrp->cset_links);
1046 list_add_tail(&link->cgrp_link, &cset->cgrp_links);
1047
1048 if (cgroup_parent(cgrp))
1049 cgroup_get(cgrp);
1050}
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060static struct css_set *find_css_set(struct css_set *old_cset,
1061 struct cgroup *cgrp)
1062{
1063 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT] = { };
1064 struct css_set *cset;
1065 struct list_head tmp_links;
1066 struct cgrp_cset_link *link;
1067 struct cgroup_subsys *ss;
1068 unsigned long key;
1069 int ssid;
1070
1071 lockdep_assert_held(&cgroup_mutex);
1072
1073
1074
1075 spin_lock_irq(&css_set_lock);
1076 cset = find_existing_css_set(old_cset, cgrp, template);
1077 if (cset)
1078 get_css_set(cset);
1079 spin_unlock_irq(&css_set_lock);
1080
1081 if (cset)
1082 return cset;
1083
1084 cset = kzalloc(sizeof(*cset), GFP_KERNEL);
1085 if (!cset)
1086 return NULL;
1087
1088
1089 if (allocate_cgrp_cset_links(cgroup_root_count, &tmp_links) < 0) {
1090 kfree(cset);
1091 return NULL;
1092 }
1093
1094 atomic_set(&cset->refcount, 1);
1095 INIT_LIST_HEAD(&cset->cgrp_links);
1096 INIT_LIST_HEAD(&cset->tasks);
1097 INIT_LIST_HEAD(&cset->mg_tasks);
1098 INIT_LIST_HEAD(&cset->mg_preload_node);
1099 INIT_LIST_HEAD(&cset->mg_node);
1100 INIT_LIST_HEAD(&cset->task_iters);
1101 INIT_HLIST_NODE(&cset->hlist);
1102
1103
1104
1105 memcpy(cset->subsys, template, sizeof(cset->subsys));
1106
1107 spin_lock_irq(&css_set_lock);
1108
1109 list_for_each_entry(link, &old_cset->cgrp_links, cgrp_link) {
1110 struct cgroup *c = link->cgrp;
1111
1112 if (c->root == cgrp->root)
1113 c = cgrp;
1114 link_css_set(&tmp_links, cset, c);
1115 }
1116
1117 BUG_ON(!list_empty(&tmp_links));
1118
1119 css_set_count++;
1120
1121
1122 key = css_set_hash(cset->subsys);
1123 hash_add(css_set_table, &cset->hlist, key);
1124
1125 for_each_subsys(ss, ssid) {
1126 struct cgroup_subsys_state *css = cset->subsys[ssid];
1127
1128 list_add_tail(&cset->e_cset_node[ssid],
1129 &css->cgroup->e_csets[ssid]);
1130 css_get(css);
1131 }
1132
1133 spin_unlock_irq(&css_set_lock);
1134
1135 return cset;
1136}
1137
1138static struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root)
1139{
1140 struct cgroup *root_cgrp = kf_root->kn->priv;
1141
1142 return root_cgrp->root;
1143}
1144
1145static int cgroup_init_root_id(struct cgroup_root *root)
1146{
1147 int id;
1148
1149 lockdep_assert_held(&cgroup_mutex);
1150
1151 id = idr_alloc_cyclic(&cgroup_hierarchy_idr, root, 0, 0, GFP_KERNEL);
1152 if (id < 0)
1153 return id;
1154
1155 root->hierarchy_id = id;
1156 return 0;
1157}
1158
1159static void cgroup_exit_root_id(struct cgroup_root *root)
1160{
1161 lockdep_assert_held(&cgroup_mutex);
1162
1163 if (root->hierarchy_id) {
1164 idr_remove(&cgroup_hierarchy_idr, root->hierarchy_id);
1165 root->hierarchy_id = 0;
1166 }
1167}
1168
1169static void cgroup_free_root(struct cgroup_root *root)
1170{
1171 if (root) {
1172
1173 WARN_ON_ONCE(root->hierarchy_id);
1174
1175 idr_destroy(&root->cgroup_idr);
1176 kfree(root);
1177 }
1178}
1179
1180static void cgroup_destroy_root(struct cgroup_root *root)
1181{
1182 struct cgroup *cgrp = &root->cgrp;
1183 struct cgrp_cset_link *link, *tmp_link;
1184
1185 cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
1186
1187 BUG_ON(atomic_read(&root->nr_cgrps));
1188 BUG_ON(!list_empty(&cgrp->self.children));
1189
1190
1191 WARN_ON(rebind_subsystems(&cgrp_dfl_root, root->subsys_mask));
1192
1193
1194
1195
1196
1197 spin_lock_irq(&css_set_lock);
1198
1199 list_for_each_entry_safe(link, tmp_link, &cgrp->cset_links, cset_link) {
1200 list_del(&link->cset_link);
1201 list_del(&link->cgrp_link);
1202 kfree(link);
1203 }
1204
1205 spin_unlock_irq(&css_set_lock);
1206
1207 if (!list_empty(&root->root_list)) {
1208 list_del(&root->root_list);
1209 cgroup_root_count--;
1210 }
1211
1212 cgroup_exit_root_id(root);
1213
1214 mutex_unlock(&cgroup_mutex);
1215
1216 kernfs_destroy_root(root->kf_root);
1217 cgroup_free_root(root);
1218}
1219
1220
1221
1222
1223
1224static struct cgroup *
1225current_cgns_cgroup_from_root(struct cgroup_root *root)
1226{
1227 struct cgroup *res = NULL;
1228 struct css_set *cset;
1229
1230 lockdep_assert_held(&css_set_lock);
1231
1232 rcu_read_lock();
1233
1234 cset = current->nsproxy->cgroup_ns->root_cset;
1235 if (cset == &init_css_set) {
1236 res = &root->cgrp;
1237 } else {
1238 struct cgrp_cset_link *link;
1239
1240 list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
1241 struct cgroup *c = link->cgrp;
1242
1243 if (c->root == root) {
1244 res = c;
1245 break;
1246 }
1247 }
1248 }
1249 rcu_read_unlock();
1250
1251 BUG_ON(!res);
1252 return res;
1253}
1254
1255
1256static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
1257 struct cgroup_root *root)
1258{
1259 struct cgroup *res = NULL;
1260
1261 lockdep_assert_held(&cgroup_mutex);
1262 lockdep_assert_held(&css_set_lock);
1263
1264 if (cset == &init_css_set) {
1265 res = &root->cgrp;
1266 } else {
1267 struct cgrp_cset_link *link;
1268
1269 list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
1270 struct cgroup *c = link->cgrp;
1271
1272 if (c->root == root) {
1273 res = c;
1274 break;
1275 }
1276 }
1277 }
1278
1279 BUG_ON(!res);
1280 return res;
1281}
1282
1283
1284
1285
1286
1287static struct cgroup *task_cgroup_from_root(struct task_struct *task,
1288 struct cgroup_root *root)
1289{
1290
1291
1292
1293
1294
1295 return cset_cgroup_from_root(task_css_set(task), root);
1296}
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324static struct kernfs_syscall_ops cgroup_kf_syscall_ops;
1325static const struct file_operations proc_cgroupstats_operations;
1326
1327static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft,
1328 char *buf)
1329{
1330 struct cgroup_subsys *ss = cft->ss;
1331
1332 if (cft->ss && !(cft->flags & CFTYPE_NO_PREFIX) &&
1333 !(cgrp->root->flags & CGRP_ROOT_NOPREFIX))
1334 snprintf(buf, CGROUP_FILE_NAME_MAX, "%s.%s",
1335 cgroup_on_dfl(cgrp) ? ss->name : ss->legacy_name,
1336 cft->name);
1337 else
1338 strncpy(buf, cft->name, CGROUP_FILE_NAME_MAX);
1339 return buf;
1340}
1341
1342
1343
1344
1345
1346
1347
1348static umode_t cgroup_file_mode(const struct cftype *cft)
1349{
1350 umode_t mode = 0;
1351
1352 if (cft->read_u64 || cft->read_s64 || cft->seq_show)
1353 mode |= S_IRUGO;
1354
1355 if (cft->write_u64 || cft->write_s64 || cft->write) {
1356 if (cft->flags & CFTYPE_WORLD_WRITABLE)
1357 mode |= S_IWUGO;
1358 else
1359 mode |= S_IWUSR;
1360 }
1361
1362 return mode;
1363}
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377static u16 cgroup_calc_subtree_ss_mask(u16 subtree_control, u16 this_ss_mask)
1378{
1379 u16 cur_ss_mask = subtree_control;
1380 struct cgroup_subsys *ss;
1381 int ssid;
1382
1383 lockdep_assert_held(&cgroup_mutex);
1384
1385 cur_ss_mask |= cgrp_dfl_implicit_ss_mask;
1386
1387 while (true) {
1388 u16 new_ss_mask = cur_ss_mask;
1389
1390 do_each_subsys_mask(ss, ssid, cur_ss_mask) {
1391 new_ss_mask |= ss->depends_on;
1392 } while_each_subsys_mask();
1393
1394
1395
1396
1397
1398
1399 new_ss_mask &= this_ss_mask;
1400
1401 if (new_ss_mask == cur_ss_mask)
1402 break;
1403 cur_ss_mask = new_ss_mask;
1404 }
1405
1406 return cur_ss_mask;
1407}
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419static void cgroup_kn_unlock(struct kernfs_node *kn)
1420{
1421 struct cgroup *cgrp;
1422
1423 if (kernfs_type(kn) == KERNFS_DIR)
1424 cgrp = kn->priv;
1425 else
1426 cgrp = kn->parent->priv;
1427
1428 mutex_unlock(&cgroup_mutex);
1429
1430 kernfs_unbreak_active_protection(kn);
1431 cgroup_put(cgrp);
1432}
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451static struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn,
1452 bool drain_offline)
1453{
1454 struct cgroup *cgrp;
1455
1456 if (kernfs_type(kn) == KERNFS_DIR)
1457 cgrp = kn->priv;
1458 else
1459 cgrp = kn->parent->priv;
1460
1461
1462
1463
1464
1465
1466
1467 if (!cgroup_tryget(cgrp))
1468 return NULL;
1469 kernfs_break_active_protection(kn);
1470
1471 if (drain_offline)
1472 cgroup_lock_and_drain_offline(cgrp);
1473 else
1474 mutex_lock(&cgroup_mutex);
1475
1476 if (!cgroup_is_dead(cgrp))
1477 return cgrp;
1478
1479 cgroup_kn_unlock(kn);
1480 return NULL;
1481}
1482
1483static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
1484{
1485 char name[CGROUP_FILE_NAME_MAX];
1486
1487 lockdep_assert_held(&cgroup_mutex);
1488
1489 if (cft->file_offset) {
1490 struct cgroup_subsys_state *css = cgroup_css(cgrp, cft->ss);
1491 struct cgroup_file *cfile = (void *)css + cft->file_offset;
1492
1493 spin_lock_irq(&cgroup_file_kn_lock);
1494 cfile->kn = NULL;
1495 spin_unlock_irq(&cgroup_file_kn_lock);
1496 }
1497
1498 kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name));
1499}
1500
1501
1502
1503
1504
1505static void css_clear_dir(struct cgroup_subsys_state *css)
1506{
1507 struct cgroup *cgrp = css->cgroup;
1508 struct cftype *cfts;
1509
1510 if (!(css->flags & CSS_VISIBLE))
1511 return;
1512
1513 css->flags &= ~CSS_VISIBLE;
1514
1515 list_for_each_entry(cfts, &css->ss->cfts, node)
1516 cgroup_addrm_files(css, cgrp, cfts, false);
1517}
1518
1519
1520
1521
1522
1523
1524
1525static int css_populate_dir(struct cgroup_subsys_state *css)
1526{
1527 struct cgroup *cgrp = css->cgroup;
1528 struct cftype *cfts, *failed_cfts;
1529 int ret;
1530
1531 if ((css->flags & CSS_VISIBLE) || !cgrp->kn)
1532 return 0;
1533
1534 if (!css->ss) {
1535 if (cgroup_on_dfl(cgrp))
1536 cfts = cgroup_dfl_base_files;
1537 else
1538 cfts = cgroup_legacy_base_files;
1539
1540 return cgroup_addrm_files(&cgrp->self, cgrp, cfts, true);
1541 }
1542
1543 list_for_each_entry(cfts, &css->ss->cfts, node) {
1544 ret = cgroup_addrm_files(css, cgrp, cfts, true);
1545 if (ret < 0) {
1546 failed_cfts = cfts;
1547 goto err;
1548 }
1549 }
1550
1551 css->flags |= CSS_VISIBLE;
1552
1553 return 0;
1554err:
1555 list_for_each_entry(cfts, &css->ss->cfts, node) {
1556 if (cfts == failed_cfts)
1557 break;
1558 cgroup_addrm_files(css, cgrp, cfts, false);
1559 }
1560 return ret;
1561}
1562
1563static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
1564{
1565 struct cgroup *dcgrp = &dst_root->cgrp;
1566 struct cgroup_subsys *ss;
1567 int ssid, i, ret;
1568
1569 lockdep_assert_held(&cgroup_mutex);
1570
1571 do_each_subsys_mask(ss, ssid, ss_mask) {
1572
1573
1574
1575
1576
1577 if (css_next_child(NULL, cgroup_css(&ss->root->cgrp, ss)) &&
1578 !ss->implicit_on_dfl)
1579 return -EBUSY;
1580
1581
1582 if (ss->root != &cgrp_dfl_root && dst_root != &cgrp_dfl_root)
1583 return -EBUSY;
1584 } while_each_subsys_mask();
1585
1586 do_each_subsys_mask(ss, ssid, ss_mask) {
1587 struct cgroup_root *src_root = ss->root;
1588 struct cgroup *scgrp = &src_root->cgrp;
1589 struct cgroup_subsys_state *css = cgroup_css(scgrp, ss);
1590 struct css_set *cset;
1591
1592 WARN_ON(!css || cgroup_css(dcgrp, ss));
1593
1594
1595 src_root->subsys_mask &= ~(1 << ssid);
1596 WARN_ON(cgroup_apply_control(scgrp));
1597 cgroup_finalize_control(scgrp, 0);
1598
1599
1600 RCU_INIT_POINTER(scgrp->subsys[ssid], NULL);
1601 rcu_assign_pointer(dcgrp->subsys[ssid], css);
1602 ss->root = dst_root;
1603 css->cgroup = dcgrp;
1604
1605 spin_lock_irq(&css_set_lock);
1606 hash_for_each(css_set_table, i, cset, hlist)
1607 list_move_tail(&cset->e_cset_node[ss->id],
1608 &dcgrp->e_csets[ss->id]);
1609 spin_unlock_irq(&css_set_lock);
1610
1611
1612 dst_root->subsys_mask |= 1 << ssid;
1613 if (dst_root == &cgrp_dfl_root) {
1614 static_branch_enable(cgroup_subsys_on_dfl_key[ssid]);
1615 } else {
1616 dcgrp->subtree_control |= 1 << ssid;
1617 static_branch_disable(cgroup_subsys_on_dfl_key[ssid]);
1618 }
1619
1620 ret = cgroup_apply_control(dcgrp);
1621 if (ret)
1622 pr_warn("partial failure to rebind %s controller (err=%d)\n",
1623 ss->name, ret);
1624
1625 if (ss->bind)
1626 ss->bind(css);
1627 } while_each_subsys_mask();
1628
1629 kernfs_activate(dcgrp->kn);
1630 return 0;
1631}
1632
1633static int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
1634 struct kernfs_root *kf_root)
1635{
1636 int len = 0;
1637 char *buf = NULL;
1638 struct cgroup_root *kf_cgroot = cgroup_root_from_kf(kf_root);
1639 struct cgroup *ns_cgroup;
1640
1641 buf = kmalloc(PATH_MAX, GFP_KERNEL);
1642 if (!buf)
1643 return -ENOMEM;
1644
1645 spin_lock_irq(&css_set_lock);
1646 ns_cgroup = current_cgns_cgroup_from_root(kf_cgroot);
1647 len = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, PATH_MAX);
1648 spin_unlock_irq(&css_set_lock);
1649
1650 if (len >= PATH_MAX)
1651 len = -ERANGE;
1652 else if (len > 0) {
1653 seq_escape(sf, buf, " \t\n\\");
1654 len = 0;
1655 }
1656 kfree(buf);
1657 return len;
1658}
1659
1660static int cgroup_show_options(struct seq_file *seq,
1661 struct kernfs_root *kf_root)
1662{
1663 struct cgroup_root *root = cgroup_root_from_kf(kf_root);
1664 struct cgroup_subsys *ss;
1665 int ssid;
1666
1667 if (root != &cgrp_dfl_root)
1668 for_each_subsys(ss, ssid)
1669 if (root->subsys_mask & (1 << ssid))
1670 seq_show_option(seq, ss->legacy_name, NULL);
1671 if (root->flags & CGRP_ROOT_NOPREFIX)
1672 seq_puts(seq, ",noprefix");
1673 if (root->flags & CGRP_ROOT_XATTR)
1674 seq_puts(seq, ",xattr");
1675
1676 spin_lock(&release_agent_path_lock);
1677 if (strlen(root->release_agent_path))
1678 seq_show_option(seq, "release_agent",
1679 root->release_agent_path);
1680 spin_unlock(&release_agent_path_lock);
1681
1682 if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags))
1683 seq_puts(seq, ",clone_children");
1684 if (strlen(root->name))
1685 seq_show_option(seq, "name", root->name);
1686 return 0;
1687}
1688
1689struct cgroup_sb_opts {
1690 u16 subsys_mask;
1691 unsigned int flags;
1692 char *release_agent;
1693 bool cpuset_clone_children;
1694 char *name;
1695
1696 bool none;
1697};
1698
1699static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1700{
1701 char *token, *o = data;
1702 bool all_ss = false, one_ss = false;
1703 u16 mask = U16_MAX;
1704 struct cgroup_subsys *ss;
1705 int nr_opts = 0;
1706 int i;
1707
1708#ifdef CONFIG_CPUSETS
1709 mask = ~((u16)1 << cpuset_cgrp_id);
1710#endif
1711
1712 memset(opts, 0, sizeof(*opts));
1713
1714 while ((token = strsep(&o, ",")) != NULL) {
1715 nr_opts++;
1716
1717 if (!*token)
1718 return -EINVAL;
1719 if (!strcmp(token, "none")) {
1720
1721 opts->none = true;
1722 continue;
1723 }
1724 if (!strcmp(token, "all")) {
1725
1726 if (one_ss)
1727 return -EINVAL;
1728 all_ss = true;
1729 continue;
1730 }
1731 if (!strcmp(token, "noprefix")) {
1732 opts->flags |= CGRP_ROOT_NOPREFIX;
1733 continue;
1734 }
1735 if (!strcmp(token, "clone_children")) {
1736 opts->cpuset_clone_children = true;
1737 continue;
1738 }
1739 if (!strcmp(token, "xattr")) {
1740 opts->flags |= CGRP_ROOT_XATTR;
1741 continue;
1742 }
1743 if (!strncmp(token, "release_agent=", 14)) {
1744
1745 if (opts->release_agent)
1746 return -EINVAL;
1747 opts->release_agent =
1748 kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL);
1749 if (!opts->release_agent)
1750 return -ENOMEM;
1751 continue;
1752 }
1753 if (!strncmp(token, "name=", 5)) {
1754 const char *name = token + 5;
1755
1756 if (!strlen(name))
1757 return -EINVAL;
1758
1759 for (i = 0; i < strlen(name); i++) {
1760 char c = name[i];
1761 if (isalnum(c))
1762 continue;
1763 if ((c == '.') || (c == '-') || (c == '_'))
1764 continue;
1765 return -EINVAL;
1766 }
1767
1768 if (opts->name)
1769 return -EINVAL;
1770 opts->name = kstrndup(name,
1771 MAX_CGROUP_ROOT_NAMELEN - 1,
1772 GFP_KERNEL);
1773 if (!opts->name)
1774 return -ENOMEM;
1775
1776 continue;
1777 }
1778
1779 for_each_subsys(ss, i) {
1780 if (strcmp(token, ss->legacy_name))
1781 continue;
1782 if (!cgroup_ssid_enabled(i))
1783 continue;
1784 if (cgroup_ssid_no_v1(i))
1785 continue;
1786
1787
1788 if (all_ss)
1789 return -EINVAL;
1790 opts->subsys_mask |= (1 << i);
1791 one_ss = true;
1792
1793 break;
1794 }
1795 if (i == CGROUP_SUBSYS_COUNT)
1796 return -ENOENT;
1797 }
1798
1799
1800
1801
1802
1803
1804 if (all_ss || (!one_ss && !opts->none && !opts->name))
1805 for_each_subsys(ss, i)
1806 if (cgroup_ssid_enabled(i) && !cgroup_ssid_no_v1(i))
1807 opts->subsys_mask |= (1 << i);
1808
1809
1810
1811
1812
1813 if (!opts->subsys_mask && !opts->name)
1814 return -EINVAL;
1815
1816
1817
1818
1819
1820
1821 if ((opts->flags & CGRP_ROOT_NOPREFIX) && (opts->subsys_mask & mask))
1822 return -EINVAL;
1823
1824
1825 if (opts->subsys_mask && opts->none)
1826 return -EINVAL;
1827
1828 return 0;
1829}
1830
1831static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
1832{
1833 int ret = 0;
1834 struct cgroup_root *root = cgroup_root_from_kf(kf_root);
1835 struct cgroup_sb_opts opts;
1836 u16 added_mask, removed_mask;
1837
1838 if (root == &cgrp_dfl_root) {
1839 pr_err("remount is not allowed\n");
1840 return -EINVAL;
1841 }
1842
1843 cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
1844
1845
1846 ret = parse_cgroupfs_options(data, &opts);
1847 if (ret)
1848 goto out_unlock;
1849
1850 if (opts.subsys_mask != root->subsys_mask || opts.release_agent)
1851 pr_warn("option changes via remount are deprecated (pid=%d comm=%s)\n",
1852 task_tgid_nr(current), current->comm);
1853
1854 added_mask = opts.subsys_mask & ~root->subsys_mask;
1855 removed_mask = root->subsys_mask & ~opts.subsys_mask;
1856
1857
1858 if ((opts.flags ^ root->flags) ||
1859 (opts.name && strcmp(opts.name, root->name))) {
1860 pr_err("option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"\n",
1861 opts.flags, opts.name ?: "", root->flags, root->name);
1862 ret = -EINVAL;
1863 goto out_unlock;
1864 }
1865
1866
1867 if (!list_empty(&root->cgrp.self.children)) {
1868 ret = -EBUSY;
1869 goto out_unlock;
1870 }
1871
1872 ret = rebind_subsystems(root, added_mask);
1873 if (ret)
1874 goto out_unlock;
1875
1876 WARN_ON(rebind_subsystems(&cgrp_dfl_root, removed_mask));
1877
1878 if (opts.release_agent) {
1879 spin_lock(&release_agent_path_lock);
1880 strcpy(root->release_agent_path, opts.release_agent);
1881 spin_unlock(&release_agent_path_lock);
1882 }
1883 out_unlock:
1884 kfree(opts.release_agent);
1885 kfree(opts.name);
1886 mutex_unlock(&cgroup_mutex);
1887 return ret;
1888}
1889
1890
1891
1892
1893
1894
1895
1896static bool use_task_css_set_links __read_mostly;
1897
1898static void cgroup_enable_task_cg_lists(void)
1899{
1900 struct task_struct *p, *g;
1901
1902 spin_lock_irq(&css_set_lock);
1903
1904 if (use_task_css_set_links)
1905 goto out_unlock;
1906
1907 use_task_css_set_links = true;
1908
1909
1910
1911
1912
1913
1914
1915
1916 read_lock(&tasklist_lock);
1917 do_each_thread(g, p) {
1918 WARN_ON_ONCE(!list_empty(&p->cg_list) ||
1919 task_css_set(p) != &init_css_set);
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932 spin_lock(&p->sighand->siglock);
1933 if (!(p->flags & PF_EXITING)) {
1934 struct css_set *cset = task_css_set(p);
1935
1936 if (!css_set_populated(cset))
1937 css_set_update_populated(cset, true);
1938 list_add_tail(&p->cg_list, &cset->tasks);
1939 get_css_set(cset);
1940 }
1941 spin_unlock(&p->sighand->siglock);
1942 } while_each_thread(g, p);
1943 read_unlock(&tasklist_lock);
1944out_unlock:
1945 spin_unlock_irq(&css_set_lock);
1946}
1947
1948static void init_cgroup_housekeeping(struct cgroup *cgrp)
1949{
1950 struct cgroup_subsys *ss;
1951 int ssid;
1952
1953 INIT_LIST_HEAD(&cgrp->self.sibling);
1954 INIT_LIST_HEAD(&cgrp->self.children);
1955 INIT_LIST_HEAD(&cgrp->cset_links);
1956 INIT_LIST_HEAD(&cgrp->pidlists);
1957 mutex_init(&cgrp->pidlist_mutex);
1958 cgrp->self.cgroup = cgrp;
1959 cgrp->self.flags |= CSS_ONLINE;
1960
1961 for_each_subsys(ss, ssid)
1962 INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
1963
1964 init_waitqueue_head(&cgrp->offline_waitq);
1965 INIT_WORK(&cgrp->release_agent_work, cgroup_release_agent);
1966}
1967
1968static void init_cgroup_root(struct cgroup_root *root,
1969 struct cgroup_sb_opts *opts)
1970{
1971 struct cgroup *cgrp = &root->cgrp;
1972
1973 INIT_LIST_HEAD(&root->root_list);
1974 atomic_set(&root->nr_cgrps, 1);
1975 cgrp->root = root;
1976 init_cgroup_housekeeping(cgrp);
1977 idr_init(&root->cgroup_idr);
1978
1979 root->flags = opts->flags;
1980 if (opts->release_agent)
1981 strcpy(root->release_agent_path, opts->release_agent);
1982 if (opts->name)
1983 strcpy(root->name, opts->name);
1984 if (opts->cpuset_clone_children)
1985 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
1986}
1987
1988static int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
1989{
1990 LIST_HEAD(tmp_links);
1991 struct cgroup *root_cgrp = &root->cgrp;
1992 struct css_set *cset;
1993 int i, ret;
1994
1995 lockdep_assert_held(&cgroup_mutex);
1996
1997 ret = cgroup_idr_alloc(&root->cgroup_idr, root_cgrp, 1, 2, GFP_KERNEL);
1998 if (ret < 0)
1999 goto out;
2000 root_cgrp->id = ret;
2001 root_cgrp->ancestor_ids[0] = ret;
2002
2003 ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, 0,
2004 GFP_KERNEL);
2005 if (ret)
2006 goto out;
2007
2008
2009
2010
2011
2012
2013
2014
2015 ret = allocate_cgrp_cset_links(2 * css_set_count, &tmp_links);
2016 if (ret)
2017 goto cancel_ref;
2018
2019 ret = cgroup_init_root_id(root);
2020 if (ret)
2021 goto cancel_ref;
2022
2023 root->kf_root = kernfs_create_root(&cgroup_kf_syscall_ops,
2024 KERNFS_ROOT_CREATE_DEACTIVATED,
2025 root_cgrp);
2026 if (IS_ERR(root->kf_root)) {
2027 ret = PTR_ERR(root->kf_root);
2028 goto exit_root_id;
2029 }
2030 root_cgrp->kn = root->kf_root->kn;
2031
2032 ret = css_populate_dir(&root_cgrp->self);
2033 if (ret)
2034 goto destroy_root;
2035
2036 ret = rebind_subsystems(root, ss_mask);
2037 if (ret)
2038 goto destroy_root;
2039
2040
2041
2042
2043
2044
2045 list_add(&root->root_list, &cgroup_roots);
2046 cgroup_root_count++;
2047
2048
2049
2050
2051
2052 spin_lock_irq(&css_set_lock);
2053 hash_for_each(css_set_table, i, cset, hlist) {
2054 link_css_set(&tmp_links, cset, root_cgrp);
2055 if (css_set_populated(cset))
2056 cgroup_update_populated(root_cgrp, true);
2057 }
2058 spin_unlock_irq(&css_set_lock);
2059
2060 BUG_ON(!list_empty(&root_cgrp->self.children));
2061 BUG_ON(atomic_read(&root->nr_cgrps) != 1);
2062
2063 kernfs_activate(root_cgrp->kn);
2064 ret = 0;
2065 goto out;
2066
2067destroy_root:
2068 kernfs_destroy_root(root->kf_root);
2069 root->kf_root = NULL;
2070exit_root_id:
2071 cgroup_exit_root_id(root);
2072cancel_ref:
2073 percpu_ref_exit(&root_cgrp->self.refcnt);
2074out:
2075 free_cgrp_cset_links(&tmp_links);
2076 return ret;
2077}
2078
2079static struct dentry *cgroup_mount(struct file_system_type *fs_type,
2080 int flags, const char *unused_dev_name,
2081 void *data)
2082{
2083 bool is_v2 = fs_type == &cgroup2_fs_type;
2084 struct super_block *pinned_sb = NULL;
2085 struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
2086 struct cgroup_subsys *ss;
2087 struct cgroup_root *root;
2088 struct cgroup_sb_opts opts;
2089 struct dentry *dentry;
2090 int ret;
2091 int i;
2092 bool new_sb;
2093
2094 get_cgroup_ns(ns);
2095
2096
2097 if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) {
2098 put_cgroup_ns(ns);
2099 return ERR_PTR(-EPERM);
2100 }
2101
2102
2103
2104
2105
2106 if (!use_task_css_set_links)
2107 cgroup_enable_task_cg_lists();
2108
2109 if (is_v2) {
2110 if (data) {
2111 pr_err("cgroup2: unknown option \"%s\"\n", (char *)data);
2112 put_cgroup_ns(ns);
2113 return ERR_PTR(-EINVAL);
2114 }
2115 cgrp_dfl_visible = true;
2116 root = &cgrp_dfl_root;
2117 cgroup_get(&root->cgrp);
2118 goto out_mount;
2119 }
2120
2121 cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
2122
2123
2124 ret = parse_cgroupfs_options(data, &opts);
2125 if (ret)
2126 goto out_unlock;
2127
2128
2129
2130
2131
2132
2133
2134
2135 for_each_subsys(ss, i) {
2136 if (!(opts.subsys_mask & (1 << i)) ||
2137 ss->root == &cgrp_dfl_root)
2138 continue;
2139
2140 if (!percpu_ref_tryget_live(&ss->root->cgrp.self.refcnt)) {
2141 mutex_unlock(&cgroup_mutex);
2142 msleep(10);
2143 ret = restart_syscall();
2144 goto out_free;
2145 }
2146 cgroup_put(&ss->root->cgrp);
2147 }
2148
2149 for_each_root(root) {
2150 bool name_match = false;
2151
2152 if (root == &cgrp_dfl_root)
2153 continue;
2154
2155
2156
2157
2158
2159
2160 if (opts.name) {
2161 if (strcmp(opts.name, root->name))
2162 continue;
2163 name_match = true;
2164 }
2165
2166
2167
2168
2169
2170 if ((opts.subsys_mask || opts.none) &&
2171 (opts.subsys_mask != root->subsys_mask)) {
2172 if (!name_match)
2173 continue;
2174 ret = -EBUSY;
2175 goto out_unlock;
2176 }
2177
2178 if (root->flags ^ opts.flags)
2179 pr_warn("new mount options do not match the existing superblock, will be ignored\n");
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193 pinned_sb = kernfs_pin_sb(root->kf_root, NULL);
2194 if (IS_ERR(pinned_sb) ||
2195 !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
2196 mutex_unlock(&cgroup_mutex);
2197 if (!IS_ERR_OR_NULL(pinned_sb))
2198 deactivate_super(pinned_sb);
2199 msleep(10);
2200 ret = restart_syscall();
2201 goto out_free;
2202 }
2203
2204 ret = 0;
2205 goto out_unlock;
2206 }
2207
2208
2209
2210
2211
2212
2213 if (!opts.subsys_mask && !opts.none) {
2214 ret = -EINVAL;
2215 goto out_unlock;
2216 }
2217
2218
2219
2220
2221
2222
2223 if (!opts.none && !capable(CAP_SYS_ADMIN)) {
2224 ret = -EPERM;
2225 goto out_unlock;
2226 }
2227
2228 root = kzalloc(sizeof(*root), GFP_KERNEL);
2229 if (!root) {
2230 ret = -ENOMEM;
2231 goto out_unlock;
2232 }
2233
2234 init_cgroup_root(root, &opts);
2235
2236 ret = cgroup_setup_root(root, opts.subsys_mask);
2237 if (ret)
2238 cgroup_free_root(root);
2239
2240out_unlock:
2241 mutex_unlock(&cgroup_mutex);
2242out_free:
2243 kfree(opts.release_agent);
2244 kfree(opts.name);
2245
2246 if (ret) {
2247 put_cgroup_ns(ns);
2248 return ERR_PTR(ret);
2249 }
2250out_mount:
2251 dentry = kernfs_mount(fs_type, flags, root->kf_root,
2252 is_v2 ? CGROUP2_SUPER_MAGIC : CGROUP_SUPER_MAGIC,
2253 &new_sb);
2254
2255
2256
2257
2258
2259
2260 if (!IS_ERR(dentry) && ns != &init_cgroup_ns) {
2261 struct dentry *nsdentry;
2262 struct cgroup *cgrp;
2263
2264 mutex_lock(&cgroup_mutex);
2265 spin_lock_irq(&css_set_lock);
2266
2267 cgrp = cset_cgroup_from_root(ns->root_cset, root);
2268
2269 spin_unlock_irq(&css_set_lock);
2270 mutex_unlock(&cgroup_mutex);
2271
2272 nsdentry = kernfs_node_dentry(cgrp->kn, dentry->d_sb);
2273 dput(dentry);
2274 dentry = nsdentry;
2275 }
2276
2277 if (IS_ERR(dentry) || !new_sb)
2278 cgroup_put(&root->cgrp);
2279
2280
2281
2282
2283
2284 if (pinned_sb) {
2285 WARN_ON(new_sb);
2286 deactivate_super(pinned_sb);
2287 }
2288
2289 put_cgroup_ns(ns);
2290 return dentry;
2291}
2292
2293static void cgroup_kill_sb(struct super_block *sb)
2294{
2295 struct kernfs_root *kf_root = kernfs_root_from_sb(sb);
2296 struct cgroup_root *root = cgroup_root_from_kf(kf_root);
2297
2298
2299
2300
2301
2302
2303
2304
2305 if (!list_empty(&root->cgrp.self.children) ||
2306 root == &cgrp_dfl_root)
2307 cgroup_put(&root->cgrp);
2308 else
2309 percpu_ref_kill(&root->cgrp.self.refcnt);
2310
2311 kernfs_kill_sb(sb);
2312}
2313
2314static struct file_system_type cgroup_fs_type = {
2315 .name = "cgroup",
2316 .mount = cgroup_mount,
2317 .kill_sb = cgroup_kill_sb,
2318 .fs_flags = FS_USERNS_MOUNT,
2319};
2320
2321static struct file_system_type cgroup2_fs_type = {
2322 .name = "cgroup2",
2323 .mount = cgroup_mount,
2324 .kill_sb = cgroup_kill_sb,
2325 .fs_flags = FS_USERNS_MOUNT,
2326};
2327
2328static char *cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
2329 struct cgroup_namespace *ns)
2330{
2331 struct cgroup *root = cset_cgroup_from_root(ns->root_cset, cgrp->root);
2332 int ret;
2333
2334 ret = kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
2335 if (ret < 0 || ret >= buflen)
2336 return NULL;
2337 return buf;
2338}
2339
2340char *cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
2341 struct cgroup_namespace *ns)
2342{
2343 char *ret;
2344
2345 mutex_lock(&cgroup_mutex);
2346 spin_lock_irq(&css_set_lock);
2347
2348 ret = cgroup_path_ns_locked(cgrp, buf, buflen, ns);
2349
2350 spin_unlock_irq(&css_set_lock);
2351 mutex_unlock(&cgroup_mutex);
2352
2353 return ret;
2354}
2355EXPORT_SYMBOL_GPL(cgroup_path_ns);
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
2371{
2372 struct cgroup_root *root;
2373 struct cgroup *cgrp;
2374 int hierarchy_id = 1;
2375 char *path = NULL;
2376
2377 mutex_lock(&cgroup_mutex);
2378 spin_lock_irq(&css_set_lock);
2379
2380 root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
2381
2382 if (root) {
2383 cgrp = task_cgroup_from_root(task, root);
2384 path = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
2385 } else {
2386
2387 if (strlcpy(buf, "/", buflen) < buflen)
2388 path = buf;
2389 }
2390
2391 spin_unlock_irq(&css_set_lock);
2392 mutex_unlock(&cgroup_mutex);
2393 return path;
2394}
2395EXPORT_SYMBOL_GPL(task_cgroup_path);
2396
2397
2398struct cgroup_taskset {
2399
2400 struct list_head src_csets;
2401 struct list_head dst_csets;
2402
2403
2404 int ssid;
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417 struct list_head *csets;
2418 struct css_set *cur_cset;
2419 struct task_struct *cur_task;
2420};
2421
2422#define CGROUP_TASKSET_INIT(tset) (struct cgroup_taskset){ \
2423 .src_csets = LIST_HEAD_INIT(tset.src_csets), \
2424 .dst_csets = LIST_HEAD_INIT(tset.dst_csets), \
2425 .csets = &tset.src_csets, \
2426}
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438static void cgroup_taskset_add(struct task_struct *task,
2439 struct cgroup_taskset *tset)
2440{
2441 struct css_set *cset;
2442
2443 lockdep_assert_held(&css_set_lock);
2444
2445
2446 if (task->flags & PF_EXITING)
2447 return;
2448
2449
2450 if (list_empty(&task->cg_list))
2451 return;
2452
2453 cset = task_css_set(task);
2454 if (!cset->mg_src_cgrp)
2455 return;
2456
2457 list_move_tail(&task->cg_list, &cset->mg_tasks);
2458 if (list_empty(&cset->mg_node))
2459 list_add_tail(&cset->mg_node, &tset->src_csets);
2460 if (list_empty(&cset->mg_dst_cset->mg_node))
2461 list_move_tail(&cset->mg_dst_cset->mg_node,
2462 &tset->dst_csets);
2463}
2464
2465
2466
2467
2468
2469
2470
2471
2472struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
2473 struct cgroup_subsys_state **dst_cssp)
2474{
2475 tset->cur_cset = list_first_entry(tset->csets, struct css_set, mg_node);
2476 tset->cur_task = NULL;
2477
2478 return cgroup_taskset_next(tset, dst_cssp);
2479}
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
2490 struct cgroup_subsys_state **dst_cssp)
2491{
2492 struct css_set *cset = tset->cur_cset;
2493 struct task_struct *task = tset->cur_task;
2494
2495 while (&cset->mg_node != tset->csets) {
2496 if (!task)
2497 task = list_first_entry(&cset->mg_tasks,
2498 struct task_struct, cg_list);
2499 else
2500 task = list_next_entry(task, cg_list);
2501
2502 if (&task->cg_list != &cset->mg_tasks) {
2503 tset->cur_cset = cset;
2504 tset->cur_task = task;
2505
2506
2507
2508
2509
2510
2511
2512 if (cset->mg_dst_cset)
2513 *dst_cssp = cset->mg_dst_cset->subsys[tset->ssid];
2514 else
2515 *dst_cssp = cset->subsys[tset->ssid];
2516
2517 return task;
2518 }
2519
2520 cset = list_next_entry(cset, mg_node);
2521 task = NULL;
2522 }
2523
2524 return NULL;
2525}
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
2538 struct cgroup_root *root)
2539{
2540 struct cgroup_subsys *ss;
2541 struct task_struct *task, *tmp_task;
2542 struct css_set *cset, *tmp_cset;
2543 int ssid, failed_ssid, ret;
2544
2545
2546 if (list_empty(&tset->src_csets))
2547 return 0;
2548
2549
2550 do_each_subsys_mask(ss, ssid, root->subsys_mask) {
2551 if (ss->can_attach) {
2552 tset->ssid = ssid;
2553 ret = ss->can_attach(tset);
2554 if (ret) {
2555 failed_ssid = ssid;
2556 goto out_cancel_attach;
2557 }
2558 }
2559 } while_each_subsys_mask();
2560
2561
2562
2563
2564
2565
2566 spin_lock_irq(&css_set_lock);
2567 list_for_each_entry(cset, &tset->src_csets, mg_node) {
2568 list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, cg_list) {
2569 struct css_set *from_cset = task_css_set(task);
2570 struct css_set *to_cset = cset->mg_dst_cset;
2571
2572 get_css_set(to_cset);
2573 css_set_move_task(task, from_cset, to_cset, true);
2574 put_css_set_locked(from_cset);
2575 }
2576 }
2577 spin_unlock_irq(&css_set_lock);
2578
2579
2580
2581
2582
2583
2584 tset->csets = &tset->dst_csets;
2585
2586 do_each_subsys_mask(ss, ssid, root->subsys_mask) {
2587 if (ss->attach) {
2588 tset->ssid = ssid;
2589 ss->attach(tset);
2590 }
2591 } while_each_subsys_mask();
2592
2593 ret = 0;
2594 goto out_release_tset;
2595
2596out_cancel_attach:
2597 do_each_subsys_mask(ss, ssid, root->subsys_mask) {
2598 if (ssid == failed_ssid)
2599 break;
2600 if (ss->cancel_attach) {
2601 tset->ssid = ssid;
2602 ss->cancel_attach(tset);
2603 }
2604 } while_each_subsys_mask();
2605out_release_tset:
2606 spin_lock_irq(&css_set_lock);
2607 list_splice_init(&tset->dst_csets, &tset->src_csets);
2608 list_for_each_entry_safe(cset, tmp_cset, &tset->src_csets, mg_node) {
2609 list_splice_tail_init(&cset->mg_tasks, &cset->tasks);
2610 list_del_init(&cset->mg_node);
2611 }
2612 spin_unlock_irq(&css_set_lock);
2613 return ret;
2614}
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624static bool cgroup_may_migrate_to(struct cgroup *dst_cgrp)
2625{
2626 return !cgroup_on_dfl(dst_cgrp) || !cgroup_parent(dst_cgrp) ||
2627 !dst_cgrp->subtree_control;
2628}
2629
2630
2631
2632
2633
2634
2635
2636
2637static void cgroup_migrate_finish(struct list_head *preloaded_csets)
2638{
2639 struct css_set *cset, *tmp_cset;
2640
2641 lockdep_assert_held(&cgroup_mutex);
2642
2643 spin_lock_irq(&css_set_lock);
2644 list_for_each_entry_safe(cset, tmp_cset, preloaded_csets, mg_preload_node) {
2645 cset->mg_src_cgrp = NULL;
2646 cset->mg_dst_cgrp = NULL;
2647 cset->mg_dst_cset = NULL;
2648 list_del_init(&cset->mg_preload_node);
2649 put_css_set_locked(cset);
2650 }
2651 spin_unlock_irq(&css_set_lock);
2652}
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670static void cgroup_migrate_add_src(struct css_set *src_cset,
2671 struct cgroup *dst_cgrp,
2672 struct list_head *preloaded_csets)
2673{
2674 struct cgroup *src_cgrp;
2675
2676 lockdep_assert_held(&cgroup_mutex);
2677 lockdep_assert_held(&css_set_lock);
2678
2679
2680
2681
2682
2683
2684 if (src_cset->dead)
2685 return;
2686
2687 src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root);
2688
2689 if (!list_empty(&src_cset->mg_preload_node))
2690 return;
2691
2692 WARN_ON(src_cset->mg_src_cgrp);
2693 WARN_ON(src_cset->mg_dst_cgrp);
2694 WARN_ON(!list_empty(&src_cset->mg_tasks));
2695 WARN_ON(!list_empty(&src_cset->mg_node));
2696
2697 src_cset->mg_src_cgrp = src_cgrp;
2698 src_cset->mg_dst_cgrp = dst_cgrp;
2699 get_css_set(src_cset);
2700 list_add(&src_cset->mg_preload_node, preloaded_csets);
2701}
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717static int cgroup_migrate_prepare_dst(struct list_head *preloaded_csets)
2718{
2719 LIST_HEAD(csets);
2720 struct css_set *src_cset, *tmp_cset;
2721
2722 lockdep_assert_held(&cgroup_mutex);
2723
2724
2725 list_for_each_entry_safe(src_cset, tmp_cset, preloaded_csets, mg_preload_node) {
2726 struct css_set *dst_cset;
2727
2728 dst_cset = find_css_set(src_cset, src_cset->mg_dst_cgrp);
2729 if (!dst_cset)
2730 goto err;
2731
2732 WARN_ON_ONCE(src_cset->mg_dst_cset || dst_cset->mg_dst_cset);
2733
2734
2735
2736
2737
2738
2739 if (src_cset == dst_cset) {
2740 src_cset->mg_src_cgrp = NULL;
2741 src_cset->mg_dst_cgrp = NULL;
2742 list_del_init(&src_cset->mg_preload_node);
2743 put_css_set(src_cset);
2744 put_css_set(dst_cset);
2745 continue;
2746 }
2747
2748 src_cset->mg_dst_cset = dst_cset;
2749
2750 if (list_empty(&dst_cset->mg_preload_node))
2751 list_add(&dst_cset->mg_preload_node, &csets);
2752 else
2753 put_css_set(dst_cset);
2754 }
2755
2756 list_splice_tail(&csets, preloaded_csets);
2757 return 0;
2758err:
2759 cgroup_migrate_finish(&csets);
2760 return -ENOMEM;
2761}
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781static int cgroup_migrate(struct task_struct *leader, bool threadgroup,
2782 struct cgroup_root *root)
2783{
2784 struct cgroup_taskset tset = CGROUP_TASKSET_INIT(tset);
2785 struct task_struct *task;
2786
2787
2788
2789
2790
2791
2792 spin_lock_irq(&css_set_lock);
2793 rcu_read_lock();
2794 task = leader;
2795 do {
2796 cgroup_taskset_add(task, &tset);
2797 if (!threadgroup)
2798 break;
2799 } while_each_thread(leader, task);
2800 rcu_read_unlock();
2801 spin_unlock_irq(&css_set_lock);
2802
2803 return cgroup_taskset_migrate(&tset, root);
2804}
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814static int cgroup_attach_task(struct cgroup *dst_cgrp,
2815 struct task_struct *leader, bool threadgroup)
2816{
2817 LIST_HEAD(preloaded_csets);
2818 struct task_struct *task;
2819 int ret;
2820
2821 if (!cgroup_may_migrate_to(dst_cgrp))
2822 return -EBUSY;
2823
2824
2825 spin_lock_irq(&css_set_lock);
2826 rcu_read_lock();
2827 task = leader;
2828 do {
2829 cgroup_migrate_add_src(task_css_set(task), dst_cgrp,
2830 &preloaded_csets);
2831 if (!threadgroup)
2832 break;
2833 } while_each_thread(leader, task);
2834 rcu_read_unlock();
2835 spin_unlock_irq(&css_set_lock);
2836
2837
2838 ret = cgroup_migrate_prepare_dst(&preloaded_csets);
2839 if (!ret)
2840 ret = cgroup_migrate(leader, threadgroup, dst_cgrp->root);
2841
2842 cgroup_migrate_finish(&preloaded_csets);
2843 return ret;
2844}
2845
2846static int cgroup_procs_write_permission(struct task_struct *task,
2847 struct cgroup *dst_cgrp,
2848 struct kernfs_open_file *of)
2849{
2850 const struct cred *cred = current_cred();
2851 const struct cred *tcred = get_task_cred(task);
2852 int ret = 0;
2853
2854
2855
2856
2857
2858 if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
2859 !uid_eq(cred->euid, tcred->uid) &&
2860 !uid_eq(cred->euid, tcred->suid))
2861 ret = -EACCES;
2862
2863 if (!ret && cgroup_on_dfl(dst_cgrp)) {
2864 struct super_block *sb = of->file->f_path.dentry->d_sb;
2865 struct cgroup *cgrp;
2866 struct inode *inode;
2867
2868 spin_lock_irq(&css_set_lock);
2869 cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
2870 spin_unlock_irq(&css_set_lock);
2871
2872 while (!cgroup_is_descendant(dst_cgrp, cgrp))
2873 cgrp = cgroup_parent(cgrp);
2874
2875 ret = -ENOMEM;
2876 inode = kernfs_get_inode(sb, cgrp->procs_file.kn);
2877 if (inode) {
2878 ret = inode_permission(inode, MAY_WRITE);
2879 iput(inode);
2880 }
2881 }
2882
2883 put_cred(tcred);
2884 return ret;
2885}
2886
2887
2888
2889
2890
2891
2892static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
2893 size_t nbytes, loff_t off, bool threadgroup)
2894{
2895 struct task_struct *tsk;
2896 struct cgroup_subsys *ss;
2897 struct cgroup *cgrp;
2898 pid_t pid;
2899 int ssid, ret;
2900
2901 if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
2902 return -EINVAL;
2903
2904 cgrp = cgroup_kn_lock_live(of->kn, false);
2905 if (!cgrp)
2906 return -ENODEV;
2907
2908 percpu_down_write(&cgroup_threadgroup_rwsem);
2909 rcu_read_lock();
2910 if (pid) {
2911 tsk = find_task_by_vpid(pid);
2912 if (!tsk) {
2913 ret = -ESRCH;
2914 goto out_unlock_rcu;
2915 }
2916 } else {
2917 tsk = current;
2918 }
2919
2920 if (threadgroup)
2921 tsk = tsk->group_leader;
2922
2923
2924
2925
2926
2927
2928 if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
2929 ret = -EINVAL;
2930 goto out_unlock_rcu;
2931 }
2932
2933 get_task_struct(tsk);
2934 rcu_read_unlock();
2935
2936 ret = cgroup_procs_write_permission(tsk, cgrp, of);
2937 if (!ret)
2938 ret = cgroup_attach_task(cgrp, tsk, threadgroup);
2939
2940 put_task_struct(tsk);
2941 goto out_unlock_threadgroup;
2942
2943out_unlock_rcu:
2944 rcu_read_unlock();
2945out_unlock_threadgroup:
2946 percpu_up_write(&cgroup_threadgroup_rwsem);
2947 for_each_subsys(ss, ssid)
2948 if (ss->post_attach)
2949 ss->post_attach();
2950 cgroup_kn_unlock(of->kn);
2951 return ret ?: nbytes;
2952}
2953
2954
2955
2956
2957
2958
2959int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
2960{
2961 struct cgroup_root *root;
2962 int retval = 0;
2963
2964 mutex_lock(&cgroup_mutex);
2965 for_each_root(root) {
2966 struct cgroup *from_cgrp;
2967
2968 if (root == &cgrp_dfl_root)
2969 continue;
2970
2971 spin_lock_irq(&css_set_lock);
2972 from_cgrp = task_cgroup_from_root(from, root);
2973 spin_unlock_irq(&css_set_lock);
2974
2975 retval = cgroup_attach_task(from_cgrp, tsk, false);
2976 if (retval)
2977 break;
2978 }
2979 mutex_unlock(&cgroup_mutex);
2980
2981 return retval;
2982}
2983EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
2984
2985static ssize_t cgroup_tasks_write(struct kernfs_open_file *of,
2986 char *buf, size_t nbytes, loff_t off)
2987{
2988 return __cgroup_procs_write(of, buf, nbytes, off, false);
2989}
2990
2991static ssize_t cgroup_procs_write(struct kernfs_open_file *of,
2992 char *buf, size_t nbytes, loff_t off)
2993{
2994 return __cgroup_procs_write(of, buf, nbytes, off, true);
2995}
2996
2997static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of,
2998 char *buf, size_t nbytes, loff_t off)
2999{
3000 struct cgroup *cgrp;
3001
3002 BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
3003
3004 cgrp = cgroup_kn_lock_live(of->kn, false);
3005 if (!cgrp)
3006 return -ENODEV;
3007 spin_lock(&release_agent_path_lock);
3008 strlcpy(cgrp->root->release_agent_path, strstrip(buf),
3009 sizeof(cgrp->root->release_agent_path));
3010 spin_unlock(&release_agent_path_lock);
3011 cgroup_kn_unlock(of->kn);
3012 return nbytes;
3013}
3014
3015static int cgroup_release_agent_show(struct seq_file *seq, void *v)
3016{
3017 struct cgroup *cgrp = seq_css(seq)->cgroup;
3018
3019 spin_lock(&release_agent_path_lock);
3020 seq_puts(seq, cgrp->root->release_agent_path);
3021 spin_unlock(&release_agent_path_lock);
3022 seq_putc(seq, '\n');
3023 return 0;
3024}
3025
3026static int cgroup_sane_behavior_show(struct seq_file *seq, void *v)
3027{
3028 seq_puts(seq, "0\n");
3029 return 0;
3030}
3031
3032static void cgroup_print_ss_mask(struct seq_file *seq, u16 ss_mask)
3033{
3034 struct cgroup_subsys *ss;
3035 bool printed = false;
3036 int ssid;
3037
3038 do_each_subsys_mask(ss, ssid, ss_mask) {
3039 if (printed)
3040 seq_putc(seq, ' ');
3041 seq_printf(seq, "%s", ss->name);
3042 printed = true;
3043 } while_each_subsys_mask();
3044 if (printed)
3045 seq_putc(seq, '\n');
3046}
3047
3048
3049static int cgroup_controllers_show(struct seq_file *seq, void *v)
3050{
3051 struct cgroup *cgrp = seq_css(seq)->cgroup;
3052
3053 cgroup_print_ss_mask(seq, cgroup_control(cgrp));
3054 return 0;
3055}
3056
3057
3058static int cgroup_subtree_control_show(struct seq_file *seq, void *v)
3059{
3060 struct cgroup *cgrp = seq_css(seq)->cgroup;
3061
3062 cgroup_print_ss_mask(seq, cgrp->subtree_control);
3063 return 0;
3064}
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075static int cgroup_update_dfl_csses(struct cgroup *cgrp)
3076{
3077 LIST_HEAD(preloaded_csets);
3078 struct cgroup_taskset tset = CGROUP_TASKSET_INIT(tset);
3079 struct cgroup_subsys_state *d_css;
3080 struct cgroup *dsct;
3081 struct css_set *src_cset;
3082 int ret;
3083
3084 lockdep_assert_held(&cgroup_mutex);
3085
3086 percpu_down_write(&cgroup_threadgroup_rwsem);
3087
3088
3089 spin_lock_irq(&css_set_lock);
3090 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
3091 struct cgrp_cset_link *link;
3092
3093 list_for_each_entry(link, &dsct->cset_links, cset_link)
3094 cgroup_migrate_add_src(link->cset, dsct,
3095 &preloaded_csets);
3096 }
3097 spin_unlock_irq(&css_set_lock);
3098
3099
3100 ret = cgroup_migrate_prepare_dst(&preloaded_csets);
3101 if (ret)
3102 goto out_finish;
3103
3104 spin_lock_irq(&css_set_lock);
3105 list_for_each_entry(src_cset, &preloaded_csets, mg_preload_node) {
3106 struct task_struct *task, *ntask;
3107
3108
3109 if (!src_cset->mg_src_cgrp)
3110 break;
3111
3112
3113 list_for_each_entry_safe(task, ntask, &src_cset->tasks, cg_list)
3114 cgroup_taskset_add(task, &tset);
3115 }
3116 spin_unlock_irq(&css_set_lock);
3117
3118 ret = cgroup_taskset_migrate(&tset, cgrp->root);
3119out_finish:
3120 cgroup_migrate_finish(&preloaded_csets);
3121 percpu_up_write(&cgroup_threadgroup_rwsem);
3122 return ret;
3123}
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133static void cgroup_lock_and_drain_offline(struct cgroup *cgrp)
3134 __acquires(&cgroup_mutex)
3135{
3136 struct cgroup *dsct;
3137 struct cgroup_subsys_state *d_css;
3138 struct cgroup_subsys *ss;
3139 int ssid;
3140
3141restart:
3142 mutex_lock(&cgroup_mutex);
3143
3144 cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
3145 for_each_subsys(ss, ssid) {
3146 struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
3147 DEFINE_WAIT(wait);
3148
3149 if (!css || !percpu_ref_is_dying(&css->refcnt))
3150 continue;
3151
3152 cgroup_get(dsct);
3153 prepare_to_wait(&dsct->offline_waitq, &wait,
3154 TASK_UNINTERRUPTIBLE);
3155
3156 mutex_unlock(&cgroup_mutex);
3157 schedule();
3158 finish_wait(&dsct->offline_waitq, &wait);
3159
3160 cgroup_put(dsct);
3161 goto restart;
3162 }
3163 }
3164}
3165
3166
3167
3168
3169
3170
3171
3172
3173static void cgroup_save_control(struct cgroup *cgrp)
3174{
3175 struct cgroup *dsct;
3176 struct cgroup_subsys_state *d_css;
3177
3178 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
3179 dsct->old_subtree_control = dsct->subtree_control;
3180 dsct->old_subtree_ss_mask = dsct->subtree_ss_mask;
3181 }
3182}
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192static void cgroup_propagate_control(struct cgroup *cgrp)
3193{
3194 struct cgroup *dsct;
3195 struct cgroup_subsys_state *d_css;
3196
3197 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
3198 dsct->subtree_control &= cgroup_control(dsct);
3199 dsct->subtree_ss_mask =
3200 cgroup_calc_subtree_ss_mask(dsct->subtree_control,
3201 cgroup_ss_mask(dsct));
3202 }
3203}
3204
3205
3206
3207
3208
3209
3210
3211
3212static void cgroup_restore_control(struct cgroup *cgrp)
3213{
3214 struct cgroup *dsct;
3215 struct cgroup_subsys_state *d_css;
3216
3217 cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
3218 dsct->subtree_control = dsct->old_subtree_control;
3219 dsct->subtree_ss_mask = dsct->old_subtree_ss_mask;
3220 }
3221}
3222
3223static bool css_visible(struct cgroup_subsys_state *css)
3224{
3225 struct cgroup_subsys *ss = css->ss;
3226 struct cgroup *cgrp = css->cgroup;
3227
3228 if (cgroup_control(cgrp) & (1 << ss->id))
3229 return true;
3230 if (!(cgroup_ss_mask(cgrp) & (1 << ss->id)))
3231 return false;
3232 return cgroup_on_dfl(cgrp) && ss->implicit_on_dfl;
3233}
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248static int cgroup_apply_control_enable(struct cgroup *cgrp)
3249{
3250 struct cgroup *dsct;
3251 struct cgroup_subsys_state *d_css;
3252 struct cgroup_subsys *ss;
3253 int ssid, ret;
3254
3255 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
3256 for_each_subsys(ss, ssid) {
3257 struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
3258
3259 WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt));
3260
3261 if (!(cgroup_ss_mask(dsct) & (1 << ss->id)))
3262 continue;
3263
3264 if (!css) {
3265 css = css_create(dsct, ss);
3266 if (IS_ERR(css))
3267 return PTR_ERR(css);
3268 }
3269
3270 if (css_visible(css)) {
3271 ret = css_populate_dir(css);
3272 if (ret)
3273 return ret;
3274 }
3275 }
3276 }
3277
3278 return 0;
3279}
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294static void cgroup_apply_control_disable(struct cgroup *cgrp)
3295{
3296 struct cgroup *dsct;
3297 struct cgroup_subsys_state *d_css;
3298 struct cgroup_subsys *ss;
3299 int ssid;
3300
3301 cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
3302 for_each_subsys(ss, ssid) {
3303 struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
3304
3305 WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt));
3306
3307 if (!css)
3308 continue;
3309
3310 if (css->parent &&
3311 !(cgroup_ss_mask(dsct) & (1 << ss->id))) {
3312 kill_css(css);
3313 } else if (!css_visible(css)) {
3314 css_clear_dir(css);
3315 if (ss->css_reset)
3316 ss->css_reset(css);
3317 }
3318 }
3319 }
3320}
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339static int cgroup_apply_control(struct cgroup *cgrp)
3340{
3341 int ret;
3342
3343 cgroup_propagate_control(cgrp);
3344
3345 ret = cgroup_apply_control_enable(cgrp);
3346 if (ret)
3347 return ret;
3348
3349
3350
3351
3352
3353
3354 ret = cgroup_update_dfl_csses(cgrp);
3355 if (ret)
3356 return ret;
3357
3358 return 0;
3359}
3360
3361
3362
3363
3364
3365
3366
3367
3368static void cgroup_finalize_control(struct cgroup *cgrp, int ret)
3369{
3370 if (ret) {
3371 cgroup_restore_control(cgrp);
3372 cgroup_propagate_control(cgrp);
3373 }
3374
3375 cgroup_apply_control_disable(cgrp);
3376}
3377
3378
3379static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
3380 char *buf, size_t nbytes,
3381 loff_t off)
3382{
3383 u16 enable = 0, disable = 0;
3384 struct cgroup *cgrp, *child;
3385 struct cgroup_subsys *ss;
3386 char *tok;
3387 int ssid, ret;
3388
3389
3390
3391
3392
3393 buf = strstrip(buf);
3394 while ((tok = strsep(&buf, " "))) {
3395 if (tok[0] == '\0')
3396 continue;
3397 do_each_subsys_mask(ss, ssid, ~cgrp_dfl_inhibit_ss_mask) {
3398 if (!cgroup_ssid_enabled(ssid) ||
3399 strcmp(tok + 1, ss->name))
3400 continue;
3401
3402 if (*tok == '+') {
3403 enable |= 1 << ssid;
3404 disable &= ~(1 << ssid);
3405 } else if (*tok == '-') {
3406 disable |= 1 << ssid;
3407 enable &= ~(1 << ssid);
3408 } else {
3409 return -EINVAL;
3410 }
3411 break;
3412 } while_each_subsys_mask();
3413 if (ssid == CGROUP_SUBSYS_COUNT)
3414 return -EINVAL;
3415 }
3416
3417 cgrp = cgroup_kn_lock_live(of->kn, true);
3418 if (!cgrp)
3419 return -ENODEV;
3420
3421 for_each_subsys(ss, ssid) {
3422 if (enable & (1 << ssid)) {
3423 if (cgrp->subtree_control & (1 << ssid)) {
3424 enable &= ~(1 << ssid);
3425 continue;
3426 }
3427
3428 if (!(cgroup_control(cgrp) & (1 << ssid))) {
3429 ret = -ENOENT;
3430 goto out_unlock;
3431 }
3432 } else if (disable & (1 << ssid)) {
3433 if (!(cgrp->subtree_control & (1 << ssid))) {
3434 disable &= ~(1 << ssid);
3435 continue;
3436 }
3437
3438
3439 cgroup_for_each_live_child(child, cgrp) {
3440 if (child->subtree_control & (1 << ssid)) {
3441 ret = -EBUSY;
3442 goto out_unlock;
3443 }
3444 }
3445 }
3446 }
3447
3448 if (!enable && !disable) {
3449 ret = 0;
3450 goto out_unlock;
3451 }
3452
3453
3454
3455
3456
3457 if (enable && cgroup_parent(cgrp) && !list_empty(&cgrp->cset_links)) {
3458 ret = -EBUSY;
3459 goto out_unlock;
3460 }
3461
3462
3463 cgroup_save_control(cgrp);
3464
3465 cgrp->subtree_control |= enable;
3466 cgrp->subtree_control &= ~disable;
3467
3468 ret = cgroup_apply_control(cgrp);
3469
3470 cgroup_finalize_control(cgrp, ret);
3471
3472 kernfs_activate(cgrp->kn);
3473 ret = 0;
3474out_unlock:
3475 cgroup_kn_unlock(of->kn);
3476 return ret ?: nbytes;
3477}
3478
3479static int cgroup_events_show(struct seq_file *seq, void *v)
3480{
3481 seq_printf(seq, "populated %d\n",
3482 cgroup_is_populated(seq_css(seq)->cgroup));
3483 return 0;
3484}
3485
3486static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
3487 size_t nbytes, loff_t off)
3488{
3489 struct cgroup *cgrp = of->kn->parent->priv;
3490 struct cftype *cft = of->kn->priv;
3491 struct cgroup_subsys_state *css;
3492 int ret;
3493
3494 if (cft->write)
3495 return cft->write(of, buf, nbytes, off);
3496
3497
3498
3499
3500
3501
3502
3503 rcu_read_lock();
3504 css = cgroup_css(cgrp, cft->ss);
3505 rcu_read_unlock();
3506
3507 if (cft->write_u64) {
3508 unsigned long long v;
3509 ret = kstrtoull(buf, 0, &v);
3510 if (!ret)
3511 ret = cft->write_u64(css, cft, v);
3512 } else if (cft->write_s64) {
3513 long long v;
3514 ret = kstrtoll(buf, 0, &v);
3515 if (!ret)
3516 ret = cft->write_s64(css, cft, v);
3517 } else {
3518 ret = -EINVAL;
3519 }
3520
3521 return ret ?: nbytes;
3522}
3523
3524static void *cgroup_seqfile_start(struct seq_file *seq, loff_t *ppos)
3525{
3526 return seq_cft(seq)->seq_start(seq, ppos);
3527}
3528
3529static void *cgroup_seqfile_next(struct seq_file *seq, void *v, loff_t *ppos)
3530{
3531 return seq_cft(seq)->seq_next(seq, v, ppos);
3532}
3533
3534static void cgroup_seqfile_stop(struct seq_file *seq, void *v)
3535{
3536 seq_cft(seq)->seq_stop(seq, v);
3537}
3538
3539static int cgroup_seqfile_show(struct seq_file *m, void *arg)
3540{
3541 struct cftype *cft = seq_cft(m);
3542 struct cgroup_subsys_state *css = seq_css(m);
3543
3544 if (cft->seq_show)
3545 return cft->seq_show(m, arg);
3546
3547 if (cft->read_u64)
3548 seq_printf(m, "%llu\n", cft->read_u64(css, cft));
3549 else if (cft->read_s64)
3550 seq_printf(m, "%lld\n", cft->read_s64(css, cft));
3551 else
3552 return -EINVAL;
3553 return 0;
3554}
3555
3556static struct kernfs_ops cgroup_kf_single_ops = {
3557 .atomic_write_len = PAGE_SIZE,
3558 .write = cgroup_file_write,
3559 .seq_show = cgroup_seqfile_show,
3560};
3561
3562static struct kernfs_ops cgroup_kf_ops = {
3563 .atomic_write_len = PAGE_SIZE,
3564 .write = cgroup_file_write,
3565 .seq_start = cgroup_seqfile_start,
3566 .seq_next = cgroup_seqfile_next,
3567 .seq_stop = cgroup_seqfile_stop,
3568 .seq_show = cgroup_seqfile_show,
3569};
3570
3571
3572
3573
3574static int cgroup_rename(struct kernfs_node *kn, struct kernfs_node *new_parent,
3575 const char *new_name_str)
3576{
3577 struct cgroup *cgrp = kn->priv;
3578 int ret;
3579
3580 if (kernfs_type(kn) != KERNFS_DIR)
3581 return -ENOTDIR;
3582 if (kn->parent != new_parent)
3583 return -EIO;
3584
3585
3586
3587
3588
3589 if (cgroup_on_dfl(cgrp))
3590 return -EPERM;
3591
3592
3593
3594
3595
3596
3597 kernfs_break_active_protection(new_parent);
3598 kernfs_break_active_protection(kn);
3599
3600 mutex_lock(&cgroup_mutex);
3601
3602 ret = kernfs_rename(kn, new_parent, new_name_str);
3603
3604 mutex_unlock(&cgroup_mutex);
3605
3606 kernfs_unbreak_active_protection(kn);
3607 kernfs_unbreak_active_protection(new_parent);
3608 return ret;
3609}
3610
3611
3612static int cgroup_kn_set_ugid(struct kernfs_node *kn)
3613{
3614 struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
3615 .ia_uid = current_fsuid(),
3616 .ia_gid = current_fsgid(), };
3617
3618 if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
3619 gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
3620 return 0;
3621
3622 return kernfs_setattr(kn, &iattr);
3623}
3624
3625static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
3626 struct cftype *cft)
3627{
3628 char name[CGROUP_FILE_NAME_MAX];
3629 struct kernfs_node *kn;
3630 struct lock_class_key *key = NULL;
3631 int ret;
3632
3633#ifdef CONFIG_DEBUG_LOCK_ALLOC
3634 key = &cft->lockdep_key;
3635#endif
3636 kn = __kernfs_create_file(cgrp->kn, cgroup_file_name(cgrp, cft, name),
3637 cgroup_file_mode(cft), 0, cft->kf_ops, cft,
3638 NULL, key);
3639 if (IS_ERR(kn))
3640 return PTR_ERR(kn);
3641
3642 ret = cgroup_kn_set_ugid(kn);
3643 if (ret) {
3644 kernfs_remove(kn);
3645 return ret;
3646 }
3647
3648 if (cft->file_offset) {
3649 struct cgroup_file *cfile = (void *)css + cft->file_offset;
3650
3651 spin_lock_irq(&cgroup_file_kn_lock);
3652 cfile->kn = kn;
3653 spin_unlock_irq(&cgroup_file_kn_lock);
3654 }
3655
3656 return 0;
3657}
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669static int cgroup_addrm_files(struct cgroup_subsys_state *css,
3670 struct cgroup *cgrp, struct cftype cfts[],
3671 bool is_add)
3672{
3673 struct cftype *cft, *cft_end = NULL;
3674 int ret = 0;
3675
3676 lockdep_assert_held(&cgroup_mutex);
3677
3678restart:
3679 for (cft = cfts; cft != cft_end && cft->name[0] != '\0'; cft++) {
3680
3681 if ((cft->flags & __CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp))
3682 continue;
3683 if ((cft->flags & __CFTYPE_NOT_ON_DFL) && cgroup_on_dfl(cgrp))
3684 continue;
3685 if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgroup_parent(cgrp))
3686 continue;
3687 if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgroup_parent(cgrp))
3688 continue;
3689
3690 if (is_add) {
3691 ret = cgroup_add_file(css, cgrp, cft);
3692 if (ret) {
3693 pr_warn("%s: failed to add %s, err=%d\n",
3694 __func__, cft->name, ret);
3695 cft_end = cft;
3696 is_add = false;
3697 goto restart;
3698 }
3699 } else {
3700 cgroup_rm_file(cgrp, cft);
3701 }
3702 }
3703 return ret;
3704}
3705
3706static int cgroup_apply_cftypes(struct cftype *cfts, bool is_add)
3707{
3708 LIST_HEAD(pending);
3709 struct cgroup_subsys *ss = cfts[0].ss;
3710 struct cgroup *root = &ss->root->cgrp;
3711 struct cgroup_subsys_state *css;
3712 int ret = 0;
3713
3714 lockdep_assert_held(&cgroup_mutex);
3715
3716
3717 css_for_each_descendant_pre(css, cgroup_css(root, ss)) {
3718 struct cgroup *cgrp = css->cgroup;
3719
3720 if (!(css->flags & CSS_VISIBLE))
3721 continue;
3722
3723 ret = cgroup_addrm_files(css, cgrp, cfts, is_add);
3724 if (ret)
3725 break;
3726 }
3727
3728 if (is_add && !ret)
3729 kernfs_activate(root->kn);
3730 return ret;
3731}
3732
3733static void cgroup_exit_cftypes(struct cftype *cfts)
3734{
3735 struct cftype *cft;
3736
3737 for (cft = cfts; cft->name[0] != '\0'; cft++) {
3738
3739 if (cft->max_write_len && cft->max_write_len != PAGE_SIZE)
3740 kfree(cft->kf_ops);
3741 cft->kf_ops = NULL;
3742 cft->ss = NULL;
3743
3744
3745 cft->flags &= ~(__CFTYPE_ONLY_ON_DFL | __CFTYPE_NOT_ON_DFL);
3746 }
3747}
3748
3749static int cgroup_init_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
3750{
3751 struct cftype *cft;
3752
3753 for (cft = cfts; cft->name[0] != '\0'; cft++) {
3754 struct kernfs_ops *kf_ops;
3755
3756 WARN_ON(cft->ss || cft->kf_ops);
3757
3758 if (cft->seq_start)
3759 kf_ops = &cgroup_kf_ops;
3760 else
3761 kf_ops = &cgroup_kf_single_ops;
3762
3763
3764
3765
3766
3767 if (cft->max_write_len && cft->max_write_len != PAGE_SIZE) {
3768 kf_ops = kmemdup(kf_ops, sizeof(*kf_ops), GFP_KERNEL);
3769 if (!kf_ops) {
3770 cgroup_exit_cftypes(cfts);
3771 return -ENOMEM;
3772 }
3773 kf_ops->atomic_write_len = cft->max_write_len;
3774 }
3775
3776 cft->kf_ops = kf_ops;
3777 cft->ss = ss;
3778 }
3779
3780 return 0;
3781}
3782
3783static int cgroup_rm_cftypes_locked(struct cftype *cfts)
3784{
3785 lockdep_assert_held(&cgroup_mutex);
3786
3787 if (!cfts || !cfts[0].ss)
3788 return -ENOENT;
3789
3790 list_del(&cfts->node);
3791 cgroup_apply_cftypes(cfts, false);
3792 cgroup_exit_cftypes(cfts);
3793 return 0;
3794}
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807int cgroup_rm_cftypes(struct cftype *cfts)
3808{
3809 int ret;
3810
3811 mutex_lock(&cgroup_mutex);
3812 ret = cgroup_rm_cftypes_locked(cfts);
3813 mutex_unlock(&cgroup_mutex);
3814 return ret;
3815}
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831static int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
3832{
3833 int ret;
3834
3835 if (!cgroup_ssid_enabled(ss->id))
3836 return 0;
3837
3838 if (!cfts || cfts[0].name[0] == '\0')
3839 return 0;
3840
3841 ret = cgroup_init_cftypes(ss, cfts);
3842 if (ret)
3843 return ret;
3844
3845 mutex_lock(&cgroup_mutex);
3846
3847 list_add_tail(&cfts->node, &ss->cfts);
3848 ret = cgroup_apply_cftypes(cfts, true);
3849 if (ret)
3850 cgroup_rm_cftypes_locked(cfts);
3851
3852 mutex_unlock(&cgroup_mutex);
3853 return ret;
3854}
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
3865{
3866 struct cftype *cft;
3867
3868 for (cft = cfts; cft && cft->name[0] != '\0'; cft++)
3869 cft->flags |= __CFTYPE_ONLY_ON_DFL;
3870 return cgroup_add_cftypes(ss, cfts);
3871}
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
3882{
3883 struct cftype *cft;
3884
3885 for (cft = cfts; cft && cft->name[0] != '\0'; cft++)
3886 cft->flags |= __CFTYPE_NOT_ON_DFL;
3887 return cgroup_add_cftypes(ss, cfts);
3888}
3889
3890
3891
3892
3893
3894
3895
3896void cgroup_file_notify(struct cgroup_file *cfile)
3897{
3898 unsigned long flags;
3899
3900 spin_lock_irqsave(&cgroup_file_kn_lock, flags);
3901 if (cfile->kn)
3902 kernfs_notify(cfile->kn);
3903 spin_unlock_irqrestore(&cgroup_file_kn_lock, flags);
3904}
3905
3906
3907
3908
3909
3910
3911
3912static int cgroup_task_count(const struct cgroup *cgrp)
3913{
3914 int count = 0;
3915 struct cgrp_cset_link *link;
3916
3917 spin_lock_irq(&css_set_lock);
3918 list_for_each_entry(link, &cgrp->cset_links, cset_link)
3919 count += atomic_read(&link->cset->refcount);
3920 spin_unlock_irq(&css_set_lock);
3921 return count;
3922}
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
3942 struct cgroup_subsys_state *parent)
3943{
3944 struct cgroup_subsys_state *next;
3945
3946 cgroup_assert_mutex_or_rcu_locked();
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968 if (!pos) {
3969 next = list_entry_rcu(parent->children.next, struct cgroup_subsys_state, sibling);
3970 } else if (likely(!(pos->flags & CSS_RELEASED))) {
3971 next = list_entry_rcu(pos->sibling.next, struct cgroup_subsys_state, sibling);
3972 } else {
3973 list_for_each_entry_rcu(next, &parent->children, sibling)
3974 if (next->serial_nr > pos->serial_nr)
3975 break;
3976 }
3977
3978
3979
3980
3981
3982 if (&next->sibling != &parent->children)
3983 return next;
3984 return NULL;
3985}
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008struct cgroup_subsys_state *
4009css_next_descendant_pre(struct cgroup_subsys_state *pos,
4010 struct cgroup_subsys_state *root)
4011{
4012 struct cgroup_subsys_state *next;
4013
4014 cgroup_assert_mutex_or_rcu_locked();
4015
4016
4017 if (!pos)
4018 return root;
4019
4020
4021 next = css_next_child(NULL, pos);
4022 if (next)
4023 return next;
4024
4025
4026 while (pos != root) {
4027 next = css_next_child(pos, pos->parent);
4028 if (next)
4029 return next;
4030 pos = pos->parent;
4031 }
4032
4033 return NULL;
4034}
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049struct cgroup_subsys_state *
4050css_rightmost_descendant(struct cgroup_subsys_state *pos)
4051{
4052 struct cgroup_subsys_state *last, *tmp;
4053
4054 cgroup_assert_mutex_or_rcu_locked();
4055
4056 do {
4057 last = pos;
4058
4059 pos = NULL;
4060 css_for_each_child(tmp, last)
4061 pos = tmp;
4062 } while (pos);
4063
4064 return last;
4065}
4066
4067static struct cgroup_subsys_state *
4068css_leftmost_descendant(struct cgroup_subsys_state *pos)
4069{
4070 struct cgroup_subsys_state *last;
4071
4072 do {
4073 last = pos;
4074 pos = css_next_child(NULL, pos);
4075 } while (pos);
4076
4077 return last;
4078}
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102struct cgroup_subsys_state *
4103css_next_descendant_post(struct cgroup_subsys_state *pos,
4104 struct cgroup_subsys_state *root)
4105{
4106 struct cgroup_subsys_state *next;
4107
4108 cgroup_assert_mutex_or_rcu_locked();
4109
4110
4111 if (!pos)
4112 return css_leftmost_descendant(root);
4113
4114
4115 if (pos == root)
4116 return NULL;
4117
4118
4119 next = css_next_child(pos, pos->parent);
4120 if (next)
4121 return css_leftmost_descendant(next);
4122
4123
4124 return pos->parent;
4125}
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135bool css_has_online_children(struct cgroup_subsys_state *css)
4136{
4137 struct cgroup_subsys_state *child;
4138 bool ret = false;
4139
4140 rcu_read_lock();
4141 css_for_each_child(child, css) {
4142 if (child->flags & CSS_ONLINE) {
4143 ret = true;
4144 break;
4145 }
4146 }
4147 rcu_read_unlock();
4148 return ret;
4149}
4150
4151
4152
4153
4154
4155
4156
4157static void css_task_iter_advance_css_set(struct css_task_iter *it)
4158{
4159 struct list_head *l = it->cset_pos;
4160 struct cgrp_cset_link *link;
4161 struct css_set *cset;
4162
4163 lockdep_assert_held(&css_set_lock);
4164
4165
4166 do {
4167 l = l->next;
4168 if (l == it->cset_head) {
4169 it->cset_pos = NULL;
4170 it->task_pos = NULL;
4171 return;
4172 }
4173
4174 if (it->ss) {
4175 cset = container_of(l, struct css_set,
4176 e_cset_node[it->ss->id]);
4177 } else {
4178 link = list_entry(l, struct cgrp_cset_link, cset_link);
4179 cset = link->cset;
4180 }
4181 } while (!css_set_populated(cset));
4182
4183 it->cset_pos = l;
4184
4185 if (!list_empty(&cset->tasks))
4186 it->task_pos = cset->tasks.next;
4187 else
4188 it->task_pos = cset->mg_tasks.next;
4189
4190 it->tasks_head = &cset->tasks;
4191 it->mg_tasks_head = &cset->mg_tasks;
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208 if (it->cur_cset) {
4209 list_del(&it->iters_node);
4210 put_css_set_locked(it->cur_cset);
4211 }
4212 get_css_set(cset);
4213 it->cur_cset = cset;
4214 list_add(&it->iters_node, &cset->task_iters);
4215}
4216
4217static void css_task_iter_advance(struct css_task_iter *it)
4218{
4219 struct list_head *l = it->task_pos;
4220
4221 lockdep_assert_held(&css_set_lock);
4222 WARN_ON_ONCE(!l);
4223
4224
4225
4226
4227
4228
4229 l = l->next;
4230
4231 if (l == it->tasks_head)
4232 l = it->mg_tasks_head->next;
4233
4234 if (l == it->mg_tasks_head)
4235 css_task_iter_advance_css_set(it);
4236 else
4237 it->task_pos = l;
4238}
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250void css_task_iter_start(struct cgroup_subsys_state *css,
4251 struct css_task_iter *it)
4252{
4253
4254 WARN_ON_ONCE(!use_task_css_set_links);
4255
4256 memset(it, 0, sizeof(*it));
4257
4258 spin_lock_irq(&css_set_lock);
4259
4260 it->ss = css->ss;
4261
4262 if (it->ss)
4263 it->cset_pos = &css->cgroup->e_csets[css->ss->id];
4264 else
4265 it->cset_pos = &css->cgroup->cset_links;
4266
4267 it->cset_head = it->cset_pos;
4268
4269 css_task_iter_advance_css_set(it);
4270
4271 spin_unlock_irq(&css_set_lock);
4272}
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282struct task_struct *css_task_iter_next(struct css_task_iter *it)
4283{
4284 if (it->cur_task) {
4285 put_task_struct(it->cur_task);
4286 it->cur_task = NULL;
4287 }
4288
4289 spin_lock_irq(&css_set_lock);
4290
4291 if (it->task_pos) {
4292 it->cur_task = list_entry(it->task_pos, struct task_struct,
4293 cg_list);
4294 get_task_struct(it->cur_task);
4295 css_task_iter_advance(it);
4296 }
4297
4298 spin_unlock_irq(&css_set_lock);
4299
4300 return it->cur_task;
4301}
4302
4303
4304
4305
4306
4307
4308
4309void css_task_iter_end(struct css_task_iter *it)
4310{
4311 if (it->cur_cset) {
4312 spin_lock_irq(&css_set_lock);
4313 list_del(&it->iters_node);
4314 put_css_set_locked(it->cur_cset);
4315 spin_unlock_irq(&css_set_lock);
4316 }
4317
4318 if (it->cur_task)
4319 put_task_struct(it->cur_task);
4320}
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
4334{
4335 LIST_HEAD(preloaded_csets);
4336 struct cgrp_cset_link *link;
4337 struct css_task_iter it;
4338 struct task_struct *task;
4339 int ret;
4340
4341 if (!cgroup_may_migrate_to(to))
4342 return -EBUSY;
4343
4344 mutex_lock(&cgroup_mutex);
4345
4346
4347 spin_lock_irq(&css_set_lock);
4348 list_for_each_entry(link, &from->cset_links, cset_link)
4349 cgroup_migrate_add_src(link->cset, to, &preloaded_csets);
4350 spin_unlock_irq(&css_set_lock);
4351
4352 ret = cgroup_migrate_prepare_dst(&preloaded_csets);
4353 if (ret)
4354 goto out_err;
4355
4356
4357
4358
4359
4360 do {
4361 css_task_iter_start(&from->self, &it);
4362 task = css_task_iter_next(&it);
4363 if (task)
4364 get_task_struct(task);
4365 css_task_iter_end(&it);
4366
4367 if (task) {
4368 ret = cgroup_migrate(task, false, to->root);
4369 put_task_struct(task);
4370 }
4371 } while (task && !ret);
4372out_err:
4373 cgroup_migrate_finish(&preloaded_csets);
4374 mutex_unlock(&cgroup_mutex);
4375 return ret;
4376}
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389enum cgroup_filetype {
4390 CGROUP_FILE_PROCS,
4391 CGROUP_FILE_TASKS,
4392};
4393
4394
4395
4396
4397
4398
4399
4400struct cgroup_pidlist {
4401
4402
4403
4404
4405 struct { enum cgroup_filetype type; struct pid_namespace *ns; } key;
4406
4407 pid_t *list;
4408
4409 int length;
4410
4411 struct list_head links;
4412
4413 struct cgroup *owner;
4414
4415 struct delayed_work destroy_dwork;
4416};
4417
4418
4419
4420
4421
4422
4423#define PIDLIST_TOO_LARGE(c) ((c) * sizeof(pid_t) > (PAGE_SIZE * 2))
4424static void *pidlist_allocate(int count)
4425{
4426 if (PIDLIST_TOO_LARGE(count))
4427 return vmalloc(count * sizeof(pid_t));
4428 else
4429 return kmalloc(count * sizeof(pid_t), GFP_KERNEL);
4430}
4431
4432static void pidlist_free(void *p)
4433{
4434 kvfree(p);
4435}
4436
4437
4438
4439
4440
4441static void cgroup_pidlist_destroy_all(struct cgroup *cgrp)
4442{
4443 struct cgroup_pidlist *l, *tmp_l;
4444
4445 mutex_lock(&cgrp->pidlist_mutex);
4446 list_for_each_entry_safe(l, tmp_l, &cgrp->pidlists, links)
4447 mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork, 0);
4448 mutex_unlock(&cgrp->pidlist_mutex);
4449
4450 flush_workqueue(cgroup_pidlist_destroy_wq);
4451 BUG_ON(!list_empty(&cgrp->pidlists));
4452}
4453
4454static void cgroup_pidlist_destroy_work_fn(struct work_struct *work)
4455{
4456 struct delayed_work *dwork = to_delayed_work(work);
4457 struct cgroup_pidlist *l = container_of(dwork, struct cgroup_pidlist,
4458 destroy_dwork);
4459 struct cgroup_pidlist *tofree = NULL;
4460
4461 mutex_lock(&l->owner->pidlist_mutex);
4462
4463
4464
4465
4466
4467 if (!delayed_work_pending(dwork)) {
4468 list_del(&l->links);
4469 pidlist_free(l->list);
4470 put_pid_ns(l->key.ns);
4471 tofree = l;
4472 }
4473
4474 mutex_unlock(&l->owner->pidlist_mutex);
4475 kfree(tofree);
4476}
4477
4478
4479
4480
4481
4482static int pidlist_uniq(pid_t *list, int length)
4483{
4484 int src, dest = 1;
4485
4486
4487
4488
4489
4490 if (length == 0 || length == 1)
4491 return length;
4492
4493 for (src = 1; src < length; src++) {
4494
4495 while (list[src] == list[src-1]) {
4496 src++;
4497 if (src == length)
4498 goto after;
4499 }
4500
4501 list[dest] = list[src];
4502 dest++;
4503 }
4504after:
4505 return dest;
4506}
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526static pid_t pid_fry(pid_t pid)
4527{
4528 unsigned a = pid & 0x55555555;
4529 unsigned b = pid & 0xAAAAAAAA;
4530
4531 return (a << 1) | (b >> 1);
4532}
4533
4534static pid_t cgroup_pid_fry(struct cgroup *cgrp, pid_t pid)
4535{
4536 if (cgroup_on_dfl(cgrp))
4537 return pid_fry(pid);
4538 else
4539 return pid;
4540}
4541
4542static int cmppid(const void *a, const void *b)
4543{
4544 return *(pid_t *)a - *(pid_t *)b;
4545}
4546
4547static int fried_cmppid(const void *a, const void *b)
4548{
4549 return pid_fry(*(pid_t *)a) - pid_fry(*(pid_t *)b);
4550}
4551
4552static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
4553 enum cgroup_filetype type)
4554{
4555 struct cgroup_pidlist *l;
4556
4557 struct pid_namespace *ns = task_active_pid_ns(current);
4558
4559 lockdep_assert_held(&cgrp->pidlist_mutex);
4560
4561 list_for_each_entry(l, &cgrp->pidlists, links)
4562 if (l->key.type == type && l->key.ns == ns)
4563 return l;
4564 return NULL;
4565}
4566
4567
4568
4569
4570
4571
4572
4573static struct cgroup_pidlist *cgroup_pidlist_find_create(struct cgroup *cgrp,
4574 enum cgroup_filetype type)
4575{
4576 struct cgroup_pidlist *l;
4577
4578 lockdep_assert_held(&cgrp->pidlist_mutex);
4579
4580 l = cgroup_pidlist_find(cgrp, type);
4581 if (l)
4582 return l;
4583
4584
4585 l = kzalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
4586 if (!l)
4587 return l;
4588
4589 INIT_DELAYED_WORK(&l->destroy_dwork, cgroup_pidlist_destroy_work_fn);
4590 l->key.type = type;
4591
4592 l->key.ns = get_pid_ns(task_active_pid_ns(current));
4593 l->owner = cgrp;
4594 list_add(&l->links, &cgrp->pidlists);
4595 return l;
4596}
4597
4598
4599
4600
4601static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
4602 struct cgroup_pidlist **lp)
4603{
4604 pid_t *array;
4605 int length;
4606 int pid, n = 0;
4607 struct css_task_iter it;
4608 struct task_struct *tsk;
4609 struct cgroup_pidlist *l;
4610
4611 lockdep_assert_held(&cgrp->pidlist_mutex);
4612
4613
4614
4615
4616
4617
4618
4619 length = cgroup_task_count(cgrp);
4620 array = pidlist_allocate(length);
4621 if (!array)
4622 return -ENOMEM;
4623
4624 css_task_iter_start(&cgrp->self, &it);
4625 while ((tsk = css_task_iter_next(&it))) {
4626 if (unlikely(n == length))
4627 break;
4628
4629 if (type == CGROUP_FILE_PROCS)
4630 pid = task_tgid_vnr(tsk);
4631 else
4632 pid = task_pid_vnr(tsk);
4633 if (pid > 0)
4634 array[n++] = pid;
4635 }
4636 css_task_iter_end(&it);
4637 length = n;
4638
4639 if (cgroup_on_dfl(cgrp))
4640 sort(array, length, sizeof(pid_t), fried_cmppid, NULL);
4641 else
4642 sort(array, length, sizeof(pid_t), cmppid, NULL);
4643 if (type == CGROUP_FILE_PROCS)
4644 length = pidlist_uniq(array, length);
4645
4646 l = cgroup_pidlist_find_create(cgrp, type);
4647 if (!l) {
4648 pidlist_free(array);
4649 return -ENOMEM;
4650 }
4651
4652
4653 pidlist_free(l->list);
4654 l->list = array;
4655 l->length = length;
4656 *lp = l;
4657 return 0;
4658}
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
4670{
4671 struct kernfs_node *kn = kernfs_node_from_dentry(dentry);
4672 struct cgroup *cgrp;
4673 struct css_task_iter it;
4674 struct task_struct *tsk;
4675
4676
4677 if (dentry->d_sb->s_type != &cgroup_fs_type || !kn ||
4678 kernfs_type(kn) != KERNFS_DIR)
4679 return -EINVAL;
4680
4681 mutex_lock(&cgroup_mutex);
4682
4683
4684
4685
4686
4687
4688 rcu_read_lock();
4689 cgrp = rcu_dereference(kn->priv);
4690 if (!cgrp || cgroup_is_dead(cgrp)) {
4691 rcu_read_unlock();
4692 mutex_unlock(&cgroup_mutex);
4693 return -ENOENT;
4694 }
4695 rcu_read_unlock();
4696
4697 css_task_iter_start(&cgrp->self, &it);
4698 while ((tsk = css_task_iter_next(&it))) {
4699 switch (tsk->state) {
4700 case TASK_RUNNING:
4701 stats->nr_running++;
4702 break;
4703 case TASK_INTERRUPTIBLE:
4704 stats->nr_sleeping++;
4705 break;
4706 case TASK_UNINTERRUPTIBLE:
4707 stats->nr_uninterruptible++;
4708 break;
4709 case TASK_STOPPED:
4710 stats->nr_stopped++;
4711 break;
4712 default:
4713 if (delayacct_is_task_waiting_on_io(tsk))
4714 stats->nr_io_wait++;
4715 break;
4716 }
4717 }
4718 css_task_iter_end(&it);
4719
4720 mutex_unlock(&cgroup_mutex);
4721 return 0;
4722}
4723
4724
4725
4726
4727
4728
4729
4730
4731static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
4732{
4733
4734
4735
4736
4737
4738
4739 struct kernfs_open_file *of = s->private;
4740 struct cgroup *cgrp = seq_css(s)->cgroup;
4741 struct cgroup_pidlist *l;
4742 enum cgroup_filetype type = seq_cft(s)->private;
4743 int index = 0, pid = *pos;
4744 int *iter, ret;
4745
4746 mutex_lock(&cgrp->pidlist_mutex);
4747
4748
4749
4750
4751
4752
4753
4754 if (of->priv)
4755 of->priv = cgroup_pidlist_find(cgrp, type);
4756
4757
4758
4759
4760
4761 if (!of->priv) {
4762 ret = pidlist_array_load(cgrp, type,
4763 (struct cgroup_pidlist **)&of->priv);
4764 if (ret)
4765 return ERR_PTR(ret);
4766 }
4767 l = of->priv;
4768
4769 if (pid) {
4770 int end = l->length;
4771
4772 while (index < end) {
4773 int mid = (index + end) / 2;
4774 if (cgroup_pid_fry(cgrp, l->list[mid]) == pid) {
4775 index = mid;
4776 break;
4777 } else if (cgroup_pid_fry(cgrp, l->list[mid]) <= pid)
4778 index = mid + 1;
4779 else
4780 end = mid;
4781 }
4782 }
4783
4784 if (index >= l->length)
4785 return NULL;
4786
4787 iter = l->list + index;
4788 *pos = cgroup_pid_fry(cgrp, *iter);
4789 return iter;
4790}
4791
4792static void cgroup_pidlist_stop(struct seq_file *s, void *v)
4793{
4794 struct kernfs_open_file *of = s->private;
4795 struct cgroup_pidlist *l = of->priv;
4796
4797 if (l)
4798 mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork,
4799 CGROUP_PIDLIST_DESTROY_DELAY);
4800 mutex_unlock(&seq_css(s)->cgroup->pidlist_mutex);
4801}
4802
4803static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
4804{
4805 struct kernfs_open_file *of = s->private;
4806 struct cgroup_pidlist *l = of->priv;
4807 pid_t *p = v;
4808 pid_t *end = l->list + l->length;
4809
4810
4811
4812
4813 p++;
4814 if (p >= end) {
4815 return NULL;
4816 } else {
4817 *pos = cgroup_pid_fry(seq_css(s)->cgroup, *p);
4818 return p;
4819 }
4820}
4821
4822static int cgroup_pidlist_show(struct seq_file *s, void *v)
4823{
4824 seq_printf(s, "%d\n", *(int *)v);
4825
4826 return 0;
4827}
4828
4829static u64 cgroup_read_notify_on_release(struct cgroup_subsys_state *css,
4830 struct cftype *cft)
4831{
4832 return notify_on_release(css->cgroup);
4833}
4834
4835static int cgroup_write_notify_on_release(struct cgroup_subsys_state *css,
4836 struct cftype *cft, u64 val)
4837{
4838 if (val)
4839 set_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags);
4840 else
4841 clear_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags);
4842 return 0;
4843}
4844
4845static u64 cgroup_clone_children_read(struct cgroup_subsys_state *css,
4846 struct cftype *cft)
4847{
4848 return test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags);
4849}
4850
4851static int cgroup_clone_children_write(struct cgroup_subsys_state *css,
4852 struct cftype *cft, u64 val)
4853{
4854 if (val)
4855 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags);
4856 else
4857 clear_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags);
4858 return 0;
4859}
4860
4861
4862static struct cftype cgroup_dfl_base_files[] = {
4863 {
4864 .name = "cgroup.procs",
4865 .file_offset = offsetof(struct cgroup, procs_file),
4866 .seq_start = cgroup_pidlist_start,
4867 .seq_next = cgroup_pidlist_next,
4868 .seq_stop = cgroup_pidlist_stop,
4869 .seq_show = cgroup_pidlist_show,
4870 .private = CGROUP_FILE_PROCS,
4871 .write = cgroup_procs_write,
4872 },
4873 {
4874 .name = "cgroup.controllers",
4875 .seq_show = cgroup_controllers_show,
4876 },
4877 {
4878 .name = "cgroup.subtree_control",
4879 .seq_show = cgroup_subtree_control_show,
4880 .write = cgroup_subtree_control_write,
4881 },
4882 {
4883 .name = "cgroup.events",
4884 .flags = CFTYPE_NOT_ON_ROOT,
4885 .file_offset = offsetof(struct cgroup, events_file),
4886 .seq_show = cgroup_events_show,
4887 },
4888 { }
4889};
4890
4891
4892static struct cftype cgroup_legacy_base_files[] = {
4893 {
4894 .name = "cgroup.procs",
4895 .seq_start = cgroup_pidlist_start,
4896 .seq_next = cgroup_pidlist_next,
4897 .seq_stop = cgroup_pidlist_stop,
4898 .seq_show = cgroup_pidlist_show,
4899 .private = CGROUP_FILE_PROCS,
4900 .write = cgroup_procs_write,
4901 },
4902 {
4903 .name = "cgroup.clone_children",
4904 .read_u64 = cgroup_clone_children_read,
4905 .write_u64 = cgroup_clone_children_write,
4906 },
4907 {
4908 .name = "cgroup.sane_behavior",
4909 .flags = CFTYPE_ONLY_ON_ROOT,
4910 .seq_show = cgroup_sane_behavior_show,
4911 },
4912 {
4913 .name = "tasks",
4914 .seq_start = cgroup_pidlist_start,
4915 .seq_next = cgroup_pidlist_next,
4916 .seq_stop = cgroup_pidlist_stop,
4917 .seq_show = cgroup_pidlist_show,
4918 .private = CGROUP_FILE_TASKS,
4919 .write = cgroup_tasks_write,
4920 },
4921 {
4922 .name = "notify_on_release",
4923 .read_u64 = cgroup_read_notify_on_release,
4924 .write_u64 = cgroup_write_notify_on_release,
4925 },
4926 {
4927 .name = "release_agent",
4928 .flags = CFTYPE_ONLY_ON_ROOT,
4929 .seq_show = cgroup_release_agent_show,
4930 .write = cgroup_release_agent_write,
4931 .max_write_len = PATH_MAX - 1,
4932 },
4933 { }
4934};
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958static void css_free_work_fn(struct work_struct *work)
4959{
4960 struct cgroup_subsys_state *css =
4961 container_of(work, struct cgroup_subsys_state, destroy_work);
4962 struct cgroup_subsys *ss = css->ss;
4963 struct cgroup *cgrp = css->cgroup;
4964
4965 percpu_ref_exit(&css->refcnt);
4966
4967 if (ss) {
4968
4969 struct cgroup_subsys_state *parent = css->parent;
4970 int id = css->id;
4971
4972 ss->css_free(css);
4973 cgroup_idr_remove(&ss->css_idr, id);
4974 cgroup_put(cgrp);
4975
4976 if (parent)
4977 css_put(parent);
4978 } else {
4979
4980 atomic_dec(&cgrp->root->nr_cgrps);
4981 cgroup_pidlist_destroy_all(cgrp);
4982 cancel_work_sync(&cgrp->release_agent_work);
4983
4984 if (cgroup_parent(cgrp)) {
4985
4986
4987
4988
4989
4990
4991 cgroup_put(cgroup_parent(cgrp));
4992 kernfs_put(cgrp->kn);
4993 kfree(cgrp);
4994 } else {
4995
4996
4997
4998
4999
5000 cgroup_destroy_root(cgrp->root);
5001 }
5002 }
5003}
5004
5005static void css_free_rcu_fn(struct rcu_head *rcu_head)
5006{
5007 struct cgroup_subsys_state *css =
5008 container_of(rcu_head, struct cgroup_subsys_state, rcu_head);
5009
5010 INIT_WORK(&css->destroy_work, css_free_work_fn);
5011 queue_work(cgroup_destroy_wq, &css->destroy_work);
5012}
5013
5014static void css_release_work_fn(struct work_struct *work)
5015{
5016 struct cgroup_subsys_state *css =
5017 container_of(work, struct cgroup_subsys_state, destroy_work);
5018 struct cgroup_subsys *ss = css->ss;
5019 struct cgroup *cgrp = css->cgroup;
5020
5021 mutex_lock(&cgroup_mutex);
5022
5023 css->flags |= CSS_RELEASED;
5024 list_del_rcu(&css->sibling);
5025
5026 if (ss) {
5027
5028 cgroup_idr_replace(&ss->css_idr, NULL, css->id);
5029 if (ss->css_released)
5030 ss->css_released(css);
5031 } else {
5032
5033 cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
5034 cgrp->id = -1;
5035
5036
5037
5038
5039
5040
5041
5042
5043 if (cgrp->kn)
5044 RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv,
5045 NULL);
5046 }
5047
5048 mutex_unlock(&cgroup_mutex);
5049
5050 call_rcu(&css->rcu_head, css_free_rcu_fn);
5051}
5052
5053static void css_release(struct percpu_ref *ref)
5054{
5055 struct cgroup_subsys_state *css =
5056 container_of(ref, struct cgroup_subsys_state, refcnt);
5057
5058 INIT_WORK(&css->destroy_work, css_release_work_fn);
5059 queue_work(cgroup_destroy_wq, &css->destroy_work);
5060}
5061
5062static void init_and_link_css(struct cgroup_subsys_state *css,
5063 struct cgroup_subsys *ss, struct cgroup *cgrp)
5064{
5065 lockdep_assert_held(&cgroup_mutex);
5066
5067 cgroup_get(cgrp);
5068
5069 memset(css, 0, sizeof(*css));
5070 css->cgroup = cgrp;
5071 css->ss = ss;
5072 css->id = -1;
5073 INIT_LIST_HEAD(&css->sibling);
5074 INIT_LIST_HEAD(&css->children);
5075 css->serial_nr = css_serial_nr_next++;
5076 atomic_set(&css->online_cnt, 0);
5077
5078 if (cgroup_parent(cgrp)) {
5079 css->parent = cgroup_css(cgroup_parent(cgrp), ss);
5080 css_get(css->parent);
5081 }
5082
5083 BUG_ON(cgroup_css(cgrp, ss));
5084}
5085
5086
5087static int online_css(struct cgroup_subsys_state *css)
5088{
5089 struct cgroup_subsys *ss = css->ss;
5090 int ret = 0;
5091
5092 lockdep_assert_held(&cgroup_mutex);
5093
5094 if (ss->css_online)
5095 ret = ss->css_online(css);
5096 if (!ret) {
5097 css->flags |= CSS_ONLINE;
5098 rcu_assign_pointer(css->cgroup->subsys[ss->id], css);
5099
5100 atomic_inc(&css->online_cnt);
5101 if (css->parent)
5102 atomic_inc(&css->parent->online_cnt);
5103 }
5104 return ret;
5105}
5106
5107
5108static void offline_css(struct cgroup_subsys_state *css)
5109{
5110 struct cgroup_subsys *ss = css->ss;
5111
5112 lockdep_assert_held(&cgroup_mutex);
5113
5114 if (!(css->flags & CSS_ONLINE))
5115 return;
5116
5117 if (ss->css_reset)
5118 ss->css_reset(css);
5119
5120 if (ss->css_offline)
5121 ss->css_offline(css);
5122
5123 css->flags &= ~CSS_ONLINE;
5124 RCU_INIT_POINTER(css->cgroup->subsys[ss->id], NULL);
5125
5126 wake_up_all(&css->cgroup->offline_waitq);
5127}
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
5139 struct cgroup_subsys *ss)
5140{
5141 struct cgroup *parent = cgroup_parent(cgrp);
5142 struct cgroup_subsys_state *parent_css = cgroup_css(parent, ss);
5143 struct cgroup_subsys_state *css;
5144 int err;
5145
5146 lockdep_assert_held(&cgroup_mutex);
5147
5148 css = ss->css_alloc(parent_css);
5149 if (IS_ERR(css))
5150 return css;
5151
5152 init_and_link_css(css, ss, cgrp);
5153
5154 err = percpu_ref_init(&css->refcnt, css_release, 0, GFP_KERNEL);
5155 if (err)
5156 goto err_free_css;
5157
5158 err = cgroup_idr_alloc(&ss->css_idr, NULL, 2, 0, GFP_KERNEL);
5159 if (err < 0)
5160 goto err_free_css;
5161 css->id = err;
5162
5163
5164 list_add_tail_rcu(&css->sibling, &parent_css->children);
5165 cgroup_idr_replace(&ss->css_idr, css, css->id);
5166
5167 err = online_css(css);
5168 if (err)
5169 goto err_list_del;
5170
5171 if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
5172 cgroup_parent(parent)) {
5173 pr_warn("%s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
5174 current->comm, current->pid, ss->name);
5175 if (!strcmp(ss->name, "memory"))
5176 pr_warn("\"memory\" requires setting use_hierarchy to 1 on the root\n");
5177 ss->warned_broken_hierarchy = true;
5178 }
5179
5180 return css;
5181
5182err_list_del:
5183 list_del_rcu(&css->sibling);
5184err_free_css:
5185 call_rcu(&css->rcu_head, css_free_rcu_fn);
5186 return ERR_PTR(err);
5187}
5188
5189static struct cgroup *cgroup_create(struct cgroup *parent)
5190{
5191 struct cgroup_root *root = parent->root;
5192 struct cgroup *cgrp, *tcgrp;
5193 int level = parent->level + 1;
5194 int ret;
5195
5196
5197 cgrp = kzalloc(sizeof(*cgrp) +
5198 sizeof(cgrp->ancestor_ids[0]) * (level + 1), GFP_KERNEL);
5199 if (!cgrp)
5200 return ERR_PTR(-ENOMEM);
5201
5202 ret = percpu_ref_init(&cgrp->self.refcnt, css_release, 0, GFP_KERNEL);
5203 if (ret)
5204 goto out_free_cgrp;
5205
5206
5207
5208
5209
5210 cgrp->id = cgroup_idr_alloc(&root->cgroup_idr, NULL, 2, 0, GFP_KERNEL);
5211 if (cgrp->id < 0) {
5212 ret = -ENOMEM;
5213 goto out_cancel_ref;
5214 }
5215
5216 init_cgroup_housekeeping(cgrp);
5217
5218 cgrp->self.parent = &parent->self;
5219 cgrp->root = root;
5220 cgrp->level = level;
5221
5222 for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp))
5223 cgrp->ancestor_ids[tcgrp->level] = tcgrp->id;
5224
5225 if (notify_on_release(parent))
5226 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
5227
5228 if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags))
5229 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
5230
5231 cgrp->self.serial_nr = css_serial_nr_next++;
5232
5233
5234 list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children);
5235 atomic_inc(&root->nr_cgrps);
5236 cgroup_get(parent);
5237
5238
5239
5240
5241
5242 cgroup_idr_replace(&root->cgroup_idr, cgrp, cgrp->id);
5243
5244
5245
5246
5247
5248 if (!cgroup_on_dfl(cgrp))
5249 cgrp->subtree_control = cgroup_control(cgrp);
5250
5251 cgroup_propagate_control(cgrp);
5252
5253
5254 ret = cgroup_apply_control_enable(cgrp);
5255 if (ret)
5256 goto out_destroy;
5257
5258 return cgrp;
5259
5260out_cancel_ref:
5261 percpu_ref_exit(&cgrp->self.refcnt);
5262out_free_cgrp:
5263 kfree(cgrp);
5264 return ERR_PTR(ret);
5265out_destroy:
5266 cgroup_destroy_locked(cgrp);
5267 return ERR_PTR(ret);
5268}
5269
5270static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
5271 umode_t mode)
5272{
5273 struct cgroup *parent, *cgrp;
5274 struct kernfs_node *kn;
5275 int ret;
5276
5277
5278 if (strchr(name, '\n'))
5279 return -EINVAL;
5280
5281 parent = cgroup_kn_lock_live(parent_kn, false);
5282 if (!parent)
5283 return -ENODEV;
5284
5285 cgrp = cgroup_create(parent);
5286 if (IS_ERR(cgrp)) {
5287 ret = PTR_ERR(cgrp);
5288 goto out_unlock;
5289 }
5290
5291
5292 kn = kernfs_create_dir(parent->kn, name, mode, cgrp);
5293 if (IS_ERR(kn)) {
5294 ret = PTR_ERR(kn);
5295 goto out_destroy;
5296 }
5297 cgrp->kn = kn;
5298
5299
5300
5301
5302
5303 kernfs_get(kn);
5304
5305 ret = cgroup_kn_set_ugid(kn);
5306 if (ret)
5307 goto out_destroy;
5308
5309 ret = css_populate_dir(&cgrp->self);
5310 if (ret)
5311 goto out_destroy;
5312
5313 ret = cgroup_apply_control_enable(cgrp);
5314 if (ret)
5315 goto out_destroy;
5316
5317
5318 kernfs_activate(kn);
5319
5320 ret = 0;
5321 goto out_unlock;
5322
5323out_destroy:
5324 cgroup_destroy_locked(cgrp);
5325out_unlock:
5326 cgroup_kn_unlock(parent_kn);
5327 return ret;
5328}
5329
5330
5331
5332
5333
5334
5335static void css_killed_work_fn(struct work_struct *work)
5336{
5337 struct cgroup_subsys_state *css =
5338 container_of(work, struct cgroup_subsys_state, destroy_work);
5339
5340 mutex_lock(&cgroup_mutex);
5341
5342 do {
5343 offline_css(css);
5344 css_put(css);
5345
5346 css = css->parent;
5347 } while (css && atomic_dec_and_test(&css->online_cnt));
5348
5349 mutex_unlock(&cgroup_mutex);
5350}
5351
5352
5353static void css_killed_ref_fn(struct percpu_ref *ref)
5354{
5355 struct cgroup_subsys_state *css =
5356 container_of(ref, struct cgroup_subsys_state, refcnt);
5357
5358 if (atomic_dec_and_test(&css->online_cnt)) {
5359 INIT_WORK(&css->destroy_work, css_killed_work_fn);
5360 queue_work(cgroup_destroy_wq, &css->destroy_work);
5361 }
5362}
5363
5364
5365
5366
5367
5368
5369
5370
5371
5372
5373static void kill_css(struct cgroup_subsys_state *css)
5374{
5375 lockdep_assert_held(&cgroup_mutex);
5376
5377
5378
5379
5380
5381 css_clear_dir(css);
5382
5383
5384
5385
5386
5387 css_get(css);
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399 percpu_ref_kill_and_confirm(&css->refcnt, css_killed_ref_fn);
5400}
5401
5402
5403
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420
5421
5422
5423
5424
5425
5426static int cgroup_destroy_locked(struct cgroup *cgrp)
5427 __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
5428{
5429 struct cgroup_subsys_state *css;
5430 struct cgrp_cset_link *link;
5431 int ssid;
5432
5433 lockdep_assert_held(&cgroup_mutex);
5434
5435
5436
5437
5438
5439 if (cgroup_is_populated(cgrp))
5440 return -EBUSY;
5441
5442
5443
5444
5445
5446
5447 if (css_has_online_children(&cgrp->self))
5448 return -EBUSY;
5449
5450
5451
5452
5453
5454
5455
5456 cgrp->self.flags &= ~CSS_ONLINE;
5457
5458 spin_lock_irq(&css_set_lock);
5459 list_for_each_entry(link, &cgrp->cset_links, cset_link)
5460 link->cset->dead = true;
5461 spin_unlock_irq(&css_set_lock);
5462
5463
5464 for_each_css(css, ssid, cgrp)
5465 kill_css(css);
5466
5467
5468
5469
5470
5471 kernfs_remove(cgrp->kn);
5472
5473 check_for_release(cgroup_parent(cgrp));
5474
5475
5476 percpu_ref_kill(&cgrp->self.refcnt);
5477
5478 return 0;
5479};
5480
5481static int cgroup_rmdir(struct kernfs_node *kn)
5482{
5483 struct cgroup *cgrp;
5484 int ret = 0;
5485
5486 cgrp = cgroup_kn_lock_live(kn, false);
5487 if (!cgrp)
5488 return 0;
5489
5490 ret = cgroup_destroy_locked(cgrp);
5491
5492 cgroup_kn_unlock(kn);
5493 return ret;
5494}
5495
5496static struct kernfs_syscall_ops cgroup_kf_syscall_ops = {
5497 .remount_fs = cgroup_remount,
5498 .show_options = cgroup_show_options,
5499 .mkdir = cgroup_mkdir,
5500 .rmdir = cgroup_rmdir,
5501 .rename = cgroup_rename,
5502 .show_path = cgroup_show_path,
5503};
5504
5505static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
5506{
5507 struct cgroup_subsys_state *css;
5508
5509 pr_debug("Initializing cgroup subsys %s\n", ss->name);
5510
5511 mutex_lock(&cgroup_mutex);
5512
5513 idr_init(&ss->css_idr);
5514 INIT_LIST_HEAD(&ss->cfts);
5515
5516
5517 ss->root = &cgrp_dfl_root;
5518 css = ss->css_alloc(cgroup_css(&cgrp_dfl_root.cgrp, ss));
5519
5520 BUG_ON(IS_ERR(css));
5521 init_and_link_css(css, ss, &cgrp_dfl_root.cgrp);
5522
5523
5524
5525
5526
5527 css->flags |= CSS_NO_REF;
5528
5529 if (early) {
5530
5531 css->id = 1;
5532 } else {
5533 css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL);
5534 BUG_ON(css->id < 0);
5535 }
5536
5537
5538
5539
5540
5541 init_css_set.subsys[ss->id] = css;
5542
5543 have_fork_callback |= (bool)ss->fork << ss->id;
5544 have_exit_callback |= (bool)ss->exit << ss->id;
5545 have_free_callback |= (bool)ss->free << ss->id;
5546 have_canfork_callback |= (bool)ss->can_fork << ss->id;
5547
5548
5549
5550
5551 BUG_ON(!list_empty(&init_task.tasks));
5552
5553 BUG_ON(online_css(css));
5554
5555 mutex_unlock(&cgroup_mutex);
5556}
5557
5558
5559
5560
5561
5562
5563
5564int __init cgroup_init_early(void)
5565{
5566 static struct cgroup_sb_opts __initdata opts;
5567 struct cgroup_subsys *ss;
5568 int i;
5569
5570 init_cgroup_root(&cgrp_dfl_root, &opts);
5571 cgrp_dfl_root.cgrp.self.flags |= CSS_NO_REF;
5572
5573 RCU_INIT_POINTER(init_task.cgroups, &init_css_set);
5574
5575 for_each_subsys(ss, i) {
5576 WARN(!ss->css_alloc || !ss->css_free || ss->name || ss->id,
5577 "invalid cgroup_subsys %d:%s css_alloc=%p css_free=%p id:name=%d:%s\n",
5578 i, cgroup_subsys_name[i], ss->css_alloc, ss->css_free,
5579 ss->id, ss->name);
5580 WARN(strlen(cgroup_subsys_name[i]) > MAX_CGROUP_TYPE_NAMELEN,
5581 "cgroup_subsys_name %s too long\n", cgroup_subsys_name[i]);
5582
5583 ss->id = i;
5584 ss->name = cgroup_subsys_name[i];
5585 if (!ss->legacy_name)
5586 ss->legacy_name = cgroup_subsys_name[i];
5587
5588 if (ss->early_init)
5589 cgroup_init_subsys(ss, true);
5590 }
5591 return 0;
5592}
5593
5594static u16 cgroup_disable_mask __initdata;
5595
5596
5597
5598
5599
5600
5601
5602int __init cgroup_init(void)
5603{
5604 struct cgroup_subsys *ss;
5605 int ssid;
5606
5607 BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16);
5608 BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem));
5609 BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files));
5610 BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files));
5611
5612 get_user_ns(init_cgroup_ns.user_ns);
5613
5614 mutex_lock(&cgroup_mutex);
5615
5616
5617
5618
5619
5620 hash_add(css_set_table, &init_css_set.hlist,
5621 css_set_hash(init_css_set.subsys));
5622
5623 BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0));
5624
5625 mutex_unlock(&cgroup_mutex);
5626
5627 for_each_subsys(ss, ssid) {
5628 if (ss->early_init) {
5629 struct cgroup_subsys_state *css =
5630 init_css_set.subsys[ss->id];
5631
5632 css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2,
5633 GFP_KERNEL);
5634 BUG_ON(css->id < 0);
5635 } else {
5636 cgroup_init_subsys(ss, false);
5637 }
5638
5639 list_add_tail(&init_css_set.e_cset_node[ssid],
5640 &cgrp_dfl_root.cgrp.e_csets[ssid]);
5641
5642
5643
5644
5645
5646
5647 if (cgroup_disable_mask & (1 << ssid)) {
5648 static_branch_disable(cgroup_subsys_enabled_key[ssid]);
5649 printk(KERN_INFO "Disabling %s control group subsystem\n",
5650 ss->name);
5651 continue;
5652 }
5653
5654 if (cgroup_ssid_no_v1(ssid))
5655 printk(KERN_INFO "Disabling %s control group subsystem in v1 mounts\n",
5656 ss->name);
5657
5658 cgrp_dfl_root.subsys_mask |= 1 << ss->id;
5659
5660 if (ss->implicit_on_dfl)
5661 cgrp_dfl_implicit_ss_mask |= 1 << ss->id;
5662 else if (!ss->dfl_cftypes)
5663 cgrp_dfl_inhibit_ss_mask |= 1 << ss->id;
5664
5665 if (ss->dfl_cftypes == ss->legacy_cftypes) {
5666 WARN_ON(cgroup_add_cftypes(ss, ss->dfl_cftypes));
5667 } else {
5668 WARN_ON(cgroup_add_dfl_cftypes(ss, ss->dfl_cftypes));
5669 WARN_ON(cgroup_add_legacy_cftypes(ss, ss->legacy_cftypes));
5670 }
5671
5672 if (ss->bind)
5673 ss->bind(init_css_set.subsys[ssid]);
5674 }
5675
5676
5677 hash_del(&init_css_set.hlist);
5678 hash_add(css_set_table, &init_css_set.hlist,
5679 css_set_hash(init_css_set.subsys));
5680
5681 WARN_ON(sysfs_create_mount_point(fs_kobj, "cgroup"));
5682 WARN_ON(register_filesystem(&cgroup_fs_type));
5683 WARN_ON(register_filesystem(&cgroup2_fs_type));
5684 WARN_ON(!proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations));
5685
5686 return 0;
5687}
5688
5689static int __init cgroup_wq_init(void)
5690{
5691
5692
5693
5694
5695
5696
5697
5698
5699 cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
5700 BUG_ON(!cgroup_destroy_wq);
5701
5702
5703
5704
5705
5706 cgroup_pidlist_destroy_wq = alloc_workqueue("cgroup_pidlist_destroy",
5707 0, 1);
5708 BUG_ON(!cgroup_pidlist_destroy_wq);
5709
5710 return 0;
5711}
5712core_initcall(cgroup_wq_init);
5713
5714
5715
5716
5717
5718
5719int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
5720 struct pid *pid, struct task_struct *tsk)
5721{
5722 char *buf, *path;
5723 int retval;
5724 struct cgroup_root *root;
5725
5726 retval = -ENOMEM;
5727 buf = kmalloc(PATH_MAX, GFP_KERNEL);
5728 if (!buf)
5729 goto out;
5730
5731 mutex_lock(&cgroup_mutex);
5732 spin_lock_irq(&css_set_lock);
5733
5734 for_each_root(root) {
5735 struct cgroup_subsys *ss;
5736 struct cgroup *cgrp;
5737 int ssid, count = 0;
5738
5739 if (root == &cgrp_dfl_root && !cgrp_dfl_visible)
5740 continue;
5741
5742 seq_printf(m, "%d:", root->hierarchy_id);
5743 if (root != &cgrp_dfl_root)
5744 for_each_subsys(ss, ssid)
5745 if (root->subsys_mask & (1 << ssid))
5746 seq_printf(m, "%s%s", count++ ? "," : "",
5747 ss->legacy_name);
5748 if (strlen(root->name))
5749 seq_printf(m, "%sname=%s", count ? "," : "",
5750 root->name);
5751 seq_putc(m, ':');
5752
5753 cgrp = task_cgroup_from_root(tsk, root);
5754
5755
5756
5757
5758
5759
5760
5761
5762
5763
5764 if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) {
5765 path = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
5766 current->nsproxy->cgroup_ns);
5767 if (!path) {
5768 retval = -ENAMETOOLONG;
5769 goto out_unlock;
5770 }
5771 } else {
5772 path = "/";
5773 }
5774
5775 seq_puts(m, path);
5776
5777 if (cgroup_on_dfl(cgrp) && cgroup_is_dead(cgrp))
5778 seq_puts(m, " (deleted)\n");
5779 else
5780 seq_putc(m, '\n');
5781 }
5782
5783 retval = 0;
5784out_unlock:
5785 spin_unlock_irq(&css_set_lock);
5786 mutex_unlock(&cgroup_mutex);
5787 kfree(buf);
5788out:
5789 return retval;
5790}
5791
5792
5793static int proc_cgroupstats_show(struct seq_file *m, void *v)
5794{
5795 struct cgroup_subsys *ss;
5796 int i;
5797
5798 seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
5799
5800
5801
5802
5803
5804 mutex_lock(&cgroup_mutex);
5805
5806 for_each_subsys(ss, i)
5807 seq_printf(m, "%s\t%d\t%d\t%d\n",
5808 ss->legacy_name, ss->root->hierarchy_id,
5809 atomic_read(&ss->root->nr_cgrps),
5810 cgroup_ssid_enabled(i));
5811
5812 mutex_unlock(&cgroup_mutex);
5813 return 0;
5814}
5815
5816static int cgroupstats_open(struct inode *inode, struct file *file)
5817{
5818 return single_open(file, proc_cgroupstats_show, NULL);
5819}
5820
5821static const struct file_operations proc_cgroupstats_operations = {
5822 .open = cgroupstats_open,
5823 .read = seq_read,
5824 .llseek = seq_lseek,
5825 .release = single_release,
5826};
5827
5828
5829
5830
5831
5832
5833
5834
5835
5836void cgroup_fork(struct task_struct *child)
5837{
5838 RCU_INIT_POINTER(child->cgroups, &init_css_set);
5839 INIT_LIST_HEAD(&child->cg_list);
5840}
5841
5842
5843
5844
5845
5846
5847
5848
5849
5850int cgroup_can_fork(struct task_struct *child)
5851{
5852 struct cgroup_subsys *ss;
5853 int i, j, ret;
5854
5855 do_each_subsys_mask(ss, i, have_canfork_callback) {
5856 ret = ss->can_fork(child);
5857 if (ret)
5858 goto out_revert;
5859 } while_each_subsys_mask();
5860
5861 return 0;
5862
5863out_revert:
5864 for_each_subsys(ss, j) {
5865 if (j >= i)
5866 break;
5867 if (ss->cancel_fork)
5868 ss->cancel_fork(child);
5869 }
5870
5871 return ret;
5872}
5873
5874
5875
5876
5877
5878
5879
5880
5881void cgroup_cancel_fork(struct task_struct *child)
5882{
5883 struct cgroup_subsys *ss;
5884 int i;
5885
5886 for_each_subsys(ss, i)
5887 if (ss->cancel_fork)
5888 ss->cancel_fork(child);
5889}
5890
5891
5892
5893
5894
5895
5896
5897
5898
5899
5900
5901void cgroup_post_fork(struct task_struct *child)
5902{
5903 struct cgroup_subsys *ss;
5904 int i;
5905
5906
5907
5908
5909
5910
5911
5912
5913
5914
5915
5916
5917
5918
5919
5920
5921
5922
5923
5924
5925
5926
5927 if (use_task_css_set_links) {
5928 struct css_set *cset;
5929
5930 spin_lock_irq(&css_set_lock);
5931 cset = task_css_set(current);
5932 if (list_empty(&child->cg_list)) {
5933 get_css_set(cset);
5934 css_set_move_task(child, NULL, cset, false);
5935 }
5936 spin_unlock_irq(&css_set_lock);
5937 }
5938
5939
5940
5941
5942
5943
5944 do_each_subsys_mask(ss, i, have_fork_callback) {
5945 ss->fork(child);
5946 } while_each_subsys_mask();
5947}
5948
5949
5950
5951
5952
5953
5954
5955
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965
5966
5967
5968void cgroup_exit(struct task_struct *tsk)
5969{
5970 struct cgroup_subsys *ss;
5971 struct css_set *cset;
5972 int i;
5973
5974
5975
5976
5977
5978 cset = task_css_set(tsk);
5979
5980 if (!list_empty(&tsk->cg_list)) {
5981 spin_lock_irq(&css_set_lock);
5982 css_set_move_task(tsk, cset, NULL, false);
5983 spin_unlock_irq(&css_set_lock);
5984 } else {
5985 get_css_set(cset);
5986 }
5987
5988
5989 do_each_subsys_mask(ss, i, have_exit_callback) {
5990 ss->exit(tsk);
5991 } while_each_subsys_mask();
5992}
5993
5994void cgroup_free(struct task_struct *task)
5995{
5996 struct css_set *cset = task_css_set(task);
5997 struct cgroup_subsys *ss;
5998 int ssid;
5999
6000 do_each_subsys_mask(ss, ssid, have_free_callback) {
6001 ss->free(task);
6002 } while_each_subsys_mask();
6003
6004 put_css_set(cset);
6005}
6006
6007static void check_for_release(struct cgroup *cgrp)
6008{
6009 if (notify_on_release(cgrp) && !cgroup_is_populated(cgrp) &&
6010 !css_has_online_children(&cgrp->self) && !cgroup_is_dead(cgrp))
6011 schedule_work(&cgrp->release_agent_work);
6012}
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037static void cgroup_release_agent(struct work_struct *work)
6038{
6039 struct cgroup *cgrp =
6040 container_of(work, struct cgroup, release_agent_work);
6041 char *pathbuf = NULL, *agentbuf = NULL, *path;
6042 char *argv[3], *envp[3];
6043
6044 mutex_lock(&cgroup_mutex);
6045
6046 pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
6047 agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
6048 if (!pathbuf || !agentbuf)
6049 goto out;
6050
6051 spin_lock_irq(&css_set_lock);
6052 path = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
6053 spin_unlock_irq(&css_set_lock);
6054 if (!path)
6055 goto out;
6056
6057 argv[0] = agentbuf;
6058 argv[1] = path;
6059 argv[2] = NULL;
6060
6061
6062 envp[0] = "HOME=/";
6063 envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
6064 envp[2] = NULL;
6065
6066 mutex_unlock(&cgroup_mutex);
6067 call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
6068 goto out_free;
6069out:
6070 mutex_unlock(&cgroup_mutex);
6071out_free:
6072 kfree(agentbuf);
6073 kfree(pathbuf);
6074}
6075
6076static int __init cgroup_disable(char *str)
6077{
6078 struct cgroup_subsys *ss;
6079 char *token;
6080 int i;
6081
6082 while ((token = strsep(&str, ",")) != NULL) {
6083 if (!*token)
6084 continue;
6085
6086 for_each_subsys(ss, i) {
6087 if (strcmp(token, ss->name) &&
6088 strcmp(token, ss->legacy_name))
6089 continue;
6090 cgroup_disable_mask |= 1 << i;
6091 }
6092 }
6093 return 1;
6094}
6095__setup("cgroup_disable=", cgroup_disable);
6096
6097static int __init cgroup_no_v1(char *str)
6098{
6099 struct cgroup_subsys *ss;
6100 char *token;
6101 int i;
6102
6103 while ((token = strsep(&str, ",")) != NULL) {
6104 if (!*token)
6105 continue;
6106
6107 if (!strcmp(token, "all")) {
6108 cgroup_no_v1_mask = U16_MAX;
6109 break;
6110 }
6111
6112 for_each_subsys(ss, i) {
6113 if (strcmp(token, ss->name) &&
6114 strcmp(token, ss->legacy_name))
6115 continue;
6116
6117 cgroup_no_v1_mask |= 1 << i;
6118 }
6119 }
6120 return 1;
6121}
6122__setup("cgroup_no_v1=", cgroup_no_v1);
6123
6124
6125
6126
6127
6128
6129
6130
6131
6132
6133struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
6134 struct cgroup_subsys *ss)
6135{
6136 struct kernfs_node *kn = kernfs_node_from_dentry(dentry);
6137 struct file_system_type *s_type = dentry->d_sb->s_type;
6138 struct cgroup_subsys_state *css = NULL;
6139 struct cgroup *cgrp;
6140
6141
6142 if ((s_type != &cgroup_fs_type && s_type != &cgroup2_fs_type) ||
6143 !kn || kernfs_type(kn) != KERNFS_DIR)
6144 return ERR_PTR(-EBADF);
6145
6146 rcu_read_lock();
6147
6148
6149
6150
6151
6152
6153 cgrp = rcu_dereference(kn->priv);
6154 if (cgrp)
6155 css = cgroup_css(cgrp, ss);
6156
6157 if (!css || !css_tryget_online(css))
6158 css = ERR_PTR(-ENOENT);
6159
6160 rcu_read_unlock();
6161 return css;
6162}
6163
6164
6165
6166
6167
6168
6169
6170
6171
6172struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss)
6173{
6174 WARN_ON_ONCE(!rcu_read_lock_held());
6175 return id > 0 ? idr_find(&ss->css_idr, id) : NULL;
6176}
6177
6178
6179
6180
6181
6182
6183
6184
6185
6186
6187struct cgroup *cgroup_get_from_path(const char *path)
6188{
6189 struct kernfs_node *kn;
6190 struct cgroup *cgrp;
6191
6192 mutex_lock(&cgroup_mutex);
6193
6194 kn = kernfs_walk_and_get(cgrp_dfl_root.cgrp.kn, path);
6195 if (kn) {
6196 if (kernfs_type(kn) == KERNFS_DIR) {
6197 cgrp = kn->priv;
6198 cgroup_get(cgrp);
6199 } else {
6200 cgrp = ERR_PTR(-ENOTDIR);
6201 }
6202 kernfs_put(kn);
6203 } else {
6204 cgrp = ERR_PTR(-ENOENT);
6205 }
6206
6207 mutex_unlock(&cgroup_mutex);
6208 return cgrp;
6209}
6210EXPORT_SYMBOL_GPL(cgroup_get_from_path);
6211
6212
6213
6214
6215
6216#ifdef CONFIG_SOCK_CGROUP_DATA
6217
6218#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
6219
6220DEFINE_SPINLOCK(cgroup_sk_update_lock);
6221static bool cgroup_sk_alloc_disabled __read_mostly;
6222
6223void cgroup_sk_alloc_disable(void)
6224{
6225 if (cgroup_sk_alloc_disabled)
6226 return;
6227 pr_info("cgroup: disabling cgroup2 socket matching due to net_prio or net_cls activation\n");
6228 cgroup_sk_alloc_disabled = true;
6229}
6230
6231#else
6232
6233#define cgroup_sk_alloc_disabled false
6234
6235#endif
6236
6237void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
6238{
6239 if (cgroup_sk_alloc_disabled)
6240 return;
6241
6242 rcu_read_lock();
6243
6244 while (true) {
6245 struct css_set *cset;
6246
6247 cset = task_css_set(current);
6248 if (likely(cgroup_tryget(cset->dfl_cgrp))) {
6249 skcd->val = (unsigned long)cset->dfl_cgrp;
6250 break;
6251 }
6252 cpu_relax();
6253 }
6254
6255 rcu_read_unlock();
6256}
6257
6258void cgroup_sk_free(struct sock_cgroup_data *skcd)
6259{
6260 cgroup_put(sock_cgroup_ptr(skcd));
6261}
6262
6263#endif
6264
6265
6266
6267static struct cgroup_namespace *alloc_cgroup_ns(void)
6268{
6269 struct cgroup_namespace *new_ns;
6270 int ret;
6271
6272 new_ns = kzalloc(sizeof(struct cgroup_namespace), GFP_KERNEL);
6273 if (!new_ns)
6274 return ERR_PTR(-ENOMEM);
6275 ret = ns_alloc_inum(&new_ns->ns);
6276 if (ret) {
6277 kfree(new_ns);
6278 return ERR_PTR(ret);
6279 }
6280 atomic_set(&new_ns->count, 1);
6281 new_ns->ns.ops = &cgroupns_operations;
6282 return new_ns;
6283}
6284
6285void free_cgroup_ns(struct cgroup_namespace *ns)
6286{
6287 put_css_set(ns->root_cset);
6288 put_user_ns(ns->user_ns);
6289 ns_free_inum(&ns->ns);
6290 kfree(ns);
6291}
6292EXPORT_SYMBOL(free_cgroup_ns);
6293
6294struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
6295 struct user_namespace *user_ns,
6296 struct cgroup_namespace *old_ns)
6297{
6298 struct cgroup_namespace *new_ns;
6299 struct css_set *cset;
6300
6301 BUG_ON(!old_ns);
6302
6303 if (!(flags & CLONE_NEWCGROUP)) {
6304 get_cgroup_ns(old_ns);
6305 return old_ns;
6306 }
6307
6308
6309 if (!ns_capable(user_ns, CAP_SYS_ADMIN))
6310 return ERR_PTR(-EPERM);
6311
6312 mutex_lock(&cgroup_mutex);
6313 spin_lock_irq(&css_set_lock);
6314
6315 cset = task_css_set(current);
6316 get_css_set(cset);
6317
6318 spin_unlock_irq(&css_set_lock);
6319 mutex_unlock(&cgroup_mutex);
6320
6321 new_ns = alloc_cgroup_ns();
6322 if (IS_ERR(new_ns)) {
6323 put_css_set(cset);
6324 return new_ns;
6325 }
6326
6327 new_ns->user_ns = get_user_ns(user_ns);
6328 new_ns->root_cset = cset;
6329
6330 return new_ns;
6331}
6332
6333static inline struct cgroup_namespace *to_cg_ns(struct ns_common *ns)
6334{
6335 return container_of(ns, struct cgroup_namespace, ns);
6336}
6337
6338static int cgroupns_install(struct nsproxy *nsproxy, struct ns_common *ns)
6339{
6340 struct cgroup_namespace *cgroup_ns = to_cg_ns(ns);
6341
6342 if (!ns_capable(current_user_ns(), CAP_SYS_ADMIN) ||
6343 !ns_capable(cgroup_ns->user_ns, CAP_SYS_ADMIN))
6344 return -EPERM;
6345
6346
6347 if (cgroup_ns == nsproxy->cgroup_ns)
6348 return 0;
6349
6350 get_cgroup_ns(cgroup_ns);
6351 put_cgroup_ns(nsproxy->cgroup_ns);
6352 nsproxy->cgroup_ns = cgroup_ns;
6353
6354 return 0;
6355}
6356
6357static struct ns_common *cgroupns_get(struct task_struct *task)
6358{
6359 struct cgroup_namespace *ns = NULL;
6360 struct nsproxy *nsproxy;
6361
6362 task_lock(task);
6363 nsproxy = task->nsproxy;
6364 if (nsproxy) {
6365 ns = nsproxy->cgroup_ns;
6366 get_cgroup_ns(ns);
6367 }
6368 task_unlock(task);
6369
6370 return ns ? &ns->ns : NULL;
6371}
6372
6373static void cgroupns_put(struct ns_common *ns)
6374{
6375 put_cgroup_ns(to_cg_ns(ns));
6376}
6377
6378const struct proc_ns_operations cgroupns_operations = {
6379 .name = "cgroup",
6380 .type = CLONE_NEWCGROUP,
6381 .get = cgroupns_get,
6382 .put = cgroupns_put,
6383 .install = cgroupns_install,
6384};
6385
6386static __init int cgroup_namespaces_init(void)
6387{
6388 return 0;
6389}
6390subsys_initcall(cgroup_namespaces_init);
6391
6392#ifdef CONFIG_CGROUP_DEBUG
6393static struct cgroup_subsys_state *
6394debug_css_alloc(struct cgroup_subsys_state *parent_css)
6395{
6396 struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
6397
6398 if (!css)
6399 return ERR_PTR(-ENOMEM);
6400
6401 return css;
6402}
6403
6404static void debug_css_free(struct cgroup_subsys_state *css)
6405{
6406 kfree(css);
6407}
6408
6409static u64 debug_taskcount_read(struct cgroup_subsys_state *css,
6410 struct cftype *cft)
6411{
6412 return cgroup_task_count(css->cgroup);
6413}
6414
6415static u64 current_css_set_read(struct cgroup_subsys_state *css,
6416 struct cftype *cft)
6417{
6418 return (u64)(unsigned long)current->cgroups;
6419}
6420
6421static u64 current_css_set_refcount_read(struct cgroup_subsys_state *css,
6422 struct cftype *cft)
6423{
6424 u64 count;
6425
6426 rcu_read_lock();
6427 count = atomic_read(&task_css_set(current)->refcount);
6428 rcu_read_unlock();
6429 return count;
6430}
6431
6432static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
6433{
6434 struct cgrp_cset_link *link;
6435 struct css_set *cset;
6436 char *name_buf;
6437
6438 name_buf = kmalloc(NAME_MAX + 1, GFP_KERNEL);
6439 if (!name_buf)
6440 return -ENOMEM;
6441
6442 spin_lock_irq(&css_set_lock);
6443 rcu_read_lock();
6444 cset = rcu_dereference(current->cgroups);
6445 list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
6446 struct cgroup *c = link->cgrp;
6447
6448 cgroup_name(c, name_buf, NAME_MAX + 1);
6449 seq_printf(seq, "Root %d group %s\n",
6450 c->root->hierarchy_id, name_buf);
6451 }
6452 rcu_read_unlock();
6453 spin_unlock_irq(&css_set_lock);
6454 kfree(name_buf);
6455 return 0;
6456}
6457
6458#define MAX_TASKS_SHOWN_PER_CSS 25
6459static int cgroup_css_links_read(struct seq_file *seq, void *v)
6460{
6461 struct cgroup_subsys_state *css = seq_css(seq);
6462 struct cgrp_cset_link *link;
6463
6464 spin_lock_irq(&css_set_lock);
6465 list_for_each_entry(link, &css->cgroup->cset_links, cset_link) {
6466 struct css_set *cset = link->cset;
6467 struct task_struct *task;
6468 int count = 0;
6469
6470 seq_printf(seq, "css_set %p\n", cset);
6471
6472 list_for_each_entry(task, &cset->tasks, cg_list) {
6473 if (count++ > MAX_TASKS_SHOWN_PER_CSS)
6474 goto overflow;
6475 seq_printf(seq, " task %d\n", task_pid_vnr(task));
6476 }
6477
6478 list_for_each_entry(task, &cset->mg_tasks, cg_list) {
6479 if (count++ > MAX_TASKS_SHOWN_PER_CSS)
6480 goto overflow;
6481 seq_printf(seq, " task %d\n", task_pid_vnr(task));
6482 }
6483 continue;
6484 overflow:
6485 seq_puts(seq, " ...\n");
6486 }
6487 spin_unlock_irq(&css_set_lock);
6488 return 0;
6489}
6490
6491static u64 releasable_read(struct cgroup_subsys_state *css, struct cftype *cft)
6492{
6493 return (!cgroup_is_populated(css->cgroup) &&
6494 !css_has_online_children(&css->cgroup->self));
6495}
6496
6497static struct cftype debug_files[] = {
6498 {
6499 .name = "taskcount",
6500 .read_u64 = debug_taskcount_read,
6501 },
6502
6503 {
6504 .name = "current_css_set",
6505 .read_u64 = current_css_set_read,
6506 },
6507
6508 {
6509 .name = "current_css_set_refcount",
6510 .read_u64 = current_css_set_refcount_read,
6511 },
6512
6513 {
6514 .name = "current_css_set_cg_links",
6515 .seq_show = current_css_set_cg_links_read,
6516 },
6517
6518 {
6519 .name = "cgroup_css_links",
6520 .seq_show = cgroup_css_links_read,
6521 },
6522
6523 {
6524 .name = "releasable",
6525 .read_u64 = releasable_read,
6526 },
6527
6528 { }
6529};
6530
6531struct cgroup_subsys debug_cgrp_subsys = {
6532 .css_alloc = debug_css_alloc,
6533 .css_free = debug_css_free,
6534 .legacy_cftypes = debug_files,
6535};
6536#endif
6537