1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
30
31#include "cgroup-internal.h"
32
33#include <linux/cred.h>
34#include <linux/errno.h>
35#include <linux/init_task.h>
36#include <linux/kernel.h>
37#include <linux/magic.h>
38#include <linux/mutex.h>
39#include <linux/mount.h>
40#include <linux/pagemap.h>
41#include <linux/proc_fs.h>
42#include <linux/rcupdate.h>
43#include <linux/sched.h>
44#include <linux/sched/task.h>
45#include <linux/slab.h>
46#include <linux/spinlock.h>
47#include <linux/percpu-rwsem.h>
48#include <linux/string.h>
49#include <linux/hashtable.h>
50#include <linux/idr.h>
51#include <linux/kthread.h>
52#include <linux/atomic.h>
53#include <linux/cpuset.h>
54#include <linux/proc_ns.h>
55#include <linux/nsproxy.h>
56#include <linux/file.h>
57#include <net/sock.h>
58
59#define CREATE_TRACE_POINTS
60#include <trace/events/cgroup.h>
61
62#define CGROUP_FILE_NAME_MAX (MAX_CGROUP_TYPE_NAMELEN + \
63 MAX_CFTYPE_NAME + 2)
64
65
66
67
68
69
70
71
72
73
74
75DEFINE_MUTEX(cgroup_mutex);
76DEFINE_SPINLOCK(css_set_lock);
77
78#ifdef CONFIG_PROVE_RCU
79EXPORT_SYMBOL_GPL(cgroup_mutex);
80EXPORT_SYMBOL_GPL(css_set_lock);
81#endif
82
83
84
85
86
87static DEFINE_SPINLOCK(cgroup_idr_lock);
88
89
90
91
92
93static DEFINE_SPINLOCK(cgroup_file_kn_lock);
94
95struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
96
97#define cgroup_assert_mutex_or_rcu_locked() \
98 RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
99 !lockdep_is_held(&cgroup_mutex), \
100 "cgroup_mutex or RCU read lock required");
101
102
103
104
105
106
107
108static struct workqueue_struct *cgroup_destroy_wq;
109
110
111#define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys,
112struct cgroup_subsys *cgroup_subsys[] = {
113#include <linux/cgroup_subsys.h>
114};
115#undef SUBSYS
116
117
118#define SUBSYS(_x) [_x ## _cgrp_id] = #_x,
119static const char *cgroup_subsys_name[] = {
120#include <linux/cgroup_subsys.h>
121};
122#undef SUBSYS
123
124
125#define SUBSYS(_x) \
126 DEFINE_STATIC_KEY_TRUE(_x ## _cgrp_subsys_enabled_key); \
127 DEFINE_STATIC_KEY_TRUE(_x ## _cgrp_subsys_on_dfl_key); \
128 EXPORT_SYMBOL_GPL(_x ## _cgrp_subsys_enabled_key); \
129 EXPORT_SYMBOL_GPL(_x ## _cgrp_subsys_on_dfl_key);
130#include <linux/cgroup_subsys.h>
131#undef SUBSYS
132
133#define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys_enabled_key,
134static struct static_key_true *cgroup_subsys_enabled_key[] = {
135#include <linux/cgroup_subsys.h>
136};
137#undef SUBSYS
138
139#define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys_on_dfl_key,
140static struct static_key_true *cgroup_subsys_on_dfl_key[] = {
141#include <linux/cgroup_subsys.h>
142};
143#undef SUBSYS
144
145
146
147
148
149
150struct cgroup_root cgrp_dfl_root;
151EXPORT_SYMBOL_GPL(cgrp_dfl_root);
152
153
154
155
156
157static bool cgrp_dfl_visible;
158
159
160static u16 cgrp_dfl_inhibit_ss_mask;
161
162
163static u16 cgrp_dfl_implicit_ss_mask;
164
165
166static u16 cgrp_dfl_threaded_ss_mask;
167
168
169LIST_HEAD(cgroup_roots);
170static int cgroup_root_count;
171
172
173static DEFINE_IDR(cgroup_hierarchy_idr);
174
175
176
177
178
179
180
181
182static u64 css_serial_nr_next = 1;
183
184
185
186
187
188static u16 have_fork_callback __read_mostly;
189static u16 have_exit_callback __read_mostly;
190static u16 have_free_callback __read_mostly;
191static u16 have_canfork_callback __read_mostly;
192
193
194struct cgroup_namespace init_cgroup_ns = {
195 .count = REFCOUNT_INIT(2),
196 .user_ns = &init_user_ns,
197 .ns.ops = &cgroupns_operations,
198 .ns.inum = PROC_CGROUP_INIT_INO,
199 .root_cset = &init_css_set,
200};
201
202static struct file_system_type cgroup2_fs_type;
203static struct cftype cgroup_base_files[];
204
205static int cgroup_apply_control(struct cgroup *cgrp);
206static void cgroup_finalize_control(struct cgroup *cgrp, int ret);
207static void css_task_iter_advance(struct css_task_iter *it);
208static int cgroup_destroy_locked(struct cgroup *cgrp);
209static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
210 struct cgroup_subsys *ss);
211static void css_release(struct percpu_ref *ref);
212static void kill_css(struct cgroup_subsys_state *css);
213static int cgroup_addrm_files(struct cgroup_subsys_state *css,
214 struct cgroup *cgrp, struct cftype cfts[],
215 bool is_add);
216
217
218
219
220
221
222
223
224
225bool cgroup_ssid_enabled(int ssid)
226{
227 if (CGROUP_SUBSYS_COUNT == 0)
228 return false;
229
230 return static_key_enabled(cgroup_subsys_enabled_key[ssid]);
231}
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286bool cgroup_on_dfl(const struct cgroup *cgrp)
287{
288 return cgrp->root == &cgrp_dfl_root;
289}
290
291
292static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end,
293 gfp_t gfp_mask)
294{
295 int ret;
296
297 idr_preload(gfp_mask);
298 spin_lock_bh(&cgroup_idr_lock);
299 ret = idr_alloc(idr, ptr, start, end, gfp_mask & ~__GFP_DIRECT_RECLAIM);
300 spin_unlock_bh(&cgroup_idr_lock);
301 idr_preload_end();
302 return ret;
303}
304
305static void *cgroup_idr_replace(struct idr *idr, void *ptr, int id)
306{
307 void *ret;
308
309 spin_lock_bh(&cgroup_idr_lock);
310 ret = idr_replace(idr, ptr, id);
311 spin_unlock_bh(&cgroup_idr_lock);
312 return ret;
313}
314
315static void cgroup_idr_remove(struct idr *idr, int id)
316{
317 spin_lock_bh(&cgroup_idr_lock);
318 idr_remove(idr, id);
319 spin_unlock_bh(&cgroup_idr_lock);
320}
321
322static bool cgroup_has_tasks(struct cgroup *cgrp)
323{
324 return cgrp->nr_populated_csets;
325}
326
327bool cgroup_is_threaded(struct cgroup *cgrp)
328{
329 return cgrp->dom_cgrp != cgrp;
330}
331
332
333static bool cgroup_is_mixable(struct cgroup *cgrp)
334{
335
336
337
338
339
340 return !cgroup_parent(cgrp);
341}
342
343
344static bool cgroup_can_be_thread_root(struct cgroup *cgrp)
345{
346
347 if (cgroup_is_mixable(cgrp))
348 return true;
349
350
351 if (cgroup_is_threaded(cgrp))
352 return false;
353
354
355 if (cgrp->nr_populated_domain_children)
356 return false;
357
358
359 if (cgrp->subtree_control & ~cgrp_dfl_threaded_ss_mask)
360 return false;
361
362 return true;
363}
364
365
366bool cgroup_is_thread_root(struct cgroup *cgrp)
367{
368
369 if (cgroup_is_threaded(cgrp))
370 return false;
371
372
373 if (cgrp->nr_threaded_children)
374 return true;
375
376
377
378
379
380 if (cgroup_has_tasks(cgrp) &&
381 (cgrp->subtree_control & cgrp_dfl_threaded_ss_mask))
382 return true;
383
384 return false;
385}
386
387
388static bool cgroup_is_valid_domain(struct cgroup *cgrp)
389{
390
391 if (cgroup_is_threaded(cgrp))
392 return false;
393
394
395 while ((cgrp = cgroup_parent(cgrp))) {
396 if (!cgroup_is_mixable(cgrp) && cgroup_is_thread_root(cgrp))
397 return false;
398 if (cgroup_is_threaded(cgrp))
399 return false;
400 }
401
402 return true;
403}
404
405
406static u16 cgroup_control(struct cgroup *cgrp)
407{
408 struct cgroup *parent = cgroup_parent(cgrp);
409 u16 root_ss_mask = cgrp->root->subsys_mask;
410
411 if (parent) {
412 u16 ss_mask = parent->subtree_control;
413
414
415 if (cgroup_is_threaded(cgrp))
416 ss_mask &= cgrp_dfl_threaded_ss_mask;
417 return ss_mask;
418 }
419
420 if (cgroup_on_dfl(cgrp))
421 root_ss_mask &= ~(cgrp_dfl_inhibit_ss_mask |
422 cgrp_dfl_implicit_ss_mask);
423 return root_ss_mask;
424}
425
426
427static u16 cgroup_ss_mask(struct cgroup *cgrp)
428{
429 struct cgroup *parent = cgroup_parent(cgrp);
430
431 if (parent) {
432 u16 ss_mask = parent->subtree_ss_mask;
433
434
435 if (cgroup_is_threaded(cgrp))
436 ss_mask &= cgrp_dfl_threaded_ss_mask;
437 return ss_mask;
438 }
439
440 return cgrp->root->subsys_mask;
441}
442
443
444
445
446
447
448
449
450
451
452
453
454static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
455 struct cgroup_subsys *ss)
456{
457 if (ss)
458 return rcu_dereference_check(cgrp->subsys[ss->id],
459 lockdep_is_held(&cgroup_mutex));
460 else
461 return &cgrp->self;
462}
463
464
465
466
467
468
469
470
471
472
473
474static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
475 struct cgroup_subsys *ss)
476{
477 lockdep_assert_held(&cgroup_mutex);
478
479 if (!ss)
480 return &cgrp->self;
481
482
483
484
485
486 while (!(cgroup_ss_mask(cgrp) & (1 << ss->id))) {
487 cgrp = cgroup_parent(cgrp);
488 if (!cgrp)
489 return NULL;
490 }
491
492 return cgroup_css(cgrp, ss);
493}
494
495
496
497
498
499
500
501
502
503
504
505
506struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgrp,
507 struct cgroup_subsys *ss)
508{
509 struct cgroup_subsys_state *css;
510
511 rcu_read_lock();
512
513 do {
514 css = cgroup_css(cgrp, ss);
515
516 if (css && css_tryget_online(css))
517 goto out_unlock;
518 cgrp = cgroup_parent(cgrp);
519 } while (cgrp);
520
521 css = init_css_set.subsys[ss->id];
522 css_get(css);
523out_unlock:
524 rcu_read_unlock();
525 return css;
526}
527
528static void cgroup_get_live(struct cgroup *cgrp)
529{
530 WARN_ON_ONCE(cgroup_is_dead(cgrp));
531 css_get(&cgrp->self);
532}
533
534struct cgroup_subsys_state *of_css(struct kernfs_open_file *of)
535{
536 struct cgroup *cgrp = of->kn->parent->priv;
537 struct cftype *cft = of_cft(of);
538
539
540
541
542
543
544
545
546
547 if (cft->ss)
548 return rcu_dereference_raw(cgrp->subsys[cft->ss->id]);
549 else
550 return &cgrp->self;
551}
552EXPORT_SYMBOL_GPL(of_css);
553
554
555
556
557
558
559
560
561
562#define for_each_css(css, ssid, cgrp) \
563 for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
564 if (!((css) = rcu_dereference_check( \
565 (cgrp)->subsys[(ssid)], \
566 lockdep_is_held(&cgroup_mutex)))) { } \
567 else
568
569
570
571
572
573
574
575
576
577#define for_each_e_css(css, ssid, cgrp) \
578 for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
579 if (!((css) = cgroup_e_css(cgrp, cgroup_subsys[(ssid)]))) \
580 ; \
581 else
582
583
584
585
586
587
588
589
590
591
592#define do_each_subsys_mask(ss, ssid, ss_mask) do { \
593 unsigned long __ss_mask = (ss_mask); \
594 if (!CGROUP_SUBSYS_COUNT) { \
595 (ssid) = 0; \
596 break; \
597 } \
598 for_each_set_bit(ssid, &__ss_mask, CGROUP_SUBSYS_COUNT) { \
599 (ss) = cgroup_subsys[ssid]; \
600 {
601
602#define while_each_subsys_mask() \
603 } \
604 } \
605} while (false)
606
607
608#define cgroup_for_each_live_child(child, cgrp) \
609 list_for_each_entry((child), &(cgrp)->self.children, self.sibling) \
610 if (({ lockdep_assert_held(&cgroup_mutex); \
611 cgroup_is_dead(child); })) \
612 ; \
613 else
614
615
616#define cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) \
617 css_for_each_descendant_pre((d_css), cgroup_css((cgrp), NULL)) \
618 if (({ lockdep_assert_held(&cgroup_mutex); \
619 (dsct) = (d_css)->cgroup; \
620 cgroup_is_dead(dsct); })) \
621 ; \
622 else
623
624
625#define cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) \
626 css_for_each_descendant_post((d_css), cgroup_css((cgrp), NULL)) \
627 if (({ lockdep_assert_held(&cgroup_mutex); \
628 (dsct) = (d_css)->cgroup; \
629 cgroup_is_dead(dsct); })) \
630 ; \
631 else
632
633
634
635
636
637
638
639
640struct css_set init_css_set = {
641 .refcount = REFCOUNT_INIT(1),
642 .dom_cset = &init_css_set,
643 .tasks = LIST_HEAD_INIT(init_css_set.tasks),
644 .mg_tasks = LIST_HEAD_INIT(init_css_set.mg_tasks),
645 .task_iters = LIST_HEAD_INIT(init_css_set.task_iters),
646 .threaded_csets = LIST_HEAD_INIT(init_css_set.threaded_csets),
647 .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links),
648 .mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node),
649 .mg_node = LIST_HEAD_INIT(init_css_set.mg_node),
650};
651
652static int css_set_count = 1;
653
654static bool css_set_threaded(struct css_set *cset)
655{
656 return cset->dom_cset != cset;
657}
658
659
660
661
662
663
664
665
666
667
668static bool css_set_populated(struct css_set *cset)
669{
670 lockdep_assert_held(&css_set_lock);
671
672 return !list_empty(&cset->tasks) || !list_empty(&cset->mg_tasks);
673}
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692static void cgroup_update_populated(struct cgroup *cgrp, bool populated)
693{
694 struct cgroup *child = NULL;
695 int adj = populated ? 1 : -1;
696
697 lockdep_assert_held(&css_set_lock);
698
699 do {
700 bool was_populated = cgroup_is_populated(cgrp);
701
702 if (!child) {
703 cgrp->nr_populated_csets += adj;
704 } else {
705 if (cgroup_is_threaded(child))
706 cgrp->nr_populated_threaded_children += adj;
707 else
708 cgrp->nr_populated_domain_children += adj;
709 }
710
711 if (was_populated == cgroup_is_populated(cgrp))
712 break;
713
714 cgroup1_check_for_release(cgrp);
715 cgroup_file_notify(&cgrp->events_file);
716
717 child = cgrp;
718 cgrp = cgroup_parent(cgrp);
719 } while (cgrp);
720}
721
722
723
724
725
726
727
728
729
730static void css_set_update_populated(struct css_set *cset, bool populated)
731{
732 struct cgrp_cset_link *link;
733
734 lockdep_assert_held(&css_set_lock);
735
736 list_for_each_entry(link, &cset->cgrp_links, cgrp_link)
737 cgroup_update_populated(link->cgrp, populated);
738}
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755static void css_set_move_task(struct task_struct *task,
756 struct css_set *from_cset, struct css_set *to_cset,
757 bool use_mg_tasks)
758{
759 lockdep_assert_held(&css_set_lock);
760
761 if (to_cset && !css_set_populated(to_cset))
762 css_set_update_populated(to_cset, true);
763
764 if (from_cset) {
765 struct css_task_iter *it, *pos;
766
767 WARN_ON_ONCE(list_empty(&task->cg_list));
768
769
770
771
772
773
774
775
776 list_for_each_entry_safe(it, pos, &from_cset->task_iters,
777 iters_node)
778 if (it->task_pos == &task->cg_list)
779 css_task_iter_advance(it);
780
781 list_del_init(&task->cg_list);
782 if (!css_set_populated(from_cset))
783 css_set_update_populated(from_cset, false);
784 } else {
785 WARN_ON_ONCE(!list_empty(&task->cg_list));
786 }
787
788 if (to_cset) {
789
790
791
792
793
794
795 WARN_ON_ONCE(task->flags & PF_EXITING);
796
797 rcu_assign_pointer(task->cgroups, to_cset);
798 list_add_tail(&task->cg_list, use_mg_tasks ? &to_cset->mg_tasks :
799 &to_cset->tasks);
800 }
801}
802
803
804
805
806
807
808#define CSS_SET_HASH_BITS 7
809static DEFINE_HASHTABLE(css_set_table, CSS_SET_HASH_BITS);
810
811static unsigned long css_set_hash(struct cgroup_subsys_state *css[])
812{
813 unsigned long key = 0UL;
814 struct cgroup_subsys *ss;
815 int i;
816
817 for_each_subsys(ss, i)
818 key += (unsigned long)css[i];
819 key = (key >> 16) ^ key;
820
821 return key;
822}
823
824void put_css_set_locked(struct css_set *cset)
825{
826 struct cgrp_cset_link *link, *tmp_link;
827 struct cgroup_subsys *ss;
828 int ssid;
829
830 lockdep_assert_held(&css_set_lock);
831
832 if (!refcount_dec_and_test(&cset->refcount))
833 return;
834
835 WARN_ON_ONCE(!list_empty(&cset->threaded_csets));
836
837
838 for_each_subsys(ss, ssid) {
839 list_del(&cset->e_cset_node[ssid]);
840 css_put(cset->subsys[ssid]);
841 }
842 hash_del(&cset->hlist);
843 css_set_count--;
844
845 list_for_each_entry_safe(link, tmp_link, &cset->cgrp_links, cgrp_link) {
846 list_del(&link->cset_link);
847 list_del(&link->cgrp_link);
848 if (cgroup_parent(link->cgrp))
849 cgroup_put(link->cgrp);
850 kfree(link);
851 }
852
853 if (css_set_threaded(cset)) {
854 list_del(&cset->threaded_csets_node);
855 put_css_set_locked(cset->dom_cset);
856 }
857
858 kfree_rcu(cset, rcu_head);
859}
860
861
862
863
864
865
866
867
868
869
870
871static bool compare_css_sets(struct css_set *cset,
872 struct css_set *old_cset,
873 struct cgroup *new_cgrp,
874 struct cgroup_subsys_state *template[])
875{
876 struct cgroup *new_dfl_cgrp;
877 struct list_head *l1, *l2;
878
879
880
881
882
883
884 if (memcmp(template, cset->subsys, sizeof(cset->subsys)))
885 return false;
886
887
888
889 if (cgroup_on_dfl(new_cgrp))
890 new_dfl_cgrp = new_cgrp;
891 else
892 new_dfl_cgrp = old_cset->dfl_cgrp;
893
894 if (new_dfl_cgrp->dom_cgrp != cset->dom_cset->dfl_cgrp)
895 return false;
896
897
898
899
900
901
902
903 l1 = &cset->cgrp_links;
904 l2 = &old_cset->cgrp_links;
905 while (1) {
906 struct cgrp_cset_link *link1, *link2;
907 struct cgroup *cgrp1, *cgrp2;
908
909 l1 = l1->next;
910 l2 = l2->next;
911
912 if (l1 == &cset->cgrp_links) {
913 BUG_ON(l2 != &old_cset->cgrp_links);
914 break;
915 } else {
916 BUG_ON(l2 == &old_cset->cgrp_links);
917 }
918
919 link1 = list_entry(l1, struct cgrp_cset_link, cgrp_link);
920 link2 = list_entry(l2, struct cgrp_cset_link, cgrp_link);
921 cgrp1 = link1->cgrp;
922 cgrp2 = link2->cgrp;
923
924 BUG_ON(cgrp1->root != cgrp2->root);
925
926
927
928
929
930
931
932
933 if (cgrp1->root == new_cgrp->root) {
934 if (cgrp1 != new_cgrp)
935 return false;
936 } else {
937 if (cgrp1 != cgrp2)
938 return false;
939 }
940 }
941 return true;
942}
943
944
945
946
947
948
949
950static struct css_set *find_existing_css_set(struct css_set *old_cset,
951 struct cgroup *cgrp,
952 struct cgroup_subsys_state *template[])
953{
954 struct cgroup_root *root = cgrp->root;
955 struct cgroup_subsys *ss;
956 struct css_set *cset;
957 unsigned long key;
958 int i;
959
960
961
962
963
964
965 for_each_subsys(ss, i) {
966 if (root->subsys_mask & (1UL << i)) {
967
968
969
970
971 template[i] = cgroup_e_css(cgrp, ss);
972 } else {
973
974
975
976
977 template[i] = old_cset->subsys[i];
978 }
979 }
980
981 key = css_set_hash(template);
982 hash_for_each_possible(css_set_table, cset, hlist, key) {
983 if (!compare_css_sets(cset, old_cset, cgrp, template))
984 continue;
985
986
987 return cset;
988 }
989
990
991 return NULL;
992}
993
994static void free_cgrp_cset_links(struct list_head *links_to_free)
995{
996 struct cgrp_cset_link *link, *tmp_link;
997
998 list_for_each_entry_safe(link, tmp_link, links_to_free, cset_link) {
999 list_del(&link->cset_link);
1000 kfree(link);
1001 }
1002}
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012static int allocate_cgrp_cset_links(int count, struct list_head *tmp_links)
1013{
1014 struct cgrp_cset_link *link;
1015 int i;
1016
1017 INIT_LIST_HEAD(tmp_links);
1018
1019 for (i = 0; i < count; i++) {
1020 link = kzalloc(sizeof(*link), GFP_KERNEL);
1021 if (!link) {
1022 free_cgrp_cset_links(tmp_links);
1023 return -ENOMEM;
1024 }
1025 list_add(&link->cset_link, tmp_links);
1026 }
1027 return 0;
1028}
1029
1030
1031
1032
1033
1034
1035
1036static void link_css_set(struct list_head *tmp_links, struct css_set *cset,
1037 struct cgroup *cgrp)
1038{
1039 struct cgrp_cset_link *link;
1040
1041 BUG_ON(list_empty(tmp_links));
1042
1043 if (cgroup_on_dfl(cgrp))
1044 cset->dfl_cgrp = cgrp;
1045
1046 link = list_first_entry(tmp_links, struct cgrp_cset_link, cset_link);
1047 link->cset = cset;
1048 link->cgrp = cgrp;
1049
1050
1051
1052
1053
1054 list_move_tail(&link->cset_link, &cgrp->cset_links);
1055 list_add_tail(&link->cgrp_link, &cset->cgrp_links);
1056
1057 if (cgroup_parent(cgrp))
1058 cgroup_get_live(cgrp);
1059}
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069static struct css_set *find_css_set(struct css_set *old_cset,
1070 struct cgroup *cgrp)
1071{
1072 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT] = { };
1073 struct css_set *cset;
1074 struct list_head tmp_links;
1075 struct cgrp_cset_link *link;
1076 struct cgroup_subsys *ss;
1077 unsigned long key;
1078 int ssid;
1079
1080 lockdep_assert_held(&cgroup_mutex);
1081
1082
1083
1084 spin_lock_irq(&css_set_lock);
1085 cset = find_existing_css_set(old_cset, cgrp, template);
1086 if (cset)
1087 get_css_set(cset);
1088 spin_unlock_irq(&css_set_lock);
1089
1090 if (cset)
1091 return cset;
1092
1093 cset = kzalloc(sizeof(*cset), GFP_KERNEL);
1094 if (!cset)
1095 return NULL;
1096
1097
1098 if (allocate_cgrp_cset_links(cgroup_root_count, &tmp_links) < 0) {
1099 kfree(cset);
1100 return NULL;
1101 }
1102
1103 refcount_set(&cset->refcount, 1);
1104 cset->dom_cset = cset;
1105 INIT_LIST_HEAD(&cset->tasks);
1106 INIT_LIST_HEAD(&cset->mg_tasks);
1107 INIT_LIST_HEAD(&cset->task_iters);
1108 INIT_LIST_HEAD(&cset->threaded_csets);
1109 INIT_HLIST_NODE(&cset->hlist);
1110 INIT_LIST_HEAD(&cset->cgrp_links);
1111 INIT_LIST_HEAD(&cset->mg_preload_node);
1112 INIT_LIST_HEAD(&cset->mg_node);
1113
1114
1115
1116 memcpy(cset->subsys, template, sizeof(cset->subsys));
1117
1118 spin_lock_irq(&css_set_lock);
1119
1120 list_for_each_entry(link, &old_cset->cgrp_links, cgrp_link) {
1121 struct cgroup *c = link->cgrp;
1122
1123 if (c->root == cgrp->root)
1124 c = cgrp;
1125 link_css_set(&tmp_links, cset, c);
1126 }
1127
1128 BUG_ON(!list_empty(&tmp_links));
1129
1130 css_set_count++;
1131
1132
1133 key = css_set_hash(cset->subsys);
1134 hash_add(css_set_table, &cset->hlist, key);
1135
1136 for_each_subsys(ss, ssid) {
1137 struct cgroup_subsys_state *css = cset->subsys[ssid];
1138
1139 list_add_tail(&cset->e_cset_node[ssid],
1140 &css->cgroup->e_csets[ssid]);
1141 css_get(css);
1142 }
1143
1144 spin_unlock_irq(&css_set_lock);
1145
1146
1147
1148
1149
1150
1151
1152 if (cgroup_is_threaded(cset->dfl_cgrp)) {
1153 struct css_set *dcset;
1154
1155 dcset = find_css_set(cset, cset->dfl_cgrp->dom_cgrp);
1156 if (!dcset) {
1157 put_css_set(cset);
1158 return NULL;
1159 }
1160
1161 spin_lock_irq(&css_set_lock);
1162 cset->dom_cset = dcset;
1163 list_add_tail(&cset->threaded_csets_node,
1164 &dcset->threaded_csets);
1165 spin_unlock_irq(&css_set_lock);
1166 }
1167
1168 return cset;
1169}
1170
1171struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root)
1172{
1173 struct cgroup *root_cgrp = kf_root->kn->priv;
1174
1175 return root_cgrp->root;
1176}
1177
1178static int cgroup_init_root_id(struct cgroup_root *root)
1179{
1180 int id;
1181
1182 lockdep_assert_held(&cgroup_mutex);
1183
1184 id = idr_alloc_cyclic(&cgroup_hierarchy_idr, root, 0, 0, GFP_KERNEL);
1185 if (id < 0)
1186 return id;
1187
1188 root->hierarchy_id = id;
1189 return 0;
1190}
1191
1192static void cgroup_exit_root_id(struct cgroup_root *root)
1193{
1194 lockdep_assert_held(&cgroup_mutex);
1195
1196 idr_remove(&cgroup_hierarchy_idr, root->hierarchy_id);
1197}
1198
1199void cgroup_free_root(struct cgroup_root *root)
1200{
1201 if (root) {
1202 idr_destroy(&root->cgroup_idr);
1203 kfree(root);
1204 }
1205}
1206
1207static void cgroup_destroy_root(struct cgroup_root *root)
1208{
1209 struct cgroup *cgrp = &root->cgrp;
1210 struct cgrp_cset_link *link, *tmp_link;
1211
1212 trace_cgroup_destroy_root(root);
1213
1214 cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
1215
1216 BUG_ON(atomic_read(&root->nr_cgrps));
1217 BUG_ON(!list_empty(&cgrp->self.children));
1218
1219
1220 WARN_ON(rebind_subsystems(&cgrp_dfl_root, root->subsys_mask));
1221
1222
1223
1224
1225
1226 spin_lock_irq(&css_set_lock);
1227
1228 list_for_each_entry_safe(link, tmp_link, &cgrp->cset_links, cset_link) {
1229 list_del(&link->cset_link);
1230 list_del(&link->cgrp_link);
1231 kfree(link);
1232 }
1233
1234 spin_unlock_irq(&css_set_lock);
1235
1236 if (!list_empty(&root->root_list)) {
1237 list_del(&root->root_list);
1238 cgroup_root_count--;
1239 }
1240
1241 cgroup_exit_root_id(root);
1242
1243 mutex_unlock(&cgroup_mutex);
1244
1245 kernfs_destroy_root(root->kf_root);
1246 cgroup_free_root(root);
1247}
1248
1249
1250
1251
1252
1253static struct cgroup *
1254current_cgns_cgroup_from_root(struct cgroup_root *root)
1255{
1256 struct cgroup *res = NULL;
1257 struct css_set *cset;
1258
1259 lockdep_assert_held(&css_set_lock);
1260
1261 rcu_read_lock();
1262
1263 cset = current->nsproxy->cgroup_ns->root_cset;
1264 if (cset == &init_css_set) {
1265 res = &root->cgrp;
1266 } else {
1267 struct cgrp_cset_link *link;
1268
1269 list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
1270 struct cgroup *c = link->cgrp;
1271
1272 if (c->root == root) {
1273 res = c;
1274 break;
1275 }
1276 }
1277 }
1278 rcu_read_unlock();
1279
1280 BUG_ON(!res);
1281 return res;
1282}
1283
1284
1285static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
1286 struct cgroup_root *root)
1287{
1288 struct cgroup *res = NULL;
1289
1290 lockdep_assert_held(&cgroup_mutex);
1291 lockdep_assert_held(&css_set_lock);
1292
1293 if (cset == &init_css_set) {
1294 res = &root->cgrp;
1295 } else if (root == &cgrp_dfl_root) {
1296 res = cset->dfl_cgrp;
1297 } else {
1298 struct cgrp_cset_link *link;
1299
1300 list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
1301 struct cgroup *c = link->cgrp;
1302
1303 if (c->root == root) {
1304 res = c;
1305 break;
1306 }
1307 }
1308 }
1309
1310 BUG_ON(!res);
1311 return res;
1312}
1313
1314
1315
1316
1317
1318struct cgroup *task_cgroup_from_root(struct task_struct *task,
1319 struct cgroup_root *root)
1320{
1321
1322
1323
1324
1325
1326 return cset_cgroup_from_root(task_css_set(task), root);
1327}
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355static struct kernfs_syscall_ops cgroup_kf_syscall_ops;
1356
1357static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft,
1358 char *buf)
1359{
1360 struct cgroup_subsys *ss = cft->ss;
1361
1362 if (cft->ss && !(cft->flags & CFTYPE_NO_PREFIX) &&
1363 !(cgrp->root->flags & CGRP_ROOT_NOPREFIX))
1364 snprintf(buf, CGROUP_FILE_NAME_MAX, "%s.%s",
1365 cgroup_on_dfl(cgrp) ? ss->name : ss->legacy_name,
1366 cft->name);
1367 else
1368 strncpy(buf, cft->name, CGROUP_FILE_NAME_MAX);
1369 return buf;
1370}
1371
1372
1373
1374
1375
1376
1377
1378static umode_t cgroup_file_mode(const struct cftype *cft)
1379{
1380 umode_t mode = 0;
1381
1382 if (cft->read_u64 || cft->read_s64 || cft->seq_show)
1383 mode |= S_IRUGO;
1384
1385 if (cft->write_u64 || cft->write_s64 || cft->write) {
1386 if (cft->flags & CFTYPE_WORLD_WRITABLE)
1387 mode |= S_IWUGO;
1388 else
1389 mode |= S_IWUSR;
1390 }
1391
1392 return mode;
1393}
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407static u16 cgroup_calc_subtree_ss_mask(u16 subtree_control, u16 this_ss_mask)
1408{
1409 u16 cur_ss_mask = subtree_control;
1410 struct cgroup_subsys *ss;
1411 int ssid;
1412
1413 lockdep_assert_held(&cgroup_mutex);
1414
1415 cur_ss_mask |= cgrp_dfl_implicit_ss_mask;
1416
1417 while (true) {
1418 u16 new_ss_mask = cur_ss_mask;
1419
1420 do_each_subsys_mask(ss, ssid, cur_ss_mask) {
1421 new_ss_mask |= ss->depends_on;
1422 } while_each_subsys_mask();
1423
1424
1425
1426
1427
1428
1429 new_ss_mask &= this_ss_mask;
1430
1431 if (new_ss_mask == cur_ss_mask)
1432 break;
1433 cur_ss_mask = new_ss_mask;
1434 }
1435
1436 return cur_ss_mask;
1437}
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449void cgroup_kn_unlock(struct kernfs_node *kn)
1450{
1451 struct cgroup *cgrp;
1452
1453 if (kernfs_type(kn) == KERNFS_DIR)
1454 cgrp = kn->priv;
1455 else
1456 cgrp = kn->parent->priv;
1457
1458 mutex_unlock(&cgroup_mutex);
1459
1460 kernfs_unbreak_active_protection(kn);
1461 cgroup_put(cgrp);
1462}
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn, bool drain_offline)
1482{
1483 struct cgroup *cgrp;
1484
1485 if (kernfs_type(kn) == KERNFS_DIR)
1486 cgrp = kn->priv;
1487 else
1488 cgrp = kn->parent->priv;
1489
1490
1491
1492
1493
1494
1495
1496 if (!cgroup_tryget(cgrp))
1497 return NULL;
1498 kernfs_break_active_protection(kn);
1499
1500 if (drain_offline)
1501 cgroup_lock_and_drain_offline(cgrp);
1502 else
1503 mutex_lock(&cgroup_mutex);
1504
1505 if (!cgroup_is_dead(cgrp))
1506 return cgrp;
1507
1508 cgroup_kn_unlock(kn);
1509 return NULL;
1510}
1511
1512static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
1513{
1514 char name[CGROUP_FILE_NAME_MAX];
1515
1516 lockdep_assert_held(&cgroup_mutex);
1517
1518 if (cft->file_offset) {
1519 struct cgroup_subsys_state *css = cgroup_css(cgrp, cft->ss);
1520 struct cgroup_file *cfile = (void *)css + cft->file_offset;
1521
1522 spin_lock_irq(&cgroup_file_kn_lock);
1523 cfile->kn = NULL;
1524 spin_unlock_irq(&cgroup_file_kn_lock);
1525 }
1526
1527 kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name));
1528}
1529
1530
1531
1532
1533
1534static void css_clear_dir(struct cgroup_subsys_state *css)
1535{
1536 struct cgroup *cgrp = css->cgroup;
1537 struct cftype *cfts;
1538
1539 if (!(css->flags & CSS_VISIBLE))
1540 return;
1541
1542 css->flags &= ~CSS_VISIBLE;
1543
1544 list_for_each_entry(cfts, &css->ss->cfts, node)
1545 cgroup_addrm_files(css, cgrp, cfts, false);
1546}
1547
1548
1549
1550
1551
1552
1553
1554static int css_populate_dir(struct cgroup_subsys_state *css)
1555{
1556 struct cgroup *cgrp = css->cgroup;
1557 struct cftype *cfts, *failed_cfts;
1558 int ret;
1559
1560 if ((css->flags & CSS_VISIBLE) || !cgrp->kn)
1561 return 0;
1562
1563 if (!css->ss) {
1564 if (cgroup_on_dfl(cgrp))
1565 cfts = cgroup_base_files;
1566 else
1567 cfts = cgroup1_base_files;
1568
1569 return cgroup_addrm_files(&cgrp->self, cgrp, cfts, true);
1570 }
1571
1572 list_for_each_entry(cfts, &css->ss->cfts, node) {
1573 ret = cgroup_addrm_files(css, cgrp, cfts, true);
1574 if (ret < 0) {
1575 failed_cfts = cfts;
1576 goto err;
1577 }
1578 }
1579
1580 css->flags |= CSS_VISIBLE;
1581
1582 return 0;
1583err:
1584 list_for_each_entry(cfts, &css->ss->cfts, node) {
1585 if (cfts == failed_cfts)
1586 break;
1587 cgroup_addrm_files(css, cgrp, cfts, false);
1588 }
1589 return ret;
1590}
1591
1592int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
1593{
1594 struct cgroup *dcgrp = &dst_root->cgrp;
1595 struct cgroup_subsys *ss;
1596 int ssid, i, ret;
1597
1598 lockdep_assert_held(&cgroup_mutex);
1599
1600 do_each_subsys_mask(ss, ssid, ss_mask) {
1601
1602
1603
1604
1605
1606 if (css_next_child(NULL, cgroup_css(&ss->root->cgrp, ss)) &&
1607 !ss->implicit_on_dfl)
1608 return -EBUSY;
1609
1610
1611 if (ss->root != &cgrp_dfl_root && dst_root != &cgrp_dfl_root)
1612 return -EBUSY;
1613 } while_each_subsys_mask();
1614
1615 do_each_subsys_mask(ss, ssid, ss_mask) {
1616 struct cgroup_root *src_root = ss->root;
1617 struct cgroup *scgrp = &src_root->cgrp;
1618 struct cgroup_subsys_state *css = cgroup_css(scgrp, ss);
1619 struct css_set *cset;
1620
1621 WARN_ON(!css || cgroup_css(dcgrp, ss));
1622
1623
1624 src_root->subsys_mask &= ~(1 << ssid);
1625 WARN_ON(cgroup_apply_control(scgrp));
1626 cgroup_finalize_control(scgrp, 0);
1627
1628
1629 RCU_INIT_POINTER(scgrp->subsys[ssid], NULL);
1630 rcu_assign_pointer(dcgrp->subsys[ssid], css);
1631 ss->root = dst_root;
1632 css->cgroup = dcgrp;
1633
1634 spin_lock_irq(&css_set_lock);
1635 hash_for_each(css_set_table, i, cset, hlist)
1636 list_move_tail(&cset->e_cset_node[ss->id],
1637 &dcgrp->e_csets[ss->id]);
1638 spin_unlock_irq(&css_set_lock);
1639
1640
1641 dst_root->subsys_mask |= 1 << ssid;
1642 if (dst_root == &cgrp_dfl_root) {
1643 static_branch_enable(cgroup_subsys_on_dfl_key[ssid]);
1644 } else {
1645 dcgrp->subtree_control |= 1 << ssid;
1646 static_branch_disable(cgroup_subsys_on_dfl_key[ssid]);
1647 }
1648
1649 ret = cgroup_apply_control(dcgrp);
1650 if (ret)
1651 pr_warn("partial failure to rebind %s controller (err=%d)\n",
1652 ss->name, ret);
1653
1654 if (ss->bind)
1655 ss->bind(css);
1656 } while_each_subsys_mask();
1657
1658 kernfs_activate(dcgrp->kn);
1659 return 0;
1660}
1661
1662int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
1663 struct kernfs_root *kf_root)
1664{
1665 int len = 0;
1666 char *buf = NULL;
1667 struct cgroup_root *kf_cgroot = cgroup_root_from_kf(kf_root);
1668 struct cgroup *ns_cgroup;
1669
1670 buf = kmalloc(PATH_MAX, GFP_KERNEL);
1671 if (!buf)
1672 return -ENOMEM;
1673
1674 spin_lock_irq(&css_set_lock);
1675 ns_cgroup = current_cgns_cgroup_from_root(kf_cgroot);
1676 len = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, PATH_MAX);
1677 spin_unlock_irq(&css_set_lock);
1678
1679 if (len >= PATH_MAX)
1680 len = -ERANGE;
1681 else if (len > 0) {
1682 seq_escape(sf, buf, " \t\n\\");
1683 len = 0;
1684 }
1685 kfree(buf);
1686 return len;
1687}
1688
1689static int parse_cgroup_root_flags(char *data, unsigned int *root_flags)
1690{
1691 char *token;
1692
1693 *root_flags = 0;
1694
1695 if (!data)
1696 return 0;
1697
1698 while ((token = strsep(&data, ",")) != NULL) {
1699 if (!strcmp(token, "nsdelegate")) {
1700 *root_flags |= CGRP_ROOT_NS_DELEGATE;
1701 continue;
1702 }
1703
1704 pr_err("cgroup2: unknown option \"%s\"\n", token);
1705 return -EINVAL;
1706 }
1707
1708 return 0;
1709}
1710
1711static void apply_cgroup_root_flags(unsigned int root_flags)
1712{
1713 if (current->nsproxy->cgroup_ns == &init_cgroup_ns) {
1714 if (root_flags & CGRP_ROOT_NS_DELEGATE)
1715 cgrp_dfl_root.flags |= CGRP_ROOT_NS_DELEGATE;
1716 else
1717 cgrp_dfl_root.flags &= ~CGRP_ROOT_NS_DELEGATE;
1718 }
1719}
1720
1721static int cgroup_show_options(struct seq_file *seq, struct kernfs_root *kf_root)
1722{
1723 if (cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE)
1724 seq_puts(seq, ",nsdelegate");
1725 return 0;
1726}
1727
1728static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
1729{
1730 unsigned int root_flags;
1731 int ret;
1732
1733 ret = parse_cgroup_root_flags(data, &root_flags);
1734 if (ret)
1735 return ret;
1736
1737 apply_cgroup_root_flags(root_flags);
1738 return 0;
1739}
1740
1741
1742
1743
1744
1745
1746
1747static bool use_task_css_set_links __read_mostly;
1748
1749static void cgroup_enable_task_cg_lists(void)
1750{
1751 struct task_struct *p, *g;
1752
1753 spin_lock_irq(&css_set_lock);
1754
1755 if (use_task_css_set_links)
1756 goto out_unlock;
1757
1758 use_task_css_set_links = true;
1759
1760
1761
1762
1763
1764
1765
1766
1767 read_lock(&tasklist_lock);
1768 do_each_thread(g, p) {
1769 WARN_ON_ONCE(!list_empty(&p->cg_list) ||
1770 task_css_set(p) != &init_css_set);
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783 spin_lock(&p->sighand->siglock);
1784 if (!(p->flags & PF_EXITING)) {
1785 struct css_set *cset = task_css_set(p);
1786
1787 if (!css_set_populated(cset))
1788 css_set_update_populated(cset, true);
1789 list_add_tail(&p->cg_list, &cset->tasks);
1790 get_css_set(cset);
1791 cset->nr_tasks++;
1792 }
1793 spin_unlock(&p->sighand->siglock);
1794 } while_each_thread(g, p);
1795 read_unlock(&tasklist_lock);
1796out_unlock:
1797 spin_unlock_irq(&css_set_lock);
1798}
1799
1800static void init_cgroup_housekeeping(struct cgroup *cgrp)
1801{
1802 struct cgroup_subsys *ss;
1803 int ssid;
1804
1805 INIT_LIST_HEAD(&cgrp->self.sibling);
1806 INIT_LIST_HEAD(&cgrp->self.children);
1807 INIT_LIST_HEAD(&cgrp->cset_links);
1808 INIT_LIST_HEAD(&cgrp->pidlists);
1809 mutex_init(&cgrp->pidlist_mutex);
1810 cgrp->self.cgroup = cgrp;
1811 cgrp->self.flags |= CSS_ONLINE;
1812 cgrp->dom_cgrp = cgrp;
1813 cgrp->max_descendants = INT_MAX;
1814 cgrp->max_depth = INT_MAX;
1815
1816 for_each_subsys(ss, ssid)
1817 INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
1818
1819 init_waitqueue_head(&cgrp->offline_waitq);
1820 INIT_WORK(&cgrp->release_agent_work, cgroup1_release_agent);
1821}
1822
1823void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts)
1824{
1825 struct cgroup *cgrp = &root->cgrp;
1826
1827 INIT_LIST_HEAD(&root->root_list);
1828 atomic_set(&root->nr_cgrps, 1);
1829 cgrp->root = root;
1830 init_cgroup_housekeeping(cgrp);
1831 idr_init(&root->cgroup_idr);
1832
1833 root->flags = opts->flags;
1834 if (opts->release_agent)
1835 strcpy(root->release_agent_path, opts->release_agent);
1836 if (opts->name)
1837 strcpy(root->name, opts->name);
1838 if (opts->cpuset_clone_children)
1839 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
1840}
1841
1842int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags)
1843{
1844 LIST_HEAD(tmp_links);
1845 struct cgroup *root_cgrp = &root->cgrp;
1846 struct kernfs_syscall_ops *kf_sops;
1847 struct css_set *cset;
1848 int i, ret;
1849
1850 lockdep_assert_held(&cgroup_mutex);
1851
1852 ret = cgroup_idr_alloc(&root->cgroup_idr, root_cgrp, 1, 2, GFP_KERNEL);
1853 if (ret < 0)
1854 goto out;
1855 root_cgrp->id = ret;
1856 root_cgrp->ancestor_ids[0] = ret;
1857
1858 ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release,
1859 ref_flags, GFP_KERNEL);
1860 if (ret)
1861 goto out;
1862
1863
1864
1865
1866
1867
1868
1869
1870 ret = allocate_cgrp_cset_links(2 * css_set_count, &tmp_links);
1871 if (ret)
1872 goto cancel_ref;
1873
1874 ret = cgroup_init_root_id(root);
1875 if (ret)
1876 goto cancel_ref;
1877
1878 kf_sops = root == &cgrp_dfl_root ?
1879 &cgroup_kf_syscall_ops : &cgroup1_kf_syscall_ops;
1880
1881 root->kf_root = kernfs_create_root(kf_sops,
1882 KERNFS_ROOT_CREATE_DEACTIVATED |
1883 KERNFS_ROOT_SUPPORT_EXPORTOP,
1884 root_cgrp);
1885 if (IS_ERR(root->kf_root)) {
1886 ret = PTR_ERR(root->kf_root);
1887 goto exit_root_id;
1888 }
1889 root_cgrp->kn = root->kf_root->kn;
1890
1891 ret = css_populate_dir(&root_cgrp->self);
1892 if (ret)
1893 goto destroy_root;
1894
1895 ret = rebind_subsystems(root, ss_mask);
1896 if (ret)
1897 goto destroy_root;
1898
1899 trace_cgroup_setup_root(root);
1900
1901
1902
1903
1904
1905
1906 list_add(&root->root_list, &cgroup_roots);
1907 cgroup_root_count++;
1908
1909
1910
1911
1912
1913 spin_lock_irq(&css_set_lock);
1914 hash_for_each(css_set_table, i, cset, hlist) {
1915 link_css_set(&tmp_links, cset, root_cgrp);
1916 if (css_set_populated(cset))
1917 cgroup_update_populated(root_cgrp, true);
1918 }
1919 spin_unlock_irq(&css_set_lock);
1920
1921 BUG_ON(!list_empty(&root_cgrp->self.children));
1922 BUG_ON(atomic_read(&root->nr_cgrps) != 1);
1923
1924 kernfs_activate(root_cgrp->kn);
1925 ret = 0;
1926 goto out;
1927
1928destroy_root:
1929 kernfs_destroy_root(root->kf_root);
1930 root->kf_root = NULL;
1931exit_root_id:
1932 cgroup_exit_root_id(root);
1933cancel_ref:
1934 percpu_ref_exit(&root_cgrp->self.refcnt);
1935out:
1936 free_cgrp_cset_links(&tmp_links);
1937 return ret;
1938}
1939
1940struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
1941 struct cgroup_root *root, unsigned long magic,
1942 struct cgroup_namespace *ns)
1943{
1944 struct dentry *dentry;
1945 bool new_sb;
1946
1947 dentry = kernfs_mount(fs_type, flags, root->kf_root, magic, &new_sb);
1948
1949
1950
1951
1952
1953 if (!IS_ERR(dentry) && ns != &init_cgroup_ns) {
1954 struct dentry *nsdentry;
1955 struct cgroup *cgrp;
1956
1957 mutex_lock(&cgroup_mutex);
1958 spin_lock_irq(&css_set_lock);
1959
1960 cgrp = cset_cgroup_from_root(ns->root_cset, root);
1961
1962 spin_unlock_irq(&css_set_lock);
1963 mutex_unlock(&cgroup_mutex);
1964
1965 nsdentry = kernfs_node_dentry(cgrp->kn, dentry->d_sb);
1966 dput(dentry);
1967 dentry = nsdentry;
1968 }
1969
1970 if (IS_ERR(dentry) || !new_sb)
1971 cgroup_put(&root->cgrp);
1972
1973 return dentry;
1974}
1975
1976static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1977 int flags, const char *unused_dev_name,
1978 void *data)
1979{
1980 struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
1981 struct dentry *dentry;
1982 int ret;
1983
1984 get_cgroup_ns(ns);
1985
1986
1987 if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) {
1988 put_cgroup_ns(ns);
1989 return ERR_PTR(-EPERM);
1990 }
1991
1992
1993
1994
1995
1996 if (!use_task_css_set_links)
1997 cgroup_enable_task_cg_lists();
1998
1999 if (fs_type == &cgroup2_fs_type) {
2000 unsigned int root_flags;
2001
2002 ret = parse_cgroup_root_flags(data, &root_flags);
2003 if (ret) {
2004 put_cgroup_ns(ns);
2005 return ERR_PTR(ret);
2006 }
2007
2008 cgrp_dfl_visible = true;
2009 cgroup_get_live(&cgrp_dfl_root.cgrp);
2010
2011 dentry = cgroup_do_mount(&cgroup2_fs_type, flags, &cgrp_dfl_root,
2012 CGROUP2_SUPER_MAGIC, ns);
2013 if (!IS_ERR(dentry))
2014 apply_cgroup_root_flags(root_flags);
2015 } else {
2016 dentry = cgroup1_mount(&cgroup_fs_type, flags, data,
2017 CGROUP_SUPER_MAGIC, ns);
2018 }
2019
2020 put_cgroup_ns(ns);
2021 return dentry;
2022}
2023
2024static void cgroup_kill_sb(struct super_block *sb)
2025{
2026 struct kernfs_root *kf_root = kernfs_root_from_sb(sb);
2027 struct cgroup_root *root = cgroup_root_from_kf(kf_root);
2028
2029
2030
2031
2032
2033
2034
2035
2036 if (!list_empty(&root->cgrp.self.children) ||
2037 root == &cgrp_dfl_root)
2038 cgroup_put(&root->cgrp);
2039 else
2040 percpu_ref_kill(&root->cgrp.self.refcnt);
2041
2042 kernfs_kill_sb(sb);
2043}
2044
2045struct file_system_type cgroup_fs_type = {
2046 .name = "cgroup",
2047 .mount = cgroup_mount,
2048 .kill_sb = cgroup_kill_sb,
2049 .fs_flags = FS_USERNS_MOUNT,
2050};
2051
2052static struct file_system_type cgroup2_fs_type = {
2053 .name = "cgroup2",
2054 .mount = cgroup_mount,
2055 .kill_sb = cgroup_kill_sb,
2056 .fs_flags = FS_USERNS_MOUNT,
2057};
2058
2059int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
2060 struct cgroup_namespace *ns)
2061{
2062 struct cgroup *root = cset_cgroup_from_root(ns->root_cset, cgrp->root);
2063
2064 return kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
2065}
2066
2067int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
2068 struct cgroup_namespace *ns)
2069{
2070 int ret;
2071
2072 mutex_lock(&cgroup_mutex);
2073 spin_lock_irq(&css_set_lock);
2074
2075 ret = cgroup_path_ns_locked(cgrp, buf, buflen, ns);
2076
2077 spin_unlock_irq(&css_set_lock);
2078 mutex_unlock(&cgroup_mutex);
2079
2080 return ret;
2081}
2082EXPORT_SYMBOL_GPL(cgroup_path_ns);
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
2098{
2099 struct cgroup_root *root;
2100 struct cgroup *cgrp;
2101 int hierarchy_id = 1;
2102 int ret;
2103
2104 mutex_lock(&cgroup_mutex);
2105 spin_lock_irq(&css_set_lock);
2106
2107 root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
2108
2109 if (root) {
2110 cgrp = task_cgroup_from_root(task, root);
2111 ret = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
2112 } else {
2113
2114 ret = strlcpy(buf, "/", buflen);
2115 }
2116
2117 spin_unlock_irq(&css_set_lock);
2118 mutex_unlock(&cgroup_mutex);
2119 return ret;
2120}
2121EXPORT_SYMBOL_GPL(task_cgroup_path);
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133static void cgroup_migrate_add_task(struct task_struct *task,
2134 struct cgroup_mgctx *mgctx)
2135{
2136 struct css_set *cset;
2137
2138 lockdep_assert_held(&css_set_lock);
2139
2140
2141 if (task->flags & PF_EXITING)
2142 return;
2143
2144
2145 if (list_empty(&task->cg_list))
2146 return;
2147
2148 cset = task_css_set(task);
2149 if (!cset->mg_src_cgrp)
2150 return;
2151
2152 mgctx->tset.nr_tasks++;
2153
2154 list_move_tail(&task->cg_list, &cset->mg_tasks);
2155 if (list_empty(&cset->mg_node))
2156 list_add_tail(&cset->mg_node,
2157 &mgctx->tset.src_csets);
2158 if (list_empty(&cset->mg_dst_cset->mg_node))
2159 list_add_tail(&cset->mg_dst_cset->mg_node,
2160 &mgctx->tset.dst_csets);
2161}
2162
2163
2164
2165
2166
2167
2168
2169
2170struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
2171 struct cgroup_subsys_state **dst_cssp)
2172{
2173 tset->cur_cset = list_first_entry(tset->csets, struct css_set, mg_node);
2174 tset->cur_task = NULL;
2175
2176 return cgroup_taskset_next(tset, dst_cssp);
2177}
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
2188 struct cgroup_subsys_state **dst_cssp)
2189{
2190 struct css_set *cset = tset->cur_cset;
2191 struct task_struct *task = tset->cur_task;
2192
2193 while (&cset->mg_node != tset->csets) {
2194 if (!task)
2195 task = list_first_entry(&cset->mg_tasks,
2196 struct task_struct, cg_list);
2197 else
2198 task = list_next_entry(task, cg_list);
2199
2200 if (&task->cg_list != &cset->mg_tasks) {
2201 tset->cur_cset = cset;
2202 tset->cur_task = task;
2203
2204
2205
2206
2207
2208
2209
2210 if (cset->mg_dst_cset)
2211 *dst_cssp = cset->mg_dst_cset->subsys[tset->ssid];
2212 else
2213 *dst_cssp = cset->subsys[tset->ssid];
2214
2215 return task;
2216 }
2217
2218 cset = list_next_entry(cset, mg_node);
2219 task = NULL;
2220 }
2221
2222 return NULL;
2223}
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx)
2235{
2236 struct cgroup_taskset *tset = &mgctx->tset;
2237 struct cgroup_subsys *ss;
2238 struct task_struct *task, *tmp_task;
2239 struct css_set *cset, *tmp_cset;
2240 int ssid, failed_ssid, ret;
2241
2242
2243 if (tset->nr_tasks) {
2244 do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
2245 if (ss->can_attach) {
2246 tset->ssid = ssid;
2247 ret = ss->can_attach(tset);
2248 if (ret) {
2249 failed_ssid = ssid;
2250 goto out_cancel_attach;
2251 }
2252 }
2253 } while_each_subsys_mask();
2254 }
2255
2256
2257
2258
2259
2260
2261 spin_lock_irq(&css_set_lock);
2262 list_for_each_entry(cset, &tset->src_csets, mg_node) {
2263 list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, cg_list) {
2264 struct css_set *from_cset = task_css_set(task);
2265 struct css_set *to_cset = cset->mg_dst_cset;
2266
2267 get_css_set(to_cset);
2268 to_cset->nr_tasks++;
2269 css_set_move_task(task, from_cset, to_cset, true);
2270 put_css_set_locked(from_cset);
2271 from_cset->nr_tasks--;
2272 }
2273 }
2274 spin_unlock_irq(&css_set_lock);
2275
2276
2277
2278
2279
2280
2281 tset->csets = &tset->dst_csets;
2282
2283 if (tset->nr_tasks) {
2284 do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
2285 if (ss->attach) {
2286 tset->ssid = ssid;
2287 ss->attach(tset);
2288 }
2289 } while_each_subsys_mask();
2290 }
2291
2292 ret = 0;
2293 goto out_release_tset;
2294
2295out_cancel_attach:
2296 if (tset->nr_tasks) {
2297 do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
2298 if (ssid == failed_ssid)
2299 break;
2300 if (ss->cancel_attach) {
2301 tset->ssid = ssid;
2302 ss->cancel_attach(tset);
2303 }
2304 } while_each_subsys_mask();
2305 }
2306out_release_tset:
2307 spin_lock_irq(&css_set_lock);
2308 list_splice_init(&tset->dst_csets, &tset->src_csets);
2309 list_for_each_entry_safe(cset, tmp_cset, &tset->src_csets, mg_node) {
2310 list_splice_tail_init(&cset->mg_tasks, &cset->tasks);
2311 list_del_init(&cset->mg_node);
2312 }
2313 spin_unlock_irq(&css_set_lock);
2314
2315
2316
2317
2318
2319
2320 tset->nr_tasks = 0;
2321 tset->csets = &tset->src_csets;
2322 return ret;
2323}
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp)
2335{
2336
2337 if (!cgroup_on_dfl(dst_cgrp))
2338 return 0;
2339
2340
2341 if (!cgroup_is_valid_domain(dst_cgrp->dom_cgrp))
2342 return -EOPNOTSUPP;
2343
2344
2345 if (cgroup_is_mixable(dst_cgrp))
2346 return 0;
2347
2348
2349
2350
2351
2352 if (cgroup_can_be_thread_root(dst_cgrp) || cgroup_is_threaded(dst_cgrp))
2353 return 0;
2354
2355
2356 if (dst_cgrp->subtree_control)
2357 return -EBUSY;
2358
2359 return 0;
2360}
2361
2362
2363
2364
2365
2366
2367
2368
2369void cgroup_migrate_finish(struct cgroup_mgctx *mgctx)
2370{
2371 LIST_HEAD(preloaded);
2372 struct css_set *cset, *tmp_cset;
2373
2374 lockdep_assert_held(&cgroup_mutex);
2375
2376 spin_lock_irq(&css_set_lock);
2377
2378 list_splice_tail_init(&mgctx->preloaded_src_csets, &preloaded);
2379 list_splice_tail_init(&mgctx->preloaded_dst_csets, &preloaded);
2380
2381 list_for_each_entry_safe(cset, tmp_cset, &preloaded, mg_preload_node) {
2382 cset->mg_src_cgrp = NULL;
2383 cset->mg_dst_cgrp = NULL;
2384 cset->mg_dst_cset = NULL;
2385 list_del_init(&cset->mg_preload_node);
2386 put_css_set_locked(cset);
2387 }
2388
2389 spin_unlock_irq(&css_set_lock);
2390}
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408void cgroup_migrate_add_src(struct css_set *src_cset,
2409 struct cgroup *dst_cgrp,
2410 struct cgroup_mgctx *mgctx)
2411{
2412 struct cgroup *src_cgrp;
2413
2414 lockdep_assert_held(&cgroup_mutex);
2415 lockdep_assert_held(&css_set_lock);
2416
2417
2418
2419
2420
2421
2422 if (src_cset->dead)
2423 return;
2424
2425 src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root);
2426
2427 if (!list_empty(&src_cset->mg_preload_node))
2428 return;
2429
2430 WARN_ON(src_cset->mg_src_cgrp);
2431 WARN_ON(src_cset->mg_dst_cgrp);
2432 WARN_ON(!list_empty(&src_cset->mg_tasks));
2433 WARN_ON(!list_empty(&src_cset->mg_node));
2434
2435 src_cset->mg_src_cgrp = src_cgrp;
2436 src_cset->mg_dst_cgrp = dst_cgrp;
2437 get_css_set(src_cset);
2438 list_add_tail(&src_cset->mg_preload_node, &mgctx->preloaded_src_csets);
2439}
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx)
2456{
2457 struct css_set *src_cset, *tmp_cset;
2458
2459 lockdep_assert_held(&cgroup_mutex);
2460
2461
2462 list_for_each_entry_safe(src_cset, tmp_cset, &mgctx->preloaded_src_csets,
2463 mg_preload_node) {
2464 struct css_set *dst_cset;
2465 struct cgroup_subsys *ss;
2466 int ssid;
2467
2468 dst_cset = find_css_set(src_cset, src_cset->mg_dst_cgrp);
2469 if (!dst_cset)
2470 goto err;
2471
2472 WARN_ON_ONCE(src_cset->mg_dst_cset || dst_cset->mg_dst_cset);
2473
2474
2475
2476
2477
2478
2479 if (src_cset == dst_cset) {
2480 src_cset->mg_src_cgrp = NULL;
2481 src_cset->mg_dst_cgrp = NULL;
2482 list_del_init(&src_cset->mg_preload_node);
2483 put_css_set(src_cset);
2484 put_css_set(dst_cset);
2485 continue;
2486 }
2487
2488 src_cset->mg_dst_cset = dst_cset;
2489
2490 if (list_empty(&dst_cset->mg_preload_node))
2491 list_add_tail(&dst_cset->mg_preload_node,
2492 &mgctx->preloaded_dst_csets);
2493 else
2494 put_css_set(dst_cset);
2495
2496 for_each_subsys(ss, ssid)
2497 if (src_cset->subsys[ssid] != dst_cset->subsys[ssid])
2498 mgctx->ss_mask |= 1 << ssid;
2499 }
2500
2501 return 0;
2502err:
2503 cgroup_migrate_finish(mgctx);
2504 return -ENOMEM;
2505}
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525int cgroup_migrate(struct task_struct *leader, bool threadgroup,
2526 struct cgroup_mgctx *mgctx)
2527{
2528 struct task_struct *task;
2529
2530
2531
2532
2533
2534
2535 spin_lock_irq(&css_set_lock);
2536 rcu_read_lock();
2537 task = leader;
2538 do {
2539 cgroup_migrate_add_task(task, mgctx);
2540 if (!threadgroup)
2541 break;
2542 } while_each_thread(leader, task);
2543 rcu_read_unlock();
2544 spin_unlock_irq(&css_set_lock);
2545
2546 return cgroup_migrate_execute(mgctx);
2547}
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
2558 bool threadgroup)
2559{
2560 DEFINE_CGROUP_MGCTX(mgctx);
2561 struct task_struct *task;
2562 int ret;
2563
2564 ret = cgroup_migrate_vet_dst(dst_cgrp);
2565 if (ret)
2566 return ret;
2567
2568
2569 spin_lock_irq(&css_set_lock);
2570 rcu_read_lock();
2571 task = leader;
2572 do {
2573 cgroup_migrate_add_src(task_css_set(task), dst_cgrp, &mgctx);
2574 if (!threadgroup)
2575 break;
2576 } while_each_thread(leader, task);
2577 rcu_read_unlock();
2578 spin_unlock_irq(&css_set_lock);
2579
2580
2581 ret = cgroup_migrate_prepare_dst(&mgctx);
2582 if (!ret)
2583 ret = cgroup_migrate(leader, threadgroup, &mgctx);
2584
2585 cgroup_migrate_finish(&mgctx);
2586
2587 if (!ret)
2588 trace_cgroup_attach_task(dst_cgrp, leader, threadgroup);
2589
2590 return ret;
2591}
2592
2593struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup)
2594 __acquires(&cgroup_threadgroup_rwsem)
2595{
2596 struct task_struct *tsk;
2597 pid_t pid;
2598
2599 if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
2600 return ERR_PTR(-EINVAL);
2601
2602 percpu_down_write(&cgroup_threadgroup_rwsem);
2603
2604 rcu_read_lock();
2605 if (pid) {
2606 tsk = find_task_by_vpid(pid);
2607 if (!tsk) {
2608 tsk = ERR_PTR(-ESRCH);
2609 goto out_unlock_threadgroup;
2610 }
2611 } else {
2612 tsk = current;
2613 }
2614
2615 if (threadgroup)
2616 tsk = tsk->group_leader;
2617
2618
2619
2620
2621
2622
2623
2624 if (tsk->no_cgroup_migration || (tsk->flags & PF_NO_SETAFFINITY)) {
2625 tsk = ERR_PTR(-EINVAL);
2626 goto out_unlock_threadgroup;
2627 }
2628
2629 get_task_struct(tsk);
2630 goto out_unlock_rcu;
2631
2632out_unlock_threadgroup:
2633 percpu_up_write(&cgroup_threadgroup_rwsem);
2634out_unlock_rcu:
2635 rcu_read_unlock();
2636 return tsk;
2637}
2638
2639void cgroup_procs_write_finish(struct task_struct *task)
2640 __releases(&cgroup_threadgroup_rwsem)
2641{
2642 struct cgroup_subsys *ss;
2643 int ssid;
2644
2645
2646 put_task_struct(task);
2647
2648 percpu_up_write(&cgroup_threadgroup_rwsem);
2649 for_each_subsys(ss, ssid)
2650 if (ss->post_attach)
2651 ss->post_attach();
2652}
2653
2654static void cgroup_print_ss_mask(struct seq_file *seq, u16 ss_mask)
2655{
2656 struct cgroup_subsys *ss;
2657 bool printed = false;
2658 int ssid;
2659
2660 do_each_subsys_mask(ss, ssid, ss_mask) {
2661 if (printed)
2662 seq_putc(seq, ' ');
2663 seq_printf(seq, "%s", ss->name);
2664 printed = true;
2665 } while_each_subsys_mask();
2666 if (printed)
2667 seq_putc(seq, '\n');
2668}
2669
2670
2671static int cgroup_controllers_show(struct seq_file *seq, void *v)
2672{
2673 struct cgroup *cgrp = seq_css(seq)->cgroup;
2674
2675 cgroup_print_ss_mask(seq, cgroup_control(cgrp));
2676 return 0;
2677}
2678
2679
2680static int cgroup_subtree_control_show(struct seq_file *seq, void *v)
2681{
2682 struct cgroup *cgrp = seq_css(seq)->cgroup;
2683
2684 cgroup_print_ss_mask(seq, cgrp->subtree_control);
2685 return 0;
2686}
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697static int cgroup_update_dfl_csses(struct cgroup *cgrp)
2698{
2699 DEFINE_CGROUP_MGCTX(mgctx);
2700 struct cgroup_subsys_state *d_css;
2701 struct cgroup *dsct;
2702 struct css_set *src_cset;
2703 int ret;
2704
2705 lockdep_assert_held(&cgroup_mutex);
2706
2707 percpu_down_write(&cgroup_threadgroup_rwsem);
2708
2709
2710 spin_lock_irq(&css_set_lock);
2711 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
2712 struct cgrp_cset_link *link;
2713
2714 list_for_each_entry(link, &dsct->cset_links, cset_link)
2715 cgroup_migrate_add_src(link->cset, dsct, &mgctx);
2716 }
2717 spin_unlock_irq(&css_set_lock);
2718
2719
2720 ret = cgroup_migrate_prepare_dst(&mgctx);
2721 if (ret)
2722 goto out_finish;
2723
2724 spin_lock_irq(&css_set_lock);
2725 list_for_each_entry(src_cset, &mgctx.preloaded_src_csets, mg_preload_node) {
2726 struct task_struct *task, *ntask;
2727
2728
2729 list_for_each_entry_safe(task, ntask, &src_cset->tasks, cg_list)
2730 cgroup_migrate_add_task(task, &mgctx);
2731 }
2732 spin_unlock_irq(&css_set_lock);
2733
2734 ret = cgroup_migrate_execute(&mgctx);
2735out_finish:
2736 cgroup_migrate_finish(&mgctx);
2737 percpu_up_write(&cgroup_threadgroup_rwsem);
2738 return ret;
2739}
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749void cgroup_lock_and_drain_offline(struct cgroup *cgrp)
2750 __acquires(&cgroup_mutex)
2751{
2752 struct cgroup *dsct;
2753 struct cgroup_subsys_state *d_css;
2754 struct cgroup_subsys *ss;
2755 int ssid;
2756
2757restart:
2758 mutex_lock(&cgroup_mutex);
2759
2760 cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
2761 for_each_subsys(ss, ssid) {
2762 struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
2763 DEFINE_WAIT(wait);
2764
2765 if (!css || !percpu_ref_is_dying(&css->refcnt))
2766 continue;
2767
2768 cgroup_get_live(dsct);
2769 prepare_to_wait(&dsct->offline_waitq, &wait,
2770 TASK_UNINTERRUPTIBLE);
2771
2772 mutex_unlock(&cgroup_mutex);
2773 schedule();
2774 finish_wait(&dsct->offline_waitq, &wait);
2775
2776 cgroup_put(dsct);
2777 goto restart;
2778 }
2779 }
2780}
2781
2782
2783
2784
2785
2786
2787
2788
2789static void cgroup_save_control(struct cgroup *cgrp)
2790{
2791 struct cgroup *dsct;
2792 struct cgroup_subsys_state *d_css;
2793
2794 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
2795 dsct->old_subtree_control = dsct->subtree_control;
2796 dsct->old_subtree_ss_mask = dsct->subtree_ss_mask;
2797 }
2798}
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808static void cgroup_propagate_control(struct cgroup *cgrp)
2809{
2810 struct cgroup *dsct;
2811 struct cgroup_subsys_state *d_css;
2812
2813 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
2814 dsct->subtree_control &= cgroup_control(dsct);
2815 dsct->subtree_ss_mask =
2816 cgroup_calc_subtree_ss_mask(dsct->subtree_control,
2817 cgroup_ss_mask(dsct));
2818 }
2819}
2820
2821
2822
2823
2824
2825
2826
2827
2828static void cgroup_restore_control(struct cgroup *cgrp)
2829{
2830 struct cgroup *dsct;
2831 struct cgroup_subsys_state *d_css;
2832
2833 cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
2834 dsct->subtree_control = dsct->old_subtree_control;
2835 dsct->subtree_ss_mask = dsct->old_subtree_ss_mask;
2836 }
2837}
2838
2839static bool css_visible(struct cgroup_subsys_state *css)
2840{
2841 struct cgroup_subsys *ss = css->ss;
2842 struct cgroup *cgrp = css->cgroup;
2843
2844 if (cgroup_control(cgrp) & (1 << ss->id))
2845 return true;
2846 if (!(cgroup_ss_mask(cgrp) & (1 << ss->id)))
2847 return false;
2848 return cgroup_on_dfl(cgrp) && ss->implicit_on_dfl;
2849}
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864static int cgroup_apply_control_enable(struct cgroup *cgrp)
2865{
2866 struct cgroup *dsct;
2867 struct cgroup_subsys_state *d_css;
2868 struct cgroup_subsys *ss;
2869 int ssid, ret;
2870
2871 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
2872 for_each_subsys(ss, ssid) {
2873 struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
2874
2875 WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt));
2876
2877 if (!(cgroup_ss_mask(dsct) & (1 << ss->id)))
2878 continue;
2879
2880 if (!css) {
2881 css = css_create(dsct, ss);
2882 if (IS_ERR(css))
2883 return PTR_ERR(css);
2884 }
2885
2886 if (css_visible(css)) {
2887 ret = css_populate_dir(css);
2888 if (ret)
2889 return ret;
2890 }
2891 }
2892 }
2893
2894 return 0;
2895}
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910static void cgroup_apply_control_disable(struct cgroup *cgrp)
2911{
2912 struct cgroup *dsct;
2913 struct cgroup_subsys_state *d_css;
2914 struct cgroup_subsys *ss;
2915 int ssid;
2916
2917 cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
2918 for_each_subsys(ss, ssid) {
2919 struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
2920
2921 WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt));
2922
2923 if (!css)
2924 continue;
2925
2926 if (css->parent &&
2927 !(cgroup_ss_mask(dsct) & (1 << ss->id))) {
2928 kill_css(css);
2929 } else if (!css_visible(css)) {
2930 css_clear_dir(css);
2931 if (ss->css_reset)
2932 ss->css_reset(css);
2933 }
2934 }
2935 }
2936}
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955static int cgroup_apply_control(struct cgroup *cgrp)
2956{
2957 int ret;
2958
2959 cgroup_propagate_control(cgrp);
2960
2961 ret = cgroup_apply_control_enable(cgrp);
2962 if (ret)
2963 return ret;
2964
2965
2966
2967
2968
2969
2970 ret = cgroup_update_dfl_csses(cgrp);
2971 if (ret)
2972 return ret;
2973
2974 return 0;
2975}
2976
2977
2978
2979
2980
2981
2982
2983
2984static void cgroup_finalize_control(struct cgroup *cgrp, int ret)
2985{
2986 if (ret) {
2987 cgroup_restore_control(cgrp);
2988 cgroup_propagate_control(cgrp);
2989 }
2990
2991 cgroup_apply_control_disable(cgrp);
2992}
2993
2994static int cgroup_vet_subtree_control_enable(struct cgroup *cgrp, u16 enable)
2995{
2996 u16 domain_enable = enable & ~cgrp_dfl_threaded_ss_mask;
2997
2998
2999 if (!enable)
3000 return 0;
3001
3002
3003 if (!cgroup_is_valid_domain(cgrp->dom_cgrp))
3004 return -EOPNOTSUPP;
3005
3006
3007 if (cgroup_is_mixable(cgrp))
3008 return 0;
3009
3010 if (domain_enable) {
3011
3012 if (cgroup_is_thread_root(cgrp) || cgroup_is_threaded(cgrp))
3013 return -EOPNOTSUPP;
3014 } else {
3015
3016
3017
3018
3019
3020 if (cgroup_can_be_thread_root(cgrp) || cgroup_is_threaded(cgrp))
3021 return 0;
3022 }
3023
3024
3025
3026
3027
3028 if (cgroup_has_tasks(cgrp))
3029 return -EBUSY;
3030
3031 return 0;
3032}
3033
3034
3035static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
3036 char *buf, size_t nbytes,
3037 loff_t off)
3038{
3039 u16 enable = 0, disable = 0;
3040 struct cgroup *cgrp, *child;
3041 struct cgroup_subsys *ss;
3042 char *tok;
3043 int ssid, ret;
3044
3045
3046
3047
3048
3049 buf = strstrip(buf);
3050 while ((tok = strsep(&buf, " "))) {
3051 if (tok[0] == '\0')
3052 continue;
3053 do_each_subsys_mask(ss, ssid, ~cgrp_dfl_inhibit_ss_mask) {
3054 if (!cgroup_ssid_enabled(ssid) ||
3055 strcmp(tok + 1, ss->name))
3056 continue;
3057
3058 if (*tok == '+') {
3059 enable |= 1 << ssid;
3060 disable &= ~(1 << ssid);
3061 } else if (*tok == '-') {
3062 disable |= 1 << ssid;
3063 enable &= ~(1 << ssid);
3064 } else {
3065 return -EINVAL;
3066 }
3067 break;
3068 } while_each_subsys_mask();
3069 if (ssid == CGROUP_SUBSYS_COUNT)
3070 return -EINVAL;
3071 }
3072
3073 cgrp = cgroup_kn_lock_live(of->kn, true);
3074 if (!cgrp)
3075 return -ENODEV;
3076
3077 for_each_subsys(ss, ssid) {
3078 if (enable & (1 << ssid)) {
3079 if (cgrp->subtree_control & (1 << ssid)) {
3080 enable &= ~(1 << ssid);
3081 continue;
3082 }
3083
3084 if (!(cgroup_control(cgrp) & (1 << ssid))) {
3085 ret = -ENOENT;
3086 goto out_unlock;
3087 }
3088 } else if (disable & (1 << ssid)) {
3089 if (!(cgrp->subtree_control & (1 << ssid))) {
3090 disable &= ~(1 << ssid);
3091 continue;
3092 }
3093
3094
3095 cgroup_for_each_live_child(child, cgrp) {
3096 if (child->subtree_control & (1 << ssid)) {
3097 ret = -EBUSY;
3098 goto out_unlock;
3099 }
3100 }
3101 }
3102 }
3103
3104 if (!enable && !disable) {
3105 ret = 0;
3106 goto out_unlock;
3107 }
3108
3109 ret = cgroup_vet_subtree_control_enable(cgrp, enable);
3110 if (ret)
3111 goto out_unlock;
3112
3113
3114 cgroup_save_control(cgrp);
3115
3116 cgrp->subtree_control |= enable;
3117 cgrp->subtree_control &= ~disable;
3118
3119 ret = cgroup_apply_control(cgrp);
3120 cgroup_finalize_control(cgrp, ret);
3121 if (ret)
3122 goto out_unlock;
3123
3124 kernfs_activate(cgrp->kn);
3125out_unlock:
3126 cgroup_kn_unlock(of->kn);
3127 return ret ?: nbytes;
3128}
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139static int cgroup_enable_threaded(struct cgroup *cgrp)
3140{
3141 struct cgroup *parent = cgroup_parent(cgrp);
3142 struct cgroup *dom_cgrp = parent->dom_cgrp;
3143 int ret;
3144
3145 lockdep_assert_held(&cgroup_mutex);
3146
3147
3148 if (cgroup_is_threaded(cgrp))
3149 return 0;
3150
3151
3152 if (!cgroup_is_valid_domain(dom_cgrp) ||
3153 !cgroup_can_be_thread_root(dom_cgrp))
3154 return -EOPNOTSUPP;
3155
3156
3157
3158
3159
3160 cgroup_save_control(cgrp);
3161
3162 cgrp->dom_cgrp = dom_cgrp;
3163 ret = cgroup_apply_control(cgrp);
3164 if (!ret)
3165 parent->nr_threaded_children++;
3166 else
3167 cgrp->dom_cgrp = cgrp;
3168
3169 cgroup_finalize_control(cgrp, ret);
3170 return ret;
3171}
3172
3173static int cgroup_type_show(struct seq_file *seq, void *v)
3174{
3175 struct cgroup *cgrp = seq_css(seq)->cgroup;
3176
3177 if (cgroup_is_threaded(cgrp))
3178 seq_puts(seq, "threaded\n");
3179 else if (!cgroup_is_valid_domain(cgrp))
3180 seq_puts(seq, "domain invalid\n");
3181 else if (cgroup_is_thread_root(cgrp))
3182 seq_puts(seq, "domain threaded\n");
3183 else
3184 seq_puts(seq, "domain\n");
3185
3186 return 0;
3187}
3188
3189static ssize_t cgroup_type_write(struct kernfs_open_file *of, char *buf,
3190 size_t nbytes, loff_t off)
3191{
3192 struct cgroup *cgrp;
3193 int ret;
3194
3195
3196 if (strcmp(strstrip(buf), "threaded"))
3197 return -EINVAL;
3198
3199 cgrp = cgroup_kn_lock_live(of->kn, false);
3200 if (!cgrp)
3201 return -ENOENT;
3202
3203
3204 ret = cgroup_enable_threaded(cgrp);
3205
3206 cgroup_kn_unlock(of->kn);
3207 return ret ?: nbytes;
3208}
3209
3210static int cgroup_max_descendants_show(struct seq_file *seq, void *v)
3211{
3212 struct cgroup *cgrp = seq_css(seq)->cgroup;
3213 int descendants = READ_ONCE(cgrp->max_descendants);
3214
3215 if (descendants == INT_MAX)
3216 seq_puts(seq, "max\n");
3217 else
3218 seq_printf(seq, "%d\n", descendants);
3219
3220 return 0;
3221}
3222
3223static ssize_t cgroup_max_descendants_write(struct kernfs_open_file *of,
3224 char *buf, size_t nbytes, loff_t off)
3225{
3226 struct cgroup *cgrp;
3227 int descendants;
3228 ssize_t ret;
3229
3230 buf = strstrip(buf);
3231 if (!strcmp(buf, "max")) {
3232 descendants = INT_MAX;
3233 } else {
3234 ret = kstrtoint(buf, 0, &descendants);
3235 if (ret)
3236 return ret;
3237 }
3238
3239 if (descendants < 0)
3240 return -ERANGE;
3241
3242 cgrp = cgroup_kn_lock_live(of->kn, false);
3243 if (!cgrp)
3244 return -ENOENT;
3245
3246 cgrp->max_descendants = descendants;
3247
3248 cgroup_kn_unlock(of->kn);
3249
3250 return nbytes;
3251}
3252
3253static int cgroup_max_depth_show(struct seq_file *seq, void *v)
3254{
3255 struct cgroup *cgrp = seq_css(seq)->cgroup;
3256 int depth = READ_ONCE(cgrp->max_depth);
3257
3258 if (depth == INT_MAX)
3259 seq_puts(seq, "max\n");
3260 else
3261 seq_printf(seq, "%d\n", depth);
3262
3263 return 0;
3264}
3265
3266static ssize_t cgroup_max_depth_write(struct kernfs_open_file *of,
3267 char *buf, size_t nbytes, loff_t off)
3268{
3269 struct cgroup *cgrp;
3270 ssize_t ret;
3271 int depth;
3272
3273 buf = strstrip(buf);
3274 if (!strcmp(buf, "max")) {
3275 depth = INT_MAX;
3276 } else {
3277 ret = kstrtoint(buf, 0, &depth);
3278 if (ret)
3279 return ret;
3280 }
3281
3282 if (depth < 0)
3283 return -ERANGE;
3284
3285 cgrp = cgroup_kn_lock_live(of->kn, false);
3286 if (!cgrp)
3287 return -ENOENT;
3288
3289 cgrp->max_depth = depth;
3290
3291 cgroup_kn_unlock(of->kn);
3292
3293 return nbytes;
3294}
3295
3296static int cgroup_events_show(struct seq_file *seq, void *v)
3297{
3298 seq_printf(seq, "populated %d\n",
3299 cgroup_is_populated(seq_css(seq)->cgroup));
3300 return 0;
3301}
3302
3303static int cgroup_stat_show(struct seq_file *seq, void *v)
3304{
3305 struct cgroup *cgroup = seq_css(seq)->cgroup;
3306
3307 seq_printf(seq, "nr_descendants %d\n",
3308 cgroup->nr_descendants);
3309 seq_printf(seq, "nr_dying_descendants %d\n",
3310 cgroup->nr_dying_descendants);
3311
3312 return 0;
3313}
3314
3315static int cgroup_file_open(struct kernfs_open_file *of)
3316{
3317 struct cftype *cft = of->kn->priv;
3318
3319 if (cft->open)
3320 return cft->open(of);
3321 return 0;
3322}
3323
3324static void cgroup_file_release(struct kernfs_open_file *of)
3325{
3326 struct cftype *cft = of->kn->priv;
3327
3328 if (cft->release)
3329 cft->release(of);
3330}
3331
3332static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
3333 size_t nbytes, loff_t off)
3334{
3335 struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
3336 struct cgroup *cgrp = of->kn->parent->priv;
3337 struct cftype *cft = of->kn->priv;
3338 struct cgroup_subsys_state *css;
3339 int ret;
3340
3341
3342
3343
3344
3345
3346
3347 if ((cgrp->root->flags & CGRP_ROOT_NS_DELEGATE) &&
3348 !(cft->flags & CFTYPE_NS_DELEGATABLE) &&
3349 ns != &init_cgroup_ns && ns->root_cset->dfl_cgrp == cgrp)
3350 return -EPERM;
3351
3352 if (cft->write)
3353 return cft->write(of, buf, nbytes, off);
3354
3355
3356
3357
3358
3359
3360
3361 rcu_read_lock();
3362 css = cgroup_css(cgrp, cft->ss);
3363 rcu_read_unlock();
3364
3365 if (cft->write_u64) {
3366 unsigned long long v;
3367 ret = kstrtoull(buf, 0, &v);
3368 if (!ret)
3369 ret = cft->write_u64(css, cft, v);
3370 } else if (cft->write_s64) {
3371 long long v;
3372 ret = kstrtoll(buf, 0, &v);
3373 if (!ret)
3374 ret = cft->write_s64(css, cft, v);
3375 } else {
3376 ret = -EINVAL;
3377 }
3378
3379 return ret ?: nbytes;
3380}
3381
3382static void *cgroup_seqfile_start(struct seq_file *seq, loff_t *ppos)
3383{
3384 return seq_cft(seq)->seq_start(seq, ppos);
3385}
3386
3387static void *cgroup_seqfile_next(struct seq_file *seq, void *v, loff_t *ppos)
3388{
3389 return seq_cft(seq)->seq_next(seq, v, ppos);
3390}
3391
3392static void cgroup_seqfile_stop(struct seq_file *seq, void *v)
3393{
3394 if (seq_cft(seq)->seq_stop)
3395 seq_cft(seq)->seq_stop(seq, v);
3396}
3397
3398static int cgroup_seqfile_show(struct seq_file *m, void *arg)
3399{
3400 struct cftype *cft = seq_cft(m);
3401 struct cgroup_subsys_state *css = seq_css(m);
3402
3403 if (cft->seq_show)
3404 return cft->seq_show(m, arg);
3405
3406 if (cft->read_u64)
3407 seq_printf(m, "%llu\n", cft->read_u64(css, cft));
3408 else if (cft->read_s64)
3409 seq_printf(m, "%lld\n", cft->read_s64(css, cft));
3410 else
3411 return -EINVAL;
3412 return 0;
3413}
3414
3415static struct kernfs_ops cgroup_kf_single_ops = {
3416 .atomic_write_len = PAGE_SIZE,
3417 .open = cgroup_file_open,
3418 .release = cgroup_file_release,
3419 .write = cgroup_file_write,
3420 .seq_show = cgroup_seqfile_show,
3421};
3422
3423static struct kernfs_ops cgroup_kf_ops = {
3424 .atomic_write_len = PAGE_SIZE,
3425 .open = cgroup_file_open,
3426 .release = cgroup_file_release,
3427 .write = cgroup_file_write,
3428 .seq_start = cgroup_seqfile_start,
3429 .seq_next = cgroup_seqfile_next,
3430 .seq_stop = cgroup_seqfile_stop,
3431 .seq_show = cgroup_seqfile_show,
3432};
3433
3434
3435static int cgroup_kn_set_ugid(struct kernfs_node *kn)
3436{
3437 struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
3438 .ia_uid = current_fsuid(),
3439 .ia_gid = current_fsgid(), };
3440
3441 if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
3442 gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
3443 return 0;
3444
3445 return kernfs_setattr(kn, &iattr);
3446}
3447
3448static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
3449 struct cftype *cft)
3450{
3451 char name[CGROUP_FILE_NAME_MAX];
3452 struct kernfs_node *kn;
3453 struct lock_class_key *key = NULL;
3454 int ret;
3455
3456#ifdef CONFIG_DEBUG_LOCK_ALLOC
3457 key = &cft->lockdep_key;
3458#endif
3459 kn = __kernfs_create_file(cgrp->kn, cgroup_file_name(cgrp, cft, name),
3460 cgroup_file_mode(cft), 0, cft->kf_ops, cft,
3461 NULL, key);
3462 if (IS_ERR(kn))
3463 return PTR_ERR(kn);
3464
3465 ret = cgroup_kn_set_ugid(kn);
3466 if (ret) {
3467 kernfs_remove(kn);
3468 return ret;
3469 }
3470
3471 if (cft->file_offset) {
3472 struct cgroup_file *cfile = (void *)css + cft->file_offset;
3473
3474 spin_lock_irq(&cgroup_file_kn_lock);
3475 cfile->kn = kn;
3476 spin_unlock_irq(&cgroup_file_kn_lock);
3477 }
3478
3479 return 0;
3480}
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492static int cgroup_addrm_files(struct cgroup_subsys_state *css,
3493 struct cgroup *cgrp, struct cftype cfts[],
3494 bool is_add)
3495{
3496 struct cftype *cft, *cft_end = NULL;
3497 int ret = 0;
3498
3499 lockdep_assert_held(&cgroup_mutex);
3500
3501restart:
3502 for (cft = cfts; cft != cft_end && cft->name[0] != '\0'; cft++) {
3503
3504 if ((cft->flags & __CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp))
3505 continue;
3506 if ((cft->flags & __CFTYPE_NOT_ON_DFL) && cgroup_on_dfl(cgrp))
3507 continue;
3508 if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgroup_parent(cgrp))
3509 continue;
3510 if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgroup_parent(cgrp))
3511 continue;
3512
3513 if (is_add) {
3514 ret = cgroup_add_file(css, cgrp, cft);
3515 if (ret) {
3516 pr_warn("%s: failed to add %s, err=%d\n",
3517 __func__, cft->name, ret);
3518 cft_end = cft;
3519 is_add = false;
3520 goto restart;
3521 }
3522 } else {
3523 cgroup_rm_file(cgrp, cft);
3524 }
3525 }
3526 return ret;
3527}
3528
3529static int cgroup_apply_cftypes(struct cftype *cfts, bool is_add)
3530{
3531 struct cgroup_subsys *ss = cfts[0].ss;
3532 struct cgroup *root = &ss->root->cgrp;
3533 struct cgroup_subsys_state *css;
3534 int ret = 0;
3535
3536 lockdep_assert_held(&cgroup_mutex);
3537
3538
3539 css_for_each_descendant_pre(css, cgroup_css(root, ss)) {
3540 struct cgroup *cgrp = css->cgroup;
3541
3542 if (!(css->flags & CSS_VISIBLE))
3543 continue;
3544
3545 ret = cgroup_addrm_files(css, cgrp, cfts, is_add);
3546 if (ret)
3547 break;
3548 }
3549
3550 if (is_add && !ret)
3551 kernfs_activate(root->kn);
3552 return ret;
3553}
3554
3555static void cgroup_exit_cftypes(struct cftype *cfts)
3556{
3557 struct cftype *cft;
3558
3559 for (cft = cfts; cft->name[0] != '\0'; cft++) {
3560
3561 if (cft->max_write_len && cft->max_write_len != PAGE_SIZE)
3562 kfree(cft->kf_ops);
3563 cft->kf_ops = NULL;
3564 cft->ss = NULL;
3565
3566
3567 cft->flags &= ~(__CFTYPE_ONLY_ON_DFL | __CFTYPE_NOT_ON_DFL);
3568 }
3569}
3570
3571static int cgroup_init_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
3572{
3573 struct cftype *cft;
3574
3575 for (cft = cfts; cft->name[0] != '\0'; cft++) {
3576 struct kernfs_ops *kf_ops;
3577
3578 WARN_ON(cft->ss || cft->kf_ops);
3579
3580 if (cft->seq_start)
3581 kf_ops = &cgroup_kf_ops;
3582 else
3583 kf_ops = &cgroup_kf_single_ops;
3584
3585
3586
3587
3588
3589 if (cft->max_write_len && cft->max_write_len != PAGE_SIZE) {
3590 kf_ops = kmemdup(kf_ops, sizeof(*kf_ops), GFP_KERNEL);
3591 if (!kf_ops) {
3592 cgroup_exit_cftypes(cfts);
3593 return -ENOMEM;
3594 }
3595 kf_ops->atomic_write_len = cft->max_write_len;
3596 }
3597
3598 cft->kf_ops = kf_ops;
3599 cft->ss = ss;
3600 }
3601
3602 return 0;
3603}
3604
3605static int cgroup_rm_cftypes_locked(struct cftype *cfts)
3606{
3607 lockdep_assert_held(&cgroup_mutex);
3608
3609 if (!cfts || !cfts[0].ss)
3610 return -ENOENT;
3611
3612 list_del(&cfts->node);
3613 cgroup_apply_cftypes(cfts, false);
3614 cgroup_exit_cftypes(cfts);
3615 return 0;
3616}
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629int cgroup_rm_cftypes(struct cftype *cfts)
3630{
3631 int ret;
3632
3633 mutex_lock(&cgroup_mutex);
3634 ret = cgroup_rm_cftypes_locked(cfts);
3635 mutex_unlock(&cgroup_mutex);
3636 return ret;
3637}
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653static int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
3654{
3655 int ret;
3656
3657 if (!cgroup_ssid_enabled(ss->id))
3658 return 0;
3659
3660 if (!cfts || cfts[0].name[0] == '\0')
3661 return 0;
3662
3663 ret = cgroup_init_cftypes(ss, cfts);
3664 if (ret)
3665 return ret;
3666
3667 mutex_lock(&cgroup_mutex);
3668
3669 list_add_tail(&cfts->node, &ss->cfts);
3670 ret = cgroup_apply_cftypes(cfts, true);
3671 if (ret)
3672 cgroup_rm_cftypes_locked(cfts);
3673
3674 mutex_unlock(&cgroup_mutex);
3675 return ret;
3676}
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
3687{
3688 struct cftype *cft;
3689
3690 for (cft = cfts; cft && cft->name[0] != '\0'; cft++)
3691 cft->flags |= __CFTYPE_ONLY_ON_DFL;
3692 return cgroup_add_cftypes(ss, cfts);
3693}
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
3704{
3705 struct cftype *cft;
3706
3707 for (cft = cfts; cft && cft->name[0] != '\0'; cft++)
3708 cft->flags |= __CFTYPE_NOT_ON_DFL;
3709 return cgroup_add_cftypes(ss, cfts);
3710}
3711
3712
3713
3714
3715
3716
3717
3718void cgroup_file_notify(struct cgroup_file *cfile)
3719{
3720 unsigned long flags;
3721
3722 spin_lock_irqsave(&cgroup_file_kn_lock, flags);
3723 if (cfile->kn)
3724 kernfs_notify(cfile->kn);
3725 spin_unlock_irqrestore(&cgroup_file_kn_lock, flags);
3726}
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
3746 struct cgroup_subsys_state *parent)
3747{
3748 struct cgroup_subsys_state *next;
3749
3750 cgroup_assert_mutex_or_rcu_locked();
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772 if (!pos) {
3773 next = list_entry_rcu(parent->children.next, struct cgroup_subsys_state, sibling);
3774 } else if (likely(!(pos->flags & CSS_RELEASED))) {
3775 next = list_entry_rcu(pos->sibling.next, struct cgroup_subsys_state, sibling);
3776 } else {
3777 list_for_each_entry_rcu(next, &parent->children, sibling)
3778 if (next->serial_nr > pos->serial_nr)
3779 break;
3780 }
3781
3782
3783
3784
3785
3786 if (&next->sibling != &parent->children)
3787 return next;
3788 return NULL;
3789}
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812struct cgroup_subsys_state *
3813css_next_descendant_pre(struct cgroup_subsys_state *pos,
3814 struct cgroup_subsys_state *root)
3815{
3816 struct cgroup_subsys_state *next;
3817
3818 cgroup_assert_mutex_or_rcu_locked();
3819
3820
3821 if (!pos)
3822 return root;
3823
3824
3825 next = css_next_child(NULL, pos);
3826 if (next)
3827 return next;
3828
3829
3830 while (pos != root) {
3831 next = css_next_child(pos, pos->parent);
3832 if (next)
3833 return next;
3834 pos = pos->parent;
3835 }
3836
3837 return NULL;
3838}
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853struct cgroup_subsys_state *
3854css_rightmost_descendant(struct cgroup_subsys_state *pos)
3855{
3856 struct cgroup_subsys_state *last, *tmp;
3857
3858 cgroup_assert_mutex_or_rcu_locked();
3859
3860 do {
3861 last = pos;
3862
3863 pos = NULL;
3864 css_for_each_child(tmp, last)
3865 pos = tmp;
3866 } while (pos);
3867
3868 return last;
3869}
3870
3871static struct cgroup_subsys_state *
3872css_leftmost_descendant(struct cgroup_subsys_state *pos)
3873{
3874 struct cgroup_subsys_state *last;
3875
3876 do {
3877 last = pos;
3878 pos = css_next_child(NULL, pos);
3879 } while (pos);
3880
3881 return last;
3882}
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906struct cgroup_subsys_state *
3907css_next_descendant_post(struct cgroup_subsys_state *pos,
3908 struct cgroup_subsys_state *root)
3909{
3910 struct cgroup_subsys_state *next;
3911
3912 cgroup_assert_mutex_or_rcu_locked();
3913
3914
3915 if (!pos)
3916 return css_leftmost_descendant(root);
3917
3918
3919 if (pos == root)
3920 return NULL;
3921
3922
3923 next = css_next_child(pos, pos->parent);
3924 if (next)
3925 return css_leftmost_descendant(next);
3926
3927
3928 return pos->parent;
3929}
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939bool css_has_online_children(struct cgroup_subsys_state *css)
3940{
3941 struct cgroup_subsys_state *child;
3942 bool ret = false;
3943
3944 rcu_read_lock();
3945 css_for_each_child(child, css) {
3946 if (child->flags & CSS_ONLINE) {
3947 ret = true;
3948 break;
3949 }
3950 }
3951 rcu_read_unlock();
3952 return ret;
3953}
3954
3955static struct css_set *css_task_iter_next_css_set(struct css_task_iter *it)
3956{
3957 struct list_head *l;
3958 struct cgrp_cset_link *link;
3959 struct css_set *cset;
3960
3961 lockdep_assert_held(&css_set_lock);
3962
3963
3964 if (it->tcset_pos) {
3965 l = it->tcset_pos->next;
3966
3967 if (l != it->tcset_head) {
3968 it->tcset_pos = l;
3969 return container_of(l, struct css_set,
3970 threaded_csets_node);
3971 }
3972
3973 it->tcset_pos = NULL;
3974 }
3975
3976
3977 l = it->cset_pos;
3978 l = l->next;
3979 if (l == it->cset_head) {
3980 it->cset_pos = NULL;
3981 return NULL;
3982 }
3983
3984 if (it->ss) {
3985 cset = container_of(l, struct css_set, e_cset_node[it->ss->id]);
3986 } else {
3987 link = list_entry(l, struct cgrp_cset_link, cset_link);
3988 cset = link->cset;
3989 }
3990
3991 it->cset_pos = l;
3992
3993
3994 if (it->flags & CSS_TASK_ITER_THREADED) {
3995 if (it->cur_dcset)
3996 put_css_set_locked(it->cur_dcset);
3997 it->cur_dcset = cset;
3998 get_css_set(cset);
3999
4000 it->tcset_head = &cset->threaded_csets;
4001 it->tcset_pos = &cset->threaded_csets;
4002 }
4003
4004 return cset;
4005}
4006
4007
4008
4009
4010
4011
4012
4013static void css_task_iter_advance_css_set(struct css_task_iter *it)
4014{
4015 struct css_set *cset;
4016
4017 lockdep_assert_held(&css_set_lock);
4018
4019
4020 do {
4021 cset = css_task_iter_next_css_set(it);
4022 if (!cset) {
4023 it->task_pos = NULL;
4024 return;
4025 }
4026 } while (!css_set_populated(cset));
4027
4028 if (!list_empty(&cset->tasks))
4029 it->task_pos = cset->tasks.next;
4030 else
4031 it->task_pos = cset->mg_tasks.next;
4032
4033 it->tasks_head = &cset->tasks;
4034 it->mg_tasks_head = &cset->mg_tasks;
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051 if (it->cur_cset) {
4052 list_del(&it->iters_node);
4053 put_css_set_locked(it->cur_cset);
4054 }
4055 get_css_set(cset);
4056 it->cur_cset = cset;
4057 list_add(&it->iters_node, &cset->task_iters);
4058}
4059
4060static void css_task_iter_advance(struct css_task_iter *it)
4061{
4062 struct list_head *l = it->task_pos;
4063
4064 lockdep_assert_held(&css_set_lock);
4065 WARN_ON_ONCE(!l);
4066
4067repeat:
4068
4069
4070
4071
4072
4073 l = l->next;
4074
4075 if (l == it->tasks_head)
4076 l = it->mg_tasks_head->next;
4077
4078 if (l == it->mg_tasks_head)
4079 css_task_iter_advance_css_set(it);
4080 else
4081 it->task_pos = l;
4082
4083
4084 if ((it->flags & CSS_TASK_ITER_PROCS) && it->task_pos &&
4085 !thread_group_leader(list_entry(it->task_pos, struct task_struct,
4086 cg_list)))
4087 goto repeat;
4088}
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags,
4102 struct css_task_iter *it)
4103{
4104
4105 WARN_ON_ONCE(!use_task_css_set_links);
4106
4107 memset(it, 0, sizeof(*it));
4108
4109 spin_lock_irq(&css_set_lock);
4110
4111 it->ss = css->ss;
4112 it->flags = flags;
4113
4114 if (it->ss)
4115 it->cset_pos = &css->cgroup->e_csets[css->ss->id];
4116 else
4117 it->cset_pos = &css->cgroup->cset_links;
4118
4119 it->cset_head = it->cset_pos;
4120
4121 css_task_iter_advance_css_set(it);
4122
4123 spin_unlock_irq(&css_set_lock);
4124}
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134struct task_struct *css_task_iter_next(struct css_task_iter *it)
4135{
4136 if (it->cur_task) {
4137 put_task_struct(it->cur_task);
4138 it->cur_task = NULL;
4139 }
4140
4141 spin_lock_irq(&css_set_lock);
4142
4143 if (it->task_pos) {
4144 it->cur_task = list_entry(it->task_pos, struct task_struct,
4145 cg_list);
4146 get_task_struct(it->cur_task);
4147 css_task_iter_advance(it);
4148 }
4149
4150 spin_unlock_irq(&css_set_lock);
4151
4152 return it->cur_task;
4153}
4154
4155
4156
4157
4158
4159
4160
4161void css_task_iter_end(struct css_task_iter *it)
4162{
4163 if (it->cur_cset) {
4164 spin_lock_irq(&css_set_lock);
4165 list_del(&it->iters_node);
4166 put_css_set_locked(it->cur_cset);
4167 spin_unlock_irq(&css_set_lock);
4168 }
4169
4170 if (it->cur_dcset)
4171 put_css_set(it->cur_dcset);
4172
4173 if (it->cur_task)
4174 put_task_struct(it->cur_task);
4175}
4176
4177static void cgroup_procs_release(struct kernfs_open_file *of)
4178{
4179 if (of->priv) {
4180 css_task_iter_end(of->priv);
4181 kfree(of->priv);
4182 }
4183}
4184
4185static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos)
4186{
4187 struct kernfs_open_file *of = s->private;
4188 struct css_task_iter *it = of->priv;
4189
4190 return css_task_iter_next(it);
4191}
4192
4193static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos,
4194 unsigned int iter_flags)
4195{
4196 struct kernfs_open_file *of = s->private;
4197 struct cgroup *cgrp = seq_css(s)->cgroup;
4198 struct css_task_iter *it = of->priv;
4199
4200
4201
4202
4203
4204 if (!it) {
4205 if (WARN_ON_ONCE((*pos)++))
4206 return ERR_PTR(-EINVAL);
4207
4208 it = kzalloc(sizeof(*it), GFP_KERNEL);
4209 if (!it)
4210 return ERR_PTR(-ENOMEM);
4211 of->priv = it;
4212 css_task_iter_start(&cgrp->self, iter_flags, it);
4213 } else if (!(*pos)++) {
4214 css_task_iter_end(it);
4215 css_task_iter_start(&cgrp->self, iter_flags, it);
4216 }
4217
4218 return cgroup_procs_next(s, NULL, NULL);
4219}
4220
4221static void *cgroup_procs_start(struct seq_file *s, loff_t *pos)
4222{
4223 struct cgroup *cgrp = seq_css(s)->cgroup;
4224
4225
4226
4227
4228
4229
4230
4231 if (cgroup_is_threaded(cgrp))
4232 return ERR_PTR(-EOPNOTSUPP);
4233
4234 return __cgroup_procs_start(s, pos, CSS_TASK_ITER_PROCS |
4235 CSS_TASK_ITER_THREADED);
4236}
4237
4238static int cgroup_procs_show(struct seq_file *s, void *v)
4239{
4240 seq_printf(s, "%d\n", task_pid_vnr(v));
4241 return 0;
4242}
4243
4244static int cgroup_procs_write_permission(struct cgroup *src_cgrp,
4245 struct cgroup *dst_cgrp,
4246 struct super_block *sb)
4247{
4248 struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
4249 struct cgroup *com_cgrp = src_cgrp;
4250 struct inode *inode;
4251 int ret;
4252
4253 lockdep_assert_held(&cgroup_mutex);
4254
4255
4256 while (!cgroup_is_descendant(dst_cgrp, com_cgrp))
4257 com_cgrp = cgroup_parent(com_cgrp);
4258
4259
4260 inode = kernfs_get_inode(sb, com_cgrp->procs_file.kn);
4261 if (!inode)
4262 return -ENOMEM;
4263
4264 ret = inode_permission(inode, MAY_WRITE);
4265 iput(inode);
4266 if (ret)
4267 return ret;
4268
4269
4270
4271
4272
4273 if ((cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE) &&
4274 (!cgroup_is_descendant(src_cgrp, ns->root_cset->dfl_cgrp) ||
4275 !cgroup_is_descendant(dst_cgrp, ns->root_cset->dfl_cgrp)))
4276 return -ENOENT;
4277
4278 return 0;
4279}
4280
4281static ssize_t cgroup_procs_write(struct kernfs_open_file *of,
4282 char *buf, size_t nbytes, loff_t off)
4283{
4284 struct cgroup *src_cgrp, *dst_cgrp;
4285 struct task_struct *task;
4286 ssize_t ret;
4287
4288 dst_cgrp = cgroup_kn_lock_live(of->kn, false);
4289 if (!dst_cgrp)
4290 return -ENODEV;
4291
4292 task = cgroup_procs_write_start(buf, true);
4293 ret = PTR_ERR_OR_ZERO(task);
4294 if (ret)
4295 goto out_unlock;
4296
4297
4298 spin_lock_irq(&css_set_lock);
4299 src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
4300 spin_unlock_irq(&css_set_lock);
4301
4302 ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp,
4303 of->file->f_path.dentry->d_sb);
4304 if (ret)
4305 goto out_finish;
4306
4307 ret = cgroup_attach_task(dst_cgrp, task, true);
4308
4309out_finish:
4310 cgroup_procs_write_finish(task);
4311out_unlock:
4312 cgroup_kn_unlock(of->kn);
4313
4314 return ret ?: nbytes;
4315}
4316
4317static void *cgroup_threads_start(struct seq_file *s, loff_t *pos)
4318{
4319 return __cgroup_procs_start(s, pos, 0);
4320}
4321
4322static ssize_t cgroup_threads_write(struct kernfs_open_file *of,
4323 char *buf, size_t nbytes, loff_t off)
4324{
4325 struct cgroup *src_cgrp, *dst_cgrp;
4326 struct task_struct *task;
4327 ssize_t ret;
4328
4329 buf = strstrip(buf);
4330
4331 dst_cgrp = cgroup_kn_lock_live(of->kn, false);
4332 if (!dst_cgrp)
4333 return -ENODEV;
4334
4335 task = cgroup_procs_write_start(buf, false);
4336 ret = PTR_ERR_OR_ZERO(task);
4337 if (ret)
4338 goto out_unlock;
4339
4340
4341 spin_lock_irq(&css_set_lock);
4342 src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
4343 spin_unlock_irq(&css_set_lock);
4344
4345
4346 ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp,
4347 of->file->f_path.dentry->d_sb);
4348 if (ret)
4349 goto out_finish;
4350
4351
4352 ret = -EOPNOTSUPP;
4353 if (src_cgrp->dom_cgrp != dst_cgrp->dom_cgrp)
4354 goto out_finish;
4355
4356 ret = cgroup_attach_task(dst_cgrp, task, false);
4357
4358out_finish:
4359 cgroup_procs_write_finish(task);
4360out_unlock:
4361 cgroup_kn_unlock(of->kn);
4362
4363 return ret ?: nbytes;
4364}
4365
4366
4367static struct cftype cgroup_base_files[] = {
4368 {
4369 .name = "cgroup.type",
4370 .flags = CFTYPE_NOT_ON_ROOT,
4371 .seq_show = cgroup_type_show,
4372 .write = cgroup_type_write,
4373 },
4374 {
4375 .name = "cgroup.procs",
4376 .flags = CFTYPE_NS_DELEGATABLE,
4377 .file_offset = offsetof(struct cgroup, procs_file),
4378 .release = cgroup_procs_release,
4379 .seq_start = cgroup_procs_start,
4380 .seq_next = cgroup_procs_next,
4381 .seq_show = cgroup_procs_show,
4382 .write = cgroup_procs_write,
4383 },
4384 {
4385 .name = "cgroup.threads",
4386 .release = cgroup_procs_release,
4387 .seq_start = cgroup_threads_start,
4388 .seq_next = cgroup_procs_next,
4389 .seq_show = cgroup_procs_show,
4390 .write = cgroup_threads_write,
4391 },
4392 {
4393 .name = "cgroup.controllers",
4394 .seq_show = cgroup_controllers_show,
4395 },
4396 {
4397 .name = "cgroup.subtree_control",
4398 .flags = CFTYPE_NS_DELEGATABLE,
4399 .seq_show = cgroup_subtree_control_show,
4400 .write = cgroup_subtree_control_write,
4401 },
4402 {
4403 .name = "cgroup.events",
4404 .flags = CFTYPE_NOT_ON_ROOT,
4405 .file_offset = offsetof(struct cgroup, events_file),
4406 .seq_show = cgroup_events_show,
4407 },
4408 {
4409 .name = "cgroup.max.descendants",
4410 .seq_show = cgroup_max_descendants_show,
4411 .write = cgroup_max_descendants_write,
4412 },
4413 {
4414 .name = "cgroup.max.depth",
4415 .seq_show = cgroup_max_depth_show,
4416 .write = cgroup_max_depth_write,
4417 },
4418 {
4419 .name = "cgroup.stat",
4420 .seq_show = cgroup_stat_show,
4421 },
4422 { }
4423};
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447static void css_free_work_fn(struct work_struct *work)
4448{
4449 struct cgroup_subsys_state *css =
4450 container_of(work, struct cgroup_subsys_state, destroy_work);
4451 struct cgroup_subsys *ss = css->ss;
4452 struct cgroup *cgrp = css->cgroup;
4453
4454 percpu_ref_exit(&css->refcnt);
4455
4456 if (ss) {
4457
4458 struct cgroup_subsys_state *parent = css->parent;
4459 int id = css->id;
4460
4461 ss->css_free(css);
4462 cgroup_idr_remove(&ss->css_idr, id);
4463 cgroup_put(cgrp);
4464
4465 if (parent)
4466 css_put(parent);
4467 } else {
4468
4469 atomic_dec(&cgrp->root->nr_cgrps);
4470 cgroup1_pidlist_destroy_all(cgrp);
4471 cancel_work_sync(&cgrp->release_agent_work);
4472
4473 if (cgroup_parent(cgrp)) {
4474
4475
4476
4477
4478
4479
4480 cgroup_put(cgroup_parent(cgrp));
4481 kernfs_put(cgrp->kn);
4482 kfree(cgrp);
4483 } else {
4484
4485
4486
4487
4488
4489 cgroup_destroy_root(cgrp->root);
4490 }
4491 }
4492}
4493
4494static void css_free_rcu_fn(struct rcu_head *rcu_head)
4495{
4496 struct cgroup_subsys_state *css =
4497 container_of(rcu_head, struct cgroup_subsys_state, rcu_head);
4498
4499 INIT_WORK(&css->destroy_work, css_free_work_fn);
4500 queue_work(cgroup_destroy_wq, &css->destroy_work);
4501}
4502
4503static void css_release_work_fn(struct work_struct *work)
4504{
4505 struct cgroup_subsys_state *css =
4506 container_of(work, struct cgroup_subsys_state, destroy_work);
4507 struct cgroup_subsys *ss = css->ss;
4508 struct cgroup *cgrp = css->cgroup;
4509
4510 mutex_lock(&cgroup_mutex);
4511
4512 css->flags |= CSS_RELEASED;
4513 list_del_rcu(&css->sibling);
4514
4515 if (ss) {
4516
4517 cgroup_idr_replace(&ss->css_idr, NULL, css->id);
4518 if (ss->css_released)
4519 ss->css_released(css);
4520 } else {
4521 struct cgroup *tcgrp;
4522
4523
4524 trace_cgroup_release(cgrp);
4525
4526 for (tcgrp = cgroup_parent(cgrp); tcgrp;
4527 tcgrp = cgroup_parent(tcgrp))
4528 tcgrp->nr_dying_descendants--;
4529
4530 cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
4531 cgrp->id = -1;
4532
4533
4534
4535
4536
4537
4538
4539
4540 if (cgrp->kn)
4541 RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv,
4542 NULL);
4543
4544 cgroup_bpf_put(cgrp);
4545 }
4546
4547 mutex_unlock(&cgroup_mutex);
4548
4549 call_rcu(&css->rcu_head, css_free_rcu_fn);
4550}
4551
4552static void css_release(struct percpu_ref *ref)
4553{
4554 struct cgroup_subsys_state *css =
4555 container_of(ref, struct cgroup_subsys_state, refcnt);
4556
4557 INIT_WORK(&css->destroy_work, css_release_work_fn);
4558 queue_work(cgroup_destroy_wq, &css->destroy_work);
4559}
4560
4561static void init_and_link_css(struct cgroup_subsys_state *css,
4562 struct cgroup_subsys *ss, struct cgroup *cgrp)
4563{
4564 lockdep_assert_held(&cgroup_mutex);
4565
4566 cgroup_get_live(cgrp);
4567
4568 memset(css, 0, sizeof(*css));
4569 css->cgroup = cgrp;
4570 css->ss = ss;
4571 css->id = -1;
4572 INIT_LIST_HEAD(&css->sibling);
4573 INIT_LIST_HEAD(&css->children);
4574 css->serial_nr = css_serial_nr_next++;
4575 atomic_set(&css->online_cnt, 0);
4576
4577 if (cgroup_parent(cgrp)) {
4578 css->parent = cgroup_css(cgroup_parent(cgrp), ss);
4579 css_get(css->parent);
4580 }
4581
4582 BUG_ON(cgroup_css(cgrp, ss));
4583}
4584
4585
4586static int online_css(struct cgroup_subsys_state *css)
4587{
4588 struct cgroup_subsys *ss = css->ss;
4589 int ret = 0;
4590
4591 lockdep_assert_held(&cgroup_mutex);
4592
4593 if (ss->css_online)
4594 ret = ss->css_online(css);
4595 if (!ret) {
4596 css->flags |= CSS_ONLINE;
4597 rcu_assign_pointer(css->cgroup->subsys[ss->id], css);
4598
4599 atomic_inc(&css->online_cnt);
4600 if (css->parent)
4601 atomic_inc(&css->parent->online_cnt);
4602 }
4603 return ret;
4604}
4605
4606
4607static void offline_css(struct cgroup_subsys_state *css)
4608{
4609 struct cgroup_subsys *ss = css->ss;
4610
4611 lockdep_assert_held(&cgroup_mutex);
4612
4613 if (!(css->flags & CSS_ONLINE))
4614 return;
4615
4616 if (ss->css_offline)
4617 ss->css_offline(css);
4618
4619 css->flags &= ~CSS_ONLINE;
4620 RCU_INIT_POINTER(css->cgroup->subsys[ss->id], NULL);
4621
4622 wake_up_all(&css->cgroup->offline_waitq);
4623}
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
4635 struct cgroup_subsys *ss)
4636{
4637 struct cgroup *parent = cgroup_parent(cgrp);
4638 struct cgroup_subsys_state *parent_css = cgroup_css(parent, ss);
4639 struct cgroup_subsys_state *css;
4640 int err;
4641
4642 lockdep_assert_held(&cgroup_mutex);
4643
4644 css = ss->css_alloc(parent_css);
4645 if (!css)
4646 css = ERR_PTR(-ENOMEM);
4647 if (IS_ERR(css))
4648 return css;
4649
4650 init_and_link_css(css, ss, cgrp);
4651
4652 err = percpu_ref_init(&css->refcnt, css_release, 0, GFP_KERNEL);
4653 if (err)
4654 goto err_free_css;
4655
4656 err = cgroup_idr_alloc(&ss->css_idr, NULL, 2, 0, GFP_KERNEL);
4657 if (err < 0)
4658 goto err_free_css;
4659 css->id = err;
4660
4661
4662 list_add_tail_rcu(&css->sibling, &parent_css->children);
4663 cgroup_idr_replace(&ss->css_idr, css, css->id);
4664
4665 err = online_css(css);
4666 if (err)
4667 goto err_list_del;
4668
4669 if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
4670 cgroup_parent(parent)) {
4671 pr_warn("%s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
4672 current->comm, current->pid, ss->name);
4673 if (!strcmp(ss->name, "memory"))
4674 pr_warn("\"memory\" requires setting use_hierarchy to 1 on the root\n");
4675 ss->warned_broken_hierarchy = true;
4676 }
4677
4678 return css;
4679
4680err_list_del:
4681 list_del_rcu(&css->sibling);
4682err_free_css:
4683 call_rcu(&css->rcu_head, css_free_rcu_fn);
4684 return ERR_PTR(err);
4685}
4686
4687
4688
4689
4690
4691
4692static struct cgroup *cgroup_create(struct cgroup *parent)
4693{
4694 struct cgroup_root *root = parent->root;
4695 struct cgroup *cgrp, *tcgrp;
4696 int level = parent->level + 1;
4697 int ret;
4698
4699
4700 cgrp = kzalloc(sizeof(*cgrp) +
4701 sizeof(cgrp->ancestor_ids[0]) * (level + 1), GFP_KERNEL);
4702 if (!cgrp)
4703 return ERR_PTR(-ENOMEM);
4704
4705 ret = percpu_ref_init(&cgrp->self.refcnt, css_release, 0, GFP_KERNEL);
4706 if (ret)
4707 goto out_free_cgrp;
4708
4709
4710
4711
4712
4713 cgrp->id = cgroup_idr_alloc(&root->cgroup_idr, NULL, 2, 0, GFP_KERNEL);
4714 if (cgrp->id < 0) {
4715 ret = -ENOMEM;
4716 goto out_cancel_ref;
4717 }
4718
4719 init_cgroup_housekeeping(cgrp);
4720
4721 cgrp->self.parent = &parent->self;
4722 cgrp->root = root;
4723 cgrp->level = level;
4724
4725 for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) {
4726 cgrp->ancestor_ids[tcgrp->level] = tcgrp->id;
4727
4728 if (tcgrp != cgrp)
4729 tcgrp->nr_descendants++;
4730 }
4731
4732 if (notify_on_release(parent))
4733 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
4734
4735 if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags))
4736 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
4737
4738 cgrp->self.serial_nr = css_serial_nr_next++;
4739
4740
4741 list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children);
4742 atomic_inc(&root->nr_cgrps);
4743 cgroup_get_live(parent);
4744
4745
4746
4747
4748
4749 cgroup_idr_replace(&root->cgroup_idr, cgrp, cgrp->id);
4750
4751
4752
4753
4754
4755 if (!cgroup_on_dfl(cgrp))
4756 cgrp->subtree_control = cgroup_control(cgrp);
4757
4758 if (parent)
4759 cgroup_bpf_inherit(cgrp, parent);
4760
4761 cgroup_propagate_control(cgrp);
4762
4763 return cgrp;
4764
4765out_cancel_ref:
4766 percpu_ref_exit(&cgrp->self.refcnt);
4767out_free_cgrp:
4768 kfree(cgrp);
4769 return ERR_PTR(ret);
4770}
4771
4772static bool cgroup_check_hierarchy_limits(struct cgroup *parent)
4773{
4774 struct cgroup *cgroup;
4775 int ret = false;
4776 int level = 1;
4777
4778 lockdep_assert_held(&cgroup_mutex);
4779
4780 for (cgroup = parent; cgroup; cgroup = cgroup_parent(cgroup)) {
4781 if (cgroup->nr_descendants >= cgroup->max_descendants)
4782 goto fail;
4783
4784 if (level > cgroup->max_depth)
4785 goto fail;
4786
4787 level++;
4788 }
4789
4790 ret = true;
4791fail:
4792 return ret;
4793}
4794
4795int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode)
4796{
4797 struct cgroup *parent, *cgrp;
4798 struct kernfs_node *kn;
4799 int ret;
4800
4801
4802 if (strchr(name, '\n'))
4803 return -EINVAL;
4804
4805 parent = cgroup_kn_lock_live(parent_kn, false);
4806 if (!parent)
4807 return -ENODEV;
4808
4809 if (!cgroup_check_hierarchy_limits(parent)) {
4810 ret = -EAGAIN;
4811 goto out_unlock;
4812 }
4813
4814 cgrp = cgroup_create(parent);
4815 if (IS_ERR(cgrp)) {
4816 ret = PTR_ERR(cgrp);
4817 goto out_unlock;
4818 }
4819
4820
4821 kn = kernfs_create_dir(parent->kn, name, mode, cgrp);
4822 if (IS_ERR(kn)) {
4823 ret = PTR_ERR(kn);
4824 goto out_destroy;
4825 }
4826 cgrp->kn = kn;
4827
4828
4829
4830
4831
4832 kernfs_get(kn);
4833
4834 ret = cgroup_kn_set_ugid(kn);
4835 if (ret)
4836 goto out_destroy;
4837
4838 ret = css_populate_dir(&cgrp->self);
4839 if (ret)
4840 goto out_destroy;
4841
4842 ret = cgroup_apply_control_enable(cgrp);
4843 if (ret)
4844 goto out_destroy;
4845
4846 trace_cgroup_mkdir(cgrp);
4847
4848
4849 kernfs_activate(kn);
4850
4851 ret = 0;
4852 goto out_unlock;
4853
4854out_destroy:
4855 cgroup_destroy_locked(cgrp);
4856out_unlock:
4857 cgroup_kn_unlock(parent_kn);
4858 return ret;
4859}
4860
4861
4862
4863
4864
4865
4866static void css_killed_work_fn(struct work_struct *work)
4867{
4868 struct cgroup_subsys_state *css =
4869 container_of(work, struct cgroup_subsys_state, destroy_work);
4870
4871 mutex_lock(&cgroup_mutex);
4872
4873 do {
4874 offline_css(css);
4875 css_put(css);
4876
4877 css = css->parent;
4878 } while (css && atomic_dec_and_test(&css->online_cnt));
4879
4880 mutex_unlock(&cgroup_mutex);
4881}
4882
4883
4884static void css_killed_ref_fn(struct percpu_ref *ref)
4885{
4886 struct cgroup_subsys_state *css =
4887 container_of(ref, struct cgroup_subsys_state, refcnt);
4888
4889 if (atomic_dec_and_test(&css->online_cnt)) {
4890 INIT_WORK(&css->destroy_work, css_killed_work_fn);
4891 queue_work(cgroup_destroy_wq, &css->destroy_work);
4892 }
4893}
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904static void kill_css(struct cgroup_subsys_state *css)
4905{
4906 lockdep_assert_held(&cgroup_mutex);
4907
4908 if (css->flags & CSS_DYING)
4909 return;
4910
4911 css->flags |= CSS_DYING;
4912
4913
4914
4915
4916
4917 css_clear_dir(css);
4918
4919
4920
4921
4922
4923 css_get(css);
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935 percpu_ref_kill_and_confirm(&css->refcnt, css_killed_ref_fn);
4936}
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962static int cgroup_destroy_locked(struct cgroup *cgrp)
4963 __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
4964{
4965 struct cgroup *tcgrp, *parent = cgroup_parent(cgrp);
4966 struct cgroup_subsys_state *css;
4967 struct cgrp_cset_link *link;
4968 int ssid;
4969
4970 lockdep_assert_held(&cgroup_mutex);
4971
4972
4973
4974
4975
4976 if (cgroup_is_populated(cgrp))
4977 return -EBUSY;
4978
4979
4980
4981
4982
4983
4984 if (css_has_online_children(&cgrp->self))
4985 return -EBUSY;
4986
4987
4988
4989
4990
4991
4992
4993 cgrp->self.flags &= ~CSS_ONLINE;
4994
4995 spin_lock_irq(&css_set_lock);
4996 list_for_each_entry(link, &cgrp->cset_links, cset_link)
4997 link->cset->dead = true;
4998 spin_unlock_irq(&css_set_lock);
4999
5000
5001 for_each_css(css, ssid, cgrp)
5002 kill_css(css);
5003
5004
5005
5006
5007
5008 kernfs_remove(cgrp->kn);
5009
5010 if (parent && cgroup_is_threaded(cgrp))
5011 parent->nr_threaded_children--;
5012
5013 for (tcgrp = cgroup_parent(cgrp); tcgrp; tcgrp = cgroup_parent(tcgrp)) {
5014 tcgrp->nr_descendants--;
5015 tcgrp->nr_dying_descendants++;
5016 }
5017
5018 cgroup1_check_for_release(parent);
5019
5020
5021 percpu_ref_kill(&cgrp->self.refcnt);
5022
5023 return 0;
5024};
5025
5026int cgroup_rmdir(struct kernfs_node *kn)
5027{
5028 struct cgroup *cgrp;
5029 int ret = 0;
5030
5031 cgrp = cgroup_kn_lock_live(kn, false);
5032 if (!cgrp)
5033 return 0;
5034
5035 ret = cgroup_destroy_locked(cgrp);
5036
5037 if (!ret)
5038 trace_cgroup_rmdir(cgrp);
5039
5040 cgroup_kn_unlock(kn);
5041 return ret;
5042}
5043
5044static struct kernfs_syscall_ops cgroup_kf_syscall_ops = {
5045 .show_options = cgroup_show_options,
5046 .remount_fs = cgroup_remount,
5047 .mkdir = cgroup_mkdir,
5048 .rmdir = cgroup_rmdir,
5049 .show_path = cgroup_show_path,
5050};
5051
5052static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
5053{
5054 struct cgroup_subsys_state *css;
5055
5056 pr_debug("Initializing cgroup subsys %s\n", ss->name);
5057
5058 mutex_lock(&cgroup_mutex);
5059
5060 idr_init(&ss->css_idr);
5061 INIT_LIST_HEAD(&ss->cfts);
5062
5063
5064 ss->root = &cgrp_dfl_root;
5065 css = ss->css_alloc(cgroup_css(&cgrp_dfl_root.cgrp, ss));
5066
5067 BUG_ON(IS_ERR(css));
5068 init_and_link_css(css, ss, &cgrp_dfl_root.cgrp);
5069
5070
5071
5072
5073
5074 css->flags |= CSS_NO_REF;
5075
5076 if (early) {
5077
5078 css->id = 1;
5079 } else {
5080 css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL);
5081 BUG_ON(css->id < 0);
5082 }
5083
5084
5085
5086
5087
5088 init_css_set.subsys[ss->id] = css;
5089
5090 have_fork_callback |= (bool)ss->fork << ss->id;
5091 have_exit_callback |= (bool)ss->exit << ss->id;
5092 have_free_callback |= (bool)ss->free << ss->id;
5093 have_canfork_callback |= (bool)ss->can_fork << ss->id;
5094
5095
5096
5097
5098 BUG_ON(!list_empty(&init_task.tasks));
5099
5100 BUG_ON(online_css(css));
5101
5102 mutex_unlock(&cgroup_mutex);
5103}
5104
5105
5106
5107
5108
5109
5110
5111int __init cgroup_init_early(void)
5112{
5113 static struct cgroup_sb_opts __initdata opts;
5114 struct cgroup_subsys *ss;
5115 int i;
5116
5117 init_cgroup_root(&cgrp_dfl_root, &opts);
5118 cgrp_dfl_root.cgrp.self.flags |= CSS_NO_REF;
5119
5120 RCU_INIT_POINTER(init_task.cgroups, &init_css_set);
5121
5122 for_each_subsys(ss, i) {
5123 WARN(!ss->css_alloc || !ss->css_free || ss->name || ss->id,
5124 "invalid cgroup_subsys %d:%s css_alloc=%p css_free=%p id:name=%d:%s\n",
5125 i, cgroup_subsys_name[i], ss->css_alloc, ss->css_free,
5126 ss->id, ss->name);
5127 WARN(strlen(cgroup_subsys_name[i]) > MAX_CGROUP_TYPE_NAMELEN,
5128 "cgroup_subsys_name %s too long\n", cgroup_subsys_name[i]);
5129
5130 ss->id = i;
5131 ss->name = cgroup_subsys_name[i];
5132 if (!ss->legacy_name)
5133 ss->legacy_name = cgroup_subsys_name[i];
5134
5135 if (ss->early_init)
5136 cgroup_init_subsys(ss, true);
5137 }
5138 return 0;
5139}
5140
5141static u16 cgroup_disable_mask __initdata;
5142
5143
5144
5145
5146
5147
5148
5149int __init cgroup_init(void)
5150{
5151 struct cgroup_subsys *ss;
5152 int ssid;
5153
5154 BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16);
5155 BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem));
5156 BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files));
5157 BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files));
5158
5159
5160
5161
5162
5163 rcu_sync_enter_start(&cgroup_threadgroup_rwsem.rss);
5164
5165 get_user_ns(init_cgroup_ns.user_ns);
5166
5167 mutex_lock(&cgroup_mutex);
5168
5169
5170
5171
5172
5173 hash_add(css_set_table, &init_css_set.hlist,
5174 css_set_hash(init_css_set.subsys));
5175
5176 BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0, 0));
5177
5178 mutex_unlock(&cgroup_mutex);
5179
5180 for_each_subsys(ss, ssid) {
5181 if (ss->early_init) {
5182 struct cgroup_subsys_state *css =
5183 init_css_set.subsys[ss->id];
5184
5185 css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2,
5186 GFP_KERNEL);
5187 BUG_ON(css->id < 0);
5188 } else {
5189 cgroup_init_subsys(ss, false);
5190 }
5191
5192 list_add_tail(&init_css_set.e_cset_node[ssid],
5193 &cgrp_dfl_root.cgrp.e_csets[ssid]);
5194
5195
5196
5197
5198
5199
5200 if (cgroup_disable_mask & (1 << ssid)) {
5201 static_branch_disable(cgroup_subsys_enabled_key[ssid]);
5202 printk(KERN_INFO "Disabling %s control group subsystem\n",
5203 ss->name);
5204 continue;
5205 }
5206
5207 if (cgroup1_ssid_disabled(ssid))
5208 printk(KERN_INFO "Disabling %s control group subsystem in v1 mounts\n",
5209 ss->name);
5210
5211 cgrp_dfl_root.subsys_mask |= 1 << ss->id;
5212
5213
5214 WARN_ON(ss->implicit_on_dfl && !ss->threaded);
5215
5216 if (ss->implicit_on_dfl)
5217 cgrp_dfl_implicit_ss_mask |= 1 << ss->id;
5218 else if (!ss->dfl_cftypes)
5219 cgrp_dfl_inhibit_ss_mask |= 1 << ss->id;
5220
5221 if (ss->threaded)
5222 cgrp_dfl_threaded_ss_mask |= 1 << ss->id;
5223
5224 if (ss->dfl_cftypes == ss->legacy_cftypes) {
5225 WARN_ON(cgroup_add_cftypes(ss, ss->dfl_cftypes));
5226 } else {
5227 WARN_ON(cgroup_add_dfl_cftypes(ss, ss->dfl_cftypes));
5228 WARN_ON(cgroup_add_legacy_cftypes(ss, ss->legacy_cftypes));
5229 }
5230
5231 if (ss->bind)
5232 ss->bind(init_css_set.subsys[ssid]);
5233
5234 mutex_lock(&cgroup_mutex);
5235 css_populate_dir(init_css_set.subsys[ssid]);
5236 mutex_unlock(&cgroup_mutex);
5237 }
5238
5239
5240 hash_del(&init_css_set.hlist);
5241 hash_add(css_set_table, &init_css_set.hlist,
5242 css_set_hash(init_css_set.subsys));
5243
5244 WARN_ON(sysfs_create_mount_point(fs_kobj, "cgroup"));
5245 WARN_ON(register_filesystem(&cgroup_fs_type));
5246 WARN_ON(register_filesystem(&cgroup2_fs_type));
5247 WARN_ON(!proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations));
5248
5249 return 0;
5250}
5251
5252static int __init cgroup_wq_init(void)
5253{
5254
5255
5256
5257
5258
5259
5260
5261
5262 cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
5263 BUG_ON(!cgroup_destroy_wq);
5264 return 0;
5265}
5266core_initcall(cgroup_wq_init);
5267
5268void cgroup_path_from_kernfs_id(const union kernfs_node_id *id,
5269 char *buf, size_t buflen)
5270{
5271 struct kernfs_node *kn;
5272
5273 kn = kernfs_get_node_by_id(cgrp_dfl_root.kf_root, id);
5274 if (!kn)
5275 return;
5276 kernfs_path(kn, buf, buflen);
5277 kernfs_put(kn);
5278}
5279
5280
5281
5282
5283
5284
5285int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
5286 struct pid *pid, struct task_struct *tsk)
5287{
5288 char *buf;
5289 int retval;
5290 struct cgroup_root *root;
5291
5292 retval = -ENOMEM;
5293 buf = kmalloc(PATH_MAX, GFP_KERNEL);
5294 if (!buf)
5295 goto out;
5296
5297 mutex_lock(&cgroup_mutex);
5298 spin_lock_irq(&css_set_lock);
5299
5300 for_each_root(root) {
5301 struct cgroup_subsys *ss;
5302 struct cgroup *cgrp;
5303 int ssid, count = 0;
5304
5305 if (root == &cgrp_dfl_root && !cgrp_dfl_visible)
5306 continue;
5307
5308 seq_printf(m, "%d:", root->hierarchy_id);
5309 if (root != &cgrp_dfl_root)
5310 for_each_subsys(ss, ssid)
5311 if (root->subsys_mask & (1 << ssid))
5312 seq_printf(m, "%s%s", count++ ? "," : "",
5313 ss->legacy_name);
5314 if (strlen(root->name))
5315 seq_printf(m, "%sname=%s", count ? "," : "",
5316 root->name);
5317 seq_putc(m, ':');
5318
5319 cgrp = task_cgroup_from_root(tsk, root);
5320
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330 if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) {
5331 retval = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
5332 current->nsproxy->cgroup_ns);
5333 if (retval >= PATH_MAX)
5334 retval = -ENAMETOOLONG;
5335 if (retval < 0)
5336 goto out_unlock;
5337
5338 seq_puts(m, buf);
5339 } else {
5340 seq_puts(m, "/");
5341 }
5342
5343 if (cgroup_on_dfl(cgrp) && cgroup_is_dead(cgrp))
5344 seq_puts(m, " (deleted)\n");
5345 else
5346 seq_putc(m, '\n');
5347 }
5348
5349 retval = 0;
5350out_unlock:
5351 spin_unlock_irq(&css_set_lock);
5352 mutex_unlock(&cgroup_mutex);
5353 kfree(buf);
5354out:
5355 return retval;
5356}
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366void cgroup_fork(struct task_struct *child)
5367{
5368 RCU_INIT_POINTER(child->cgroups, &init_css_set);
5369 INIT_LIST_HEAD(&child->cg_list);
5370}
5371
5372
5373
5374
5375
5376
5377
5378
5379
5380int cgroup_can_fork(struct task_struct *child)
5381{
5382 struct cgroup_subsys *ss;
5383 int i, j, ret;
5384
5385 do_each_subsys_mask(ss, i, have_canfork_callback) {
5386 ret = ss->can_fork(child);
5387 if (ret)
5388 goto out_revert;
5389 } while_each_subsys_mask();
5390
5391 return 0;
5392
5393out_revert:
5394 for_each_subsys(ss, j) {
5395 if (j >= i)
5396 break;
5397 if (ss->cancel_fork)
5398 ss->cancel_fork(child);
5399 }
5400
5401 return ret;
5402}
5403
5404
5405
5406
5407
5408
5409
5410
5411void cgroup_cancel_fork(struct task_struct *child)
5412{
5413 struct cgroup_subsys *ss;
5414 int i;
5415
5416 for_each_subsys(ss, i)
5417 if (ss->cancel_fork)
5418 ss->cancel_fork(child);
5419}
5420
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431void cgroup_post_fork(struct task_struct *child)
5432{
5433 struct cgroup_subsys *ss;
5434 int i;
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457 if (use_task_css_set_links) {
5458 struct css_set *cset;
5459
5460 spin_lock_irq(&css_set_lock);
5461 cset = task_css_set(current);
5462 if (list_empty(&child->cg_list)) {
5463 get_css_set(cset);
5464 cset->nr_tasks++;
5465 css_set_move_task(child, NULL, cset, false);
5466 }
5467 spin_unlock_irq(&css_set_lock);
5468 }
5469
5470
5471
5472
5473
5474
5475 do_each_subsys_mask(ss, i, have_fork_callback) {
5476 ss->fork(child);
5477 } while_each_subsys_mask();
5478}
5479
5480
5481
5482
5483
5484
5485
5486
5487
5488
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498
5499void cgroup_exit(struct task_struct *tsk)
5500{
5501 struct cgroup_subsys *ss;
5502 struct css_set *cset;
5503 int i;
5504
5505
5506
5507
5508
5509 cset = task_css_set(tsk);
5510
5511 if (!list_empty(&tsk->cg_list)) {
5512 spin_lock_irq(&css_set_lock);
5513 css_set_move_task(tsk, cset, NULL, false);
5514 cset->nr_tasks--;
5515 spin_unlock_irq(&css_set_lock);
5516 } else {
5517 get_css_set(cset);
5518 }
5519
5520
5521 do_each_subsys_mask(ss, i, have_exit_callback) {
5522 ss->exit(tsk);
5523 } while_each_subsys_mask();
5524}
5525
5526void cgroup_free(struct task_struct *task)
5527{
5528 struct css_set *cset = task_css_set(task);
5529 struct cgroup_subsys *ss;
5530 int ssid;
5531
5532 do_each_subsys_mask(ss, ssid, have_free_callback) {
5533 ss->free(task);
5534 } while_each_subsys_mask();
5535
5536 put_css_set(cset);
5537}
5538
5539static int __init cgroup_disable(char *str)
5540{
5541 struct cgroup_subsys *ss;
5542 char *token;
5543 int i;
5544
5545 while ((token = strsep(&str, ",")) != NULL) {
5546 if (!*token)
5547 continue;
5548
5549 for_each_subsys(ss, i) {
5550 if (strcmp(token, ss->name) &&
5551 strcmp(token, ss->legacy_name))
5552 continue;
5553 cgroup_disable_mask |= 1 << i;
5554 }
5555 }
5556 return 1;
5557}
5558__setup("cgroup_disable=", cgroup_disable);
5559
5560
5561
5562
5563
5564
5565
5566
5567
5568
5569struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
5570 struct cgroup_subsys *ss)
5571{
5572 struct kernfs_node *kn = kernfs_node_from_dentry(dentry);
5573 struct file_system_type *s_type = dentry->d_sb->s_type;
5574 struct cgroup_subsys_state *css = NULL;
5575 struct cgroup *cgrp;
5576
5577
5578 if ((s_type != &cgroup_fs_type && s_type != &cgroup2_fs_type) ||
5579 !kn || kernfs_type(kn) != KERNFS_DIR)
5580 return ERR_PTR(-EBADF);
5581
5582 rcu_read_lock();
5583
5584
5585
5586
5587
5588
5589 cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv);
5590 if (cgrp)
5591 css = cgroup_css(cgrp, ss);
5592
5593 if (!css || !css_tryget_online(css))
5594 css = ERR_PTR(-ENOENT);
5595
5596 rcu_read_unlock();
5597 return css;
5598}
5599
5600
5601
5602
5603
5604
5605
5606
5607
5608struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss)
5609{
5610 WARN_ON_ONCE(!rcu_read_lock_held());
5611 return idr_find(&ss->css_idr, id);
5612}
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623struct cgroup *cgroup_get_from_path(const char *path)
5624{
5625 struct kernfs_node *kn;
5626 struct cgroup *cgrp;
5627
5628 mutex_lock(&cgroup_mutex);
5629
5630 kn = kernfs_walk_and_get(cgrp_dfl_root.cgrp.kn, path);
5631 if (kn) {
5632 if (kernfs_type(kn) == KERNFS_DIR) {
5633 cgrp = kn->priv;
5634 cgroup_get_live(cgrp);
5635 } else {
5636 cgrp = ERR_PTR(-ENOTDIR);
5637 }
5638 kernfs_put(kn);
5639 } else {
5640 cgrp = ERR_PTR(-ENOENT);
5641 }
5642
5643 mutex_unlock(&cgroup_mutex);
5644 return cgrp;
5645}
5646EXPORT_SYMBOL_GPL(cgroup_get_from_path);
5647
5648
5649
5650
5651
5652
5653
5654
5655
5656
5657struct cgroup *cgroup_get_from_fd(int fd)
5658{
5659 struct cgroup_subsys_state *css;
5660 struct cgroup *cgrp;
5661 struct file *f;
5662
5663 f = fget_raw(fd);
5664 if (!f)
5665 return ERR_PTR(-EBADF);
5666
5667 css = css_tryget_online_from_dir(f->f_path.dentry, NULL);
5668 fput(f);
5669 if (IS_ERR(css))
5670 return ERR_CAST(css);
5671
5672 cgrp = css->cgroup;
5673 if (!cgroup_on_dfl(cgrp)) {
5674 cgroup_put(cgrp);
5675 return ERR_PTR(-EBADF);
5676 }
5677
5678 return cgrp;
5679}
5680EXPORT_SYMBOL_GPL(cgroup_get_from_fd);
5681
5682
5683
5684
5685
5686#ifdef CONFIG_SOCK_CGROUP_DATA
5687
5688#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
5689
5690DEFINE_SPINLOCK(cgroup_sk_update_lock);
5691static bool cgroup_sk_alloc_disabled __read_mostly;
5692
5693void cgroup_sk_alloc_disable(void)
5694{
5695 if (cgroup_sk_alloc_disabled)
5696 return;
5697 pr_info("cgroup: disabling cgroup2 socket matching due to net_prio or net_cls activation\n");
5698 cgroup_sk_alloc_disabled = true;
5699}
5700
5701#else
5702
5703#define cgroup_sk_alloc_disabled false
5704
5705#endif
5706
5707void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
5708{
5709 if (cgroup_sk_alloc_disabled)
5710 return;
5711
5712
5713 if (skcd->val) {
5714
5715
5716
5717
5718
5719 cgroup_get(sock_cgroup_ptr(skcd));
5720 return;
5721 }
5722
5723 rcu_read_lock();
5724
5725 while (true) {
5726 struct css_set *cset;
5727
5728 cset = task_css_set(current);
5729 if (likely(cgroup_tryget(cset->dfl_cgrp))) {
5730 skcd->val = (unsigned long)cset->dfl_cgrp;
5731 break;
5732 }
5733 cpu_relax();
5734 }
5735
5736 rcu_read_unlock();
5737}
5738
5739void cgroup_sk_free(struct sock_cgroup_data *skcd)
5740{
5741 cgroup_put(sock_cgroup_ptr(skcd));
5742}
5743
5744#endif
5745
5746#ifdef CONFIG_CGROUP_BPF
5747int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog,
5748 enum bpf_attach_type type, bool overridable)
5749{
5750 struct cgroup *parent = cgroup_parent(cgrp);
5751 int ret;
5752
5753 mutex_lock(&cgroup_mutex);
5754 ret = __cgroup_bpf_update(cgrp, parent, prog, type, overridable);
5755 mutex_unlock(&cgroup_mutex);
5756 return ret;
5757}
5758#endif
5759