1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
30
31#include "cgroup-internal.h"
32
33#include <linux/cred.h>
34#include <linux/errno.h>
35#include <linux/init_task.h>
36#include <linux/kernel.h>
37#include <linux/magic.h>
38#include <linux/mutex.h>
39#include <linux/mount.h>
40#include <linux/pagemap.h>
41#include <linux/proc_fs.h>
42#include <linux/rcupdate.h>
43#include <linux/sched.h>
44#include <linux/sched/task.h>
45#include <linux/slab.h>
46#include <linux/spinlock.h>
47#include <linux/percpu-rwsem.h>
48#include <linux/string.h>
49#include <linux/hashtable.h>
50#include <linux/idr.h>
51#include <linux/kthread.h>
52#include <linux/atomic.h>
53#include <linux/cpuset.h>
54#include <linux/proc_ns.h>
55#include <linux/nsproxy.h>
56#include <linux/file.h>
57#include <net/sock.h>
58
59#define CREATE_TRACE_POINTS
60#include <trace/events/cgroup.h>
61
62#define CGROUP_FILE_NAME_MAX (MAX_CGROUP_TYPE_NAMELEN + \
63 MAX_CFTYPE_NAME + 2)
64
65
66
67
68
69
70
71
72
73
74
75DEFINE_MUTEX(cgroup_mutex);
76DEFINE_SPINLOCK(css_set_lock);
77
78#ifdef CONFIG_PROVE_RCU
79EXPORT_SYMBOL_GPL(cgroup_mutex);
80EXPORT_SYMBOL_GPL(css_set_lock);
81#endif
82
83
84
85
86
87static DEFINE_SPINLOCK(cgroup_idr_lock);
88
89
90
91
92
93static DEFINE_SPINLOCK(cgroup_file_kn_lock);
94
95struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
96
97#define cgroup_assert_mutex_or_rcu_locked() \
98 RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
99 !lockdep_is_held(&cgroup_mutex), \
100 "cgroup_mutex or RCU read lock required");
101
102
103
104
105
106
107
108static struct workqueue_struct *cgroup_destroy_wq;
109
110
111#define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys,
112struct cgroup_subsys *cgroup_subsys[] = {
113#include <linux/cgroup_subsys.h>
114};
115#undef SUBSYS
116
117
118#define SUBSYS(_x) [_x ## _cgrp_id] = #_x,
119static const char *cgroup_subsys_name[] = {
120#include <linux/cgroup_subsys.h>
121};
122#undef SUBSYS
123
124
125#define SUBSYS(_x) \
126 DEFINE_STATIC_KEY_TRUE(_x ## _cgrp_subsys_enabled_key); \
127 DEFINE_STATIC_KEY_TRUE(_x ## _cgrp_subsys_on_dfl_key); \
128 EXPORT_SYMBOL_GPL(_x ## _cgrp_subsys_enabled_key); \
129 EXPORT_SYMBOL_GPL(_x ## _cgrp_subsys_on_dfl_key);
130#include <linux/cgroup_subsys.h>
131#undef SUBSYS
132
133#define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys_enabled_key,
134static struct static_key_true *cgroup_subsys_enabled_key[] = {
135#include <linux/cgroup_subsys.h>
136};
137#undef SUBSYS
138
139#define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys_on_dfl_key,
140static struct static_key_true *cgroup_subsys_on_dfl_key[] = {
141#include <linux/cgroup_subsys.h>
142};
143#undef SUBSYS
144
145static DEFINE_PER_CPU(struct cgroup_cpu_stat, cgrp_dfl_root_cpu_stat);
146
147
148
149
150
151
152struct cgroup_root cgrp_dfl_root = { .cgrp.cpu_stat = &cgrp_dfl_root_cpu_stat };
153EXPORT_SYMBOL_GPL(cgrp_dfl_root);
154
155
156
157
158
159static bool cgrp_dfl_visible;
160
161
162static u16 cgrp_dfl_inhibit_ss_mask;
163
164
165static u16 cgrp_dfl_implicit_ss_mask;
166
167
168static u16 cgrp_dfl_threaded_ss_mask;
169
170
171LIST_HEAD(cgroup_roots);
172static int cgroup_root_count;
173
174
175static DEFINE_IDR(cgroup_hierarchy_idr);
176
177
178
179
180
181
182
183
184static u64 css_serial_nr_next = 1;
185
186
187
188
189
190static u16 have_fork_callback __read_mostly;
191static u16 have_exit_callback __read_mostly;
192static u16 have_free_callback __read_mostly;
193static u16 have_canfork_callback __read_mostly;
194
195
196struct cgroup_namespace init_cgroup_ns = {
197 .count = REFCOUNT_INIT(2),
198 .user_ns = &init_user_ns,
199 .ns.ops = &cgroupns_operations,
200 .ns.inum = PROC_CGROUP_INIT_INO,
201 .root_cset = &init_css_set,
202};
203
204static struct file_system_type cgroup2_fs_type;
205static struct cftype cgroup_base_files[];
206
207static int cgroup_apply_control(struct cgroup *cgrp);
208static void cgroup_finalize_control(struct cgroup *cgrp, int ret);
209static void css_task_iter_advance(struct css_task_iter *it);
210static int cgroup_destroy_locked(struct cgroup *cgrp);
211static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
212 struct cgroup_subsys *ss);
213static void css_release(struct percpu_ref *ref);
214static void kill_css(struct cgroup_subsys_state *css);
215static int cgroup_addrm_files(struct cgroup_subsys_state *css,
216 struct cgroup *cgrp, struct cftype cfts[],
217 bool is_add);
218
219
220
221
222
223
224
225
226
227bool cgroup_ssid_enabled(int ssid)
228{
229 if (CGROUP_SUBSYS_COUNT == 0)
230 return false;
231
232 return static_key_enabled(cgroup_subsys_enabled_key[ssid]);
233}
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288bool cgroup_on_dfl(const struct cgroup *cgrp)
289{
290 return cgrp->root == &cgrp_dfl_root;
291}
292
293
294static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end,
295 gfp_t gfp_mask)
296{
297 int ret;
298
299 idr_preload(gfp_mask);
300 spin_lock_bh(&cgroup_idr_lock);
301 ret = idr_alloc(idr, ptr, start, end, gfp_mask & ~__GFP_DIRECT_RECLAIM);
302 spin_unlock_bh(&cgroup_idr_lock);
303 idr_preload_end();
304 return ret;
305}
306
307static void *cgroup_idr_replace(struct idr *idr, void *ptr, int id)
308{
309 void *ret;
310
311 spin_lock_bh(&cgroup_idr_lock);
312 ret = idr_replace(idr, ptr, id);
313 spin_unlock_bh(&cgroup_idr_lock);
314 return ret;
315}
316
317static void cgroup_idr_remove(struct idr *idr, int id)
318{
319 spin_lock_bh(&cgroup_idr_lock);
320 idr_remove(idr, id);
321 spin_unlock_bh(&cgroup_idr_lock);
322}
323
324static bool cgroup_has_tasks(struct cgroup *cgrp)
325{
326 return cgrp->nr_populated_csets;
327}
328
329bool cgroup_is_threaded(struct cgroup *cgrp)
330{
331 return cgrp->dom_cgrp != cgrp;
332}
333
334
335static bool cgroup_is_mixable(struct cgroup *cgrp)
336{
337
338
339
340
341
342 return !cgroup_parent(cgrp);
343}
344
345
346static bool cgroup_can_be_thread_root(struct cgroup *cgrp)
347{
348
349 if (cgroup_is_mixable(cgrp))
350 return true;
351
352
353 if (cgroup_is_threaded(cgrp))
354 return false;
355
356
357 if (cgrp->nr_populated_domain_children)
358 return false;
359
360
361 if (cgrp->subtree_control & ~cgrp_dfl_threaded_ss_mask)
362 return false;
363
364 return true;
365}
366
367
368bool cgroup_is_thread_root(struct cgroup *cgrp)
369{
370
371 if (cgroup_is_threaded(cgrp))
372 return false;
373
374
375 if (cgrp->nr_threaded_children)
376 return true;
377
378
379
380
381
382 if (cgroup_has_tasks(cgrp) &&
383 (cgrp->subtree_control & cgrp_dfl_threaded_ss_mask))
384 return true;
385
386 return false;
387}
388
389
390static bool cgroup_is_valid_domain(struct cgroup *cgrp)
391{
392
393 if (cgroup_is_threaded(cgrp))
394 return false;
395
396
397 while ((cgrp = cgroup_parent(cgrp))) {
398 if (!cgroup_is_mixable(cgrp) && cgroup_is_thread_root(cgrp))
399 return false;
400 if (cgroup_is_threaded(cgrp))
401 return false;
402 }
403
404 return true;
405}
406
407
408static u16 cgroup_control(struct cgroup *cgrp)
409{
410 struct cgroup *parent = cgroup_parent(cgrp);
411 u16 root_ss_mask = cgrp->root->subsys_mask;
412
413 if (parent) {
414 u16 ss_mask = parent->subtree_control;
415
416
417 if (cgroup_is_threaded(cgrp))
418 ss_mask &= cgrp_dfl_threaded_ss_mask;
419 return ss_mask;
420 }
421
422 if (cgroup_on_dfl(cgrp))
423 root_ss_mask &= ~(cgrp_dfl_inhibit_ss_mask |
424 cgrp_dfl_implicit_ss_mask);
425 return root_ss_mask;
426}
427
428
429static u16 cgroup_ss_mask(struct cgroup *cgrp)
430{
431 struct cgroup *parent = cgroup_parent(cgrp);
432
433 if (parent) {
434 u16 ss_mask = parent->subtree_ss_mask;
435
436
437 if (cgroup_is_threaded(cgrp))
438 ss_mask &= cgrp_dfl_threaded_ss_mask;
439 return ss_mask;
440 }
441
442 return cgrp->root->subsys_mask;
443}
444
445
446
447
448
449
450
451
452
453
454
455
456static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
457 struct cgroup_subsys *ss)
458{
459 if (ss)
460 return rcu_dereference_check(cgrp->subsys[ss->id],
461 lockdep_is_held(&cgroup_mutex));
462 else
463 return &cgrp->self;
464}
465
466
467
468
469
470
471
472
473
474static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
475 struct cgroup_subsys *ss)
476{
477 struct cgroup_subsys_state *css;
478
479 rcu_read_lock();
480 css = cgroup_css(cgrp, ss);
481 if (!css || !css_tryget_online(css))
482 css = NULL;
483 rcu_read_unlock();
484
485 return css;
486}
487
488
489
490
491
492
493
494
495
496
497
498static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
499 struct cgroup_subsys *ss)
500{
501 lockdep_assert_held(&cgroup_mutex);
502
503 if (!ss)
504 return &cgrp->self;
505
506
507
508
509
510 while (!(cgroup_ss_mask(cgrp) & (1 << ss->id))) {
511 cgrp = cgroup_parent(cgrp);
512 if (!cgrp)
513 return NULL;
514 }
515
516 return cgroup_css(cgrp, ss);
517}
518
519
520
521
522
523
524
525
526
527
528
529
530struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgrp,
531 struct cgroup_subsys *ss)
532{
533 struct cgroup_subsys_state *css;
534
535 rcu_read_lock();
536
537 do {
538 css = cgroup_css(cgrp, ss);
539
540 if (css && css_tryget_online(css))
541 goto out_unlock;
542 cgrp = cgroup_parent(cgrp);
543 } while (cgrp);
544
545 css = init_css_set.subsys[ss->id];
546 css_get(css);
547out_unlock:
548 rcu_read_unlock();
549 return css;
550}
551
552static void cgroup_get_live(struct cgroup *cgrp)
553{
554 WARN_ON_ONCE(cgroup_is_dead(cgrp));
555 css_get(&cgrp->self);
556}
557
558struct cgroup_subsys_state *of_css(struct kernfs_open_file *of)
559{
560 struct cgroup *cgrp = of->kn->parent->priv;
561 struct cftype *cft = of_cft(of);
562
563
564
565
566
567
568
569
570
571 if (cft->ss)
572 return rcu_dereference_raw(cgrp->subsys[cft->ss->id]);
573 else
574 return &cgrp->self;
575}
576EXPORT_SYMBOL_GPL(of_css);
577
578
579
580
581
582
583
584
585
586#define for_each_css(css, ssid, cgrp) \
587 for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
588 if (!((css) = rcu_dereference_check( \
589 (cgrp)->subsys[(ssid)], \
590 lockdep_is_held(&cgroup_mutex)))) { } \
591 else
592
593
594
595
596
597
598
599
600
601#define for_each_e_css(css, ssid, cgrp) \
602 for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
603 if (!((css) = cgroup_e_css(cgrp, cgroup_subsys[(ssid)]))) \
604 ; \
605 else
606
607
608
609
610
611
612
613
614
615
616#define do_each_subsys_mask(ss, ssid, ss_mask) do { \
617 unsigned long __ss_mask = (ss_mask); \
618 if (!CGROUP_SUBSYS_COUNT) { \
619 (ssid) = 0; \
620 break; \
621 } \
622 for_each_set_bit(ssid, &__ss_mask, CGROUP_SUBSYS_COUNT) { \
623 (ss) = cgroup_subsys[ssid]; \
624 {
625
626#define while_each_subsys_mask() \
627 } \
628 } \
629} while (false)
630
631
632#define cgroup_for_each_live_child(child, cgrp) \
633 list_for_each_entry((child), &(cgrp)->self.children, self.sibling) \
634 if (({ lockdep_assert_held(&cgroup_mutex); \
635 cgroup_is_dead(child); })) \
636 ; \
637 else
638
639
640#define cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) \
641 css_for_each_descendant_pre((d_css), cgroup_css((cgrp), NULL)) \
642 if (({ lockdep_assert_held(&cgroup_mutex); \
643 (dsct) = (d_css)->cgroup; \
644 cgroup_is_dead(dsct); })) \
645 ; \
646 else
647
648
649#define cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) \
650 css_for_each_descendant_post((d_css), cgroup_css((cgrp), NULL)) \
651 if (({ lockdep_assert_held(&cgroup_mutex); \
652 (dsct) = (d_css)->cgroup; \
653 cgroup_is_dead(dsct); })) \
654 ; \
655 else
656
657
658
659
660
661
662
663
664struct css_set init_css_set = {
665 .refcount = REFCOUNT_INIT(1),
666 .dom_cset = &init_css_set,
667 .tasks = LIST_HEAD_INIT(init_css_set.tasks),
668 .mg_tasks = LIST_HEAD_INIT(init_css_set.mg_tasks),
669 .task_iters = LIST_HEAD_INIT(init_css_set.task_iters),
670 .threaded_csets = LIST_HEAD_INIT(init_css_set.threaded_csets),
671 .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links),
672 .mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node),
673 .mg_node = LIST_HEAD_INIT(init_css_set.mg_node),
674
675
676
677
678
679
680
681 .dfl_cgrp = &cgrp_dfl_root.cgrp,
682};
683
684static int css_set_count = 1;
685
686static bool css_set_threaded(struct css_set *cset)
687{
688 return cset->dom_cset != cset;
689}
690
691
692
693
694
695
696
697
698
699
700static bool css_set_populated(struct css_set *cset)
701{
702 lockdep_assert_held(&css_set_lock);
703
704 return !list_empty(&cset->tasks) || !list_empty(&cset->mg_tasks);
705}
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724static void cgroup_update_populated(struct cgroup *cgrp, bool populated)
725{
726 struct cgroup *child = NULL;
727 int adj = populated ? 1 : -1;
728
729 lockdep_assert_held(&css_set_lock);
730
731 do {
732 bool was_populated = cgroup_is_populated(cgrp);
733
734 if (!child) {
735 cgrp->nr_populated_csets += adj;
736 } else {
737 if (cgroup_is_threaded(child))
738 cgrp->nr_populated_threaded_children += adj;
739 else
740 cgrp->nr_populated_domain_children += adj;
741 }
742
743 if (was_populated == cgroup_is_populated(cgrp))
744 break;
745
746 cgroup1_check_for_release(cgrp);
747 cgroup_file_notify(&cgrp->events_file);
748
749 child = cgrp;
750 cgrp = cgroup_parent(cgrp);
751 } while (cgrp);
752}
753
754
755
756
757
758
759
760
761
762static void css_set_update_populated(struct css_set *cset, bool populated)
763{
764 struct cgrp_cset_link *link;
765
766 lockdep_assert_held(&css_set_lock);
767
768 list_for_each_entry(link, &cset->cgrp_links, cgrp_link)
769 cgroup_update_populated(link->cgrp, populated);
770}
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787static void css_set_move_task(struct task_struct *task,
788 struct css_set *from_cset, struct css_set *to_cset,
789 bool use_mg_tasks)
790{
791 lockdep_assert_held(&css_set_lock);
792
793 if (to_cset && !css_set_populated(to_cset))
794 css_set_update_populated(to_cset, true);
795
796 if (from_cset) {
797 struct css_task_iter *it, *pos;
798
799 WARN_ON_ONCE(list_empty(&task->cg_list));
800
801
802
803
804
805
806
807
808 list_for_each_entry_safe(it, pos, &from_cset->task_iters,
809 iters_node)
810 if (it->task_pos == &task->cg_list)
811 css_task_iter_advance(it);
812
813 list_del_init(&task->cg_list);
814 if (!css_set_populated(from_cset))
815 css_set_update_populated(from_cset, false);
816 } else {
817 WARN_ON_ONCE(!list_empty(&task->cg_list));
818 }
819
820 if (to_cset) {
821
822
823
824
825
826
827 WARN_ON_ONCE(task->flags & PF_EXITING);
828
829 rcu_assign_pointer(task->cgroups, to_cset);
830 list_add_tail(&task->cg_list, use_mg_tasks ? &to_cset->mg_tasks :
831 &to_cset->tasks);
832 }
833}
834
835
836
837
838
839
840#define CSS_SET_HASH_BITS 7
841static DEFINE_HASHTABLE(css_set_table, CSS_SET_HASH_BITS);
842
843static unsigned long css_set_hash(struct cgroup_subsys_state *css[])
844{
845 unsigned long key = 0UL;
846 struct cgroup_subsys *ss;
847 int i;
848
849 for_each_subsys(ss, i)
850 key += (unsigned long)css[i];
851 key = (key >> 16) ^ key;
852
853 return key;
854}
855
856void put_css_set_locked(struct css_set *cset)
857{
858 struct cgrp_cset_link *link, *tmp_link;
859 struct cgroup_subsys *ss;
860 int ssid;
861
862 lockdep_assert_held(&css_set_lock);
863
864 if (!refcount_dec_and_test(&cset->refcount))
865 return;
866
867 WARN_ON_ONCE(!list_empty(&cset->threaded_csets));
868
869
870 for_each_subsys(ss, ssid) {
871 list_del(&cset->e_cset_node[ssid]);
872 css_put(cset->subsys[ssid]);
873 }
874 hash_del(&cset->hlist);
875 css_set_count--;
876
877 list_for_each_entry_safe(link, tmp_link, &cset->cgrp_links, cgrp_link) {
878 list_del(&link->cset_link);
879 list_del(&link->cgrp_link);
880 if (cgroup_parent(link->cgrp))
881 cgroup_put(link->cgrp);
882 kfree(link);
883 }
884
885 if (css_set_threaded(cset)) {
886 list_del(&cset->threaded_csets_node);
887 put_css_set_locked(cset->dom_cset);
888 }
889
890 kfree_rcu(cset, rcu_head);
891}
892
893
894
895
896
897
898
899
900
901
902
903static bool compare_css_sets(struct css_set *cset,
904 struct css_set *old_cset,
905 struct cgroup *new_cgrp,
906 struct cgroup_subsys_state *template[])
907{
908 struct cgroup *new_dfl_cgrp;
909 struct list_head *l1, *l2;
910
911
912
913
914
915
916 if (memcmp(template, cset->subsys, sizeof(cset->subsys)))
917 return false;
918
919
920
921 if (cgroup_on_dfl(new_cgrp))
922 new_dfl_cgrp = new_cgrp;
923 else
924 new_dfl_cgrp = old_cset->dfl_cgrp;
925
926 if (new_dfl_cgrp->dom_cgrp != cset->dom_cset->dfl_cgrp)
927 return false;
928
929
930
931
932
933
934
935 l1 = &cset->cgrp_links;
936 l2 = &old_cset->cgrp_links;
937 while (1) {
938 struct cgrp_cset_link *link1, *link2;
939 struct cgroup *cgrp1, *cgrp2;
940
941 l1 = l1->next;
942 l2 = l2->next;
943
944 if (l1 == &cset->cgrp_links) {
945 BUG_ON(l2 != &old_cset->cgrp_links);
946 break;
947 } else {
948 BUG_ON(l2 == &old_cset->cgrp_links);
949 }
950
951 link1 = list_entry(l1, struct cgrp_cset_link, cgrp_link);
952 link2 = list_entry(l2, struct cgrp_cset_link, cgrp_link);
953 cgrp1 = link1->cgrp;
954 cgrp2 = link2->cgrp;
955
956 BUG_ON(cgrp1->root != cgrp2->root);
957
958
959
960
961
962
963
964
965 if (cgrp1->root == new_cgrp->root) {
966 if (cgrp1 != new_cgrp)
967 return false;
968 } else {
969 if (cgrp1 != cgrp2)
970 return false;
971 }
972 }
973 return true;
974}
975
976
977
978
979
980
981
982static struct css_set *find_existing_css_set(struct css_set *old_cset,
983 struct cgroup *cgrp,
984 struct cgroup_subsys_state *template[])
985{
986 struct cgroup_root *root = cgrp->root;
987 struct cgroup_subsys *ss;
988 struct css_set *cset;
989 unsigned long key;
990 int i;
991
992
993
994
995
996
997 for_each_subsys(ss, i) {
998 if (root->subsys_mask & (1UL << i)) {
999
1000
1001
1002
1003 template[i] = cgroup_e_css(cgrp, ss);
1004 } else {
1005
1006
1007
1008
1009 template[i] = old_cset->subsys[i];
1010 }
1011 }
1012
1013 key = css_set_hash(template);
1014 hash_for_each_possible(css_set_table, cset, hlist, key) {
1015 if (!compare_css_sets(cset, old_cset, cgrp, template))
1016 continue;
1017
1018
1019 return cset;
1020 }
1021
1022
1023 return NULL;
1024}
1025
1026static void free_cgrp_cset_links(struct list_head *links_to_free)
1027{
1028 struct cgrp_cset_link *link, *tmp_link;
1029
1030 list_for_each_entry_safe(link, tmp_link, links_to_free, cset_link) {
1031 list_del(&link->cset_link);
1032 kfree(link);
1033 }
1034}
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044static int allocate_cgrp_cset_links(int count, struct list_head *tmp_links)
1045{
1046 struct cgrp_cset_link *link;
1047 int i;
1048
1049 INIT_LIST_HEAD(tmp_links);
1050
1051 for (i = 0; i < count; i++) {
1052 link = kzalloc(sizeof(*link), GFP_KERNEL);
1053 if (!link) {
1054 free_cgrp_cset_links(tmp_links);
1055 return -ENOMEM;
1056 }
1057 list_add(&link->cset_link, tmp_links);
1058 }
1059 return 0;
1060}
1061
1062
1063
1064
1065
1066
1067
1068static void link_css_set(struct list_head *tmp_links, struct css_set *cset,
1069 struct cgroup *cgrp)
1070{
1071 struct cgrp_cset_link *link;
1072
1073 BUG_ON(list_empty(tmp_links));
1074
1075 if (cgroup_on_dfl(cgrp))
1076 cset->dfl_cgrp = cgrp;
1077
1078 link = list_first_entry(tmp_links, struct cgrp_cset_link, cset_link);
1079 link->cset = cset;
1080 link->cgrp = cgrp;
1081
1082
1083
1084
1085
1086 list_move_tail(&link->cset_link, &cgrp->cset_links);
1087 list_add_tail(&link->cgrp_link, &cset->cgrp_links);
1088
1089 if (cgroup_parent(cgrp))
1090 cgroup_get_live(cgrp);
1091}
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101static struct css_set *find_css_set(struct css_set *old_cset,
1102 struct cgroup *cgrp)
1103{
1104 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT] = { };
1105 struct css_set *cset;
1106 struct list_head tmp_links;
1107 struct cgrp_cset_link *link;
1108 struct cgroup_subsys *ss;
1109 unsigned long key;
1110 int ssid;
1111
1112 lockdep_assert_held(&cgroup_mutex);
1113
1114
1115
1116 spin_lock_irq(&css_set_lock);
1117 cset = find_existing_css_set(old_cset, cgrp, template);
1118 if (cset)
1119 get_css_set(cset);
1120 spin_unlock_irq(&css_set_lock);
1121
1122 if (cset)
1123 return cset;
1124
1125 cset = kzalloc(sizeof(*cset), GFP_KERNEL);
1126 if (!cset)
1127 return NULL;
1128
1129
1130 if (allocate_cgrp_cset_links(cgroup_root_count, &tmp_links) < 0) {
1131 kfree(cset);
1132 return NULL;
1133 }
1134
1135 refcount_set(&cset->refcount, 1);
1136 cset->dom_cset = cset;
1137 INIT_LIST_HEAD(&cset->tasks);
1138 INIT_LIST_HEAD(&cset->mg_tasks);
1139 INIT_LIST_HEAD(&cset->task_iters);
1140 INIT_LIST_HEAD(&cset->threaded_csets);
1141 INIT_HLIST_NODE(&cset->hlist);
1142 INIT_LIST_HEAD(&cset->cgrp_links);
1143 INIT_LIST_HEAD(&cset->mg_preload_node);
1144 INIT_LIST_HEAD(&cset->mg_node);
1145
1146
1147
1148 memcpy(cset->subsys, template, sizeof(cset->subsys));
1149
1150 spin_lock_irq(&css_set_lock);
1151
1152 list_for_each_entry(link, &old_cset->cgrp_links, cgrp_link) {
1153 struct cgroup *c = link->cgrp;
1154
1155 if (c->root == cgrp->root)
1156 c = cgrp;
1157 link_css_set(&tmp_links, cset, c);
1158 }
1159
1160 BUG_ON(!list_empty(&tmp_links));
1161
1162 css_set_count++;
1163
1164
1165 key = css_set_hash(cset->subsys);
1166 hash_add(css_set_table, &cset->hlist, key);
1167
1168 for_each_subsys(ss, ssid) {
1169 struct cgroup_subsys_state *css = cset->subsys[ssid];
1170
1171 list_add_tail(&cset->e_cset_node[ssid],
1172 &css->cgroup->e_csets[ssid]);
1173 css_get(css);
1174 }
1175
1176 spin_unlock_irq(&css_set_lock);
1177
1178
1179
1180
1181
1182
1183
1184 if (cgroup_is_threaded(cset->dfl_cgrp)) {
1185 struct css_set *dcset;
1186
1187 dcset = find_css_set(cset, cset->dfl_cgrp->dom_cgrp);
1188 if (!dcset) {
1189 put_css_set(cset);
1190 return NULL;
1191 }
1192
1193 spin_lock_irq(&css_set_lock);
1194 cset->dom_cset = dcset;
1195 list_add_tail(&cset->threaded_csets_node,
1196 &dcset->threaded_csets);
1197 spin_unlock_irq(&css_set_lock);
1198 }
1199
1200 return cset;
1201}
1202
1203struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root)
1204{
1205 struct cgroup *root_cgrp = kf_root->kn->priv;
1206
1207 return root_cgrp->root;
1208}
1209
1210static int cgroup_init_root_id(struct cgroup_root *root)
1211{
1212 int id;
1213
1214 lockdep_assert_held(&cgroup_mutex);
1215
1216 id = idr_alloc_cyclic(&cgroup_hierarchy_idr, root, 0, 0, GFP_KERNEL);
1217 if (id < 0)
1218 return id;
1219
1220 root->hierarchy_id = id;
1221 return 0;
1222}
1223
1224static void cgroup_exit_root_id(struct cgroup_root *root)
1225{
1226 lockdep_assert_held(&cgroup_mutex);
1227
1228 idr_remove(&cgroup_hierarchy_idr, root->hierarchy_id);
1229}
1230
1231void cgroup_free_root(struct cgroup_root *root)
1232{
1233 if (root) {
1234 idr_destroy(&root->cgroup_idr);
1235 kfree(root);
1236 }
1237}
1238
1239static void cgroup_destroy_root(struct cgroup_root *root)
1240{
1241 struct cgroup *cgrp = &root->cgrp;
1242 struct cgrp_cset_link *link, *tmp_link;
1243
1244 trace_cgroup_destroy_root(root);
1245
1246 cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
1247
1248 BUG_ON(atomic_read(&root->nr_cgrps));
1249 BUG_ON(!list_empty(&cgrp->self.children));
1250
1251
1252 WARN_ON(rebind_subsystems(&cgrp_dfl_root, root->subsys_mask));
1253
1254
1255
1256
1257
1258 spin_lock_irq(&css_set_lock);
1259
1260 list_for_each_entry_safe(link, tmp_link, &cgrp->cset_links, cset_link) {
1261 list_del(&link->cset_link);
1262 list_del(&link->cgrp_link);
1263 kfree(link);
1264 }
1265
1266 spin_unlock_irq(&css_set_lock);
1267
1268 if (!list_empty(&root->root_list)) {
1269 list_del(&root->root_list);
1270 cgroup_root_count--;
1271 }
1272
1273 cgroup_exit_root_id(root);
1274
1275 mutex_unlock(&cgroup_mutex);
1276
1277 kernfs_destroy_root(root->kf_root);
1278 cgroup_free_root(root);
1279}
1280
1281
1282
1283
1284
1285static struct cgroup *
1286current_cgns_cgroup_from_root(struct cgroup_root *root)
1287{
1288 struct cgroup *res = NULL;
1289 struct css_set *cset;
1290
1291 lockdep_assert_held(&css_set_lock);
1292
1293 rcu_read_lock();
1294
1295 cset = current->nsproxy->cgroup_ns->root_cset;
1296 if (cset == &init_css_set) {
1297 res = &root->cgrp;
1298 } else {
1299 struct cgrp_cset_link *link;
1300
1301 list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
1302 struct cgroup *c = link->cgrp;
1303
1304 if (c->root == root) {
1305 res = c;
1306 break;
1307 }
1308 }
1309 }
1310 rcu_read_unlock();
1311
1312 BUG_ON(!res);
1313 return res;
1314}
1315
1316
1317static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
1318 struct cgroup_root *root)
1319{
1320 struct cgroup *res = NULL;
1321
1322 lockdep_assert_held(&cgroup_mutex);
1323 lockdep_assert_held(&css_set_lock);
1324
1325 if (cset == &init_css_set) {
1326 res = &root->cgrp;
1327 } else if (root == &cgrp_dfl_root) {
1328 res = cset->dfl_cgrp;
1329 } else {
1330 struct cgrp_cset_link *link;
1331
1332 list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
1333 struct cgroup *c = link->cgrp;
1334
1335 if (c->root == root) {
1336 res = c;
1337 break;
1338 }
1339 }
1340 }
1341
1342 BUG_ON(!res);
1343 return res;
1344}
1345
1346
1347
1348
1349
1350struct cgroup *task_cgroup_from_root(struct task_struct *task,
1351 struct cgroup_root *root)
1352{
1353
1354
1355
1356
1357
1358 return cset_cgroup_from_root(task_css_set(task), root);
1359}
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387static struct kernfs_syscall_ops cgroup_kf_syscall_ops;
1388
1389static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft,
1390 char *buf)
1391{
1392 struct cgroup_subsys *ss = cft->ss;
1393
1394 if (cft->ss && !(cft->flags & CFTYPE_NO_PREFIX) &&
1395 !(cgrp->root->flags & CGRP_ROOT_NOPREFIX))
1396 snprintf(buf, CGROUP_FILE_NAME_MAX, "%s.%s",
1397 cgroup_on_dfl(cgrp) ? ss->name : ss->legacy_name,
1398 cft->name);
1399 else
1400 strscpy(buf, cft->name, CGROUP_FILE_NAME_MAX);
1401 return buf;
1402}
1403
1404
1405
1406
1407
1408
1409
1410static umode_t cgroup_file_mode(const struct cftype *cft)
1411{
1412 umode_t mode = 0;
1413
1414 if (cft->read_u64 || cft->read_s64 || cft->seq_show)
1415 mode |= S_IRUGO;
1416
1417 if (cft->write_u64 || cft->write_s64 || cft->write) {
1418 if (cft->flags & CFTYPE_WORLD_WRITABLE)
1419 mode |= S_IWUGO;
1420 else
1421 mode |= S_IWUSR;
1422 }
1423
1424 return mode;
1425}
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439static u16 cgroup_calc_subtree_ss_mask(u16 subtree_control, u16 this_ss_mask)
1440{
1441 u16 cur_ss_mask = subtree_control;
1442 struct cgroup_subsys *ss;
1443 int ssid;
1444
1445 lockdep_assert_held(&cgroup_mutex);
1446
1447 cur_ss_mask |= cgrp_dfl_implicit_ss_mask;
1448
1449 while (true) {
1450 u16 new_ss_mask = cur_ss_mask;
1451
1452 do_each_subsys_mask(ss, ssid, cur_ss_mask) {
1453 new_ss_mask |= ss->depends_on;
1454 } while_each_subsys_mask();
1455
1456
1457
1458
1459
1460
1461 new_ss_mask &= this_ss_mask;
1462
1463 if (new_ss_mask == cur_ss_mask)
1464 break;
1465 cur_ss_mask = new_ss_mask;
1466 }
1467
1468 return cur_ss_mask;
1469}
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481void cgroup_kn_unlock(struct kernfs_node *kn)
1482{
1483 struct cgroup *cgrp;
1484
1485 if (kernfs_type(kn) == KERNFS_DIR)
1486 cgrp = kn->priv;
1487 else
1488 cgrp = kn->parent->priv;
1489
1490 mutex_unlock(&cgroup_mutex);
1491
1492 kernfs_unbreak_active_protection(kn);
1493 cgroup_put(cgrp);
1494}
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn, bool drain_offline)
1514{
1515 struct cgroup *cgrp;
1516
1517 if (kernfs_type(kn) == KERNFS_DIR)
1518 cgrp = kn->priv;
1519 else
1520 cgrp = kn->parent->priv;
1521
1522
1523
1524
1525
1526
1527
1528 if (!cgroup_tryget(cgrp))
1529 return NULL;
1530 kernfs_break_active_protection(kn);
1531
1532 if (drain_offline)
1533 cgroup_lock_and_drain_offline(cgrp);
1534 else
1535 mutex_lock(&cgroup_mutex);
1536
1537 if (!cgroup_is_dead(cgrp))
1538 return cgrp;
1539
1540 cgroup_kn_unlock(kn);
1541 return NULL;
1542}
1543
1544static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
1545{
1546 char name[CGROUP_FILE_NAME_MAX];
1547
1548 lockdep_assert_held(&cgroup_mutex);
1549
1550 if (cft->file_offset) {
1551 struct cgroup_subsys_state *css = cgroup_css(cgrp, cft->ss);
1552 struct cgroup_file *cfile = (void *)css + cft->file_offset;
1553
1554 spin_lock_irq(&cgroup_file_kn_lock);
1555 cfile->kn = NULL;
1556 spin_unlock_irq(&cgroup_file_kn_lock);
1557 }
1558
1559 kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name));
1560}
1561
1562
1563
1564
1565
1566static void css_clear_dir(struct cgroup_subsys_state *css)
1567{
1568 struct cgroup *cgrp = css->cgroup;
1569 struct cftype *cfts;
1570
1571 if (!(css->flags & CSS_VISIBLE))
1572 return;
1573
1574 css->flags &= ~CSS_VISIBLE;
1575
1576 list_for_each_entry(cfts, &css->ss->cfts, node)
1577 cgroup_addrm_files(css, cgrp, cfts, false);
1578}
1579
1580
1581
1582
1583
1584
1585
1586static int css_populate_dir(struct cgroup_subsys_state *css)
1587{
1588 struct cgroup *cgrp = css->cgroup;
1589 struct cftype *cfts, *failed_cfts;
1590 int ret;
1591
1592 if ((css->flags & CSS_VISIBLE) || !cgrp->kn)
1593 return 0;
1594
1595 if (!css->ss) {
1596 if (cgroup_on_dfl(cgrp))
1597 cfts = cgroup_base_files;
1598 else
1599 cfts = cgroup1_base_files;
1600
1601 return cgroup_addrm_files(&cgrp->self, cgrp, cfts, true);
1602 }
1603
1604 list_for_each_entry(cfts, &css->ss->cfts, node) {
1605 ret = cgroup_addrm_files(css, cgrp, cfts, true);
1606 if (ret < 0) {
1607 failed_cfts = cfts;
1608 goto err;
1609 }
1610 }
1611
1612 css->flags |= CSS_VISIBLE;
1613
1614 return 0;
1615err:
1616 list_for_each_entry(cfts, &css->ss->cfts, node) {
1617 if (cfts == failed_cfts)
1618 break;
1619 cgroup_addrm_files(css, cgrp, cfts, false);
1620 }
1621 return ret;
1622}
1623
1624int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
1625{
1626 struct cgroup *dcgrp = &dst_root->cgrp;
1627 struct cgroup_subsys *ss;
1628 int ssid, i, ret;
1629
1630 lockdep_assert_held(&cgroup_mutex);
1631
1632 do_each_subsys_mask(ss, ssid, ss_mask) {
1633
1634
1635
1636
1637
1638 if (css_next_child(NULL, cgroup_css(&ss->root->cgrp, ss)) &&
1639 !ss->implicit_on_dfl)
1640 return -EBUSY;
1641
1642
1643 if (ss->root != &cgrp_dfl_root && dst_root != &cgrp_dfl_root)
1644 return -EBUSY;
1645 } while_each_subsys_mask();
1646
1647 do_each_subsys_mask(ss, ssid, ss_mask) {
1648 struct cgroup_root *src_root = ss->root;
1649 struct cgroup *scgrp = &src_root->cgrp;
1650 struct cgroup_subsys_state *css = cgroup_css(scgrp, ss);
1651 struct css_set *cset;
1652
1653 WARN_ON(!css || cgroup_css(dcgrp, ss));
1654
1655
1656 src_root->subsys_mask &= ~(1 << ssid);
1657 WARN_ON(cgroup_apply_control(scgrp));
1658 cgroup_finalize_control(scgrp, 0);
1659
1660
1661 RCU_INIT_POINTER(scgrp->subsys[ssid], NULL);
1662 rcu_assign_pointer(dcgrp->subsys[ssid], css);
1663 ss->root = dst_root;
1664 css->cgroup = dcgrp;
1665
1666 spin_lock_irq(&css_set_lock);
1667 hash_for_each(css_set_table, i, cset, hlist)
1668 list_move_tail(&cset->e_cset_node[ss->id],
1669 &dcgrp->e_csets[ss->id]);
1670 spin_unlock_irq(&css_set_lock);
1671
1672
1673 dst_root->subsys_mask |= 1 << ssid;
1674 if (dst_root == &cgrp_dfl_root) {
1675 static_branch_enable(cgroup_subsys_on_dfl_key[ssid]);
1676 } else {
1677 dcgrp->subtree_control |= 1 << ssid;
1678 static_branch_disable(cgroup_subsys_on_dfl_key[ssid]);
1679 }
1680
1681 ret = cgroup_apply_control(dcgrp);
1682 if (ret)
1683 pr_warn("partial failure to rebind %s controller (err=%d)\n",
1684 ss->name, ret);
1685
1686 if (ss->bind)
1687 ss->bind(css);
1688 } while_each_subsys_mask();
1689
1690 kernfs_activate(dcgrp->kn);
1691 return 0;
1692}
1693
1694int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
1695 struct kernfs_root *kf_root)
1696{
1697 int len = 0;
1698 char *buf = NULL;
1699 struct cgroup_root *kf_cgroot = cgroup_root_from_kf(kf_root);
1700 struct cgroup *ns_cgroup;
1701
1702 buf = kmalloc(PATH_MAX, GFP_KERNEL);
1703 if (!buf)
1704 return -ENOMEM;
1705
1706 spin_lock_irq(&css_set_lock);
1707 ns_cgroup = current_cgns_cgroup_from_root(kf_cgroot);
1708 len = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, PATH_MAX);
1709 spin_unlock_irq(&css_set_lock);
1710
1711 if (len >= PATH_MAX)
1712 len = -ERANGE;
1713 else if (len > 0) {
1714 seq_escape(sf, buf, " \t\n\\");
1715 len = 0;
1716 }
1717 kfree(buf);
1718 return len;
1719}
1720
1721static int parse_cgroup_root_flags(char *data, unsigned int *root_flags)
1722{
1723 char *token;
1724
1725 *root_flags = 0;
1726
1727 if (!data)
1728 return 0;
1729
1730 while ((token = strsep(&data, ",")) != NULL) {
1731 if (!strcmp(token, "nsdelegate")) {
1732 *root_flags |= CGRP_ROOT_NS_DELEGATE;
1733 continue;
1734 }
1735
1736 pr_err("cgroup2: unknown option \"%s\"\n", token);
1737 return -EINVAL;
1738 }
1739
1740 return 0;
1741}
1742
1743static void apply_cgroup_root_flags(unsigned int root_flags)
1744{
1745 if (current->nsproxy->cgroup_ns == &init_cgroup_ns) {
1746 if (root_flags & CGRP_ROOT_NS_DELEGATE)
1747 cgrp_dfl_root.flags |= CGRP_ROOT_NS_DELEGATE;
1748 else
1749 cgrp_dfl_root.flags &= ~CGRP_ROOT_NS_DELEGATE;
1750 }
1751}
1752
1753static int cgroup_show_options(struct seq_file *seq, struct kernfs_root *kf_root)
1754{
1755 if (cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE)
1756 seq_puts(seq, ",nsdelegate");
1757 return 0;
1758}
1759
1760static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
1761{
1762 unsigned int root_flags;
1763 int ret;
1764
1765 ret = parse_cgroup_root_flags(data, &root_flags);
1766 if (ret)
1767 return ret;
1768
1769 apply_cgroup_root_flags(root_flags);
1770 return 0;
1771}
1772
1773
1774
1775
1776
1777
1778
1779static bool use_task_css_set_links __read_mostly;
1780
1781static void cgroup_enable_task_cg_lists(void)
1782{
1783 struct task_struct *p, *g;
1784
1785 spin_lock_irq(&css_set_lock);
1786
1787 if (use_task_css_set_links)
1788 goto out_unlock;
1789
1790 use_task_css_set_links = true;
1791
1792
1793
1794
1795
1796
1797
1798
1799 read_lock(&tasklist_lock);
1800 do_each_thread(g, p) {
1801 WARN_ON_ONCE(!list_empty(&p->cg_list) ||
1802 task_css_set(p) != &init_css_set);
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815 spin_lock(&p->sighand->siglock);
1816 if (!(p->flags & PF_EXITING)) {
1817 struct css_set *cset = task_css_set(p);
1818
1819 if (!css_set_populated(cset))
1820 css_set_update_populated(cset, true);
1821 list_add_tail(&p->cg_list, &cset->tasks);
1822 get_css_set(cset);
1823 cset->nr_tasks++;
1824 }
1825 spin_unlock(&p->sighand->siglock);
1826 } while_each_thread(g, p);
1827 read_unlock(&tasklist_lock);
1828out_unlock:
1829 spin_unlock_irq(&css_set_lock);
1830}
1831
1832static void init_cgroup_housekeeping(struct cgroup *cgrp)
1833{
1834 struct cgroup_subsys *ss;
1835 int ssid;
1836
1837 INIT_LIST_HEAD(&cgrp->self.sibling);
1838 INIT_LIST_HEAD(&cgrp->self.children);
1839 INIT_LIST_HEAD(&cgrp->cset_links);
1840 INIT_LIST_HEAD(&cgrp->pidlists);
1841 mutex_init(&cgrp->pidlist_mutex);
1842 cgrp->self.cgroup = cgrp;
1843 cgrp->self.flags |= CSS_ONLINE;
1844 cgrp->dom_cgrp = cgrp;
1845 cgrp->max_descendants = INT_MAX;
1846 cgrp->max_depth = INT_MAX;
1847
1848 for_each_subsys(ss, ssid)
1849 INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
1850
1851 init_waitqueue_head(&cgrp->offline_waitq);
1852 INIT_WORK(&cgrp->release_agent_work, cgroup1_release_agent);
1853}
1854
1855void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts)
1856{
1857 struct cgroup *cgrp = &root->cgrp;
1858
1859 INIT_LIST_HEAD(&root->root_list);
1860 atomic_set(&root->nr_cgrps, 1);
1861 cgrp->root = root;
1862 init_cgroup_housekeeping(cgrp);
1863 idr_init(&root->cgroup_idr);
1864
1865 root->flags = opts->flags;
1866 if (opts->release_agent)
1867 strscpy(root->release_agent_path, opts->release_agent, PATH_MAX);
1868 if (opts->name)
1869 strscpy(root->name, opts->name, MAX_CGROUP_ROOT_NAMELEN);
1870 if (opts->cpuset_clone_children)
1871 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
1872}
1873
1874int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags)
1875{
1876 LIST_HEAD(tmp_links);
1877 struct cgroup *root_cgrp = &root->cgrp;
1878 struct kernfs_syscall_ops *kf_sops;
1879 struct css_set *cset;
1880 int i, ret;
1881
1882 lockdep_assert_held(&cgroup_mutex);
1883
1884 ret = cgroup_idr_alloc(&root->cgroup_idr, root_cgrp, 1, 2, GFP_KERNEL);
1885 if (ret < 0)
1886 goto out;
1887 root_cgrp->id = ret;
1888 root_cgrp->ancestor_ids[0] = ret;
1889
1890 ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release,
1891 ref_flags, GFP_KERNEL);
1892 if (ret)
1893 goto out;
1894
1895
1896
1897
1898
1899
1900
1901
1902 ret = allocate_cgrp_cset_links(2 * css_set_count, &tmp_links);
1903 if (ret)
1904 goto cancel_ref;
1905
1906 ret = cgroup_init_root_id(root);
1907 if (ret)
1908 goto cancel_ref;
1909
1910 kf_sops = root == &cgrp_dfl_root ?
1911 &cgroup_kf_syscall_ops : &cgroup1_kf_syscall_ops;
1912
1913 root->kf_root = kernfs_create_root(kf_sops,
1914 KERNFS_ROOT_CREATE_DEACTIVATED |
1915 KERNFS_ROOT_SUPPORT_EXPORTOP,
1916 root_cgrp);
1917 if (IS_ERR(root->kf_root)) {
1918 ret = PTR_ERR(root->kf_root);
1919 goto exit_root_id;
1920 }
1921 root_cgrp->kn = root->kf_root->kn;
1922
1923 ret = css_populate_dir(&root_cgrp->self);
1924 if (ret)
1925 goto destroy_root;
1926
1927 ret = rebind_subsystems(root, ss_mask);
1928 if (ret)
1929 goto destroy_root;
1930
1931 ret = cgroup_bpf_inherit(root_cgrp);
1932 WARN_ON_ONCE(ret);
1933
1934 trace_cgroup_setup_root(root);
1935
1936
1937
1938
1939
1940
1941 list_add(&root->root_list, &cgroup_roots);
1942 cgroup_root_count++;
1943
1944
1945
1946
1947
1948 spin_lock_irq(&css_set_lock);
1949 hash_for_each(css_set_table, i, cset, hlist) {
1950 link_css_set(&tmp_links, cset, root_cgrp);
1951 if (css_set_populated(cset))
1952 cgroup_update_populated(root_cgrp, true);
1953 }
1954 spin_unlock_irq(&css_set_lock);
1955
1956 BUG_ON(!list_empty(&root_cgrp->self.children));
1957 BUG_ON(atomic_read(&root->nr_cgrps) != 1);
1958
1959 kernfs_activate(root_cgrp->kn);
1960 ret = 0;
1961 goto out;
1962
1963destroy_root:
1964 kernfs_destroy_root(root->kf_root);
1965 root->kf_root = NULL;
1966exit_root_id:
1967 cgroup_exit_root_id(root);
1968cancel_ref:
1969 percpu_ref_exit(&root_cgrp->self.refcnt);
1970out:
1971 free_cgrp_cset_links(&tmp_links);
1972 return ret;
1973}
1974
1975struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
1976 struct cgroup_root *root, unsigned long magic,
1977 struct cgroup_namespace *ns)
1978{
1979 struct dentry *dentry;
1980 bool new_sb;
1981
1982 dentry = kernfs_mount(fs_type, flags, root->kf_root, magic, &new_sb);
1983
1984
1985
1986
1987
1988 if (!IS_ERR(dentry) && ns != &init_cgroup_ns) {
1989 struct dentry *nsdentry;
1990 struct cgroup *cgrp;
1991
1992 mutex_lock(&cgroup_mutex);
1993 spin_lock_irq(&css_set_lock);
1994
1995 cgrp = cset_cgroup_from_root(ns->root_cset, root);
1996
1997 spin_unlock_irq(&css_set_lock);
1998 mutex_unlock(&cgroup_mutex);
1999
2000 nsdentry = kernfs_node_dentry(cgrp->kn, dentry->d_sb);
2001 dput(dentry);
2002 dentry = nsdentry;
2003 }
2004
2005 if (IS_ERR(dentry) || !new_sb)
2006 cgroup_put(&root->cgrp);
2007
2008 return dentry;
2009}
2010
2011static struct dentry *cgroup_mount(struct file_system_type *fs_type,
2012 int flags, const char *unused_dev_name,
2013 void *data)
2014{
2015 struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
2016 struct dentry *dentry;
2017 int ret;
2018
2019 get_cgroup_ns(ns);
2020
2021
2022 if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) {
2023 put_cgroup_ns(ns);
2024 return ERR_PTR(-EPERM);
2025 }
2026
2027
2028
2029
2030
2031 if (!use_task_css_set_links)
2032 cgroup_enable_task_cg_lists();
2033
2034 if (fs_type == &cgroup2_fs_type) {
2035 unsigned int root_flags;
2036
2037 ret = parse_cgroup_root_flags(data, &root_flags);
2038 if (ret) {
2039 put_cgroup_ns(ns);
2040 return ERR_PTR(ret);
2041 }
2042
2043 cgrp_dfl_visible = true;
2044 cgroup_get_live(&cgrp_dfl_root.cgrp);
2045
2046 dentry = cgroup_do_mount(&cgroup2_fs_type, flags, &cgrp_dfl_root,
2047 CGROUP2_SUPER_MAGIC, ns);
2048 if (!IS_ERR(dentry))
2049 apply_cgroup_root_flags(root_flags);
2050 } else {
2051 dentry = cgroup1_mount(&cgroup_fs_type, flags, data,
2052 CGROUP_SUPER_MAGIC, ns);
2053 }
2054
2055 put_cgroup_ns(ns);
2056 return dentry;
2057}
2058
2059static void cgroup_kill_sb(struct super_block *sb)
2060{
2061 struct kernfs_root *kf_root = kernfs_root_from_sb(sb);
2062 struct cgroup_root *root = cgroup_root_from_kf(kf_root);
2063
2064
2065
2066
2067
2068
2069
2070
2071 if (!list_empty(&root->cgrp.self.children) ||
2072 root == &cgrp_dfl_root)
2073 cgroup_put(&root->cgrp);
2074 else
2075 percpu_ref_kill(&root->cgrp.self.refcnt);
2076
2077 kernfs_kill_sb(sb);
2078}
2079
2080struct file_system_type cgroup_fs_type = {
2081 .name = "cgroup",
2082 .mount = cgroup_mount,
2083 .kill_sb = cgroup_kill_sb,
2084 .fs_flags = FS_USERNS_MOUNT,
2085};
2086
2087static struct file_system_type cgroup2_fs_type = {
2088 .name = "cgroup2",
2089 .mount = cgroup_mount,
2090 .kill_sb = cgroup_kill_sb,
2091 .fs_flags = FS_USERNS_MOUNT,
2092};
2093
2094int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
2095 struct cgroup_namespace *ns)
2096{
2097 struct cgroup *root = cset_cgroup_from_root(ns->root_cset, cgrp->root);
2098
2099 return kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
2100}
2101
2102int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
2103 struct cgroup_namespace *ns)
2104{
2105 int ret;
2106
2107 mutex_lock(&cgroup_mutex);
2108 spin_lock_irq(&css_set_lock);
2109
2110 ret = cgroup_path_ns_locked(cgrp, buf, buflen, ns);
2111
2112 spin_unlock_irq(&css_set_lock);
2113 mutex_unlock(&cgroup_mutex);
2114
2115 return ret;
2116}
2117EXPORT_SYMBOL_GPL(cgroup_path_ns);
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
2133{
2134 struct cgroup_root *root;
2135 struct cgroup *cgrp;
2136 int hierarchy_id = 1;
2137 int ret;
2138
2139 mutex_lock(&cgroup_mutex);
2140 spin_lock_irq(&css_set_lock);
2141
2142 root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
2143
2144 if (root) {
2145 cgrp = task_cgroup_from_root(task, root);
2146 ret = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
2147 } else {
2148
2149 ret = strlcpy(buf, "/", buflen);
2150 }
2151
2152 spin_unlock_irq(&css_set_lock);
2153 mutex_unlock(&cgroup_mutex);
2154 return ret;
2155}
2156EXPORT_SYMBOL_GPL(task_cgroup_path);
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168static void cgroup_migrate_add_task(struct task_struct *task,
2169 struct cgroup_mgctx *mgctx)
2170{
2171 struct css_set *cset;
2172
2173 lockdep_assert_held(&css_set_lock);
2174
2175
2176 if (task->flags & PF_EXITING)
2177 return;
2178
2179
2180 if (list_empty(&task->cg_list))
2181 return;
2182
2183 cset = task_css_set(task);
2184 if (!cset->mg_src_cgrp)
2185 return;
2186
2187 mgctx->tset.nr_tasks++;
2188
2189 list_move_tail(&task->cg_list, &cset->mg_tasks);
2190 if (list_empty(&cset->mg_node))
2191 list_add_tail(&cset->mg_node,
2192 &mgctx->tset.src_csets);
2193 if (list_empty(&cset->mg_dst_cset->mg_node))
2194 list_add_tail(&cset->mg_dst_cset->mg_node,
2195 &mgctx->tset.dst_csets);
2196}
2197
2198
2199
2200
2201
2202
2203
2204
2205struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
2206 struct cgroup_subsys_state **dst_cssp)
2207{
2208 tset->cur_cset = list_first_entry(tset->csets, struct css_set, mg_node);
2209 tset->cur_task = NULL;
2210
2211 return cgroup_taskset_next(tset, dst_cssp);
2212}
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
2223 struct cgroup_subsys_state **dst_cssp)
2224{
2225 struct css_set *cset = tset->cur_cset;
2226 struct task_struct *task = tset->cur_task;
2227
2228 while (&cset->mg_node != tset->csets) {
2229 if (!task)
2230 task = list_first_entry(&cset->mg_tasks,
2231 struct task_struct, cg_list);
2232 else
2233 task = list_next_entry(task, cg_list);
2234
2235 if (&task->cg_list != &cset->mg_tasks) {
2236 tset->cur_cset = cset;
2237 tset->cur_task = task;
2238
2239
2240
2241
2242
2243
2244
2245 if (cset->mg_dst_cset)
2246 *dst_cssp = cset->mg_dst_cset->subsys[tset->ssid];
2247 else
2248 *dst_cssp = cset->subsys[tset->ssid];
2249
2250 return task;
2251 }
2252
2253 cset = list_next_entry(cset, mg_node);
2254 task = NULL;
2255 }
2256
2257 return NULL;
2258}
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx)
2270{
2271 struct cgroup_taskset *tset = &mgctx->tset;
2272 struct cgroup_subsys *ss;
2273 struct task_struct *task, *tmp_task;
2274 struct css_set *cset, *tmp_cset;
2275 int ssid, failed_ssid, ret;
2276
2277
2278 if (tset->nr_tasks) {
2279 do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
2280 if (ss->can_attach) {
2281 tset->ssid = ssid;
2282 ret = ss->can_attach(tset);
2283 if (ret) {
2284 failed_ssid = ssid;
2285 goto out_cancel_attach;
2286 }
2287 }
2288 } while_each_subsys_mask();
2289 }
2290
2291
2292
2293
2294
2295
2296 spin_lock_irq(&css_set_lock);
2297 list_for_each_entry(cset, &tset->src_csets, mg_node) {
2298 list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, cg_list) {
2299 struct css_set *from_cset = task_css_set(task);
2300 struct css_set *to_cset = cset->mg_dst_cset;
2301
2302 get_css_set(to_cset);
2303 to_cset->nr_tasks++;
2304 css_set_move_task(task, from_cset, to_cset, true);
2305 put_css_set_locked(from_cset);
2306 from_cset->nr_tasks--;
2307 }
2308 }
2309 spin_unlock_irq(&css_set_lock);
2310
2311
2312
2313
2314
2315
2316 tset->csets = &tset->dst_csets;
2317
2318 if (tset->nr_tasks) {
2319 do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
2320 if (ss->attach) {
2321 tset->ssid = ssid;
2322 ss->attach(tset);
2323 }
2324 } while_each_subsys_mask();
2325 }
2326
2327 ret = 0;
2328 goto out_release_tset;
2329
2330out_cancel_attach:
2331 if (tset->nr_tasks) {
2332 do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
2333 if (ssid == failed_ssid)
2334 break;
2335 if (ss->cancel_attach) {
2336 tset->ssid = ssid;
2337 ss->cancel_attach(tset);
2338 }
2339 } while_each_subsys_mask();
2340 }
2341out_release_tset:
2342 spin_lock_irq(&css_set_lock);
2343 list_splice_init(&tset->dst_csets, &tset->src_csets);
2344 list_for_each_entry_safe(cset, tmp_cset, &tset->src_csets, mg_node) {
2345 list_splice_tail_init(&cset->mg_tasks, &cset->tasks);
2346 list_del_init(&cset->mg_node);
2347 }
2348 spin_unlock_irq(&css_set_lock);
2349
2350
2351
2352
2353
2354
2355 tset->nr_tasks = 0;
2356 tset->csets = &tset->src_csets;
2357 return ret;
2358}
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp)
2370{
2371
2372 if (!cgroup_on_dfl(dst_cgrp))
2373 return 0;
2374
2375
2376 if (!cgroup_is_valid_domain(dst_cgrp->dom_cgrp))
2377 return -EOPNOTSUPP;
2378
2379
2380 if (cgroup_is_mixable(dst_cgrp))
2381 return 0;
2382
2383
2384
2385
2386
2387 if (cgroup_can_be_thread_root(dst_cgrp) || cgroup_is_threaded(dst_cgrp))
2388 return 0;
2389
2390
2391 if (dst_cgrp->subtree_control)
2392 return -EBUSY;
2393
2394 return 0;
2395}
2396
2397
2398
2399
2400
2401
2402
2403
2404void cgroup_migrate_finish(struct cgroup_mgctx *mgctx)
2405{
2406 LIST_HEAD(preloaded);
2407 struct css_set *cset, *tmp_cset;
2408
2409 lockdep_assert_held(&cgroup_mutex);
2410
2411 spin_lock_irq(&css_set_lock);
2412
2413 list_splice_tail_init(&mgctx->preloaded_src_csets, &preloaded);
2414 list_splice_tail_init(&mgctx->preloaded_dst_csets, &preloaded);
2415
2416 list_for_each_entry_safe(cset, tmp_cset, &preloaded, mg_preload_node) {
2417 cset->mg_src_cgrp = NULL;
2418 cset->mg_dst_cgrp = NULL;
2419 cset->mg_dst_cset = NULL;
2420 list_del_init(&cset->mg_preload_node);
2421 put_css_set_locked(cset);
2422 }
2423
2424 spin_unlock_irq(&css_set_lock);
2425}
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443void cgroup_migrate_add_src(struct css_set *src_cset,
2444 struct cgroup *dst_cgrp,
2445 struct cgroup_mgctx *mgctx)
2446{
2447 struct cgroup *src_cgrp;
2448
2449 lockdep_assert_held(&cgroup_mutex);
2450 lockdep_assert_held(&css_set_lock);
2451
2452
2453
2454
2455
2456
2457 if (src_cset->dead)
2458 return;
2459
2460 src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root);
2461
2462 if (!list_empty(&src_cset->mg_preload_node))
2463 return;
2464
2465 WARN_ON(src_cset->mg_src_cgrp);
2466 WARN_ON(src_cset->mg_dst_cgrp);
2467 WARN_ON(!list_empty(&src_cset->mg_tasks));
2468 WARN_ON(!list_empty(&src_cset->mg_node));
2469
2470 src_cset->mg_src_cgrp = src_cgrp;
2471 src_cset->mg_dst_cgrp = dst_cgrp;
2472 get_css_set(src_cset);
2473 list_add_tail(&src_cset->mg_preload_node, &mgctx->preloaded_src_csets);
2474}
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx)
2491{
2492 struct css_set *src_cset, *tmp_cset;
2493
2494 lockdep_assert_held(&cgroup_mutex);
2495
2496
2497 list_for_each_entry_safe(src_cset, tmp_cset, &mgctx->preloaded_src_csets,
2498 mg_preload_node) {
2499 struct css_set *dst_cset;
2500 struct cgroup_subsys *ss;
2501 int ssid;
2502
2503 dst_cset = find_css_set(src_cset, src_cset->mg_dst_cgrp);
2504 if (!dst_cset)
2505 goto err;
2506
2507 WARN_ON_ONCE(src_cset->mg_dst_cset || dst_cset->mg_dst_cset);
2508
2509
2510
2511
2512
2513
2514 if (src_cset == dst_cset) {
2515 src_cset->mg_src_cgrp = NULL;
2516 src_cset->mg_dst_cgrp = NULL;
2517 list_del_init(&src_cset->mg_preload_node);
2518 put_css_set(src_cset);
2519 put_css_set(dst_cset);
2520 continue;
2521 }
2522
2523 src_cset->mg_dst_cset = dst_cset;
2524
2525 if (list_empty(&dst_cset->mg_preload_node))
2526 list_add_tail(&dst_cset->mg_preload_node,
2527 &mgctx->preloaded_dst_csets);
2528 else
2529 put_css_set(dst_cset);
2530
2531 for_each_subsys(ss, ssid)
2532 if (src_cset->subsys[ssid] != dst_cset->subsys[ssid])
2533 mgctx->ss_mask |= 1 << ssid;
2534 }
2535
2536 return 0;
2537err:
2538 cgroup_migrate_finish(mgctx);
2539 return -ENOMEM;
2540}
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560int cgroup_migrate(struct task_struct *leader, bool threadgroup,
2561 struct cgroup_mgctx *mgctx)
2562{
2563 struct task_struct *task;
2564
2565
2566
2567
2568
2569
2570 spin_lock_irq(&css_set_lock);
2571 rcu_read_lock();
2572 task = leader;
2573 do {
2574 cgroup_migrate_add_task(task, mgctx);
2575 if (!threadgroup)
2576 break;
2577 } while_each_thread(leader, task);
2578 rcu_read_unlock();
2579 spin_unlock_irq(&css_set_lock);
2580
2581 return cgroup_migrate_execute(mgctx);
2582}
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
2593 bool threadgroup)
2594{
2595 DEFINE_CGROUP_MGCTX(mgctx);
2596 struct task_struct *task;
2597 int ret;
2598
2599 ret = cgroup_migrate_vet_dst(dst_cgrp);
2600 if (ret)
2601 return ret;
2602
2603
2604 spin_lock_irq(&css_set_lock);
2605 rcu_read_lock();
2606 task = leader;
2607 do {
2608 cgroup_migrate_add_src(task_css_set(task), dst_cgrp, &mgctx);
2609 if (!threadgroup)
2610 break;
2611 } while_each_thread(leader, task);
2612 rcu_read_unlock();
2613 spin_unlock_irq(&css_set_lock);
2614
2615
2616 ret = cgroup_migrate_prepare_dst(&mgctx);
2617 if (!ret)
2618 ret = cgroup_migrate(leader, threadgroup, &mgctx);
2619
2620 cgroup_migrate_finish(&mgctx);
2621
2622 if (!ret)
2623 trace_cgroup_attach_task(dst_cgrp, leader, threadgroup);
2624
2625 return ret;
2626}
2627
2628struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup)
2629 __acquires(&cgroup_threadgroup_rwsem)
2630{
2631 struct task_struct *tsk;
2632 pid_t pid;
2633
2634 if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
2635 return ERR_PTR(-EINVAL);
2636
2637 percpu_down_write(&cgroup_threadgroup_rwsem);
2638
2639 rcu_read_lock();
2640 if (pid) {
2641 tsk = find_task_by_vpid(pid);
2642 if (!tsk) {
2643 tsk = ERR_PTR(-ESRCH);
2644 goto out_unlock_threadgroup;
2645 }
2646 } else {
2647 tsk = current;
2648 }
2649
2650 if (threadgroup)
2651 tsk = tsk->group_leader;
2652
2653
2654
2655
2656
2657
2658
2659 if (tsk->no_cgroup_migration || (tsk->flags & PF_NO_SETAFFINITY)) {
2660 tsk = ERR_PTR(-EINVAL);
2661 goto out_unlock_threadgroup;
2662 }
2663
2664 get_task_struct(tsk);
2665 goto out_unlock_rcu;
2666
2667out_unlock_threadgroup:
2668 percpu_up_write(&cgroup_threadgroup_rwsem);
2669out_unlock_rcu:
2670 rcu_read_unlock();
2671 return tsk;
2672}
2673
2674void cgroup_procs_write_finish(struct task_struct *task)
2675 __releases(&cgroup_threadgroup_rwsem)
2676{
2677 struct cgroup_subsys *ss;
2678 int ssid;
2679
2680
2681 put_task_struct(task);
2682
2683 percpu_up_write(&cgroup_threadgroup_rwsem);
2684 for_each_subsys(ss, ssid)
2685 if (ss->post_attach)
2686 ss->post_attach();
2687}
2688
2689static void cgroup_print_ss_mask(struct seq_file *seq, u16 ss_mask)
2690{
2691 struct cgroup_subsys *ss;
2692 bool printed = false;
2693 int ssid;
2694
2695 do_each_subsys_mask(ss, ssid, ss_mask) {
2696 if (printed)
2697 seq_putc(seq, ' ');
2698 seq_printf(seq, "%s", ss->name);
2699 printed = true;
2700 } while_each_subsys_mask();
2701 if (printed)
2702 seq_putc(seq, '\n');
2703}
2704
2705
2706static int cgroup_controllers_show(struct seq_file *seq, void *v)
2707{
2708 struct cgroup *cgrp = seq_css(seq)->cgroup;
2709
2710 cgroup_print_ss_mask(seq, cgroup_control(cgrp));
2711 return 0;
2712}
2713
2714
2715static int cgroup_subtree_control_show(struct seq_file *seq, void *v)
2716{
2717 struct cgroup *cgrp = seq_css(seq)->cgroup;
2718
2719 cgroup_print_ss_mask(seq, cgrp->subtree_control);
2720 return 0;
2721}
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732static int cgroup_update_dfl_csses(struct cgroup *cgrp)
2733{
2734 DEFINE_CGROUP_MGCTX(mgctx);
2735 struct cgroup_subsys_state *d_css;
2736 struct cgroup *dsct;
2737 struct css_set *src_cset;
2738 int ret;
2739
2740 lockdep_assert_held(&cgroup_mutex);
2741
2742 percpu_down_write(&cgroup_threadgroup_rwsem);
2743
2744
2745 spin_lock_irq(&css_set_lock);
2746 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
2747 struct cgrp_cset_link *link;
2748
2749 list_for_each_entry(link, &dsct->cset_links, cset_link)
2750 cgroup_migrate_add_src(link->cset, dsct, &mgctx);
2751 }
2752 spin_unlock_irq(&css_set_lock);
2753
2754
2755 ret = cgroup_migrate_prepare_dst(&mgctx);
2756 if (ret)
2757 goto out_finish;
2758
2759 spin_lock_irq(&css_set_lock);
2760 list_for_each_entry(src_cset, &mgctx.preloaded_src_csets, mg_preload_node) {
2761 struct task_struct *task, *ntask;
2762
2763
2764 list_for_each_entry_safe(task, ntask, &src_cset->tasks, cg_list)
2765 cgroup_migrate_add_task(task, &mgctx);
2766 }
2767 spin_unlock_irq(&css_set_lock);
2768
2769 ret = cgroup_migrate_execute(&mgctx);
2770out_finish:
2771 cgroup_migrate_finish(&mgctx);
2772 percpu_up_write(&cgroup_threadgroup_rwsem);
2773 return ret;
2774}
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784void cgroup_lock_and_drain_offline(struct cgroup *cgrp)
2785 __acquires(&cgroup_mutex)
2786{
2787 struct cgroup *dsct;
2788 struct cgroup_subsys_state *d_css;
2789 struct cgroup_subsys *ss;
2790 int ssid;
2791
2792restart:
2793 mutex_lock(&cgroup_mutex);
2794
2795 cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
2796 for_each_subsys(ss, ssid) {
2797 struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
2798 DEFINE_WAIT(wait);
2799
2800 if (!css || !percpu_ref_is_dying(&css->refcnt))
2801 continue;
2802
2803 cgroup_get_live(dsct);
2804 prepare_to_wait(&dsct->offline_waitq, &wait,
2805 TASK_UNINTERRUPTIBLE);
2806
2807 mutex_unlock(&cgroup_mutex);
2808 schedule();
2809 finish_wait(&dsct->offline_waitq, &wait);
2810
2811 cgroup_put(dsct);
2812 goto restart;
2813 }
2814 }
2815}
2816
2817
2818
2819
2820
2821
2822
2823
2824static void cgroup_save_control(struct cgroup *cgrp)
2825{
2826 struct cgroup *dsct;
2827 struct cgroup_subsys_state *d_css;
2828
2829 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
2830 dsct->old_subtree_control = dsct->subtree_control;
2831 dsct->old_subtree_ss_mask = dsct->subtree_ss_mask;
2832 }
2833}
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843static void cgroup_propagate_control(struct cgroup *cgrp)
2844{
2845 struct cgroup *dsct;
2846 struct cgroup_subsys_state *d_css;
2847
2848 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
2849 dsct->subtree_control &= cgroup_control(dsct);
2850 dsct->subtree_ss_mask =
2851 cgroup_calc_subtree_ss_mask(dsct->subtree_control,
2852 cgroup_ss_mask(dsct));
2853 }
2854}
2855
2856
2857
2858
2859
2860
2861
2862
2863static void cgroup_restore_control(struct cgroup *cgrp)
2864{
2865 struct cgroup *dsct;
2866 struct cgroup_subsys_state *d_css;
2867
2868 cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
2869 dsct->subtree_control = dsct->old_subtree_control;
2870 dsct->subtree_ss_mask = dsct->old_subtree_ss_mask;
2871 }
2872}
2873
2874static bool css_visible(struct cgroup_subsys_state *css)
2875{
2876 struct cgroup_subsys *ss = css->ss;
2877 struct cgroup *cgrp = css->cgroup;
2878
2879 if (cgroup_control(cgrp) & (1 << ss->id))
2880 return true;
2881 if (!(cgroup_ss_mask(cgrp) & (1 << ss->id)))
2882 return false;
2883 return cgroup_on_dfl(cgrp) && ss->implicit_on_dfl;
2884}
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899static int cgroup_apply_control_enable(struct cgroup *cgrp)
2900{
2901 struct cgroup *dsct;
2902 struct cgroup_subsys_state *d_css;
2903 struct cgroup_subsys *ss;
2904 int ssid, ret;
2905
2906 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
2907 for_each_subsys(ss, ssid) {
2908 struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
2909
2910 WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt));
2911
2912 if (!(cgroup_ss_mask(dsct) & (1 << ss->id)))
2913 continue;
2914
2915 if (!css) {
2916 css = css_create(dsct, ss);
2917 if (IS_ERR(css))
2918 return PTR_ERR(css);
2919 }
2920
2921 if (css_visible(css)) {
2922 ret = css_populate_dir(css);
2923 if (ret)
2924 return ret;
2925 }
2926 }
2927 }
2928
2929 return 0;
2930}
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945static void cgroup_apply_control_disable(struct cgroup *cgrp)
2946{
2947 struct cgroup *dsct;
2948 struct cgroup_subsys_state *d_css;
2949 struct cgroup_subsys *ss;
2950 int ssid;
2951
2952 cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
2953 for_each_subsys(ss, ssid) {
2954 struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
2955
2956 WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt));
2957
2958 if (!css)
2959 continue;
2960
2961 if (css->parent &&
2962 !(cgroup_ss_mask(dsct) & (1 << ss->id))) {
2963 kill_css(css);
2964 } else if (!css_visible(css)) {
2965 css_clear_dir(css);
2966 if (ss->css_reset)
2967 ss->css_reset(css);
2968 }
2969 }
2970 }
2971}
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990static int cgroup_apply_control(struct cgroup *cgrp)
2991{
2992 int ret;
2993
2994 cgroup_propagate_control(cgrp);
2995
2996 ret = cgroup_apply_control_enable(cgrp);
2997 if (ret)
2998 return ret;
2999
3000
3001
3002
3003
3004
3005 ret = cgroup_update_dfl_csses(cgrp);
3006 if (ret)
3007 return ret;
3008
3009 return 0;
3010}
3011
3012
3013
3014
3015
3016
3017
3018
3019static void cgroup_finalize_control(struct cgroup *cgrp, int ret)
3020{
3021 if (ret) {
3022 cgroup_restore_control(cgrp);
3023 cgroup_propagate_control(cgrp);
3024 }
3025
3026 cgroup_apply_control_disable(cgrp);
3027}
3028
3029static int cgroup_vet_subtree_control_enable(struct cgroup *cgrp, u16 enable)
3030{
3031 u16 domain_enable = enable & ~cgrp_dfl_threaded_ss_mask;
3032
3033
3034 if (!enable)
3035 return 0;
3036
3037
3038 if (!cgroup_is_valid_domain(cgrp->dom_cgrp))
3039 return -EOPNOTSUPP;
3040
3041
3042 if (cgroup_is_mixable(cgrp))
3043 return 0;
3044
3045 if (domain_enable) {
3046
3047 if (cgroup_is_thread_root(cgrp) || cgroup_is_threaded(cgrp))
3048 return -EOPNOTSUPP;
3049 } else {
3050
3051
3052
3053
3054
3055 if (cgroup_can_be_thread_root(cgrp) || cgroup_is_threaded(cgrp))
3056 return 0;
3057 }
3058
3059
3060
3061
3062
3063 if (cgroup_has_tasks(cgrp))
3064 return -EBUSY;
3065
3066 return 0;
3067}
3068
3069
3070static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
3071 char *buf, size_t nbytes,
3072 loff_t off)
3073{
3074 u16 enable = 0, disable = 0;
3075 struct cgroup *cgrp, *child;
3076 struct cgroup_subsys *ss;
3077 char *tok;
3078 int ssid, ret;
3079
3080
3081
3082
3083
3084 buf = strstrip(buf);
3085 while ((tok = strsep(&buf, " "))) {
3086 if (tok[0] == '\0')
3087 continue;
3088 do_each_subsys_mask(ss, ssid, ~cgrp_dfl_inhibit_ss_mask) {
3089 if (!cgroup_ssid_enabled(ssid) ||
3090 strcmp(tok + 1, ss->name))
3091 continue;
3092
3093 if (*tok == '+') {
3094 enable |= 1 << ssid;
3095 disable &= ~(1 << ssid);
3096 } else if (*tok == '-') {
3097 disable |= 1 << ssid;
3098 enable &= ~(1 << ssid);
3099 } else {
3100 return -EINVAL;
3101 }
3102 break;
3103 } while_each_subsys_mask();
3104 if (ssid == CGROUP_SUBSYS_COUNT)
3105 return -EINVAL;
3106 }
3107
3108 cgrp = cgroup_kn_lock_live(of->kn, true);
3109 if (!cgrp)
3110 return -ENODEV;
3111
3112 for_each_subsys(ss, ssid) {
3113 if (enable & (1 << ssid)) {
3114 if (cgrp->subtree_control & (1 << ssid)) {
3115 enable &= ~(1 << ssid);
3116 continue;
3117 }
3118
3119 if (!(cgroup_control(cgrp) & (1 << ssid))) {
3120 ret = -ENOENT;
3121 goto out_unlock;
3122 }
3123 } else if (disable & (1 << ssid)) {
3124 if (!(cgrp->subtree_control & (1 << ssid))) {
3125 disable &= ~(1 << ssid);
3126 continue;
3127 }
3128
3129
3130 cgroup_for_each_live_child(child, cgrp) {
3131 if (child->subtree_control & (1 << ssid)) {
3132 ret = -EBUSY;
3133 goto out_unlock;
3134 }
3135 }
3136 }
3137 }
3138
3139 if (!enable && !disable) {
3140 ret = 0;
3141 goto out_unlock;
3142 }
3143
3144 ret = cgroup_vet_subtree_control_enable(cgrp, enable);
3145 if (ret)
3146 goto out_unlock;
3147
3148
3149 cgroup_save_control(cgrp);
3150
3151 cgrp->subtree_control |= enable;
3152 cgrp->subtree_control &= ~disable;
3153
3154 ret = cgroup_apply_control(cgrp);
3155 cgroup_finalize_control(cgrp, ret);
3156 if (ret)
3157 goto out_unlock;
3158
3159 kernfs_activate(cgrp->kn);
3160out_unlock:
3161 cgroup_kn_unlock(of->kn);
3162 return ret ?: nbytes;
3163}
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174static int cgroup_enable_threaded(struct cgroup *cgrp)
3175{
3176 struct cgroup *parent = cgroup_parent(cgrp);
3177 struct cgroup *dom_cgrp = parent->dom_cgrp;
3178 int ret;
3179
3180 lockdep_assert_held(&cgroup_mutex);
3181
3182
3183 if (cgroup_is_threaded(cgrp))
3184 return 0;
3185
3186
3187
3188
3189
3190
3191
3192 if (cgroup_is_populated(cgrp) ||
3193 cgrp->subtree_control & ~cgrp_dfl_threaded_ss_mask)
3194 return -EOPNOTSUPP;
3195
3196
3197 if (!cgroup_is_valid_domain(dom_cgrp) ||
3198 !cgroup_can_be_thread_root(dom_cgrp))
3199 return -EOPNOTSUPP;
3200
3201
3202
3203
3204
3205 cgroup_save_control(cgrp);
3206
3207 cgrp->dom_cgrp = dom_cgrp;
3208 ret = cgroup_apply_control(cgrp);
3209 if (!ret)
3210 parent->nr_threaded_children++;
3211 else
3212 cgrp->dom_cgrp = cgrp;
3213
3214 cgroup_finalize_control(cgrp, ret);
3215 return ret;
3216}
3217
3218static int cgroup_type_show(struct seq_file *seq, void *v)
3219{
3220 struct cgroup *cgrp = seq_css(seq)->cgroup;
3221
3222 if (cgroup_is_threaded(cgrp))
3223 seq_puts(seq, "threaded\n");
3224 else if (!cgroup_is_valid_domain(cgrp))
3225 seq_puts(seq, "domain invalid\n");
3226 else if (cgroup_is_thread_root(cgrp))
3227 seq_puts(seq, "domain threaded\n");
3228 else
3229 seq_puts(seq, "domain\n");
3230
3231 return 0;
3232}
3233
3234static ssize_t cgroup_type_write(struct kernfs_open_file *of, char *buf,
3235 size_t nbytes, loff_t off)
3236{
3237 struct cgroup *cgrp;
3238 int ret;
3239
3240
3241 if (strcmp(strstrip(buf), "threaded"))
3242 return -EINVAL;
3243
3244 cgrp = cgroup_kn_lock_live(of->kn, false);
3245 if (!cgrp)
3246 return -ENOENT;
3247
3248
3249 ret = cgroup_enable_threaded(cgrp);
3250
3251 cgroup_kn_unlock(of->kn);
3252 return ret ?: nbytes;
3253}
3254
3255static int cgroup_max_descendants_show(struct seq_file *seq, void *v)
3256{
3257 struct cgroup *cgrp = seq_css(seq)->cgroup;
3258 int descendants = READ_ONCE(cgrp->max_descendants);
3259
3260 if (descendants == INT_MAX)
3261 seq_puts(seq, "max\n");
3262 else
3263 seq_printf(seq, "%d\n", descendants);
3264
3265 return 0;
3266}
3267
3268static ssize_t cgroup_max_descendants_write(struct kernfs_open_file *of,
3269 char *buf, size_t nbytes, loff_t off)
3270{
3271 struct cgroup *cgrp;
3272 int descendants;
3273 ssize_t ret;
3274
3275 buf = strstrip(buf);
3276 if (!strcmp(buf, "max")) {
3277 descendants = INT_MAX;
3278 } else {
3279 ret = kstrtoint(buf, 0, &descendants);
3280 if (ret)
3281 return ret;
3282 }
3283
3284 if (descendants < 0)
3285 return -ERANGE;
3286
3287 cgrp = cgroup_kn_lock_live(of->kn, false);
3288 if (!cgrp)
3289 return -ENOENT;
3290
3291 cgrp->max_descendants = descendants;
3292
3293 cgroup_kn_unlock(of->kn);
3294
3295 return nbytes;
3296}
3297
3298static int cgroup_max_depth_show(struct seq_file *seq, void *v)
3299{
3300 struct cgroup *cgrp = seq_css(seq)->cgroup;
3301 int depth = READ_ONCE(cgrp->max_depth);
3302
3303 if (depth == INT_MAX)
3304 seq_puts(seq, "max\n");
3305 else
3306 seq_printf(seq, "%d\n", depth);
3307
3308 return 0;
3309}
3310
3311static ssize_t cgroup_max_depth_write(struct kernfs_open_file *of,
3312 char *buf, size_t nbytes, loff_t off)
3313{
3314 struct cgroup *cgrp;
3315 ssize_t ret;
3316 int depth;
3317
3318 buf = strstrip(buf);
3319 if (!strcmp(buf, "max")) {
3320 depth = INT_MAX;
3321 } else {
3322 ret = kstrtoint(buf, 0, &depth);
3323 if (ret)
3324 return ret;
3325 }
3326
3327 if (depth < 0)
3328 return -ERANGE;
3329
3330 cgrp = cgroup_kn_lock_live(of->kn, false);
3331 if (!cgrp)
3332 return -ENOENT;
3333
3334 cgrp->max_depth = depth;
3335
3336 cgroup_kn_unlock(of->kn);
3337
3338 return nbytes;
3339}
3340
3341static int cgroup_events_show(struct seq_file *seq, void *v)
3342{
3343 seq_printf(seq, "populated %d\n",
3344 cgroup_is_populated(seq_css(seq)->cgroup));
3345 return 0;
3346}
3347
3348static int cgroup_stat_show(struct seq_file *seq, void *v)
3349{
3350 struct cgroup *cgroup = seq_css(seq)->cgroup;
3351
3352 seq_printf(seq, "nr_descendants %d\n",
3353 cgroup->nr_descendants);
3354 seq_printf(seq, "nr_dying_descendants %d\n",
3355 cgroup->nr_dying_descendants);
3356
3357 return 0;
3358}
3359
3360static int __maybe_unused cgroup_extra_stat_show(struct seq_file *seq,
3361 struct cgroup *cgrp, int ssid)
3362{
3363 struct cgroup_subsys *ss = cgroup_subsys[ssid];
3364 struct cgroup_subsys_state *css;
3365 int ret;
3366
3367 if (!ss->css_extra_stat_show)
3368 return 0;
3369
3370 css = cgroup_tryget_css(cgrp, ss);
3371 if (!css)
3372 return 0;
3373
3374 ret = ss->css_extra_stat_show(seq, css);
3375 css_put(css);
3376 return ret;
3377}
3378
3379static int cpu_stat_show(struct seq_file *seq, void *v)
3380{
3381 struct cgroup __maybe_unused *cgrp = seq_css(seq)->cgroup;
3382 int ret = 0;
3383
3384 cgroup_stat_show_cputime(seq);
3385#ifdef CONFIG_CGROUP_SCHED
3386 ret = cgroup_extra_stat_show(seq, cgrp, cpu_cgrp_id);
3387#endif
3388 return ret;
3389}
3390
3391static int cgroup_file_open(struct kernfs_open_file *of)
3392{
3393 struct cftype *cft = of->kn->priv;
3394
3395 if (cft->open)
3396 return cft->open(of);
3397 return 0;
3398}
3399
3400static void cgroup_file_release(struct kernfs_open_file *of)
3401{
3402 struct cftype *cft = of->kn->priv;
3403
3404 if (cft->release)
3405 cft->release(of);
3406}
3407
3408static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
3409 size_t nbytes, loff_t off)
3410{
3411 struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
3412 struct cgroup *cgrp = of->kn->parent->priv;
3413 struct cftype *cft = of->kn->priv;
3414 struct cgroup_subsys_state *css;
3415 int ret;
3416
3417
3418
3419
3420
3421
3422
3423 if ((cgrp->root->flags & CGRP_ROOT_NS_DELEGATE) &&
3424 !(cft->flags & CFTYPE_NS_DELEGATABLE) &&
3425 ns != &init_cgroup_ns && ns->root_cset->dfl_cgrp == cgrp)
3426 return -EPERM;
3427
3428 if (cft->write)
3429 return cft->write(of, buf, nbytes, off);
3430
3431
3432
3433
3434
3435
3436
3437 rcu_read_lock();
3438 css = cgroup_css(cgrp, cft->ss);
3439 rcu_read_unlock();
3440
3441 if (cft->write_u64) {
3442 unsigned long long v;
3443 ret = kstrtoull(buf, 0, &v);
3444 if (!ret)
3445 ret = cft->write_u64(css, cft, v);
3446 } else if (cft->write_s64) {
3447 long long v;
3448 ret = kstrtoll(buf, 0, &v);
3449 if (!ret)
3450 ret = cft->write_s64(css, cft, v);
3451 } else {
3452 ret = -EINVAL;
3453 }
3454
3455 return ret ?: nbytes;
3456}
3457
3458static void *cgroup_seqfile_start(struct seq_file *seq, loff_t *ppos)
3459{
3460 return seq_cft(seq)->seq_start(seq, ppos);
3461}
3462
3463static void *cgroup_seqfile_next(struct seq_file *seq, void *v, loff_t *ppos)
3464{
3465 return seq_cft(seq)->seq_next(seq, v, ppos);
3466}
3467
3468static void cgroup_seqfile_stop(struct seq_file *seq, void *v)
3469{
3470 if (seq_cft(seq)->seq_stop)
3471 seq_cft(seq)->seq_stop(seq, v);
3472}
3473
3474static int cgroup_seqfile_show(struct seq_file *m, void *arg)
3475{
3476 struct cftype *cft = seq_cft(m);
3477 struct cgroup_subsys_state *css = seq_css(m);
3478
3479 if (cft->seq_show)
3480 return cft->seq_show(m, arg);
3481
3482 if (cft->read_u64)
3483 seq_printf(m, "%llu\n", cft->read_u64(css, cft));
3484 else if (cft->read_s64)
3485 seq_printf(m, "%lld\n", cft->read_s64(css, cft));
3486 else
3487 return -EINVAL;
3488 return 0;
3489}
3490
3491static struct kernfs_ops cgroup_kf_single_ops = {
3492 .atomic_write_len = PAGE_SIZE,
3493 .open = cgroup_file_open,
3494 .release = cgroup_file_release,
3495 .write = cgroup_file_write,
3496 .seq_show = cgroup_seqfile_show,
3497};
3498
3499static struct kernfs_ops cgroup_kf_ops = {
3500 .atomic_write_len = PAGE_SIZE,
3501 .open = cgroup_file_open,
3502 .release = cgroup_file_release,
3503 .write = cgroup_file_write,
3504 .seq_start = cgroup_seqfile_start,
3505 .seq_next = cgroup_seqfile_next,
3506 .seq_stop = cgroup_seqfile_stop,
3507 .seq_show = cgroup_seqfile_show,
3508};
3509
3510
3511static int cgroup_kn_set_ugid(struct kernfs_node *kn)
3512{
3513 struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
3514 .ia_uid = current_fsuid(),
3515 .ia_gid = current_fsgid(), };
3516
3517 if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
3518 gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
3519 return 0;
3520
3521 return kernfs_setattr(kn, &iattr);
3522}
3523
3524static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
3525 struct cftype *cft)
3526{
3527 char name[CGROUP_FILE_NAME_MAX];
3528 struct kernfs_node *kn;
3529 struct lock_class_key *key = NULL;
3530 int ret;
3531
3532#ifdef CONFIG_DEBUG_LOCK_ALLOC
3533 key = &cft->lockdep_key;
3534#endif
3535 kn = __kernfs_create_file(cgrp->kn, cgroup_file_name(cgrp, cft, name),
3536 cgroup_file_mode(cft), 0, cft->kf_ops, cft,
3537 NULL, key);
3538 if (IS_ERR(kn))
3539 return PTR_ERR(kn);
3540
3541 ret = cgroup_kn_set_ugid(kn);
3542 if (ret) {
3543 kernfs_remove(kn);
3544 return ret;
3545 }
3546
3547 if (cft->file_offset) {
3548 struct cgroup_file *cfile = (void *)css + cft->file_offset;
3549
3550 spin_lock_irq(&cgroup_file_kn_lock);
3551 cfile->kn = kn;
3552 spin_unlock_irq(&cgroup_file_kn_lock);
3553 }
3554
3555 return 0;
3556}
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568static int cgroup_addrm_files(struct cgroup_subsys_state *css,
3569 struct cgroup *cgrp, struct cftype cfts[],
3570 bool is_add)
3571{
3572 struct cftype *cft, *cft_end = NULL;
3573 int ret = 0;
3574
3575 lockdep_assert_held(&cgroup_mutex);
3576
3577restart:
3578 for (cft = cfts; cft != cft_end && cft->name[0] != '\0'; cft++) {
3579
3580 if ((cft->flags & __CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp))
3581 continue;
3582 if ((cft->flags & __CFTYPE_NOT_ON_DFL) && cgroup_on_dfl(cgrp))
3583 continue;
3584 if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgroup_parent(cgrp))
3585 continue;
3586 if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgroup_parent(cgrp))
3587 continue;
3588
3589 if (is_add) {
3590 ret = cgroup_add_file(css, cgrp, cft);
3591 if (ret) {
3592 pr_warn("%s: failed to add %s, err=%d\n",
3593 __func__, cft->name, ret);
3594 cft_end = cft;
3595 is_add = false;
3596 goto restart;
3597 }
3598 } else {
3599 cgroup_rm_file(cgrp, cft);
3600 }
3601 }
3602 return ret;
3603}
3604
3605static int cgroup_apply_cftypes(struct cftype *cfts, bool is_add)
3606{
3607 struct cgroup_subsys *ss = cfts[0].ss;
3608 struct cgroup *root = &ss->root->cgrp;
3609 struct cgroup_subsys_state *css;
3610 int ret = 0;
3611
3612 lockdep_assert_held(&cgroup_mutex);
3613
3614
3615 css_for_each_descendant_pre(css, cgroup_css(root, ss)) {
3616 struct cgroup *cgrp = css->cgroup;
3617
3618 if (!(css->flags & CSS_VISIBLE))
3619 continue;
3620
3621 ret = cgroup_addrm_files(css, cgrp, cfts, is_add);
3622 if (ret)
3623 break;
3624 }
3625
3626 if (is_add && !ret)
3627 kernfs_activate(root->kn);
3628 return ret;
3629}
3630
3631static void cgroup_exit_cftypes(struct cftype *cfts)
3632{
3633 struct cftype *cft;
3634
3635 for (cft = cfts; cft->name[0] != '\0'; cft++) {
3636
3637 if (cft->max_write_len && cft->max_write_len != PAGE_SIZE)
3638 kfree(cft->kf_ops);
3639 cft->kf_ops = NULL;
3640 cft->ss = NULL;
3641
3642
3643 cft->flags &= ~(__CFTYPE_ONLY_ON_DFL | __CFTYPE_NOT_ON_DFL);
3644 }
3645}
3646
3647static int cgroup_init_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
3648{
3649 struct cftype *cft;
3650
3651 for (cft = cfts; cft->name[0] != '\0'; cft++) {
3652 struct kernfs_ops *kf_ops;
3653
3654 WARN_ON(cft->ss || cft->kf_ops);
3655
3656 if (cft->seq_start)
3657 kf_ops = &cgroup_kf_ops;
3658 else
3659 kf_ops = &cgroup_kf_single_ops;
3660
3661
3662
3663
3664
3665 if (cft->max_write_len && cft->max_write_len != PAGE_SIZE) {
3666 kf_ops = kmemdup(kf_ops, sizeof(*kf_ops), GFP_KERNEL);
3667 if (!kf_ops) {
3668 cgroup_exit_cftypes(cfts);
3669 return -ENOMEM;
3670 }
3671 kf_ops->atomic_write_len = cft->max_write_len;
3672 }
3673
3674 cft->kf_ops = kf_ops;
3675 cft->ss = ss;
3676 }
3677
3678 return 0;
3679}
3680
3681static int cgroup_rm_cftypes_locked(struct cftype *cfts)
3682{
3683 lockdep_assert_held(&cgroup_mutex);
3684
3685 if (!cfts || !cfts[0].ss)
3686 return -ENOENT;
3687
3688 list_del(&cfts->node);
3689 cgroup_apply_cftypes(cfts, false);
3690 cgroup_exit_cftypes(cfts);
3691 return 0;
3692}
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705int cgroup_rm_cftypes(struct cftype *cfts)
3706{
3707 int ret;
3708
3709 mutex_lock(&cgroup_mutex);
3710 ret = cgroup_rm_cftypes_locked(cfts);
3711 mutex_unlock(&cgroup_mutex);
3712 return ret;
3713}
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729static int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
3730{
3731 int ret;
3732
3733 if (!cgroup_ssid_enabled(ss->id))
3734 return 0;
3735
3736 if (!cfts || cfts[0].name[0] == '\0')
3737 return 0;
3738
3739 ret = cgroup_init_cftypes(ss, cfts);
3740 if (ret)
3741 return ret;
3742
3743 mutex_lock(&cgroup_mutex);
3744
3745 list_add_tail(&cfts->node, &ss->cfts);
3746 ret = cgroup_apply_cftypes(cfts, true);
3747 if (ret)
3748 cgroup_rm_cftypes_locked(cfts);
3749
3750 mutex_unlock(&cgroup_mutex);
3751 return ret;
3752}
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
3763{
3764 struct cftype *cft;
3765
3766 for (cft = cfts; cft && cft->name[0] != '\0'; cft++)
3767 cft->flags |= __CFTYPE_ONLY_ON_DFL;
3768 return cgroup_add_cftypes(ss, cfts);
3769}
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
3780{
3781 struct cftype *cft;
3782
3783 for (cft = cfts; cft && cft->name[0] != '\0'; cft++)
3784 cft->flags |= __CFTYPE_NOT_ON_DFL;
3785 return cgroup_add_cftypes(ss, cfts);
3786}
3787
3788
3789
3790
3791
3792
3793
3794void cgroup_file_notify(struct cgroup_file *cfile)
3795{
3796 unsigned long flags;
3797
3798 spin_lock_irqsave(&cgroup_file_kn_lock, flags);
3799 if (cfile->kn)
3800 kernfs_notify(cfile->kn);
3801 spin_unlock_irqrestore(&cgroup_file_kn_lock, flags);
3802}
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
3822 struct cgroup_subsys_state *parent)
3823{
3824 struct cgroup_subsys_state *next;
3825
3826 cgroup_assert_mutex_or_rcu_locked();
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848 if (!pos) {
3849 next = list_entry_rcu(parent->children.next, struct cgroup_subsys_state, sibling);
3850 } else if (likely(!(pos->flags & CSS_RELEASED))) {
3851 next = list_entry_rcu(pos->sibling.next, struct cgroup_subsys_state, sibling);
3852 } else {
3853 list_for_each_entry_rcu(next, &parent->children, sibling)
3854 if (next->serial_nr > pos->serial_nr)
3855 break;
3856 }
3857
3858
3859
3860
3861
3862 if (&next->sibling != &parent->children)
3863 return next;
3864 return NULL;
3865}
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888struct cgroup_subsys_state *
3889css_next_descendant_pre(struct cgroup_subsys_state *pos,
3890 struct cgroup_subsys_state *root)
3891{
3892 struct cgroup_subsys_state *next;
3893
3894 cgroup_assert_mutex_or_rcu_locked();
3895
3896
3897 if (!pos)
3898 return root;
3899
3900
3901 next = css_next_child(NULL, pos);
3902 if (next)
3903 return next;
3904
3905
3906 while (pos != root) {
3907 next = css_next_child(pos, pos->parent);
3908 if (next)
3909 return next;
3910 pos = pos->parent;
3911 }
3912
3913 return NULL;
3914}
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929struct cgroup_subsys_state *
3930css_rightmost_descendant(struct cgroup_subsys_state *pos)
3931{
3932 struct cgroup_subsys_state *last, *tmp;
3933
3934 cgroup_assert_mutex_or_rcu_locked();
3935
3936 do {
3937 last = pos;
3938
3939 pos = NULL;
3940 css_for_each_child(tmp, last)
3941 pos = tmp;
3942 } while (pos);
3943
3944 return last;
3945}
3946
3947static struct cgroup_subsys_state *
3948css_leftmost_descendant(struct cgroup_subsys_state *pos)
3949{
3950 struct cgroup_subsys_state *last;
3951
3952 do {
3953 last = pos;
3954 pos = css_next_child(NULL, pos);
3955 } while (pos);
3956
3957 return last;
3958}
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982struct cgroup_subsys_state *
3983css_next_descendant_post(struct cgroup_subsys_state *pos,
3984 struct cgroup_subsys_state *root)
3985{
3986 struct cgroup_subsys_state *next;
3987
3988 cgroup_assert_mutex_or_rcu_locked();
3989
3990
3991 if (!pos)
3992 return css_leftmost_descendant(root);
3993
3994
3995 if (pos == root)
3996 return NULL;
3997
3998
3999 next = css_next_child(pos, pos->parent);
4000 if (next)
4001 return css_leftmost_descendant(next);
4002
4003
4004 return pos->parent;
4005}
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015bool css_has_online_children(struct cgroup_subsys_state *css)
4016{
4017 struct cgroup_subsys_state *child;
4018 bool ret = false;
4019
4020 rcu_read_lock();
4021 css_for_each_child(child, css) {
4022 if (child->flags & CSS_ONLINE) {
4023 ret = true;
4024 break;
4025 }
4026 }
4027 rcu_read_unlock();
4028 return ret;
4029}
4030
4031static struct css_set *css_task_iter_next_css_set(struct css_task_iter *it)
4032{
4033 struct list_head *l;
4034 struct cgrp_cset_link *link;
4035 struct css_set *cset;
4036
4037 lockdep_assert_held(&css_set_lock);
4038
4039
4040 if (it->tcset_pos) {
4041 l = it->tcset_pos->next;
4042
4043 if (l != it->tcset_head) {
4044 it->tcset_pos = l;
4045 return container_of(l, struct css_set,
4046 threaded_csets_node);
4047 }
4048
4049 it->tcset_pos = NULL;
4050 }
4051
4052
4053 l = it->cset_pos;
4054 l = l->next;
4055 if (l == it->cset_head) {
4056 it->cset_pos = NULL;
4057 return NULL;
4058 }
4059
4060 if (it->ss) {
4061 cset = container_of(l, struct css_set, e_cset_node[it->ss->id]);
4062 } else {
4063 link = list_entry(l, struct cgrp_cset_link, cset_link);
4064 cset = link->cset;
4065 }
4066
4067 it->cset_pos = l;
4068
4069
4070 if (it->flags & CSS_TASK_ITER_THREADED) {
4071 if (it->cur_dcset)
4072 put_css_set_locked(it->cur_dcset);
4073 it->cur_dcset = cset;
4074 get_css_set(cset);
4075
4076 it->tcset_head = &cset->threaded_csets;
4077 it->tcset_pos = &cset->threaded_csets;
4078 }
4079
4080 return cset;
4081}
4082
4083
4084
4085
4086
4087
4088
4089static void css_task_iter_advance_css_set(struct css_task_iter *it)
4090{
4091 struct css_set *cset;
4092
4093 lockdep_assert_held(&css_set_lock);
4094
4095
4096 do {
4097 cset = css_task_iter_next_css_set(it);
4098 if (!cset) {
4099 it->task_pos = NULL;
4100 return;
4101 }
4102 } while (!css_set_populated(cset));
4103
4104 if (!list_empty(&cset->tasks))
4105 it->task_pos = cset->tasks.next;
4106 else
4107 it->task_pos = cset->mg_tasks.next;
4108
4109 it->tasks_head = &cset->tasks;
4110 it->mg_tasks_head = &cset->mg_tasks;
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127 if (it->cur_cset) {
4128 list_del(&it->iters_node);
4129 put_css_set_locked(it->cur_cset);
4130 }
4131 get_css_set(cset);
4132 it->cur_cset = cset;
4133 list_add(&it->iters_node, &cset->task_iters);
4134}
4135
4136static void css_task_iter_advance(struct css_task_iter *it)
4137{
4138 struct list_head *next;
4139
4140 lockdep_assert_held(&css_set_lock);
4141repeat:
4142
4143
4144
4145
4146
4147 next = it->task_pos->next;
4148
4149 if (next == it->tasks_head)
4150 next = it->mg_tasks_head->next;
4151
4152 if (next == it->mg_tasks_head)
4153 css_task_iter_advance_css_set(it);
4154 else
4155 it->task_pos = next;
4156
4157
4158 if ((it->flags & CSS_TASK_ITER_PROCS) && it->task_pos &&
4159 !thread_group_leader(list_entry(it->task_pos, struct task_struct,
4160 cg_list)))
4161 goto repeat;
4162}
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags,
4176 struct css_task_iter *it)
4177{
4178
4179 WARN_ON_ONCE(!use_task_css_set_links);
4180
4181 memset(it, 0, sizeof(*it));
4182
4183 spin_lock_irq(&css_set_lock);
4184
4185 it->ss = css->ss;
4186 it->flags = flags;
4187
4188 if (it->ss)
4189 it->cset_pos = &css->cgroup->e_csets[css->ss->id];
4190 else
4191 it->cset_pos = &css->cgroup->cset_links;
4192
4193 it->cset_head = it->cset_pos;
4194
4195 css_task_iter_advance_css_set(it);
4196
4197 spin_unlock_irq(&css_set_lock);
4198}
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208struct task_struct *css_task_iter_next(struct css_task_iter *it)
4209{
4210 if (it->cur_task) {
4211 put_task_struct(it->cur_task);
4212 it->cur_task = NULL;
4213 }
4214
4215 spin_lock_irq(&css_set_lock);
4216
4217 if (it->task_pos) {
4218 it->cur_task = list_entry(it->task_pos, struct task_struct,
4219 cg_list);
4220 get_task_struct(it->cur_task);
4221 css_task_iter_advance(it);
4222 }
4223
4224 spin_unlock_irq(&css_set_lock);
4225
4226 return it->cur_task;
4227}
4228
4229
4230
4231
4232
4233
4234
4235void css_task_iter_end(struct css_task_iter *it)
4236{
4237 if (it->cur_cset) {
4238 spin_lock_irq(&css_set_lock);
4239 list_del(&it->iters_node);
4240 put_css_set_locked(it->cur_cset);
4241 spin_unlock_irq(&css_set_lock);
4242 }
4243
4244 if (it->cur_dcset)
4245 put_css_set(it->cur_dcset);
4246
4247 if (it->cur_task)
4248 put_task_struct(it->cur_task);
4249}
4250
4251static void cgroup_procs_release(struct kernfs_open_file *of)
4252{
4253 if (of->priv) {
4254 css_task_iter_end(of->priv);
4255 kfree(of->priv);
4256 }
4257}
4258
4259static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos)
4260{
4261 struct kernfs_open_file *of = s->private;
4262 struct css_task_iter *it = of->priv;
4263
4264 return css_task_iter_next(it);
4265}
4266
4267static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos,
4268 unsigned int iter_flags)
4269{
4270 struct kernfs_open_file *of = s->private;
4271 struct cgroup *cgrp = seq_css(s)->cgroup;
4272 struct css_task_iter *it = of->priv;
4273
4274
4275
4276
4277
4278 if (!it) {
4279 if (WARN_ON_ONCE((*pos)++))
4280 return ERR_PTR(-EINVAL);
4281
4282 it = kzalloc(sizeof(*it), GFP_KERNEL);
4283 if (!it)
4284 return ERR_PTR(-ENOMEM);
4285 of->priv = it;
4286 css_task_iter_start(&cgrp->self, iter_flags, it);
4287 } else if (!(*pos)++) {
4288 css_task_iter_end(it);
4289 css_task_iter_start(&cgrp->self, iter_flags, it);
4290 }
4291
4292 return cgroup_procs_next(s, NULL, NULL);
4293}
4294
4295static void *cgroup_procs_start(struct seq_file *s, loff_t *pos)
4296{
4297 struct cgroup *cgrp = seq_css(s)->cgroup;
4298
4299
4300
4301
4302
4303
4304
4305 if (cgroup_is_threaded(cgrp))
4306 return ERR_PTR(-EOPNOTSUPP);
4307
4308 return __cgroup_procs_start(s, pos, CSS_TASK_ITER_PROCS |
4309 CSS_TASK_ITER_THREADED);
4310}
4311
4312static int cgroup_procs_show(struct seq_file *s, void *v)
4313{
4314 seq_printf(s, "%d\n", task_pid_vnr(v));
4315 return 0;
4316}
4317
4318static int cgroup_procs_write_permission(struct cgroup *src_cgrp,
4319 struct cgroup *dst_cgrp,
4320 struct super_block *sb)
4321{
4322 struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
4323 struct cgroup *com_cgrp = src_cgrp;
4324 struct inode *inode;
4325 int ret;
4326
4327 lockdep_assert_held(&cgroup_mutex);
4328
4329
4330 while (!cgroup_is_descendant(dst_cgrp, com_cgrp))
4331 com_cgrp = cgroup_parent(com_cgrp);
4332
4333
4334 inode = kernfs_get_inode(sb, com_cgrp->procs_file.kn);
4335 if (!inode)
4336 return -ENOMEM;
4337
4338 ret = inode_permission(inode, MAY_WRITE);
4339 iput(inode);
4340 if (ret)
4341 return ret;
4342
4343
4344
4345
4346
4347 if ((cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE) &&
4348 (!cgroup_is_descendant(src_cgrp, ns->root_cset->dfl_cgrp) ||
4349 !cgroup_is_descendant(dst_cgrp, ns->root_cset->dfl_cgrp)))
4350 return -ENOENT;
4351
4352 return 0;
4353}
4354
4355static ssize_t cgroup_procs_write(struct kernfs_open_file *of,
4356 char *buf, size_t nbytes, loff_t off)
4357{
4358 struct cgroup *src_cgrp, *dst_cgrp;
4359 struct task_struct *task;
4360 ssize_t ret;
4361
4362 dst_cgrp = cgroup_kn_lock_live(of->kn, false);
4363 if (!dst_cgrp)
4364 return -ENODEV;
4365
4366 task = cgroup_procs_write_start(buf, true);
4367 ret = PTR_ERR_OR_ZERO(task);
4368 if (ret)
4369 goto out_unlock;
4370
4371
4372 spin_lock_irq(&css_set_lock);
4373 src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
4374 spin_unlock_irq(&css_set_lock);
4375
4376 ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp,
4377 of->file->f_path.dentry->d_sb);
4378 if (ret)
4379 goto out_finish;
4380
4381 ret = cgroup_attach_task(dst_cgrp, task, true);
4382
4383out_finish:
4384 cgroup_procs_write_finish(task);
4385out_unlock:
4386 cgroup_kn_unlock(of->kn);
4387
4388 return ret ?: nbytes;
4389}
4390
4391static void *cgroup_threads_start(struct seq_file *s, loff_t *pos)
4392{
4393 return __cgroup_procs_start(s, pos, 0);
4394}
4395
4396static ssize_t cgroup_threads_write(struct kernfs_open_file *of,
4397 char *buf, size_t nbytes, loff_t off)
4398{
4399 struct cgroup *src_cgrp, *dst_cgrp;
4400 struct task_struct *task;
4401 ssize_t ret;
4402
4403 buf = strstrip(buf);
4404
4405 dst_cgrp = cgroup_kn_lock_live(of->kn, false);
4406 if (!dst_cgrp)
4407 return -ENODEV;
4408
4409 task = cgroup_procs_write_start(buf, false);
4410 ret = PTR_ERR_OR_ZERO(task);
4411 if (ret)
4412 goto out_unlock;
4413
4414
4415 spin_lock_irq(&css_set_lock);
4416 src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
4417 spin_unlock_irq(&css_set_lock);
4418
4419
4420 ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp,
4421 of->file->f_path.dentry->d_sb);
4422 if (ret)
4423 goto out_finish;
4424
4425
4426 ret = -EOPNOTSUPP;
4427 if (src_cgrp->dom_cgrp != dst_cgrp->dom_cgrp)
4428 goto out_finish;
4429
4430 ret = cgroup_attach_task(dst_cgrp, task, false);
4431
4432out_finish:
4433 cgroup_procs_write_finish(task);
4434out_unlock:
4435 cgroup_kn_unlock(of->kn);
4436
4437 return ret ?: nbytes;
4438}
4439
4440
4441static struct cftype cgroup_base_files[] = {
4442 {
4443 .name = "cgroup.type",
4444 .flags = CFTYPE_NOT_ON_ROOT,
4445 .seq_show = cgroup_type_show,
4446 .write = cgroup_type_write,
4447 },
4448 {
4449 .name = "cgroup.procs",
4450 .flags = CFTYPE_NS_DELEGATABLE,
4451 .file_offset = offsetof(struct cgroup, procs_file),
4452 .release = cgroup_procs_release,
4453 .seq_start = cgroup_procs_start,
4454 .seq_next = cgroup_procs_next,
4455 .seq_show = cgroup_procs_show,
4456 .write = cgroup_procs_write,
4457 },
4458 {
4459 .name = "cgroup.threads",
4460 .flags = CFTYPE_NS_DELEGATABLE,
4461 .release = cgroup_procs_release,
4462 .seq_start = cgroup_threads_start,
4463 .seq_next = cgroup_procs_next,
4464 .seq_show = cgroup_procs_show,
4465 .write = cgroup_threads_write,
4466 },
4467 {
4468 .name = "cgroup.controllers",
4469 .seq_show = cgroup_controllers_show,
4470 },
4471 {
4472 .name = "cgroup.subtree_control",
4473 .flags = CFTYPE_NS_DELEGATABLE,
4474 .seq_show = cgroup_subtree_control_show,
4475 .write = cgroup_subtree_control_write,
4476 },
4477 {
4478 .name = "cgroup.events",
4479 .flags = CFTYPE_NOT_ON_ROOT,
4480 .file_offset = offsetof(struct cgroup, events_file),
4481 .seq_show = cgroup_events_show,
4482 },
4483 {
4484 .name = "cgroup.max.descendants",
4485 .seq_show = cgroup_max_descendants_show,
4486 .write = cgroup_max_descendants_write,
4487 },
4488 {
4489 .name = "cgroup.max.depth",
4490 .seq_show = cgroup_max_depth_show,
4491 .write = cgroup_max_depth_write,
4492 },
4493 {
4494 .name = "cgroup.stat",
4495 .seq_show = cgroup_stat_show,
4496 },
4497 {
4498 .name = "cpu.stat",
4499 .flags = CFTYPE_NOT_ON_ROOT,
4500 .seq_show = cpu_stat_show,
4501 },
4502 { }
4503};
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527static void css_free_work_fn(struct work_struct *work)
4528{
4529 struct cgroup_subsys_state *css =
4530 container_of(work, struct cgroup_subsys_state, destroy_work);
4531 struct cgroup_subsys *ss = css->ss;
4532 struct cgroup *cgrp = css->cgroup;
4533
4534 percpu_ref_exit(&css->refcnt);
4535
4536 if (ss) {
4537
4538 struct cgroup_subsys_state *parent = css->parent;
4539 int id = css->id;
4540
4541 ss->css_free(css);
4542 cgroup_idr_remove(&ss->css_idr, id);
4543 cgroup_put(cgrp);
4544
4545 if (parent)
4546 css_put(parent);
4547 } else {
4548
4549 atomic_dec(&cgrp->root->nr_cgrps);
4550 cgroup1_pidlist_destroy_all(cgrp);
4551 cancel_work_sync(&cgrp->release_agent_work);
4552
4553 if (cgroup_parent(cgrp)) {
4554
4555
4556
4557
4558
4559
4560 cgroup_put(cgroup_parent(cgrp));
4561 kernfs_put(cgrp->kn);
4562 if (cgroup_on_dfl(cgrp))
4563 cgroup_stat_exit(cgrp);
4564 kfree(cgrp);
4565 } else {
4566
4567
4568
4569
4570
4571 cgroup_destroy_root(cgrp->root);
4572 }
4573 }
4574}
4575
4576static void css_free_rcu_fn(struct rcu_head *rcu_head)
4577{
4578 struct cgroup_subsys_state *css =
4579 container_of(rcu_head, struct cgroup_subsys_state, rcu_head);
4580
4581 INIT_WORK(&css->destroy_work, css_free_work_fn);
4582 queue_work(cgroup_destroy_wq, &css->destroy_work);
4583}
4584
4585static void css_release_work_fn(struct work_struct *work)
4586{
4587 struct cgroup_subsys_state *css =
4588 container_of(work, struct cgroup_subsys_state, destroy_work);
4589 struct cgroup_subsys *ss = css->ss;
4590 struct cgroup *cgrp = css->cgroup;
4591
4592 mutex_lock(&cgroup_mutex);
4593
4594 css->flags |= CSS_RELEASED;
4595 list_del_rcu(&css->sibling);
4596
4597 if (ss) {
4598
4599 cgroup_idr_replace(&ss->css_idr, NULL, css->id);
4600 if (ss->css_released)
4601 ss->css_released(css);
4602 } else {
4603 struct cgroup *tcgrp;
4604
4605
4606 trace_cgroup_release(cgrp);
4607
4608 if (cgroup_on_dfl(cgrp))
4609 cgroup_stat_flush(cgrp);
4610
4611 for (tcgrp = cgroup_parent(cgrp); tcgrp;
4612 tcgrp = cgroup_parent(tcgrp))
4613 tcgrp->nr_dying_descendants--;
4614
4615 cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
4616 cgrp->id = -1;
4617
4618
4619
4620
4621
4622
4623
4624
4625 if (cgrp->kn)
4626 RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv,
4627 NULL);
4628
4629 cgroup_bpf_put(cgrp);
4630 }
4631
4632 mutex_unlock(&cgroup_mutex);
4633
4634 call_rcu(&css->rcu_head, css_free_rcu_fn);
4635}
4636
4637static void css_release(struct percpu_ref *ref)
4638{
4639 struct cgroup_subsys_state *css =
4640 container_of(ref, struct cgroup_subsys_state, refcnt);
4641
4642 INIT_WORK(&css->destroy_work, css_release_work_fn);
4643 queue_work(cgroup_destroy_wq, &css->destroy_work);
4644}
4645
4646static void init_and_link_css(struct cgroup_subsys_state *css,
4647 struct cgroup_subsys *ss, struct cgroup *cgrp)
4648{
4649 lockdep_assert_held(&cgroup_mutex);
4650
4651 cgroup_get_live(cgrp);
4652
4653 memset(css, 0, sizeof(*css));
4654 css->cgroup = cgrp;
4655 css->ss = ss;
4656 css->id = -1;
4657 INIT_LIST_HEAD(&css->sibling);
4658 INIT_LIST_HEAD(&css->children);
4659 css->serial_nr = css_serial_nr_next++;
4660 atomic_set(&css->online_cnt, 0);
4661
4662 if (cgroup_parent(cgrp)) {
4663 css->parent = cgroup_css(cgroup_parent(cgrp), ss);
4664 css_get(css->parent);
4665 }
4666
4667 BUG_ON(cgroup_css(cgrp, ss));
4668}
4669
4670
4671static int online_css(struct cgroup_subsys_state *css)
4672{
4673 struct cgroup_subsys *ss = css->ss;
4674 int ret = 0;
4675
4676 lockdep_assert_held(&cgroup_mutex);
4677
4678 if (ss->css_online)
4679 ret = ss->css_online(css);
4680 if (!ret) {
4681 css->flags |= CSS_ONLINE;
4682 rcu_assign_pointer(css->cgroup->subsys[ss->id], css);
4683
4684 atomic_inc(&css->online_cnt);
4685 if (css->parent)
4686 atomic_inc(&css->parent->online_cnt);
4687 }
4688 return ret;
4689}
4690
4691
4692static void offline_css(struct cgroup_subsys_state *css)
4693{
4694 struct cgroup_subsys *ss = css->ss;
4695
4696 lockdep_assert_held(&cgroup_mutex);
4697
4698 if (!(css->flags & CSS_ONLINE))
4699 return;
4700
4701 if (ss->css_offline)
4702 ss->css_offline(css);
4703
4704 css->flags &= ~CSS_ONLINE;
4705 RCU_INIT_POINTER(css->cgroup->subsys[ss->id], NULL);
4706
4707 wake_up_all(&css->cgroup->offline_waitq);
4708}
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
4720 struct cgroup_subsys *ss)
4721{
4722 struct cgroup *parent = cgroup_parent(cgrp);
4723 struct cgroup_subsys_state *parent_css = cgroup_css(parent, ss);
4724 struct cgroup_subsys_state *css;
4725 int err;
4726
4727 lockdep_assert_held(&cgroup_mutex);
4728
4729 css = ss->css_alloc(parent_css);
4730 if (!css)
4731 css = ERR_PTR(-ENOMEM);
4732 if (IS_ERR(css))
4733 return css;
4734
4735 init_and_link_css(css, ss, cgrp);
4736
4737 err = percpu_ref_init(&css->refcnt, css_release, 0, GFP_KERNEL);
4738 if (err)
4739 goto err_free_css;
4740
4741 err = cgroup_idr_alloc(&ss->css_idr, NULL, 2, 0, GFP_KERNEL);
4742 if (err < 0)
4743 goto err_free_css;
4744 css->id = err;
4745
4746
4747 list_add_tail_rcu(&css->sibling, &parent_css->children);
4748 cgroup_idr_replace(&ss->css_idr, css, css->id);
4749
4750 err = online_css(css);
4751 if (err)
4752 goto err_list_del;
4753
4754 if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
4755 cgroup_parent(parent)) {
4756 pr_warn("%s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
4757 current->comm, current->pid, ss->name);
4758 if (!strcmp(ss->name, "memory"))
4759 pr_warn("\"memory\" requires setting use_hierarchy to 1 on the root\n");
4760 ss->warned_broken_hierarchy = true;
4761 }
4762
4763 return css;
4764
4765err_list_del:
4766 list_del_rcu(&css->sibling);
4767err_free_css:
4768 call_rcu(&css->rcu_head, css_free_rcu_fn);
4769 return ERR_PTR(err);
4770}
4771
4772
4773
4774
4775
4776
4777static struct cgroup *cgroup_create(struct cgroup *parent)
4778{
4779 struct cgroup_root *root = parent->root;
4780 struct cgroup *cgrp, *tcgrp;
4781 int level = parent->level + 1;
4782 int ret;
4783
4784
4785 cgrp = kzalloc(sizeof(*cgrp) +
4786 sizeof(cgrp->ancestor_ids[0]) * (level + 1), GFP_KERNEL);
4787 if (!cgrp)
4788 return ERR_PTR(-ENOMEM);
4789
4790 ret = percpu_ref_init(&cgrp->self.refcnt, css_release, 0, GFP_KERNEL);
4791 if (ret)
4792 goto out_free_cgrp;
4793
4794 if (cgroup_on_dfl(parent)) {
4795 ret = cgroup_stat_init(cgrp);
4796 if (ret)
4797 goto out_cancel_ref;
4798 }
4799
4800
4801
4802
4803
4804 cgrp->id = cgroup_idr_alloc(&root->cgroup_idr, NULL, 2, 0, GFP_KERNEL);
4805 if (cgrp->id < 0) {
4806 ret = -ENOMEM;
4807 goto out_stat_exit;
4808 }
4809
4810 init_cgroup_housekeeping(cgrp);
4811
4812 cgrp->self.parent = &parent->self;
4813 cgrp->root = root;
4814 cgrp->level = level;
4815 ret = cgroup_bpf_inherit(cgrp);
4816 if (ret)
4817 goto out_idr_free;
4818
4819 for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) {
4820 cgrp->ancestor_ids[tcgrp->level] = tcgrp->id;
4821
4822 if (tcgrp != cgrp)
4823 tcgrp->nr_descendants++;
4824 }
4825
4826 if (notify_on_release(parent))
4827 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
4828
4829 if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags))
4830 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
4831
4832 cgrp->self.serial_nr = css_serial_nr_next++;
4833
4834
4835 list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children);
4836 atomic_inc(&root->nr_cgrps);
4837 cgroup_get_live(parent);
4838
4839
4840
4841
4842
4843 cgroup_idr_replace(&root->cgroup_idr, cgrp, cgrp->id);
4844
4845
4846
4847
4848
4849 if (!cgroup_on_dfl(cgrp))
4850 cgrp->subtree_control = cgroup_control(cgrp);
4851
4852 cgroup_propagate_control(cgrp);
4853
4854 return cgrp;
4855
4856out_idr_free:
4857 cgroup_idr_remove(&root->cgroup_idr, cgrp->id);
4858out_stat_exit:
4859 if (cgroup_on_dfl(parent))
4860 cgroup_stat_exit(cgrp);
4861out_cancel_ref:
4862 percpu_ref_exit(&cgrp->self.refcnt);
4863out_free_cgrp:
4864 kfree(cgrp);
4865 return ERR_PTR(ret);
4866}
4867
4868static bool cgroup_check_hierarchy_limits(struct cgroup *parent)
4869{
4870 struct cgroup *cgroup;
4871 int ret = false;
4872 int level = 1;
4873
4874 lockdep_assert_held(&cgroup_mutex);
4875
4876 for (cgroup = parent; cgroup; cgroup = cgroup_parent(cgroup)) {
4877 if (cgroup->nr_descendants >= cgroup->max_descendants)
4878 goto fail;
4879
4880 if (level > cgroup->max_depth)
4881 goto fail;
4882
4883 level++;
4884 }
4885
4886 ret = true;
4887fail:
4888 return ret;
4889}
4890
4891int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode)
4892{
4893 struct cgroup *parent, *cgrp;
4894 struct kernfs_node *kn;
4895 int ret;
4896
4897
4898 if (strchr(name, '\n'))
4899 return -EINVAL;
4900
4901 parent = cgroup_kn_lock_live(parent_kn, false);
4902 if (!parent)
4903 return -ENODEV;
4904
4905 if (!cgroup_check_hierarchy_limits(parent)) {
4906 ret = -EAGAIN;
4907 goto out_unlock;
4908 }
4909
4910 cgrp = cgroup_create(parent);
4911 if (IS_ERR(cgrp)) {
4912 ret = PTR_ERR(cgrp);
4913 goto out_unlock;
4914 }
4915
4916
4917 kn = kernfs_create_dir(parent->kn, name, mode, cgrp);
4918 if (IS_ERR(kn)) {
4919 ret = PTR_ERR(kn);
4920 goto out_destroy;
4921 }
4922 cgrp->kn = kn;
4923
4924
4925
4926
4927
4928 kernfs_get(kn);
4929
4930 ret = cgroup_kn_set_ugid(kn);
4931 if (ret)
4932 goto out_destroy;
4933
4934 ret = css_populate_dir(&cgrp->self);
4935 if (ret)
4936 goto out_destroy;
4937
4938 ret = cgroup_apply_control_enable(cgrp);
4939 if (ret)
4940 goto out_destroy;
4941
4942 trace_cgroup_mkdir(cgrp);
4943
4944
4945 kernfs_activate(kn);
4946
4947 ret = 0;
4948 goto out_unlock;
4949
4950out_destroy:
4951 cgroup_destroy_locked(cgrp);
4952out_unlock:
4953 cgroup_kn_unlock(parent_kn);
4954 return ret;
4955}
4956
4957
4958
4959
4960
4961
4962static void css_killed_work_fn(struct work_struct *work)
4963{
4964 struct cgroup_subsys_state *css =
4965 container_of(work, struct cgroup_subsys_state, destroy_work);
4966
4967 mutex_lock(&cgroup_mutex);
4968
4969 do {
4970 offline_css(css);
4971 css_put(css);
4972
4973 css = css->parent;
4974 } while (css && atomic_dec_and_test(&css->online_cnt));
4975
4976 mutex_unlock(&cgroup_mutex);
4977}
4978
4979
4980static void css_killed_ref_fn(struct percpu_ref *ref)
4981{
4982 struct cgroup_subsys_state *css =
4983 container_of(ref, struct cgroup_subsys_state, refcnt);
4984
4985 if (atomic_dec_and_test(&css->online_cnt)) {
4986 INIT_WORK(&css->destroy_work, css_killed_work_fn);
4987 queue_work(cgroup_destroy_wq, &css->destroy_work);
4988 }
4989}
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000static void kill_css(struct cgroup_subsys_state *css)
5001{
5002 lockdep_assert_held(&cgroup_mutex);
5003
5004 if (css->flags & CSS_DYING)
5005 return;
5006
5007 css->flags |= CSS_DYING;
5008
5009
5010
5011
5012
5013 css_clear_dir(css);
5014
5015
5016
5017
5018
5019 css_get(css);
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031 percpu_ref_kill_and_confirm(&css->refcnt, css_killed_ref_fn);
5032}
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058static int cgroup_destroy_locked(struct cgroup *cgrp)
5059 __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
5060{
5061 struct cgroup *tcgrp, *parent = cgroup_parent(cgrp);
5062 struct cgroup_subsys_state *css;
5063 struct cgrp_cset_link *link;
5064 int ssid;
5065
5066 lockdep_assert_held(&cgroup_mutex);
5067
5068
5069
5070
5071
5072 if (cgroup_is_populated(cgrp))
5073 return -EBUSY;
5074
5075
5076
5077
5078
5079
5080 if (css_has_online_children(&cgrp->self))
5081 return -EBUSY;
5082
5083
5084
5085
5086
5087
5088
5089 cgrp->self.flags &= ~CSS_ONLINE;
5090
5091 spin_lock_irq(&css_set_lock);
5092 list_for_each_entry(link, &cgrp->cset_links, cset_link)
5093 link->cset->dead = true;
5094 spin_unlock_irq(&css_set_lock);
5095
5096
5097 for_each_css(css, ssid, cgrp)
5098 kill_css(css);
5099
5100
5101
5102
5103
5104 kernfs_remove(cgrp->kn);
5105
5106 if (parent && cgroup_is_threaded(cgrp))
5107 parent->nr_threaded_children--;
5108
5109 for (tcgrp = cgroup_parent(cgrp); tcgrp; tcgrp = cgroup_parent(tcgrp)) {
5110 tcgrp->nr_descendants--;
5111 tcgrp->nr_dying_descendants++;
5112 }
5113
5114 cgroup1_check_for_release(parent);
5115
5116
5117 percpu_ref_kill(&cgrp->self.refcnt);
5118
5119 return 0;
5120};
5121
5122int cgroup_rmdir(struct kernfs_node *kn)
5123{
5124 struct cgroup *cgrp;
5125 int ret = 0;
5126
5127 cgrp = cgroup_kn_lock_live(kn, false);
5128 if (!cgrp)
5129 return 0;
5130
5131 ret = cgroup_destroy_locked(cgrp);
5132
5133 if (!ret)
5134 trace_cgroup_rmdir(cgrp);
5135
5136 cgroup_kn_unlock(kn);
5137 return ret;
5138}
5139
5140static struct kernfs_syscall_ops cgroup_kf_syscall_ops = {
5141 .show_options = cgroup_show_options,
5142 .remount_fs = cgroup_remount,
5143 .mkdir = cgroup_mkdir,
5144 .rmdir = cgroup_rmdir,
5145 .show_path = cgroup_show_path,
5146};
5147
5148static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
5149{
5150 struct cgroup_subsys_state *css;
5151
5152 pr_debug("Initializing cgroup subsys %s\n", ss->name);
5153
5154 mutex_lock(&cgroup_mutex);
5155
5156 idr_init(&ss->css_idr);
5157 INIT_LIST_HEAD(&ss->cfts);
5158
5159
5160 ss->root = &cgrp_dfl_root;
5161 css = ss->css_alloc(cgroup_css(&cgrp_dfl_root.cgrp, ss));
5162
5163 BUG_ON(IS_ERR(css));
5164 init_and_link_css(css, ss, &cgrp_dfl_root.cgrp);
5165
5166
5167
5168
5169
5170 css->flags |= CSS_NO_REF;
5171
5172 if (early) {
5173
5174 css->id = 1;
5175 } else {
5176 css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL);
5177 BUG_ON(css->id < 0);
5178 }
5179
5180
5181
5182
5183
5184 init_css_set.subsys[ss->id] = css;
5185
5186 have_fork_callback |= (bool)ss->fork << ss->id;
5187 have_exit_callback |= (bool)ss->exit << ss->id;
5188 have_free_callback |= (bool)ss->free << ss->id;
5189 have_canfork_callback |= (bool)ss->can_fork << ss->id;
5190
5191
5192
5193
5194 BUG_ON(!list_empty(&init_task.tasks));
5195
5196 BUG_ON(online_css(css));
5197
5198 mutex_unlock(&cgroup_mutex);
5199}
5200
5201
5202
5203
5204
5205
5206
5207int __init cgroup_init_early(void)
5208{
5209 static struct cgroup_sb_opts __initdata opts;
5210 struct cgroup_subsys *ss;
5211 int i;
5212
5213 init_cgroup_root(&cgrp_dfl_root, &opts);
5214 cgrp_dfl_root.cgrp.self.flags |= CSS_NO_REF;
5215
5216 RCU_INIT_POINTER(init_task.cgroups, &init_css_set);
5217
5218 for_each_subsys(ss, i) {
5219 WARN(!ss->css_alloc || !ss->css_free || ss->name || ss->id,
5220 "invalid cgroup_subsys %d:%s css_alloc=%p css_free=%p id:name=%d:%s\n",
5221 i, cgroup_subsys_name[i], ss->css_alloc, ss->css_free,
5222 ss->id, ss->name);
5223 WARN(strlen(cgroup_subsys_name[i]) > MAX_CGROUP_TYPE_NAMELEN,
5224 "cgroup_subsys_name %s too long\n", cgroup_subsys_name[i]);
5225
5226 ss->id = i;
5227 ss->name = cgroup_subsys_name[i];
5228 if (!ss->legacy_name)
5229 ss->legacy_name = cgroup_subsys_name[i];
5230
5231 if (ss->early_init)
5232 cgroup_init_subsys(ss, true);
5233 }
5234 return 0;
5235}
5236
5237static u16 cgroup_disable_mask __initdata;
5238
5239
5240
5241
5242
5243
5244
5245int __init cgroup_init(void)
5246{
5247 struct cgroup_subsys *ss;
5248 int ssid;
5249
5250 BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16);
5251 BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem));
5252 BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files));
5253 BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files));
5254
5255 cgroup_stat_boot();
5256
5257
5258
5259
5260
5261 rcu_sync_enter_start(&cgroup_threadgroup_rwsem.rss);
5262
5263 get_user_ns(init_cgroup_ns.user_ns);
5264
5265 mutex_lock(&cgroup_mutex);
5266
5267
5268
5269
5270
5271 hash_add(css_set_table, &init_css_set.hlist,
5272 css_set_hash(init_css_set.subsys));
5273
5274 BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0, 0));
5275
5276 mutex_unlock(&cgroup_mutex);
5277
5278 for_each_subsys(ss, ssid) {
5279 if (ss->early_init) {
5280 struct cgroup_subsys_state *css =
5281 init_css_set.subsys[ss->id];
5282
5283 css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2,
5284 GFP_KERNEL);
5285 BUG_ON(css->id < 0);
5286 } else {
5287 cgroup_init_subsys(ss, false);
5288 }
5289
5290 list_add_tail(&init_css_set.e_cset_node[ssid],
5291 &cgrp_dfl_root.cgrp.e_csets[ssid]);
5292
5293
5294
5295
5296
5297
5298 if (cgroup_disable_mask & (1 << ssid)) {
5299 static_branch_disable(cgroup_subsys_enabled_key[ssid]);
5300 printk(KERN_INFO "Disabling %s control group subsystem\n",
5301 ss->name);
5302 continue;
5303 }
5304
5305 if (cgroup1_ssid_disabled(ssid))
5306 printk(KERN_INFO "Disabling %s control group subsystem in v1 mounts\n",
5307 ss->name);
5308
5309 cgrp_dfl_root.subsys_mask |= 1 << ss->id;
5310
5311
5312 WARN_ON(ss->implicit_on_dfl && !ss->threaded);
5313
5314 if (ss->implicit_on_dfl)
5315 cgrp_dfl_implicit_ss_mask |= 1 << ss->id;
5316 else if (!ss->dfl_cftypes)
5317 cgrp_dfl_inhibit_ss_mask |= 1 << ss->id;
5318
5319 if (ss->threaded)
5320 cgrp_dfl_threaded_ss_mask |= 1 << ss->id;
5321
5322 if (ss->dfl_cftypes == ss->legacy_cftypes) {
5323 WARN_ON(cgroup_add_cftypes(ss, ss->dfl_cftypes));
5324 } else {
5325 WARN_ON(cgroup_add_dfl_cftypes(ss, ss->dfl_cftypes));
5326 WARN_ON(cgroup_add_legacy_cftypes(ss, ss->legacy_cftypes));
5327 }
5328
5329 if (ss->bind)
5330 ss->bind(init_css_set.subsys[ssid]);
5331
5332 mutex_lock(&cgroup_mutex);
5333 css_populate_dir(init_css_set.subsys[ssid]);
5334 mutex_unlock(&cgroup_mutex);
5335 }
5336
5337
5338 hash_del(&init_css_set.hlist);
5339 hash_add(css_set_table, &init_css_set.hlist,
5340 css_set_hash(init_css_set.subsys));
5341
5342 WARN_ON(sysfs_create_mount_point(fs_kobj, "cgroup"));
5343 WARN_ON(register_filesystem(&cgroup_fs_type));
5344 WARN_ON(register_filesystem(&cgroup2_fs_type));
5345 WARN_ON(!proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations));
5346
5347 return 0;
5348}
5349
5350static int __init cgroup_wq_init(void)
5351{
5352
5353
5354
5355
5356
5357
5358
5359
5360 cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
5361 BUG_ON(!cgroup_destroy_wq);
5362 return 0;
5363}
5364core_initcall(cgroup_wq_init);
5365
5366void cgroup_path_from_kernfs_id(const union kernfs_node_id *id,
5367 char *buf, size_t buflen)
5368{
5369 struct kernfs_node *kn;
5370
5371 kn = kernfs_get_node_by_id(cgrp_dfl_root.kf_root, id);
5372 if (!kn)
5373 return;
5374 kernfs_path(kn, buf, buflen);
5375 kernfs_put(kn);
5376}
5377
5378
5379
5380
5381
5382
5383int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
5384 struct pid *pid, struct task_struct *tsk)
5385{
5386 char *buf;
5387 int retval;
5388 struct cgroup_root *root;
5389
5390 retval = -ENOMEM;
5391 buf = kmalloc(PATH_MAX, GFP_KERNEL);
5392 if (!buf)
5393 goto out;
5394
5395 mutex_lock(&cgroup_mutex);
5396 spin_lock_irq(&css_set_lock);
5397
5398 for_each_root(root) {
5399 struct cgroup_subsys *ss;
5400 struct cgroup *cgrp;
5401 int ssid, count = 0;
5402
5403 if (root == &cgrp_dfl_root && !cgrp_dfl_visible)
5404 continue;
5405
5406 seq_printf(m, "%d:", root->hierarchy_id);
5407 if (root != &cgrp_dfl_root)
5408 for_each_subsys(ss, ssid)
5409 if (root->subsys_mask & (1 << ssid))
5410 seq_printf(m, "%s%s", count++ ? "," : "",
5411 ss->legacy_name);
5412 if (strlen(root->name))
5413 seq_printf(m, "%sname=%s", count ? "," : "",
5414 root->name);
5415 seq_putc(m, ':');
5416
5417 cgrp = task_cgroup_from_root(tsk, root);
5418
5419
5420
5421
5422
5423
5424
5425
5426
5427
5428 if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) {
5429 retval = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
5430 current->nsproxy->cgroup_ns);
5431 if (retval >= PATH_MAX)
5432 retval = -ENAMETOOLONG;
5433 if (retval < 0)
5434 goto out_unlock;
5435
5436 seq_puts(m, buf);
5437 } else {
5438 seq_puts(m, "/");
5439 }
5440
5441 if (cgroup_on_dfl(cgrp) && cgroup_is_dead(cgrp))
5442 seq_puts(m, " (deleted)\n");
5443 else
5444 seq_putc(m, '\n');
5445 }
5446
5447 retval = 0;
5448out_unlock:
5449 spin_unlock_irq(&css_set_lock);
5450 mutex_unlock(&cgroup_mutex);
5451 kfree(buf);
5452out:
5453 return retval;
5454}
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464void cgroup_fork(struct task_struct *child)
5465{
5466 RCU_INIT_POINTER(child->cgroups, &init_css_set);
5467 INIT_LIST_HEAD(&child->cg_list);
5468}
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478int cgroup_can_fork(struct task_struct *child)
5479{
5480 struct cgroup_subsys *ss;
5481 int i, j, ret;
5482
5483 do_each_subsys_mask(ss, i, have_canfork_callback) {
5484 ret = ss->can_fork(child);
5485 if (ret)
5486 goto out_revert;
5487 } while_each_subsys_mask();
5488
5489 return 0;
5490
5491out_revert:
5492 for_each_subsys(ss, j) {
5493 if (j >= i)
5494 break;
5495 if (ss->cancel_fork)
5496 ss->cancel_fork(child);
5497 }
5498
5499 return ret;
5500}
5501
5502
5503
5504
5505
5506
5507
5508
5509void cgroup_cancel_fork(struct task_struct *child)
5510{
5511 struct cgroup_subsys *ss;
5512 int i;
5513
5514 for_each_subsys(ss, i)
5515 if (ss->cancel_fork)
5516 ss->cancel_fork(child);
5517}
5518
5519
5520
5521
5522
5523
5524
5525
5526
5527
5528
5529void cgroup_post_fork(struct task_struct *child)
5530{
5531 struct cgroup_subsys *ss;
5532 int i;
5533
5534
5535
5536
5537
5538
5539
5540
5541
5542
5543
5544
5545
5546
5547
5548
5549
5550
5551
5552
5553
5554
5555 if (use_task_css_set_links) {
5556 struct css_set *cset;
5557
5558 spin_lock_irq(&css_set_lock);
5559 cset = task_css_set(current);
5560 if (list_empty(&child->cg_list)) {
5561 get_css_set(cset);
5562 cset->nr_tasks++;
5563 css_set_move_task(child, NULL, cset, false);
5564 }
5565 spin_unlock_irq(&css_set_lock);
5566 }
5567
5568
5569
5570
5571
5572
5573 do_each_subsys_mask(ss, i, have_fork_callback) {
5574 ss->fork(child);
5575 } while_each_subsys_mask();
5576}
5577
5578
5579
5580
5581
5582
5583
5584
5585
5586
5587
5588
5589
5590
5591
5592
5593
5594
5595
5596
5597void cgroup_exit(struct task_struct *tsk)
5598{
5599 struct cgroup_subsys *ss;
5600 struct css_set *cset;
5601 int i;
5602
5603
5604
5605
5606
5607 cset = task_css_set(tsk);
5608
5609 if (!list_empty(&tsk->cg_list)) {
5610 spin_lock_irq(&css_set_lock);
5611 css_set_move_task(tsk, cset, NULL, false);
5612 cset->nr_tasks--;
5613 spin_unlock_irq(&css_set_lock);
5614 } else {
5615 get_css_set(cset);
5616 }
5617
5618
5619 do_each_subsys_mask(ss, i, have_exit_callback) {
5620 ss->exit(tsk);
5621 } while_each_subsys_mask();
5622}
5623
5624void cgroup_free(struct task_struct *task)
5625{
5626 struct css_set *cset = task_css_set(task);
5627 struct cgroup_subsys *ss;
5628 int ssid;
5629
5630 do_each_subsys_mask(ss, ssid, have_free_callback) {
5631 ss->free(task);
5632 } while_each_subsys_mask();
5633
5634 put_css_set(cset);
5635}
5636
5637static int __init cgroup_disable(char *str)
5638{
5639 struct cgroup_subsys *ss;
5640 char *token;
5641 int i;
5642
5643 while ((token = strsep(&str, ",")) != NULL) {
5644 if (!*token)
5645 continue;
5646
5647 for_each_subsys(ss, i) {
5648 if (strcmp(token, ss->name) &&
5649 strcmp(token, ss->legacy_name))
5650 continue;
5651 cgroup_disable_mask |= 1 << i;
5652 }
5653 }
5654 return 1;
5655}
5656__setup("cgroup_disable=", cgroup_disable);
5657
5658
5659
5660
5661
5662
5663
5664
5665
5666
5667struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
5668 struct cgroup_subsys *ss)
5669{
5670 struct kernfs_node *kn = kernfs_node_from_dentry(dentry);
5671 struct file_system_type *s_type = dentry->d_sb->s_type;
5672 struct cgroup_subsys_state *css = NULL;
5673 struct cgroup *cgrp;
5674
5675
5676 if ((s_type != &cgroup_fs_type && s_type != &cgroup2_fs_type) ||
5677 !kn || kernfs_type(kn) != KERNFS_DIR)
5678 return ERR_PTR(-EBADF);
5679
5680 rcu_read_lock();
5681
5682
5683
5684
5685
5686
5687 cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv);
5688 if (cgrp)
5689 css = cgroup_css(cgrp, ss);
5690
5691 if (!css || !css_tryget_online(css))
5692 css = ERR_PTR(-ENOENT);
5693
5694 rcu_read_unlock();
5695 return css;
5696}
5697
5698
5699
5700
5701
5702
5703
5704
5705
5706struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss)
5707{
5708 WARN_ON_ONCE(!rcu_read_lock_held());
5709 return idr_find(&ss->css_idr, id);
5710}
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720
5721struct cgroup *cgroup_get_from_path(const char *path)
5722{
5723 struct kernfs_node *kn;
5724 struct cgroup *cgrp;
5725
5726 mutex_lock(&cgroup_mutex);
5727
5728 kn = kernfs_walk_and_get(cgrp_dfl_root.cgrp.kn, path);
5729 if (kn) {
5730 if (kernfs_type(kn) == KERNFS_DIR) {
5731 cgrp = kn->priv;
5732 cgroup_get_live(cgrp);
5733 } else {
5734 cgrp = ERR_PTR(-ENOTDIR);
5735 }
5736 kernfs_put(kn);
5737 } else {
5738 cgrp = ERR_PTR(-ENOENT);
5739 }
5740
5741 mutex_unlock(&cgroup_mutex);
5742 return cgrp;
5743}
5744EXPORT_SYMBOL_GPL(cgroup_get_from_path);
5745
5746
5747
5748
5749
5750
5751
5752
5753
5754
5755struct cgroup *cgroup_get_from_fd(int fd)
5756{
5757 struct cgroup_subsys_state *css;
5758 struct cgroup *cgrp;
5759 struct file *f;
5760
5761 f = fget_raw(fd);
5762 if (!f)
5763 return ERR_PTR(-EBADF);
5764
5765 css = css_tryget_online_from_dir(f->f_path.dentry, NULL);
5766 fput(f);
5767 if (IS_ERR(css))
5768 return ERR_CAST(css);
5769
5770 cgrp = css->cgroup;
5771 if (!cgroup_on_dfl(cgrp)) {
5772 cgroup_put(cgrp);
5773 return ERR_PTR(-EBADF);
5774 }
5775
5776 return cgrp;
5777}
5778EXPORT_SYMBOL_GPL(cgroup_get_from_fd);
5779
5780
5781
5782
5783
5784#ifdef CONFIG_SOCK_CGROUP_DATA
5785
5786#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
5787
5788DEFINE_SPINLOCK(cgroup_sk_update_lock);
5789static bool cgroup_sk_alloc_disabled __read_mostly;
5790
5791void cgroup_sk_alloc_disable(void)
5792{
5793 if (cgroup_sk_alloc_disabled)
5794 return;
5795 pr_info("cgroup: disabling cgroup2 socket matching due to net_prio or net_cls activation\n");
5796 cgroup_sk_alloc_disabled = true;
5797}
5798
5799#else
5800
5801#define cgroup_sk_alloc_disabled false
5802
5803#endif
5804
5805void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
5806{
5807 if (cgroup_sk_alloc_disabled)
5808 return;
5809
5810
5811 if (skcd->val) {
5812
5813
5814
5815
5816
5817 cgroup_get(sock_cgroup_ptr(skcd));
5818 return;
5819 }
5820
5821 rcu_read_lock();
5822
5823 while (true) {
5824 struct css_set *cset;
5825
5826 cset = task_css_set(current);
5827 if (likely(cgroup_tryget(cset->dfl_cgrp))) {
5828 skcd->val = (unsigned long)cset->dfl_cgrp;
5829 break;
5830 }
5831 cpu_relax();
5832 }
5833
5834 rcu_read_unlock();
5835}
5836
5837void cgroup_sk_free(struct sock_cgroup_data *skcd)
5838{
5839 cgroup_put(sock_cgroup_ptr(skcd));
5840}
5841
5842#endif
5843
5844#ifdef CONFIG_CGROUP_BPF
5845int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
5846 enum bpf_attach_type type, u32 flags)
5847{
5848 int ret;
5849
5850 mutex_lock(&cgroup_mutex);
5851 ret = __cgroup_bpf_attach(cgrp, prog, type, flags);
5852 mutex_unlock(&cgroup_mutex);
5853 return ret;
5854}
5855int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
5856 enum bpf_attach_type type, u32 flags)
5857{
5858 int ret;
5859
5860 mutex_lock(&cgroup_mutex);
5861 ret = __cgroup_bpf_detach(cgrp, prog, type, flags);
5862 mutex_unlock(&cgroup_mutex);
5863 return ret;
5864}
5865int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
5866 union bpf_attr __user *uattr)
5867{
5868 int ret;
5869
5870 mutex_lock(&cgroup_mutex);
5871 ret = __cgroup_bpf_query(cgrp, attr, uattr);
5872 mutex_unlock(&cgroup_mutex);
5873 return ret;
5874}
5875#endif
5876
5877#ifdef CONFIG_SYSFS
5878static ssize_t show_delegatable_files(struct cftype *files, char *buf,
5879 ssize_t size, const char *prefix)
5880{
5881 struct cftype *cft;
5882 ssize_t ret = 0;
5883
5884 for (cft = files; cft && cft->name[0] != '\0'; cft++) {
5885 if (!(cft->flags & CFTYPE_NS_DELEGATABLE))
5886 continue;
5887
5888 if (prefix)
5889 ret += snprintf(buf + ret, size - ret, "%s.", prefix);
5890
5891 ret += snprintf(buf + ret, size - ret, "%s\n", cft->name);
5892
5893 if (unlikely(ret >= size)) {
5894 WARN_ON(1);
5895 break;
5896 }
5897 }
5898
5899 return ret;
5900}
5901
5902static ssize_t delegate_show(struct kobject *kobj, struct kobj_attribute *attr,
5903 char *buf)
5904{
5905 struct cgroup_subsys *ss;
5906 int ssid;
5907 ssize_t ret = 0;
5908
5909 ret = show_delegatable_files(cgroup_base_files, buf, PAGE_SIZE - ret,
5910 NULL);
5911
5912 for_each_subsys(ss, ssid)
5913 ret += show_delegatable_files(ss->dfl_cftypes, buf + ret,
5914 PAGE_SIZE - ret,
5915 cgroup_subsys_name[ssid]);
5916
5917 return ret;
5918}
5919static struct kobj_attribute cgroup_delegate_attr = __ATTR_RO(delegate);
5920
5921static ssize_t features_show(struct kobject *kobj, struct kobj_attribute *attr,
5922 char *buf)
5923{
5924 return snprintf(buf, PAGE_SIZE, "nsdelegate\n");
5925}
5926static struct kobj_attribute cgroup_features_attr = __ATTR_RO(features);
5927
5928static struct attribute *cgroup_sysfs_attrs[] = {
5929 &cgroup_delegate_attr.attr,
5930 &cgroup_features_attr.attr,
5931 NULL,
5932};
5933
5934static const struct attribute_group cgroup_sysfs_attr_group = {
5935 .attrs = cgroup_sysfs_attrs,
5936 .name = "cgroup",
5937};
5938
5939static int __init cgroup_sysfs_init(void)
5940{
5941 return sysfs_create_group(kernel_kobj, &cgroup_sysfs_attr_group);
5942}
5943subsys_initcall(cgroup_sysfs_init);
5944#endif
5945