1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
30
31#include "cgroup-internal.h"
32
33#include <linux/cred.h>
34#include <linux/errno.h>
35#include <linux/init_task.h>
36#include <linux/kernel.h>
37#include <linux/magic.h>
38#include <linux/mutex.h>
39#include <linux/mount.h>
40#include <linux/pagemap.h>
41#include <linux/proc_fs.h>
42#include <linux/rcupdate.h>
43#include <linux/sched.h>
44#include <linux/sched/task.h>
45#include <linux/slab.h>
46#include <linux/spinlock.h>
47#include <linux/percpu-rwsem.h>
48#include <linux/string.h>
49#include <linux/hashtable.h>
50#include <linux/idr.h>
51#include <linux/kthread.h>
52#include <linux/atomic.h>
53#include <linux/cpuset.h>
54#include <linux/proc_ns.h>
55#include <linux/nsproxy.h>
56#include <linux/file.h>
57#include <linux/sched/cputime.h>
58#include <linux/psi.h>
59#include <net/sock.h>
60
61#define CREATE_TRACE_POINTS
62#include <trace/events/cgroup.h>
63
64#define CGROUP_FILE_NAME_MAX (MAX_CGROUP_TYPE_NAMELEN + \
65 MAX_CFTYPE_NAME + 2)
66
67#define CGROUP_FILE_NOTIFY_MIN_INTV DIV_ROUND_UP(HZ, 100)
68
69
70
71
72
73
74
75
76
77
78
79DEFINE_MUTEX(cgroup_mutex);
80DEFINE_SPINLOCK(css_set_lock);
81
82#ifdef CONFIG_PROVE_RCU
83EXPORT_SYMBOL_GPL(cgroup_mutex);
84EXPORT_SYMBOL_GPL(css_set_lock);
85#endif
86
87DEFINE_SPINLOCK(trace_cgroup_path_lock);
88char trace_cgroup_path[TRACE_CGROUP_PATH_LEN];
89bool cgroup_debug __read_mostly;
90
91
92
93
94
95static DEFINE_SPINLOCK(cgroup_idr_lock);
96
97
98
99
100
101static DEFINE_SPINLOCK(cgroup_file_kn_lock);
102
103struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
104
105#define cgroup_assert_mutex_or_rcu_locked() \
106 RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
107 !lockdep_is_held(&cgroup_mutex), \
108 "cgroup_mutex or RCU read lock required");
109
110
111
112
113
114
115
116static struct workqueue_struct *cgroup_destroy_wq;
117
118
119#define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys,
120struct cgroup_subsys *cgroup_subsys[] = {
121#include <linux/cgroup_subsys.h>
122};
123#undef SUBSYS
124
125
126#define SUBSYS(_x) [_x ## _cgrp_id] = #_x,
127static const char *cgroup_subsys_name[] = {
128#include <linux/cgroup_subsys.h>
129};
130#undef SUBSYS
131
132
133#define SUBSYS(_x) \
134 DEFINE_STATIC_KEY_TRUE(_x ## _cgrp_subsys_enabled_key); \
135 DEFINE_STATIC_KEY_TRUE(_x ## _cgrp_subsys_on_dfl_key); \
136 EXPORT_SYMBOL_GPL(_x ## _cgrp_subsys_enabled_key); \
137 EXPORT_SYMBOL_GPL(_x ## _cgrp_subsys_on_dfl_key);
138#include <linux/cgroup_subsys.h>
139#undef SUBSYS
140
141#define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys_enabled_key,
142static struct static_key_true *cgroup_subsys_enabled_key[] = {
143#include <linux/cgroup_subsys.h>
144};
145#undef SUBSYS
146
147#define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys_on_dfl_key,
148static struct static_key_true *cgroup_subsys_on_dfl_key[] = {
149#include <linux/cgroup_subsys.h>
150};
151#undef SUBSYS
152
153static DEFINE_PER_CPU(struct cgroup_rstat_cpu, cgrp_dfl_root_rstat_cpu);
154
155
156
157
158
159
160struct cgroup_root cgrp_dfl_root = { .cgrp.rstat_cpu = &cgrp_dfl_root_rstat_cpu };
161EXPORT_SYMBOL_GPL(cgrp_dfl_root);
162
163
164
165
166
167static bool cgrp_dfl_visible;
168
169
170static u16 cgrp_dfl_inhibit_ss_mask;
171
172
173static u16 cgrp_dfl_implicit_ss_mask;
174
175
176static u16 cgrp_dfl_threaded_ss_mask;
177
178
179LIST_HEAD(cgroup_roots);
180static int cgroup_root_count;
181
182
183static DEFINE_IDR(cgroup_hierarchy_idr);
184
185
186
187
188
189
190
191
192static u64 css_serial_nr_next = 1;
193
194
195
196
197
198static u16 have_fork_callback __read_mostly;
199static u16 have_exit_callback __read_mostly;
200static u16 have_free_callback __read_mostly;
201static u16 have_canfork_callback __read_mostly;
202
203
204struct cgroup_namespace init_cgroup_ns = {
205 .count = REFCOUNT_INIT(2),
206 .user_ns = &init_user_ns,
207 .ns.ops = &cgroupns_operations,
208 .ns.inum = PROC_CGROUP_INIT_INO,
209 .root_cset = &init_css_set,
210};
211
212static struct file_system_type cgroup2_fs_type;
213static struct cftype cgroup_base_files[];
214
215static int cgroup_apply_control(struct cgroup *cgrp);
216static void cgroup_finalize_control(struct cgroup *cgrp, int ret);
217static void css_task_iter_advance(struct css_task_iter *it);
218static int cgroup_destroy_locked(struct cgroup *cgrp);
219static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
220 struct cgroup_subsys *ss);
221static void css_release(struct percpu_ref *ref);
222static void kill_css(struct cgroup_subsys_state *css);
223static int cgroup_addrm_files(struct cgroup_subsys_state *css,
224 struct cgroup *cgrp, struct cftype cfts[],
225 bool is_add);
226
227
228
229
230
231
232
233
234
235bool cgroup_ssid_enabled(int ssid)
236{
237 if (CGROUP_SUBSYS_COUNT == 0)
238 return false;
239
240 return static_key_enabled(cgroup_subsys_enabled_key[ssid]);
241}
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296bool cgroup_on_dfl(const struct cgroup *cgrp)
297{
298 return cgrp->root == &cgrp_dfl_root;
299}
300
301
302static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end,
303 gfp_t gfp_mask)
304{
305 int ret;
306
307 idr_preload(gfp_mask);
308 spin_lock_bh(&cgroup_idr_lock);
309 ret = idr_alloc(idr, ptr, start, end, gfp_mask & ~__GFP_DIRECT_RECLAIM);
310 spin_unlock_bh(&cgroup_idr_lock);
311 idr_preload_end();
312 return ret;
313}
314
315static void *cgroup_idr_replace(struct idr *idr, void *ptr, int id)
316{
317 void *ret;
318
319 spin_lock_bh(&cgroup_idr_lock);
320 ret = idr_replace(idr, ptr, id);
321 spin_unlock_bh(&cgroup_idr_lock);
322 return ret;
323}
324
325static void cgroup_idr_remove(struct idr *idr, int id)
326{
327 spin_lock_bh(&cgroup_idr_lock);
328 idr_remove(idr, id);
329 spin_unlock_bh(&cgroup_idr_lock);
330}
331
332static bool cgroup_has_tasks(struct cgroup *cgrp)
333{
334 return cgrp->nr_populated_csets;
335}
336
337bool cgroup_is_threaded(struct cgroup *cgrp)
338{
339 return cgrp->dom_cgrp != cgrp;
340}
341
342
343static bool cgroup_is_mixable(struct cgroup *cgrp)
344{
345
346
347
348
349
350 return !cgroup_parent(cgrp);
351}
352
353
354static bool cgroup_can_be_thread_root(struct cgroup *cgrp)
355{
356
357 if (cgroup_is_mixable(cgrp))
358 return true;
359
360
361 if (cgroup_is_threaded(cgrp))
362 return false;
363
364
365 if (cgrp->nr_populated_domain_children)
366 return false;
367
368
369 if (cgrp->subtree_control & ~cgrp_dfl_threaded_ss_mask)
370 return false;
371
372 return true;
373}
374
375
376bool cgroup_is_thread_root(struct cgroup *cgrp)
377{
378
379 if (cgroup_is_threaded(cgrp))
380 return false;
381
382
383 if (cgrp->nr_threaded_children)
384 return true;
385
386
387
388
389
390 if (cgroup_has_tasks(cgrp) &&
391 (cgrp->subtree_control & cgrp_dfl_threaded_ss_mask))
392 return true;
393
394 return false;
395}
396
397
398static bool cgroup_is_valid_domain(struct cgroup *cgrp)
399{
400
401 if (cgroup_is_threaded(cgrp))
402 return false;
403
404
405 while ((cgrp = cgroup_parent(cgrp))) {
406 if (!cgroup_is_mixable(cgrp) && cgroup_is_thread_root(cgrp))
407 return false;
408 if (cgroup_is_threaded(cgrp))
409 return false;
410 }
411
412 return true;
413}
414
415
416static u16 cgroup_control(struct cgroup *cgrp)
417{
418 struct cgroup *parent = cgroup_parent(cgrp);
419 u16 root_ss_mask = cgrp->root->subsys_mask;
420
421 if (parent) {
422 u16 ss_mask = parent->subtree_control;
423
424
425 if (cgroup_is_threaded(cgrp))
426 ss_mask &= cgrp_dfl_threaded_ss_mask;
427 return ss_mask;
428 }
429
430 if (cgroup_on_dfl(cgrp))
431 root_ss_mask &= ~(cgrp_dfl_inhibit_ss_mask |
432 cgrp_dfl_implicit_ss_mask);
433 return root_ss_mask;
434}
435
436
437static u16 cgroup_ss_mask(struct cgroup *cgrp)
438{
439 struct cgroup *parent = cgroup_parent(cgrp);
440
441 if (parent) {
442 u16 ss_mask = parent->subtree_ss_mask;
443
444
445 if (cgroup_is_threaded(cgrp))
446 ss_mask &= cgrp_dfl_threaded_ss_mask;
447 return ss_mask;
448 }
449
450 return cgrp->root->subsys_mask;
451}
452
453
454
455
456
457
458
459
460
461
462
463
464static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
465 struct cgroup_subsys *ss)
466{
467 if (ss)
468 return rcu_dereference_check(cgrp->subsys[ss->id],
469 lockdep_is_held(&cgroup_mutex));
470 else
471 return &cgrp->self;
472}
473
474
475
476
477
478
479
480
481
482static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
483 struct cgroup_subsys *ss)
484{
485 struct cgroup_subsys_state *css;
486
487 rcu_read_lock();
488 css = cgroup_css(cgrp, ss);
489 if (!css || !css_tryget_online(css))
490 css = NULL;
491 rcu_read_unlock();
492
493 return css;
494}
495
496
497
498
499
500
501
502
503
504
505
506static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp,
507 struct cgroup_subsys *ss)
508{
509 lockdep_assert_held(&cgroup_mutex);
510
511 if (!ss)
512 return &cgrp->self;
513
514
515
516
517
518 while (!(cgroup_ss_mask(cgrp) & (1 << ss->id))) {
519 cgrp = cgroup_parent(cgrp);
520 if (!cgrp)
521 return NULL;
522 }
523
524 return cgroup_css(cgrp, ss);
525}
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
541 struct cgroup_subsys *ss)
542{
543 struct cgroup_subsys_state *css;
544
545 do {
546 css = cgroup_css(cgrp, ss);
547
548 if (css)
549 return css;
550 cgrp = cgroup_parent(cgrp);
551 } while (cgrp);
552
553 return init_css_set.subsys[ss->id];
554}
555
556
557
558
559
560
561
562
563
564
565
566
567struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgrp,
568 struct cgroup_subsys *ss)
569{
570 struct cgroup_subsys_state *css;
571
572 rcu_read_lock();
573
574 do {
575 css = cgroup_css(cgrp, ss);
576
577 if (css && css_tryget_online(css))
578 goto out_unlock;
579 cgrp = cgroup_parent(cgrp);
580 } while (cgrp);
581
582 css = init_css_set.subsys[ss->id];
583 css_get(css);
584out_unlock:
585 rcu_read_unlock();
586 return css;
587}
588
589static void cgroup_get_live(struct cgroup *cgrp)
590{
591 WARN_ON_ONCE(cgroup_is_dead(cgrp));
592 css_get(&cgrp->self);
593}
594
595
596
597
598
599
600int __cgroup_task_count(const struct cgroup *cgrp)
601{
602 int count = 0;
603 struct cgrp_cset_link *link;
604
605 lockdep_assert_held(&css_set_lock);
606
607 list_for_each_entry(link, &cgrp->cset_links, cset_link)
608 count += link->cset->nr_tasks;
609
610 return count;
611}
612
613
614
615
616
617int cgroup_task_count(const struct cgroup *cgrp)
618{
619 int count;
620
621 spin_lock_irq(&css_set_lock);
622 count = __cgroup_task_count(cgrp);
623 spin_unlock_irq(&css_set_lock);
624
625 return count;
626}
627
628struct cgroup_subsys_state *of_css(struct kernfs_open_file *of)
629{
630 struct cgroup *cgrp = of->kn->parent->priv;
631 struct cftype *cft = of_cft(of);
632
633
634
635
636
637
638
639
640
641 if (cft->ss)
642 return rcu_dereference_raw(cgrp->subsys[cft->ss->id]);
643 else
644 return &cgrp->self;
645}
646EXPORT_SYMBOL_GPL(of_css);
647
648
649
650
651
652
653
654
655
656#define for_each_css(css, ssid, cgrp) \
657 for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
658 if (!((css) = rcu_dereference_check( \
659 (cgrp)->subsys[(ssid)], \
660 lockdep_is_held(&cgroup_mutex)))) { } \
661 else
662
663
664
665
666
667
668
669
670
671#define for_each_e_css(css, ssid, cgrp) \
672 for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
673 if (!((css) = cgroup_e_css_by_mask(cgrp, \
674 cgroup_subsys[(ssid)]))) \
675 ; \
676 else
677
678
679
680
681
682
683
684
685
686
687#define do_each_subsys_mask(ss, ssid, ss_mask) do { \
688 unsigned long __ss_mask = (ss_mask); \
689 if (!CGROUP_SUBSYS_COUNT) { \
690 (ssid) = 0; \
691 break; \
692 } \
693 for_each_set_bit(ssid, &__ss_mask, CGROUP_SUBSYS_COUNT) { \
694 (ss) = cgroup_subsys[ssid]; \
695 {
696
697#define while_each_subsys_mask() \
698 } \
699 } \
700} while (false)
701
702
703#define cgroup_for_each_live_child(child, cgrp) \
704 list_for_each_entry((child), &(cgrp)->self.children, self.sibling) \
705 if (({ lockdep_assert_held(&cgroup_mutex); \
706 cgroup_is_dead(child); })) \
707 ; \
708 else
709
710
711#define cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) \
712 css_for_each_descendant_pre((d_css), cgroup_css((cgrp), NULL)) \
713 if (({ lockdep_assert_held(&cgroup_mutex); \
714 (dsct) = (d_css)->cgroup; \
715 cgroup_is_dead(dsct); })) \
716 ; \
717 else
718
719
720#define cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) \
721 css_for_each_descendant_post((d_css), cgroup_css((cgrp), NULL)) \
722 if (({ lockdep_assert_held(&cgroup_mutex); \
723 (dsct) = (d_css)->cgroup; \
724 cgroup_is_dead(dsct); })) \
725 ; \
726 else
727
728
729
730
731
732
733
734
735struct css_set init_css_set = {
736 .refcount = REFCOUNT_INIT(1),
737 .dom_cset = &init_css_set,
738 .tasks = LIST_HEAD_INIT(init_css_set.tasks),
739 .mg_tasks = LIST_HEAD_INIT(init_css_set.mg_tasks),
740 .task_iters = LIST_HEAD_INIT(init_css_set.task_iters),
741 .threaded_csets = LIST_HEAD_INIT(init_css_set.threaded_csets),
742 .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links),
743 .mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node),
744 .mg_node = LIST_HEAD_INIT(init_css_set.mg_node),
745
746
747
748
749
750
751
752 .dfl_cgrp = &cgrp_dfl_root.cgrp,
753};
754
755static int css_set_count = 1;
756
757static bool css_set_threaded(struct css_set *cset)
758{
759 return cset->dom_cset != cset;
760}
761
762
763
764
765
766
767
768
769
770
771static bool css_set_populated(struct css_set *cset)
772{
773 lockdep_assert_held(&css_set_lock);
774
775 return !list_empty(&cset->tasks) || !list_empty(&cset->mg_tasks);
776}
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795static void cgroup_update_populated(struct cgroup *cgrp, bool populated)
796{
797 struct cgroup *child = NULL;
798 int adj = populated ? 1 : -1;
799
800 lockdep_assert_held(&css_set_lock);
801
802 do {
803 bool was_populated = cgroup_is_populated(cgrp);
804
805 if (!child) {
806 cgrp->nr_populated_csets += adj;
807 } else {
808 if (cgroup_is_threaded(child))
809 cgrp->nr_populated_threaded_children += adj;
810 else
811 cgrp->nr_populated_domain_children += adj;
812 }
813
814 if (was_populated == cgroup_is_populated(cgrp))
815 break;
816
817 cgroup1_check_for_release(cgrp);
818 TRACE_CGROUP_PATH(notify_populated, cgrp,
819 cgroup_is_populated(cgrp));
820 cgroup_file_notify(&cgrp->events_file);
821
822 child = cgrp;
823 cgrp = cgroup_parent(cgrp);
824 } while (cgrp);
825}
826
827
828
829
830
831
832
833
834
835static void css_set_update_populated(struct css_set *cset, bool populated)
836{
837 struct cgrp_cset_link *link;
838
839 lockdep_assert_held(&css_set_lock);
840
841 list_for_each_entry(link, &cset->cgrp_links, cgrp_link)
842 cgroup_update_populated(link->cgrp, populated);
843}
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860static void css_set_move_task(struct task_struct *task,
861 struct css_set *from_cset, struct css_set *to_cset,
862 bool use_mg_tasks)
863{
864 lockdep_assert_held(&css_set_lock);
865
866 if (to_cset && !css_set_populated(to_cset))
867 css_set_update_populated(to_cset, true);
868
869 if (from_cset) {
870 struct css_task_iter *it, *pos;
871
872 WARN_ON_ONCE(list_empty(&task->cg_list));
873
874
875
876
877
878
879
880
881 list_for_each_entry_safe(it, pos, &from_cset->task_iters,
882 iters_node)
883 if (it->task_pos == &task->cg_list)
884 css_task_iter_advance(it);
885
886 list_del_init(&task->cg_list);
887 if (!css_set_populated(from_cset))
888 css_set_update_populated(from_cset, false);
889 } else {
890 WARN_ON_ONCE(!list_empty(&task->cg_list));
891 }
892
893 if (to_cset) {
894
895
896
897
898
899
900 WARN_ON_ONCE(task->flags & PF_EXITING);
901
902 cgroup_move_task(task, to_cset);
903 list_add_tail(&task->cg_list, use_mg_tasks ? &to_cset->mg_tasks :
904 &to_cset->tasks);
905 }
906}
907
908
909
910
911
912
913#define CSS_SET_HASH_BITS 7
914static DEFINE_HASHTABLE(css_set_table, CSS_SET_HASH_BITS);
915
916static unsigned long css_set_hash(struct cgroup_subsys_state *css[])
917{
918 unsigned long key = 0UL;
919 struct cgroup_subsys *ss;
920 int i;
921
922 for_each_subsys(ss, i)
923 key += (unsigned long)css[i];
924 key = (key >> 16) ^ key;
925
926 return key;
927}
928
929void put_css_set_locked(struct css_set *cset)
930{
931 struct cgrp_cset_link *link, *tmp_link;
932 struct cgroup_subsys *ss;
933 int ssid;
934
935 lockdep_assert_held(&css_set_lock);
936
937 if (!refcount_dec_and_test(&cset->refcount))
938 return;
939
940 WARN_ON_ONCE(!list_empty(&cset->threaded_csets));
941
942
943 for_each_subsys(ss, ssid) {
944 list_del(&cset->e_cset_node[ssid]);
945 css_put(cset->subsys[ssid]);
946 }
947 hash_del(&cset->hlist);
948 css_set_count--;
949
950 list_for_each_entry_safe(link, tmp_link, &cset->cgrp_links, cgrp_link) {
951 list_del(&link->cset_link);
952 list_del(&link->cgrp_link);
953 if (cgroup_parent(link->cgrp))
954 cgroup_put(link->cgrp);
955 kfree(link);
956 }
957
958 if (css_set_threaded(cset)) {
959 list_del(&cset->threaded_csets_node);
960 put_css_set_locked(cset->dom_cset);
961 }
962
963 kfree_rcu(cset, rcu_head);
964}
965
966
967
968
969
970
971
972
973
974
975
976static bool compare_css_sets(struct css_set *cset,
977 struct css_set *old_cset,
978 struct cgroup *new_cgrp,
979 struct cgroup_subsys_state *template[])
980{
981 struct cgroup *new_dfl_cgrp;
982 struct list_head *l1, *l2;
983
984
985
986
987
988
989 if (memcmp(template, cset->subsys, sizeof(cset->subsys)))
990 return false;
991
992
993
994 if (cgroup_on_dfl(new_cgrp))
995 new_dfl_cgrp = new_cgrp;
996 else
997 new_dfl_cgrp = old_cset->dfl_cgrp;
998
999 if (new_dfl_cgrp->dom_cgrp != cset->dom_cset->dfl_cgrp)
1000 return false;
1001
1002
1003
1004
1005
1006
1007
1008 l1 = &cset->cgrp_links;
1009 l2 = &old_cset->cgrp_links;
1010 while (1) {
1011 struct cgrp_cset_link *link1, *link2;
1012 struct cgroup *cgrp1, *cgrp2;
1013
1014 l1 = l1->next;
1015 l2 = l2->next;
1016
1017 if (l1 == &cset->cgrp_links) {
1018 BUG_ON(l2 != &old_cset->cgrp_links);
1019 break;
1020 } else {
1021 BUG_ON(l2 == &old_cset->cgrp_links);
1022 }
1023
1024 link1 = list_entry(l1, struct cgrp_cset_link, cgrp_link);
1025 link2 = list_entry(l2, struct cgrp_cset_link, cgrp_link);
1026 cgrp1 = link1->cgrp;
1027 cgrp2 = link2->cgrp;
1028
1029 BUG_ON(cgrp1->root != cgrp2->root);
1030
1031
1032
1033
1034
1035
1036
1037
1038 if (cgrp1->root == new_cgrp->root) {
1039 if (cgrp1 != new_cgrp)
1040 return false;
1041 } else {
1042 if (cgrp1 != cgrp2)
1043 return false;
1044 }
1045 }
1046 return true;
1047}
1048
1049
1050
1051
1052
1053
1054
1055static struct css_set *find_existing_css_set(struct css_set *old_cset,
1056 struct cgroup *cgrp,
1057 struct cgroup_subsys_state *template[])
1058{
1059 struct cgroup_root *root = cgrp->root;
1060 struct cgroup_subsys *ss;
1061 struct css_set *cset;
1062 unsigned long key;
1063 int i;
1064
1065
1066
1067
1068
1069
1070 for_each_subsys(ss, i) {
1071 if (root->subsys_mask & (1UL << i)) {
1072
1073
1074
1075
1076 template[i] = cgroup_e_css_by_mask(cgrp, ss);
1077 } else {
1078
1079
1080
1081
1082 template[i] = old_cset->subsys[i];
1083 }
1084 }
1085
1086 key = css_set_hash(template);
1087 hash_for_each_possible(css_set_table, cset, hlist, key) {
1088 if (!compare_css_sets(cset, old_cset, cgrp, template))
1089 continue;
1090
1091
1092 return cset;
1093 }
1094
1095
1096 return NULL;
1097}
1098
1099static void free_cgrp_cset_links(struct list_head *links_to_free)
1100{
1101 struct cgrp_cset_link *link, *tmp_link;
1102
1103 list_for_each_entry_safe(link, tmp_link, links_to_free, cset_link) {
1104 list_del(&link->cset_link);
1105 kfree(link);
1106 }
1107}
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117static int allocate_cgrp_cset_links(int count, struct list_head *tmp_links)
1118{
1119 struct cgrp_cset_link *link;
1120 int i;
1121
1122 INIT_LIST_HEAD(tmp_links);
1123
1124 for (i = 0; i < count; i++) {
1125 link = kzalloc(sizeof(*link), GFP_KERNEL);
1126 if (!link) {
1127 free_cgrp_cset_links(tmp_links);
1128 return -ENOMEM;
1129 }
1130 list_add(&link->cset_link, tmp_links);
1131 }
1132 return 0;
1133}
1134
1135
1136
1137
1138
1139
1140
1141static void link_css_set(struct list_head *tmp_links, struct css_set *cset,
1142 struct cgroup *cgrp)
1143{
1144 struct cgrp_cset_link *link;
1145
1146 BUG_ON(list_empty(tmp_links));
1147
1148 if (cgroup_on_dfl(cgrp))
1149 cset->dfl_cgrp = cgrp;
1150
1151 link = list_first_entry(tmp_links, struct cgrp_cset_link, cset_link);
1152 link->cset = cset;
1153 link->cgrp = cgrp;
1154
1155
1156
1157
1158
1159 list_move_tail(&link->cset_link, &cgrp->cset_links);
1160 list_add_tail(&link->cgrp_link, &cset->cgrp_links);
1161
1162 if (cgroup_parent(cgrp))
1163 cgroup_get_live(cgrp);
1164}
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174static struct css_set *find_css_set(struct css_set *old_cset,
1175 struct cgroup *cgrp)
1176{
1177 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT] = { };
1178 struct css_set *cset;
1179 struct list_head tmp_links;
1180 struct cgrp_cset_link *link;
1181 struct cgroup_subsys *ss;
1182 unsigned long key;
1183 int ssid;
1184
1185 lockdep_assert_held(&cgroup_mutex);
1186
1187
1188
1189 spin_lock_irq(&css_set_lock);
1190 cset = find_existing_css_set(old_cset, cgrp, template);
1191 if (cset)
1192 get_css_set(cset);
1193 spin_unlock_irq(&css_set_lock);
1194
1195 if (cset)
1196 return cset;
1197
1198 cset = kzalloc(sizeof(*cset), GFP_KERNEL);
1199 if (!cset)
1200 return NULL;
1201
1202
1203 if (allocate_cgrp_cset_links(cgroup_root_count, &tmp_links) < 0) {
1204 kfree(cset);
1205 return NULL;
1206 }
1207
1208 refcount_set(&cset->refcount, 1);
1209 cset->dom_cset = cset;
1210 INIT_LIST_HEAD(&cset->tasks);
1211 INIT_LIST_HEAD(&cset->mg_tasks);
1212 INIT_LIST_HEAD(&cset->task_iters);
1213 INIT_LIST_HEAD(&cset->threaded_csets);
1214 INIT_HLIST_NODE(&cset->hlist);
1215 INIT_LIST_HEAD(&cset->cgrp_links);
1216 INIT_LIST_HEAD(&cset->mg_preload_node);
1217 INIT_LIST_HEAD(&cset->mg_node);
1218
1219
1220
1221 memcpy(cset->subsys, template, sizeof(cset->subsys));
1222
1223 spin_lock_irq(&css_set_lock);
1224
1225 list_for_each_entry(link, &old_cset->cgrp_links, cgrp_link) {
1226 struct cgroup *c = link->cgrp;
1227
1228 if (c->root == cgrp->root)
1229 c = cgrp;
1230 link_css_set(&tmp_links, cset, c);
1231 }
1232
1233 BUG_ON(!list_empty(&tmp_links));
1234
1235 css_set_count++;
1236
1237
1238 key = css_set_hash(cset->subsys);
1239 hash_add(css_set_table, &cset->hlist, key);
1240
1241 for_each_subsys(ss, ssid) {
1242 struct cgroup_subsys_state *css = cset->subsys[ssid];
1243
1244 list_add_tail(&cset->e_cset_node[ssid],
1245 &css->cgroup->e_csets[ssid]);
1246 css_get(css);
1247 }
1248
1249 spin_unlock_irq(&css_set_lock);
1250
1251
1252
1253
1254
1255
1256
1257 if (cgroup_is_threaded(cset->dfl_cgrp)) {
1258 struct css_set *dcset;
1259
1260 dcset = find_css_set(cset, cset->dfl_cgrp->dom_cgrp);
1261 if (!dcset) {
1262 put_css_set(cset);
1263 return NULL;
1264 }
1265
1266 spin_lock_irq(&css_set_lock);
1267 cset->dom_cset = dcset;
1268 list_add_tail(&cset->threaded_csets_node,
1269 &dcset->threaded_csets);
1270 spin_unlock_irq(&css_set_lock);
1271 }
1272
1273 return cset;
1274}
1275
1276struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root)
1277{
1278 struct cgroup *root_cgrp = kf_root->kn->priv;
1279
1280 return root_cgrp->root;
1281}
1282
1283static int cgroup_init_root_id(struct cgroup_root *root)
1284{
1285 int id;
1286
1287 lockdep_assert_held(&cgroup_mutex);
1288
1289 id = idr_alloc_cyclic(&cgroup_hierarchy_idr, root, 0, 0, GFP_KERNEL);
1290 if (id < 0)
1291 return id;
1292
1293 root->hierarchy_id = id;
1294 return 0;
1295}
1296
1297static void cgroup_exit_root_id(struct cgroup_root *root)
1298{
1299 lockdep_assert_held(&cgroup_mutex);
1300
1301 idr_remove(&cgroup_hierarchy_idr, root->hierarchy_id);
1302}
1303
1304void cgroup_free_root(struct cgroup_root *root)
1305{
1306 if (root) {
1307 idr_destroy(&root->cgroup_idr);
1308 kfree(root);
1309 }
1310}
1311
1312static void cgroup_destroy_root(struct cgroup_root *root)
1313{
1314 struct cgroup *cgrp = &root->cgrp;
1315 struct cgrp_cset_link *link, *tmp_link;
1316
1317 trace_cgroup_destroy_root(root);
1318
1319 cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
1320
1321 BUG_ON(atomic_read(&root->nr_cgrps));
1322 BUG_ON(!list_empty(&cgrp->self.children));
1323
1324
1325 WARN_ON(rebind_subsystems(&cgrp_dfl_root, root->subsys_mask));
1326
1327
1328
1329
1330
1331 spin_lock_irq(&css_set_lock);
1332
1333 list_for_each_entry_safe(link, tmp_link, &cgrp->cset_links, cset_link) {
1334 list_del(&link->cset_link);
1335 list_del(&link->cgrp_link);
1336 kfree(link);
1337 }
1338
1339 spin_unlock_irq(&css_set_lock);
1340
1341 if (!list_empty(&root->root_list)) {
1342 list_del(&root->root_list);
1343 cgroup_root_count--;
1344 }
1345
1346 cgroup_exit_root_id(root);
1347
1348 mutex_unlock(&cgroup_mutex);
1349
1350 kernfs_destroy_root(root->kf_root);
1351 cgroup_free_root(root);
1352}
1353
1354
1355
1356
1357
1358static struct cgroup *
1359current_cgns_cgroup_from_root(struct cgroup_root *root)
1360{
1361 struct cgroup *res = NULL;
1362 struct css_set *cset;
1363
1364 lockdep_assert_held(&css_set_lock);
1365
1366 rcu_read_lock();
1367
1368 cset = current->nsproxy->cgroup_ns->root_cset;
1369 if (cset == &init_css_set) {
1370 res = &root->cgrp;
1371 } else {
1372 struct cgrp_cset_link *link;
1373
1374 list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
1375 struct cgroup *c = link->cgrp;
1376
1377 if (c->root == root) {
1378 res = c;
1379 break;
1380 }
1381 }
1382 }
1383 rcu_read_unlock();
1384
1385 BUG_ON(!res);
1386 return res;
1387}
1388
1389
1390static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
1391 struct cgroup_root *root)
1392{
1393 struct cgroup *res = NULL;
1394
1395 lockdep_assert_held(&cgroup_mutex);
1396 lockdep_assert_held(&css_set_lock);
1397
1398 if (cset == &init_css_set) {
1399 res = &root->cgrp;
1400 } else if (root == &cgrp_dfl_root) {
1401 res = cset->dfl_cgrp;
1402 } else {
1403 struct cgrp_cset_link *link;
1404
1405 list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
1406 struct cgroup *c = link->cgrp;
1407
1408 if (c->root == root) {
1409 res = c;
1410 break;
1411 }
1412 }
1413 }
1414
1415 BUG_ON(!res);
1416 return res;
1417}
1418
1419
1420
1421
1422
1423struct cgroup *task_cgroup_from_root(struct task_struct *task,
1424 struct cgroup_root *root)
1425{
1426
1427
1428
1429
1430
1431 return cset_cgroup_from_root(task_css_set(task), root);
1432}
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460static struct kernfs_syscall_ops cgroup_kf_syscall_ops;
1461
1462static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft,
1463 char *buf)
1464{
1465 struct cgroup_subsys *ss = cft->ss;
1466
1467 if (cft->ss && !(cft->flags & CFTYPE_NO_PREFIX) &&
1468 !(cgrp->root->flags & CGRP_ROOT_NOPREFIX)) {
1469 const char *dbg = (cft->flags & CFTYPE_DEBUG) ? ".__DEBUG__." : "";
1470
1471 snprintf(buf, CGROUP_FILE_NAME_MAX, "%s%s.%s",
1472 dbg, cgroup_on_dfl(cgrp) ? ss->name : ss->legacy_name,
1473 cft->name);
1474 } else {
1475 strscpy(buf, cft->name, CGROUP_FILE_NAME_MAX);
1476 }
1477 return buf;
1478}
1479
1480
1481
1482
1483
1484
1485
1486static umode_t cgroup_file_mode(const struct cftype *cft)
1487{
1488 umode_t mode = 0;
1489
1490 if (cft->read_u64 || cft->read_s64 || cft->seq_show)
1491 mode |= S_IRUGO;
1492
1493 if (cft->write_u64 || cft->write_s64 || cft->write) {
1494 if (cft->flags & CFTYPE_WORLD_WRITABLE)
1495 mode |= S_IWUGO;
1496 else
1497 mode |= S_IWUSR;
1498 }
1499
1500 return mode;
1501}
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515static u16 cgroup_calc_subtree_ss_mask(u16 subtree_control, u16 this_ss_mask)
1516{
1517 u16 cur_ss_mask = subtree_control;
1518 struct cgroup_subsys *ss;
1519 int ssid;
1520
1521 lockdep_assert_held(&cgroup_mutex);
1522
1523 cur_ss_mask |= cgrp_dfl_implicit_ss_mask;
1524
1525 while (true) {
1526 u16 new_ss_mask = cur_ss_mask;
1527
1528 do_each_subsys_mask(ss, ssid, cur_ss_mask) {
1529 new_ss_mask |= ss->depends_on;
1530 } while_each_subsys_mask();
1531
1532
1533
1534
1535
1536
1537 new_ss_mask &= this_ss_mask;
1538
1539 if (new_ss_mask == cur_ss_mask)
1540 break;
1541 cur_ss_mask = new_ss_mask;
1542 }
1543
1544 return cur_ss_mask;
1545}
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557void cgroup_kn_unlock(struct kernfs_node *kn)
1558{
1559 struct cgroup *cgrp;
1560
1561 if (kernfs_type(kn) == KERNFS_DIR)
1562 cgrp = kn->priv;
1563 else
1564 cgrp = kn->parent->priv;
1565
1566 mutex_unlock(&cgroup_mutex);
1567
1568 kernfs_unbreak_active_protection(kn);
1569 cgroup_put(cgrp);
1570}
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn, bool drain_offline)
1590{
1591 struct cgroup *cgrp;
1592
1593 if (kernfs_type(kn) == KERNFS_DIR)
1594 cgrp = kn->priv;
1595 else
1596 cgrp = kn->parent->priv;
1597
1598
1599
1600
1601
1602
1603
1604 if (!cgroup_tryget(cgrp))
1605 return NULL;
1606 kernfs_break_active_protection(kn);
1607
1608 if (drain_offline)
1609 cgroup_lock_and_drain_offline(cgrp);
1610 else
1611 mutex_lock(&cgroup_mutex);
1612
1613 if (!cgroup_is_dead(cgrp))
1614 return cgrp;
1615
1616 cgroup_kn_unlock(kn);
1617 return NULL;
1618}
1619
1620static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
1621{
1622 char name[CGROUP_FILE_NAME_MAX];
1623
1624 lockdep_assert_held(&cgroup_mutex);
1625
1626 if (cft->file_offset) {
1627 struct cgroup_subsys_state *css = cgroup_css(cgrp, cft->ss);
1628 struct cgroup_file *cfile = (void *)css + cft->file_offset;
1629
1630 spin_lock_irq(&cgroup_file_kn_lock);
1631 cfile->kn = NULL;
1632 spin_unlock_irq(&cgroup_file_kn_lock);
1633
1634 del_timer_sync(&cfile->notify_timer);
1635 }
1636
1637 kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name));
1638}
1639
1640
1641
1642
1643
1644static void css_clear_dir(struct cgroup_subsys_state *css)
1645{
1646 struct cgroup *cgrp = css->cgroup;
1647 struct cftype *cfts;
1648
1649 if (!(css->flags & CSS_VISIBLE))
1650 return;
1651
1652 css->flags &= ~CSS_VISIBLE;
1653
1654 if (!css->ss) {
1655 if (cgroup_on_dfl(cgrp))
1656 cfts = cgroup_base_files;
1657 else
1658 cfts = cgroup1_base_files;
1659
1660 cgroup_addrm_files(css, cgrp, cfts, false);
1661 } else {
1662 list_for_each_entry(cfts, &css->ss->cfts, node)
1663 cgroup_addrm_files(css, cgrp, cfts, false);
1664 }
1665}
1666
1667
1668
1669
1670
1671
1672
1673static int css_populate_dir(struct cgroup_subsys_state *css)
1674{
1675 struct cgroup *cgrp = css->cgroup;
1676 struct cftype *cfts, *failed_cfts;
1677 int ret;
1678
1679 if ((css->flags & CSS_VISIBLE) || !cgrp->kn)
1680 return 0;
1681
1682 if (!css->ss) {
1683 if (cgroup_on_dfl(cgrp))
1684 cfts = cgroup_base_files;
1685 else
1686 cfts = cgroup1_base_files;
1687
1688 ret = cgroup_addrm_files(&cgrp->self, cgrp, cfts, true);
1689 if (ret < 0)
1690 return ret;
1691 } else {
1692 list_for_each_entry(cfts, &css->ss->cfts, node) {
1693 ret = cgroup_addrm_files(css, cgrp, cfts, true);
1694 if (ret < 0) {
1695 failed_cfts = cfts;
1696 goto err;
1697 }
1698 }
1699 }
1700
1701 css->flags |= CSS_VISIBLE;
1702
1703 return 0;
1704err:
1705 list_for_each_entry(cfts, &css->ss->cfts, node) {
1706 if (cfts == failed_cfts)
1707 break;
1708 cgroup_addrm_files(css, cgrp, cfts, false);
1709 }
1710 return ret;
1711}
1712
1713int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
1714{
1715 struct cgroup *dcgrp = &dst_root->cgrp;
1716 struct cgroup_subsys *ss;
1717 int ssid, i, ret;
1718
1719 lockdep_assert_held(&cgroup_mutex);
1720
1721 do_each_subsys_mask(ss, ssid, ss_mask) {
1722
1723
1724
1725
1726
1727 if (css_next_child(NULL, cgroup_css(&ss->root->cgrp, ss)) &&
1728 !ss->implicit_on_dfl)
1729 return -EBUSY;
1730
1731
1732 if (ss->root != &cgrp_dfl_root && dst_root != &cgrp_dfl_root)
1733 return -EBUSY;
1734 } while_each_subsys_mask();
1735
1736 do_each_subsys_mask(ss, ssid, ss_mask) {
1737 struct cgroup_root *src_root = ss->root;
1738 struct cgroup *scgrp = &src_root->cgrp;
1739 struct cgroup_subsys_state *css = cgroup_css(scgrp, ss);
1740 struct css_set *cset;
1741
1742 WARN_ON(!css || cgroup_css(dcgrp, ss));
1743
1744
1745 src_root->subsys_mask &= ~(1 << ssid);
1746 WARN_ON(cgroup_apply_control(scgrp));
1747 cgroup_finalize_control(scgrp, 0);
1748
1749
1750 RCU_INIT_POINTER(scgrp->subsys[ssid], NULL);
1751 rcu_assign_pointer(dcgrp->subsys[ssid], css);
1752 ss->root = dst_root;
1753 css->cgroup = dcgrp;
1754
1755 spin_lock_irq(&css_set_lock);
1756 hash_for_each(css_set_table, i, cset, hlist)
1757 list_move_tail(&cset->e_cset_node[ss->id],
1758 &dcgrp->e_csets[ss->id]);
1759 spin_unlock_irq(&css_set_lock);
1760
1761
1762 dst_root->subsys_mask |= 1 << ssid;
1763 if (dst_root == &cgrp_dfl_root) {
1764 static_branch_enable(cgroup_subsys_on_dfl_key[ssid]);
1765 } else {
1766 dcgrp->subtree_control |= 1 << ssid;
1767 static_branch_disable(cgroup_subsys_on_dfl_key[ssid]);
1768 }
1769
1770 ret = cgroup_apply_control(dcgrp);
1771 if (ret)
1772 pr_warn("partial failure to rebind %s controller (err=%d)\n",
1773 ss->name, ret);
1774
1775 if (ss->bind)
1776 ss->bind(css);
1777 } while_each_subsys_mask();
1778
1779 kernfs_activate(dcgrp->kn);
1780 return 0;
1781}
1782
1783int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
1784 struct kernfs_root *kf_root)
1785{
1786 int len = 0;
1787 char *buf = NULL;
1788 struct cgroup_root *kf_cgroot = cgroup_root_from_kf(kf_root);
1789 struct cgroup *ns_cgroup;
1790
1791 buf = kmalloc(PATH_MAX, GFP_KERNEL);
1792 if (!buf)
1793 return -ENOMEM;
1794
1795 spin_lock_irq(&css_set_lock);
1796 ns_cgroup = current_cgns_cgroup_from_root(kf_cgroot);
1797 len = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, PATH_MAX);
1798 spin_unlock_irq(&css_set_lock);
1799
1800 if (len >= PATH_MAX)
1801 len = -ERANGE;
1802 else if (len > 0) {
1803 seq_escape(sf, buf, " \t\n\\");
1804 len = 0;
1805 }
1806 kfree(buf);
1807 return len;
1808}
1809
1810static int parse_cgroup_root_flags(char *data, unsigned int *root_flags)
1811{
1812 char *token;
1813
1814 *root_flags = 0;
1815
1816 if (!data || *data == '\0')
1817 return 0;
1818
1819 while ((token = strsep(&data, ",")) != NULL) {
1820 if (!strcmp(token, "nsdelegate")) {
1821 *root_flags |= CGRP_ROOT_NS_DELEGATE;
1822 continue;
1823 }
1824
1825 pr_err("cgroup2: unknown option \"%s\"\n", token);
1826 return -EINVAL;
1827 }
1828
1829 return 0;
1830}
1831
1832static void apply_cgroup_root_flags(unsigned int root_flags)
1833{
1834 if (current->nsproxy->cgroup_ns == &init_cgroup_ns) {
1835 if (root_flags & CGRP_ROOT_NS_DELEGATE)
1836 cgrp_dfl_root.flags |= CGRP_ROOT_NS_DELEGATE;
1837 else
1838 cgrp_dfl_root.flags &= ~CGRP_ROOT_NS_DELEGATE;
1839 }
1840}
1841
1842static int cgroup_show_options(struct seq_file *seq, struct kernfs_root *kf_root)
1843{
1844 if (cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE)
1845 seq_puts(seq, ",nsdelegate");
1846 return 0;
1847}
1848
1849static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
1850{
1851 unsigned int root_flags;
1852 int ret;
1853
1854 ret = parse_cgroup_root_flags(data, &root_flags);
1855 if (ret)
1856 return ret;
1857
1858 apply_cgroup_root_flags(root_flags);
1859 return 0;
1860}
1861
1862
1863
1864
1865
1866
1867
1868static bool use_task_css_set_links __read_mostly;
1869
1870static void cgroup_enable_task_cg_lists(void)
1871{
1872 struct task_struct *p, *g;
1873
1874
1875
1876
1877
1878
1879
1880
1881 read_lock(&tasklist_lock);
1882 spin_lock_irq(&css_set_lock);
1883
1884 if (use_task_css_set_links)
1885 goto out_unlock;
1886
1887 use_task_css_set_links = true;
1888
1889 do_each_thread(g, p) {
1890 WARN_ON_ONCE(!list_empty(&p->cg_list) ||
1891 task_css_set(p) != &init_css_set);
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904 spin_lock(&p->sighand->siglock);
1905 if (!(p->flags & PF_EXITING)) {
1906 struct css_set *cset = task_css_set(p);
1907
1908 if (!css_set_populated(cset))
1909 css_set_update_populated(cset, true);
1910 list_add_tail(&p->cg_list, &cset->tasks);
1911 get_css_set(cset);
1912 cset->nr_tasks++;
1913 }
1914 spin_unlock(&p->sighand->siglock);
1915 } while_each_thread(g, p);
1916out_unlock:
1917 spin_unlock_irq(&css_set_lock);
1918 read_unlock(&tasklist_lock);
1919}
1920
1921static void init_cgroup_housekeeping(struct cgroup *cgrp)
1922{
1923 struct cgroup_subsys *ss;
1924 int ssid;
1925
1926 INIT_LIST_HEAD(&cgrp->self.sibling);
1927 INIT_LIST_HEAD(&cgrp->self.children);
1928 INIT_LIST_HEAD(&cgrp->cset_links);
1929 INIT_LIST_HEAD(&cgrp->pidlists);
1930 mutex_init(&cgrp->pidlist_mutex);
1931 cgrp->self.cgroup = cgrp;
1932 cgrp->self.flags |= CSS_ONLINE;
1933 cgrp->dom_cgrp = cgrp;
1934 cgrp->max_descendants = INT_MAX;
1935 cgrp->max_depth = INT_MAX;
1936 INIT_LIST_HEAD(&cgrp->rstat_css_list);
1937 prev_cputime_init(&cgrp->prev_cputime);
1938
1939 for_each_subsys(ss, ssid)
1940 INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
1941
1942 init_waitqueue_head(&cgrp->offline_waitq);
1943 INIT_WORK(&cgrp->release_agent_work, cgroup1_release_agent);
1944}
1945
1946void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts)
1947{
1948 struct cgroup *cgrp = &root->cgrp;
1949
1950 INIT_LIST_HEAD(&root->root_list);
1951 atomic_set(&root->nr_cgrps, 1);
1952 cgrp->root = root;
1953 init_cgroup_housekeeping(cgrp);
1954 idr_init(&root->cgroup_idr);
1955
1956 root->flags = opts->flags;
1957 if (opts->release_agent)
1958 strscpy(root->release_agent_path, opts->release_agent, PATH_MAX);
1959 if (opts->name)
1960 strscpy(root->name, opts->name, MAX_CGROUP_ROOT_NAMELEN);
1961 if (opts->cpuset_clone_children)
1962 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
1963}
1964
1965int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
1966{
1967 LIST_HEAD(tmp_links);
1968 struct cgroup *root_cgrp = &root->cgrp;
1969 struct kernfs_syscall_ops *kf_sops;
1970 struct css_set *cset;
1971 int i, ret;
1972
1973 lockdep_assert_held(&cgroup_mutex);
1974
1975 ret = cgroup_idr_alloc(&root->cgroup_idr, root_cgrp, 1, 2, GFP_KERNEL);
1976 if (ret < 0)
1977 goto out;
1978 root_cgrp->id = ret;
1979 root_cgrp->ancestor_ids[0] = ret;
1980
1981 ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release,
1982 0, GFP_KERNEL);
1983 if (ret)
1984 goto out;
1985
1986
1987
1988
1989
1990
1991
1992
1993 ret = allocate_cgrp_cset_links(2 * css_set_count, &tmp_links);
1994 if (ret)
1995 goto cancel_ref;
1996
1997 ret = cgroup_init_root_id(root);
1998 if (ret)
1999 goto cancel_ref;
2000
2001 kf_sops = root == &cgrp_dfl_root ?
2002 &cgroup_kf_syscall_ops : &cgroup1_kf_syscall_ops;
2003
2004 root->kf_root = kernfs_create_root(kf_sops,
2005 KERNFS_ROOT_CREATE_DEACTIVATED |
2006 KERNFS_ROOT_SUPPORT_EXPORTOP,
2007 root_cgrp);
2008 if (IS_ERR(root->kf_root)) {
2009 ret = PTR_ERR(root->kf_root);
2010 goto exit_root_id;
2011 }
2012 root_cgrp->kn = root->kf_root->kn;
2013
2014 ret = css_populate_dir(&root_cgrp->self);
2015 if (ret)
2016 goto destroy_root;
2017
2018 ret = rebind_subsystems(root, ss_mask);
2019 if (ret)
2020 goto destroy_root;
2021
2022 ret = cgroup_bpf_inherit(root_cgrp);
2023 WARN_ON_ONCE(ret);
2024
2025 trace_cgroup_setup_root(root);
2026
2027
2028
2029
2030
2031
2032 list_add(&root->root_list, &cgroup_roots);
2033 cgroup_root_count++;
2034
2035
2036
2037
2038
2039 spin_lock_irq(&css_set_lock);
2040 hash_for_each(css_set_table, i, cset, hlist) {
2041 link_css_set(&tmp_links, cset, root_cgrp);
2042 if (css_set_populated(cset))
2043 cgroup_update_populated(root_cgrp, true);
2044 }
2045 spin_unlock_irq(&css_set_lock);
2046
2047 BUG_ON(!list_empty(&root_cgrp->self.children));
2048 BUG_ON(atomic_read(&root->nr_cgrps) != 1);
2049
2050 kernfs_activate(root_cgrp->kn);
2051 ret = 0;
2052 goto out;
2053
2054destroy_root:
2055 kernfs_destroy_root(root->kf_root);
2056 root->kf_root = NULL;
2057exit_root_id:
2058 cgroup_exit_root_id(root);
2059cancel_ref:
2060 percpu_ref_exit(&root_cgrp->self.refcnt);
2061out:
2062 free_cgrp_cset_links(&tmp_links);
2063 return ret;
2064}
2065
2066struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
2067 struct cgroup_root *root, unsigned long magic,
2068 struct cgroup_namespace *ns)
2069{
2070 struct dentry *dentry;
2071 bool new_sb = false;
2072
2073 dentry = kernfs_mount(fs_type, flags, root->kf_root, magic, &new_sb);
2074
2075
2076
2077
2078
2079 if (!IS_ERR(dentry) && ns != &init_cgroup_ns) {
2080 struct dentry *nsdentry;
2081 struct super_block *sb = dentry->d_sb;
2082 struct cgroup *cgrp;
2083
2084 mutex_lock(&cgroup_mutex);
2085 spin_lock_irq(&css_set_lock);
2086
2087 cgrp = cset_cgroup_from_root(ns->root_cset, root);
2088
2089 spin_unlock_irq(&css_set_lock);
2090 mutex_unlock(&cgroup_mutex);
2091
2092 nsdentry = kernfs_node_dentry(cgrp->kn, sb);
2093 dput(dentry);
2094 if (IS_ERR(nsdentry))
2095 deactivate_locked_super(sb);
2096 dentry = nsdentry;
2097 }
2098
2099 if (!new_sb)
2100 cgroup_put(&root->cgrp);
2101
2102 return dentry;
2103}
2104
2105static struct dentry *cgroup_mount(struct file_system_type *fs_type,
2106 int flags, const char *unused_dev_name,
2107 void *data)
2108{
2109 struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
2110 struct dentry *dentry;
2111 int ret;
2112
2113 get_cgroup_ns(ns);
2114
2115
2116 if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) {
2117 put_cgroup_ns(ns);
2118 return ERR_PTR(-EPERM);
2119 }
2120
2121
2122
2123
2124
2125 if (!use_task_css_set_links)
2126 cgroup_enable_task_cg_lists();
2127
2128 if (fs_type == &cgroup2_fs_type) {
2129 unsigned int root_flags;
2130
2131 ret = parse_cgroup_root_flags(data, &root_flags);
2132 if (ret) {
2133 put_cgroup_ns(ns);
2134 return ERR_PTR(ret);
2135 }
2136
2137 cgrp_dfl_visible = true;
2138 cgroup_get_live(&cgrp_dfl_root.cgrp);
2139
2140 dentry = cgroup_do_mount(&cgroup2_fs_type, flags, &cgrp_dfl_root,
2141 CGROUP2_SUPER_MAGIC, ns);
2142 if (!IS_ERR(dentry))
2143 apply_cgroup_root_flags(root_flags);
2144 } else {
2145 dentry = cgroup1_mount(&cgroup_fs_type, flags, data,
2146 CGROUP_SUPER_MAGIC, ns);
2147 }
2148
2149 put_cgroup_ns(ns);
2150 return dentry;
2151}
2152
2153static void cgroup_kill_sb(struct super_block *sb)
2154{
2155 struct kernfs_root *kf_root = kernfs_root_from_sb(sb);
2156 struct cgroup_root *root = cgroup_root_from_kf(kf_root);
2157
2158
2159
2160
2161
2162
2163
2164
2165 if (list_empty(&root->cgrp.self.children) && root != &cgrp_dfl_root &&
2166 !percpu_ref_is_dying(&root->cgrp.self.refcnt))
2167 percpu_ref_kill(&root->cgrp.self.refcnt);
2168 cgroup_put(&root->cgrp);
2169 kernfs_kill_sb(sb);
2170}
2171
2172struct file_system_type cgroup_fs_type = {
2173 .name = "cgroup",
2174 .mount = cgroup_mount,
2175 .kill_sb = cgroup_kill_sb,
2176 .fs_flags = FS_USERNS_MOUNT,
2177};
2178
2179static struct file_system_type cgroup2_fs_type = {
2180 .name = "cgroup2",
2181 .mount = cgroup_mount,
2182 .kill_sb = cgroup_kill_sb,
2183 .fs_flags = FS_USERNS_MOUNT,
2184};
2185
2186int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
2187 struct cgroup_namespace *ns)
2188{
2189 struct cgroup *root = cset_cgroup_from_root(ns->root_cset, cgrp->root);
2190
2191 return kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
2192}
2193
2194int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
2195 struct cgroup_namespace *ns)
2196{
2197 int ret;
2198
2199 mutex_lock(&cgroup_mutex);
2200 spin_lock_irq(&css_set_lock);
2201
2202 ret = cgroup_path_ns_locked(cgrp, buf, buflen, ns);
2203
2204 spin_unlock_irq(&css_set_lock);
2205 mutex_unlock(&cgroup_mutex);
2206
2207 return ret;
2208}
2209EXPORT_SYMBOL_GPL(cgroup_path_ns);
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
2225{
2226 struct cgroup_root *root;
2227 struct cgroup *cgrp;
2228 int hierarchy_id = 1;
2229 int ret;
2230
2231 mutex_lock(&cgroup_mutex);
2232 spin_lock_irq(&css_set_lock);
2233
2234 root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
2235
2236 if (root) {
2237 cgrp = task_cgroup_from_root(task, root);
2238 ret = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
2239 } else {
2240
2241 ret = strlcpy(buf, "/", buflen);
2242 }
2243
2244 spin_unlock_irq(&css_set_lock);
2245 mutex_unlock(&cgroup_mutex);
2246 return ret;
2247}
2248EXPORT_SYMBOL_GPL(task_cgroup_path);
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260static void cgroup_migrate_add_task(struct task_struct *task,
2261 struct cgroup_mgctx *mgctx)
2262{
2263 struct css_set *cset;
2264
2265 lockdep_assert_held(&css_set_lock);
2266
2267
2268 if (task->flags & PF_EXITING)
2269 return;
2270
2271
2272 if (list_empty(&task->cg_list))
2273 return;
2274
2275 cset = task_css_set(task);
2276 if (!cset->mg_src_cgrp)
2277 return;
2278
2279 mgctx->tset.nr_tasks++;
2280
2281 list_move_tail(&task->cg_list, &cset->mg_tasks);
2282 if (list_empty(&cset->mg_node))
2283 list_add_tail(&cset->mg_node,
2284 &mgctx->tset.src_csets);
2285 if (list_empty(&cset->mg_dst_cset->mg_node))
2286 list_add_tail(&cset->mg_dst_cset->mg_node,
2287 &mgctx->tset.dst_csets);
2288}
2289
2290
2291
2292
2293
2294
2295
2296
2297struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
2298 struct cgroup_subsys_state **dst_cssp)
2299{
2300 tset->cur_cset = list_first_entry(tset->csets, struct css_set, mg_node);
2301 tset->cur_task = NULL;
2302
2303 return cgroup_taskset_next(tset, dst_cssp);
2304}
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
2315 struct cgroup_subsys_state **dst_cssp)
2316{
2317 struct css_set *cset = tset->cur_cset;
2318 struct task_struct *task = tset->cur_task;
2319
2320 while (&cset->mg_node != tset->csets) {
2321 if (!task)
2322 task = list_first_entry(&cset->mg_tasks,
2323 struct task_struct, cg_list);
2324 else
2325 task = list_next_entry(task, cg_list);
2326
2327 if (&task->cg_list != &cset->mg_tasks) {
2328 tset->cur_cset = cset;
2329 tset->cur_task = task;
2330
2331
2332
2333
2334
2335
2336
2337 if (cset->mg_dst_cset)
2338 *dst_cssp = cset->mg_dst_cset->subsys[tset->ssid];
2339 else
2340 *dst_cssp = cset->subsys[tset->ssid];
2341
2342 return task;
2343 }
2344
2345 cset = list_next_entry(cset, mg_node);
2346 task = NULL;
2347 }
2348
2349 return NULL;
2350}
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx)
2362{
2363 struct cgroup_taskset *tset = &mgctx->tset;
2364 struct cgroup_subsys *ss;
2365 struct task_struct *task, *tmp_task;
2366 struct css_set *cset, *tmp_cset;
2367 int ssid, failed_ssid, ret;
2368
2369
2370 if (tset->nr_tasks) {
2371 do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
2372 if (ss->can_attach) {
2373 tset->ssid = ssid;
2374 ret = ss->can_attach(tset);
2375 if (ret) {
2376 failed_ssid = ssid;
2377 goto out_cancel_attach;
2378 }
2379 }
2380 } while_each_subsys_mask();
2381 }
2382
2383
2384
2385
2386
2387
2388 spin_lock_irq(&css_set_lock);
2389 list_for_each_entry(cset, &tset->src_csets, mg_node) {
2390 list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, cg_list) {
2391 struct css_set *from_cset = task_css_set(task);
2392 struct css_set *to_cset = cset->mg_dst_cset;
2393
2394 get_css_set(to_cset);
2395 to_cset->nr_tasks++;
2396 css_set_move_task(task, from_cset, to_cset, true);
2397 from_cset->nr_tasks--;
2398
2399
2400
2401
2402 cgroup_freezer_migrate_task(task, from_cset->dfl_cgrp,
2403 to_cset->dfl_cgrp);
2404 put_css_set_locked(from_cset);
2405
2406 }
2407 }
2408 spin_unlock_irq(&css_set_lock);
2409
2410
2411
2412
2413
2414
2415 tset->csets = &tset->dst_csets;
2416
2417 if (tset->nr_tasks) {
2418 do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
2419 if (ss->attach) {
2420 tset->ssid = ssid;
2421 ss->attach(tset);
2422 }
2423 } while_each_subsys_mask();
2424 }
2425
2426 ret = 0;
2427 goto out_release_tset;
2428
2429out_cancel_attach:
2430 if (tset->nr_tasks) {
2431 do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
2432 if (ssid == failed_ssid)
2433 break;
2434 if (ss->cancel_attach) {
2435 tset->ssid = ssid;
2436 ss->cancel_attach(tset);
2437 }
2438 } while_each_subsys_mask();
2439 }
2440out_release_tset:
2441 spin_lock_irq(&css_set_lock);
2442 list_splice_init(&tset->dst_csets, &tset->src_csets);
2443 list_for_each_entry_safe(cset, tmp_cset, &tset->src_csets, mg_node) {
2444 list_splice_tail_init(&cset->mg_tasks, &cset->tasks);
2445 list_del_init(&cset->mg_node);
2446 }
2447 spin_unlock_irq(&css_set_lock);
2448
2449
2450
2451
2452
2453
2454 tset->nr_tasks = 0;
2455 tset->csets = &tset->src_csets;
2456 return ret;
2457}
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp)
2469{
2470
2471 if (!cgroup_on_dfl(dst_cgrp))
2472 return 0;
2473
2474
2475 if (!cgroup_is_valid_domain(dst_cgrp->dom_cgrp))
2476 return -EOPNOTSUPP;
2477
2478
2479 if (cgroup_is_mixable(dst_cgrp))
2480 return 0;
2481
2482
2483
2484
2485
2486 if (cgroup_can_be_thread_root(dst_cgrp) || cgroup_is_threaded(dst_cgrp))
2487 return 0;
2488
2489
2490 if (dst_cgrp->subtree_control)
2491 return -EBUSY;
2492
2493 return 0;
2494}
2495
2496
2497
2498
2499
2500
2501
2502
2503void cgroup_migrate_finish(struct cgroup_mgctx *mgctx)
2504{
2505 LIST_HEAD(preloaded);
2506 struct css_set *cset, *tmp_cset;
2507
2508 lockdep_assert_held(&cgroup_mutex);
2509
2510 spin_lock_irq(&css_set_lock);
2511
2512 list_splice_tail_init(&mgctx->preloaded_src_csets, &preloaded);
2513 list_splice_tail_init(&mgctx->preloaded_dst_csets, &preloaded);
2514
2515 list_for_each_entry_safe(cset, tmp_cset, &preloaded, mg_preload_node) {
2516 cset->mg_src_cgrp = NULL;
2517 cset->mg_dst_cgrp = NULL;
2518 cset->mg_dst_cset = NULL;
2519 list_del_init(&cset->mg_preload_node);
2520 put_css_set_locked(cset);
2521 }
2522
2523 spin_unlock_irq(&css_set_lock);
2524}
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542void cgroup_migrate_add_src(struct css_set *src_cset,
2543 struct cgroup *dst_cgrp,
2544 struct cgroup_mgctx *mgctx)
2545{
2546 struct cgroup *src_cgrp;
2547
2548 lockdep_assert_held(&cgroup_mutex);
2549 lockdep_assert_held(&css_set_lock);
2550
2551
2552
2553
2554
2555
2556 if (src_cset->dead)
2557 return;
2558
2559 src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root);
2560
2561 if (!list_empty(&src_cset->mg_preload_node))
2562 return;
2563
2564 WARN_ON(src_cset->mg_src_cgrp);
2565 WARN_ON(src_cset->mg_dst_cgrp);
2566 WARN_ON(!list_empty(&src_cset->mg_tasks));
2567 WARN_ON(!list_empty(&src_cset->mg_node));
2568
2569 src_cset->mg_src_cgrp = src_cgrp;
2570 src_cset->mg_dst_cgrp = dst_cgrp;
2571 get_css_set(src_cset);
2572 list_add_tail(&src_cset->mg_preload_node, &mgctx->preloaded_src_csets);
2573}
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx)
2590{
2591 struct css_set *src_cset, *tmp_cset;
2592
2593 lockdep_assert_held(&cgroup_mutex);
2594
2595
2596 list_for_each_entry_safe(src_cset, tmp_cset, &mgctx->preloaded_src_csets,
2597 mg_preload_node) {
2598 struct css_set *dst_cset;
2599 struct cgroup_subsys *ss;
2600 int ssid;
2601
2602 dst_cset = find_css_set(src_cset, src_cset->mg_dst_cgrp);
2603 if (!dst_cset)
2604 return -ENOMEM;
2605
2606 WARN_ON_ONCE(src_cset->mg_dst_cset || dst_cset->mg_dst_cset);
2607
2608
2609
2610
2611
2612
2613 if (src_cset == dst_cset) {
2614 src_cset->mg_src_cgrp = NULL;
2615 src_cset->mg_dst_cgrp = NULL;
2616 list_del_init(&src_cset->mg_preload_node);
2617 put_css_set(src_cset);
2618 put_css_set(dst_cset);
2619 continue;
2620 }
2621
2622 src_cset->mg_dst_cset = dst_cset;
2623
2624 if (list_empty(&dst_cset->mg_preload_node))
2625 list_add_tail(&dst_cset->mg_preload_node,
2626 &mgctx->preloaded_dst_csets);
2627 else
2628 put_css_set(dst_cset);
2629
2630 for_each_subsys(ss, ssid)
2631 if (src_cset->subsys[ssid] != dst_cset->subsys[ssid])
2632 mgctx->ss_mask |= 1 << ssid;
2633 }
2634
2635 return 0;
2636}
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656int cgroup_migrate(struct task_struct *leader, bool threadgroup,
2657 struct cgroup_mgctx *mgctx)
2658{
2659 struct task_struct *task;
2660
2661
2662
2663
2664
2665
2666 spin_lock_irq(&css_set_lock);
2667 rcu_read_lock();
2668 task = leader;
2669 do {
2670 cgroup_migrate_add_task(task, mgctx);
2671 if (!threadgroup)
2672 break;
2673 } while_each_thread(leader, task);
2674 rcu_read_unlock();
2675 spin_unlock_irq(&css_set_lock);
2676
2677 return cgroup_migrate_execute(mgctx);
2678}
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
2689 bool threadgroup)
2690{
2691 DEFINE_CGROUP_MGCTX(mgctx);
2692 struct task_struct *task;
2693 int ret;
2694
2695 ret = cgroup_migrate_vet_dst(dst_cgrp);
2696 if (ret)
2697 return ret;
2698
2699
2700 spin_lock_irq(&css_set_lock);
2701 rcu_read_lock();
2702 task = leader;
2703 do {
2704 cgroup_migrate_add_src(task_css_set(task), dst_cgrp, &mgctx);
2705 if (!threadgroup)
2706 break;
2707 } while_each_thread(leader, task);
2708 rcu_read_unlock();
2709 spin_unlock_irq(&css_set_lock);
2710
2711
2712 ret = cgroup_migrate_prepare_dst(&mgctx);
2713 if (!ret)
2714 ret = cgroup_migrate(leader, threadgroup, &mgctx);
2715
2716 cgroup_migrate_finish(&mgctx);
2717
2718 if (!ret)
2719 TRACE_CGROUP_PATH(attach_task, dst_cgrp, leader, threadgroup);
2720
2721 return ret;
2722}
2723
2724struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup)
2725 __acquires(&cgroup_threadgroup_rwsem)
2726{
2727 struct task_struct *tsk;
2728 pid_t pid;
2729
2730 if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
2731 return ERR_PTR(-EINVAL);
2732
2733 percpu_down_write(&cgroup_threadgroup_rwsem);
2734
2735 rcu_read_lock();
2736 if (pid) {
2737 tsk = find_task_by_vpid(pid);
2738 if (!tsk) {
2739 tsk = ERR_PTR(-ESRCH);
2740 goto out_unlock_threadgroup;
2741 }
2742 } else {
2743 tsk = current;
2744 }
2745
2746 if (threadgroup)
2747 tsk = tsk->group_leader;
2748
2749
2750
2751
2752
2753
2754
2755 if (tsk->no_cgroup_migration || (tsk->flags & PF_NO_SETAFFINITY)) {
2756 tsk = ERR_PTR(-EINVAL);
2757 goto out_unlock_threadgroup;
2758 }
2759
2760 get_task_struct(tsk);
2761 goto out_unlock_rcu;
2762
2763out_unlock_threadgroup:
2764 percpu_up_write(&cgroup_threadgroup_rwsem);
2765out_unlock_rcu:
2766 rcu_read_unlock();
2767 return tsk;
2768}
2769
2770void cgroup_procs_write_finish(struct task_struct *task)
2771 __releases(&cgroup_threadgroup_rwsem)
2772{
2773 struct cgroup_subsys *ss;
2774 int ssid;
2775
2776
2777 put_task_struct(task);
2778
2779 percpu_up_write(&cgroup_threadgroup_rwsem);
2780 for_each_subsys(ss, ssid)
2781 if (ss->post_attach)
2782 ss->post_attach();
2783}
2784
2785static void cgroup_print_ss_mask(struct seq_file *seq, u16 ss_mask)
2786{
2787 struct cgroup_subsys *ss;
2788 bool printed = false;
2789 int ssid;
2790
2791 do_each_subsys_mask(ss, ssid, ss_mask) {
2792 if (printed)
2793 seq_putc(seq, ' ');
2794 seq_printf(seq, "%s", ss->name);
2795 printed = true;
2796 } while_each_subsys_mask();
2797 if (printed)
2798 seq_putc(seq, '\n');
2799}
2800
2801
2802static int cgroup_controllers_show(struct seq_file *seq, void *v)
2803{
2804 struct cgroup *cgrp = seq_css(seq)->cgroup;
2805
2806 cgroup_print_ss_mask(seq, cgroup_control(cgrp));
2807 return 0;
2808}
2809
2810
2811static int cgroup_subtree_control_show(struct seq_file *seq, void *v)
2812{
2813 struct cgroup *cgrp = seq_css(seq)->cgroup;
2814
2815 cgroup_print_ss_mask(seq, cgrp->subtree_control);
2816 return 0;
2817}
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828static int cgroup_update_dfl_csses(struct cgroup *cgrp)
2829{
2830 DEFINE_CGROUP_MGCTX(mgctx);
2831 struct cgroup_subsys_state *d_css;
2832 struct cgroup *dsct;
2833 struct css_set *src_cset;
2834 int ret;
2835
2836 lockdep_assert_held(&cgroup_mutex);
2837
2838 percpu_down_write(&cgroup_threadgroup_rwsem);
2839
2840
2841 spin_lock_irq(&css_set_lock);
2842 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
2843 struct cgrp_cset_link *link;
2844
2845 list_for_each_entry(link, &dsct->cset_links, cset_link)
2846 cgroup_migrate_add_src(link->cset, dsct, &mgctx);
2847 }
2848 spin_unlock_irq(&css_set_lock);
2849
2850
2851 ret = cgroup_migrate_prepare_dst(&mgctx);
2852 if (ret)
2853 goto out_finish;
2854
2855 spin_lock_irq(&css_set_lock);
2856 list_for_each_entry(src_cset, &mgctx.preloaded_src_csets, mg_preload_node) {
2857 struct task_struct *task, *ntask;
2858
2859
2860 list_for_each_entry_safe(task, ntask, &src_cset->tasks, cg_list)
2861 cgroup_migrate_add_task(task, &mgctx);
2862 }
2863 spin_unlock_irq(&css_set_lock);
2864
2865 ret = cgroup_migrate_execute(&mgctx);
2866out_finish:
2867 cgroup_migrate_finish(&mgctx);
2868 percpu_up_write(&cgroup_threadgroup_rwsem);
2869 return ret;
2870}
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880void cgroup_lock_and_drain_offline(struct cgroup *cgrp)
2881 __acquires(&cgroup_mutex)
2882{
2883 struct cgroup *dsct;
2884 struct cgroup_subsys_state *d_css;
2885 struct cgroup_subsys *ss;
2886 int ssid;
2887
2888restart:
2889 mutex_lock(&cgroup_mutex);
2890
2891 cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
2892 for_each_subsys(ss, ssid) {
2893 struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
2894 DEFINE_WAIT(wait);
2895
2896 if (!css || !percpu_ref_is_dying(&css->refcnt))
2897 continue;
2898
2899 cgroup_get_live(dsct);
2900 prepare_to_wait(&dsct->offline_waitq, &wait,
2901 TASK_UNINTERRUPTIBLE);
2902
2903 mutex_unlock(&cgroup_mutex);
2904 schedule();
2905 finish_wait(&dsct->offline_waitq, &wait);
2906
2907 cgroup_put(dsct);
2908 goto restart;
2909 }
2910 }
2911}
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921static void cgroup_save_control(struct cgroup *cgrp)
2922{
2923 struct cgroup *dsct;
2924 struct cgroup_subsys_state *d_css;
2925
2926 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
2927 dsct->old_subtree_control = dsct->subtree_control;
2928 dsct->old_subtree_ss_mask = dsct->subtree_ss_mask;
2929 dsct->old_dom_cgrp = dsct->dom_cgrp;
2930 }
2931}
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941static void cgroup_propagate_control(struct cgroup *cgrp)
2942{
2943 struct cgroup *dsct;
2944 struct cgroup_subsys_state *d_css;
2945
2946 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
2947 dsct->subtree_control &= cgroup_control(dsct);
2948 dsct->subtree_ss_mask =
2949 cgroup_calc_subtree_ss_mask(dsct->subtree_control,
2950 cgroup_ss_mask(dsct));
2951 }
2952}
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962static void cgroup_restore_control(struct cgroup *cgrp)
2963{
2964 struct cgroup *dsct;
2965 struct cgroup_subsys_state *d_css;
2966
2967 cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
2968 dsct->subtree_control = dsct->old_subtree_control;
2969 dsct->subtree_ss_mask = dsct->old_subtree_ss_mask;
2970 dsct->dom_cgrp = dsct->old_dom_cgrp;
2971 }
2972}
2973
2974static bool css_visible(struct cgroup_subsys_state *css)
2975{
2976 struct cgroup_subsys *ss = css->ss;
2977 struct cgroup *cgrp = css->cgroup;
2978
2979 if (cgroup_control(cgrp) & (1 << ss->id))
2980 return true;
2981 if (!(cgroup_ss_mask(cgrp) & (1 << ss->id)))
2982 return false;
2983 return cgroup_on_dfl(cgrp) && ss->implicit_on_dfl;
2984}
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999static int cgroup_apply_control_enable(struct cgroup *cgrp)
3000{
3001 struct cgroup *dsct;
3002 struct cgroup_subsys_state *d_css;
3003 struct cgroup_subsys *ss;
3004 int ssid, ret;
3005
3006 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
3007 for_each_subsys(ss, ssid) {
3008 struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
3009
3010 WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt));
3011
3012 if (!(cgroup_ss_mask(dsct) & (1 << ss->id)))
3013 continue;
3014
3015 if (!css) {
3016 css = css_create(dsct, ss);
3017 if (IS_ERR(css))
3018 return PTR_ERR(css);
3019 }
3020
3021 if (css_visible(css)) {
3022 ret = css_populate_dir(css);
3023 if (ret)
3024 return ret;
3025 }
3026 }
3027 }
3028
3029 return 0;
3030}
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045static void cgroup_apply_control_disable(struct cgroup *cgrp)
3046{
3047 struct cgroup *dsct;
3048 struct cgroup_subsys_state *d_css;
3049 struct cgroup_subsys *ss;
3050 int ssid;
3051
3052 cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
3053 for_each_subsys(ss, ssid) {
3054 struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
3055
3056 WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt));
3057
3058 if (!css)
3059 continue;
3060
3061 if (css->parent &&
3062 !(cgroup_ss_mask(dsct) & (1 << ss->id))) {
3063 kill_css(css);
3064 } else if (!css_visible(css)) {
3065 css_clear_dir(css);
3066 if (ss->css_reset)
3067 ss->css_reset(css);
3068 }
3069 }
3070 }
3071}
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090static int cgroup_apply_control(struct cgroup *cgrp)
3091{
3092 int ret;
3093
3094 cgroup_propagate_control(cgrp);
3095
3096 ret = cgroup_apply_control_enable(cgrp);
3097 if (ret)
3098 return ret;
3099
3100
3101
3102
3103
3104
3105 ret = cgroup_update_dfl_csses(cgrp);
3106 if (ret)
3107 return ret;
3108
3109 return 0;
3110}
3111
3112
3113
3114
3115
3116
3117
3118
3119static void cgroup_finalize_control(struct cgroup *cgrp, int ret)
3120{
3121 if (ret) {
3122 cgroup_restore_control(cgrp);
3123 cgroup_propagate_control(cgrp);
3124 }
3125
3126 cgroup_apply_control_disable(cgrp);
3127}
3128
3129static int cgroup_vet_subtree_control_enable(struct cgroup *cgrp, u16 enable)
3130{
3131 u16 domain_enable = enable & ~cgrp_dfl_threaded_ss_mask;
3132
3133
3134 if (!enable)
3135 return 0;
3136
3137
3138 if (!cgroup_is_valid_domain(cgrp->dom_cgrp))
3139 return -EOPNOTSUPP;
3140
3141
3142 if (cgroup_is_mixable(cgrp))
3143 return 0;
3144
3145 if (domain_enable) {
3146
3147 if (cgroup_is_thread_root(cgrp) || cgroup_is_threaded(cgrp))
3148 return -EOPNOTSUPP;
3149 } else {
3150
3151
3152
3153
3154
3155 if (cgroup_can_be_thread_root(cgrp) || cgroup_is_threaded(cgrp))
3156 return 0;
3157 }
3158
3159
3160
3161
3162
3163 if (cgroup_has_tasks(cgrp))
3164 return -EBUSY;
3165
3166 return 0;
3167}
3168
3169
3170static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
3171 char *buf, size_t nbytes,
3172 loff_t off)
3173{
3174 u16 enable = 0, disable = 0;
3175 struct cgroup *cgrp, *child;
3176 struct cgroup_subsys *ss;
3177 char *tok;
3178 int ssid, ret;
3179
3180
3181
3182
3183
3184 buf = strstrip(buf);
3185 while ((tok = strsep(&buf, " "))) {
3186 if (tok[0] == '\0')
3187 continue;
3188 do_each_subsys_mask(ss, ssid, ~cgrp_dfl_inhibit_ss_mask) {
3189 if (!cgroup_ssid_enabled(ssid) ||
3190 strcmp(tok + 1, ss->name))
3191 continue;
3192
3193 if (*tok == '+') {
3194 enable |= 1 << ssid;
3195 disable &= ~(1 << ssid);
3196 } else if (*tok == '-') {
3197 disable |= 1 << ssid;
3198 enable &= ~(1 << ssid);
3199 } else {
3200 return -EINVAL;
3201 }
3202 break;
3203 } while_each_subsys_mask();
3204 if (ssid == CGROUP_SUBSYS_COUNT)
3205 return -EINVAL;
3206 }
3207
3208 cgrp = cgroup_kn_lock_live(of->kn, true);
3209 if (!cgrp)
3210 return -ENODEV;
3211
3212 for_each_subsys(ss, ssid) {
3213 if (enable & (1 << ssid)) {
3214 if (cgrp->subtree_control & (1 << ssid)) {
3215 enable &= ~(1 << ssid);
3216 continue;
3217 }
3218
3219 if (!(cgroup_control(cgrp) & (1 << ssid))) {
3220 ret = -ENOENT;
3221 goto out_unlock;
3222 }
3223 } else if (disable & (1 << ssid)) {
3224 if (!(cgrp->subtree_control & (1 << ssid))) {
3225 disable &= ~(1 << ssid);
3226 continue;
3227 }
3228
3229
3230 cgroup_for_each_live_child(child, cgrp) {
3231 if (child->subtree_control & (1 << ssid)) {
3232 ret = -EBUSY;
3233 goto out_unlock;
3234 }
3235 }
3236 }
3237 }
3238
3239 if (!enable && !disable) {
3240 ret = 0;
3241 goto out_unlock;
3242 }
3243
3244 ret = cgroup_vet_subtree_control_enable(cgrp, enable);
3245 if (ret)
3246 goto out_unlock;
3247
3248
3249 cgroup_save_control(cgrp);
3250
3251 cgrp->subtree_control |= enable;
3252 cgrp->subtree_control &= ~disable;
3253
3254 ret = cgroup_apply_control(cgrp);
3255 cgroup_finalize_control(cgrp, ret);
3256 if (ret)
3257 goto out_unlock;
3258
3259 kernfs_activate(cgrp->kn);
3260out_unlock:
3261 cgroup_kn_unlock(of->kn);
3262 return ret ?: nbytes;
3263}
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274static int cgroup_enable_threaded(struct cgroup *cgrp)
3275{
3276 struct cgroup *parent = cgroup_parent(cgrp);
3277 struct cgroup *dom_cgrp = parent->dom_cgrp;
3278 struct cgroup *dsct;
3279 struct cgroup_subsys_state *d_css;
3280 int ret;
3281
3282 lockdep_assert_held(&cgroup_mutex);
3283
3284
3285 if (cgroup_is_threaded(cgrp))
3286 return 0;
3287
3288
3289
3290
3291
3292
3293
3294 if (cgroup_is_populated(cgrp) ||
3295 cgrp->subtree_control & ~cgrp_dfl_threaded_ss_mask)
3296 return -EOPNOTSUPP;
3297
3298
3299 if (!cgroup_is_valid_domain(dom_cgrp) ||
3300 !cgroup_can_be_thread_root(dom_cgrp))
3301 return -EOPNOTSUPP;
3302
3303
3304
3305
3306
3307 cgroup_save_control(cgrp);
3308
3309 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp)
3310 if (dsct == cgrp || cgroup_is_threaded(dsct))
3311 dsct->dom_cgrp = dom_cgrp;
3312
3313 ret = cgroup_apply_control(cgrp);
3314 if (!ret)
3315 parent->nr_threaded_children++;
3316
3317 cgroup_finalize_control(cgrp, ret);
3318 return ret;
3319}
3320
3321static int cgroup_type_show(struct seq_file *seq, void *v)
3322{
3323 struct cgroup *cgrp = seq_css(seq)->cgroup;
3324
3325 if (cgroup_is_threaded(cgrp))
3326 seq_puts(seq, "threaded\n");
3327 else if (!cgroup_is_valid_domain(cgrp))
3328 seq_puts(seq, "domain invalid\n");
3329 else if (cgroup_is_thread_root(cgrp))
3330 seq_puts(seq, "domain threaded\n");
3331 else
3332 seq_puts(seq, "domain\n");
3333
3334 return 0;
3335}
3336
3337static ssize_t cgroup_type_write(struct kernfs_open_file *of, char *buf,
3338 size_t nbytes, loff_t off)
3339{
3340 struct cgroup *cgrp;
3341 int ret;
3342
3343
3344 if (strcmp(strstrip(buf), "threaded"))
3345 return -EINVAL;
3346
3347 cgrp = cgroup_kn_lock_live(of->kn, false);
3348 if (!cgrp)
3349 return -ENOENT;
3350
3351
3352 ret = cgroup_enable_threaded(cgrp);
3353
3354 cgroup_kn_unlock(of->kn);
3355 return ret ?: nbytes;
3356}
3357
3358static int cgroup_max_descendants_show(struct seq_file *seq, void *v)
3359{
3360 struct cgroup *cgrp = seq_css(seq)->cgroup;
3361 int descendants = READ_ONCE(cgrp->max_descendants);
3362
3363 if (descendants == INT_MAX)
3364 seq_puts(seq, "max\n");
3365 else
3366 seq_printf(seq, "%d\n", descendants);
3367
3368 return 0;
3369}
3370
3371static ssize_t cgroup_max_descendants_write(struct kernfs_open_file *of,
3372 char *buf, size_t nbytes, loff_t off)
3373{
3374 struct cgroup *cgrp;
3375 int descendants;
3376 ssize_t ret;
3377
3378 buf = strstrip(buf);
3379 if (!strcmp(buf, "max")) {
3380 descendants = INT_MAX;
3381 } else {
3382 ret = kstrtoint(buf, 0, &descendants);
3383 if (ret)
3384 return ret;
3385 }
3386
3387 if (descendants < 0)
3388 return -ERANGE;
3389
3390 cgrp = cgroup_kn_lock_live(of->kn, false);
3391 if (!cgrp)
3392 return -ENOENT;
3393
3394 cgrp->max_descendants = descendants;
3395
3396 cgroup_kn_unlock(of->kn);
3397
3398 return nbytes;
3399}
3400
3401static int cgroup_max_depth_show(struct seq_file *seq, void *v)
3402{
3403 struct cgroup *cgrp = seq_css(seq)->cgroup;
3404 int depth = READ_ONCE(cgrp->max_depth);
3405
3406 if (depth == INT_MAX)
3407 seq_puts(seq, "max\n");
3408 else
3409 seq_printf(seq, "%d\n", depth);
3410
3411 return 0;
3412}
3413
3414static ssize_t cgroup_max_depth_write(struct kernfs_open_file *of,
3415 char *buf, size_t nbytes, loff_t off)
3416{
3417 struct cgroup *cgrp;
3418 ssize_t ret;
3419 int depth;
3420
3421 buf = strstrip(buf);
3422 if (!strcmp(buf, "max")) {
3423 depth = INT_MAX;
3424 } else {
3425 ret = kstrtoint(buf, 0, &depth);
3426 if (ret)
3427 return ret;
3428 }
3429
3430 if (depth < 0)
3431 return -ERANGE;
3432
3433 cgrp = cgroup_kn_lock_live(of->kn, false);
3434 if (!cgrp)
3435 return -ENOENT;
3436
3437 cgrp->max_depth = depth;
3438
3439 cgroup_kn_unlock(of->kn);
3440
3441 return nbytes;
3442}
3443
3444static int cgroup_events_show(struct seq_file *seq, void *v)
3445{
3446 struct cgroup *cgrp = seq_css(seq)->cgroup;
3447
3448 seq_printf(seq, "populated %d\n", cgroup_is_populated(cgrp));
3449 seq_printf(seq, "frozen %d\n", test_bit(CGRP_FROZEN, &cgrp->flags));
3450
3451 return 0;
3452}
3453
3454static int cgroup_stat_show(struct seq_file *seq, void *v)
3455{
3456 struct cgroup *cgroup = seq_css(seq)->cgroup;
3457
3458 seq_printf(seq, "nr_descendants %d\n",
3459 cgroup->nr_descendants);
3460 seq_printf(seq, "nr_dying_descendants %d\n",
3461 cgroup->nr_dying_descendants);
3462
3463 return 0;
3464}
3465
3466static int __maybe_unused cgroup_extra_stat_show(struct seq_file *seq,
3467 struct cgroup *cgrp, int ssid)
3468{
3469 struct cgroup_subsys *ss = cgroup_subsys[ssid];
3470 struct cgroup_subsys_state *css;
3471 int ret;
3472
3473 if (!ss->css_extra_stat_show)
3474 return 0;
3475
3476 css = cgroup_tryget_css(cgrp, ss);
3477 if (!css)
3478 return 0;
3479
3480 ret = ss->css_extra_stat_show(seq, css);
3481 css_put(css);
3482 return ret;
3483}
3484
3485static int cpu_stat_show(struct seq_file *seq, void *v)
3486{
3487 struct cgroup __maybe_unused *cgrp = seq_css(seq)->cgroup;
3488 int ret = 0;
3489
3490 cgroup_base_stat_cputime_show(seq);
3491#ifdef CONFIG_CGROUP_SCHED
3492 ret = cgroup_extra_stat_show(seq, cgrp, cpu_cgrp_id);
3493#endif
3494 return ret;
3495}
3496
3497static int cgroup_freeze_show(struct seq_file *seq, void *v)
3498{
3499 struct cgroup *cgrp = seq_css(seq)->cgroup;
3500
3501 seq_printf(seq, "%d\n", cgrp->freezer.freeze);
3502
3503 return 0;
3504}
3505
3506static ssize_t cgroup_freeze_write(struct kernfs_open_file *of,
3507 char *buf, size_t nbytes, loff_t off)
3508{
3509 struct cgroup *cgrp;
3510 ssize_t ret;
3511 int freeze;
3512
3513 ret = kstrtoint(strstrip(buf), 0, &freeze);
3514 if (ret)
3515 return ret;
3516
3517 if (freeze < 0 || freeze > 1)
3518 return -ERANGE;
3519
3520 cgrp = cgroup_kn_lock_live(of->kn, false);
3521 if (!cgrp)
3522 return -ENOENT;
3523
3524 cgroup_freeze(cgrp, freeze);
3525
3526 cgroup_kn_unlock(of->kn);
3527
3528 return nbytes;
3529}
3530
3531#ifdef CONFIG_PSI
3532static int cgroup_io_pressure_show(struct seq_file *seq, void *v)
3533{
3534 struct cgroup *cgroup = seq_css(seq)->cgroup;
3535 struct psi_group *psi = cgroup->id == 1 ? &psi_system : &cgroup->psi;
3536
3537 return psi_show(seq, psi, PSI_IO);
3538}
3539static int cgroup_memory_pressure_show(struct seq_file *seq, void *v)
3540{
3541 struct cgroup *cgroup = seq_css(seq)->cgroup;
3542 struct psi_group *psi = cgroup->id == 1 ? &psi_system : &cgroup->psi;
3543
3544 return psi_show(seq, psi, PSI_MEM);
3545}
3546static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v)
3547{
3548 struct cgroup *cgroup = seq_css(seq)->cgroup;
3549 struct psi_group *psi = cgroup->id == 1 ? &psi_system : &cgroup->psi;
3550
3551 return psi_show(seq, psi, PSI_CPU);
3552}
3553
3554static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
3555 size_t nbytes, enum psi_res res)
3556{
3557 struct psi_trigger *new;
3558 struct cgroup *cgrp;
3559
3560 cgrp = cgroup_kn_lock_live(of->kn, false);
3561 if (!cgrp)
3562 return -ENODEV;
3563
3564 cgroup_get(cgrp);
3565 cgroup_kn_unlock(of->kn);
3566
3567 new = psi_trigger_create(&cgrp->psi, buf, nbytes, res);
3568 if (IS_ERR(new)) {
3569 cgroup_put(cgrp);
3570 return PTR_ERR(new);
3571 }
3572
3573 psi_trigger_replace(&of->priv, new);
3574
3575 cgroup_put(cgrp);
3576
3577 return nbytes;
3578}
3579
3580static ssize_t cgroup_io_pressure_write(struct kernfs_open_file *of,
3581 char *buf, size_t nbytes,
3582 loff_t off)
3583{
3584 return cgroup_pressure_write(of, buf, nbytes, PSI_IO);
3585}
3586
3587static ssize_t cgroup_memory_pressure_write(struct kernfs_open_file *of,
3588 char *buf, size_t nbytes,
3589 loff_t off)
3590{
3591 return cgroup_pressure_write(of, buf, nbytes, PSI_MEM);
3592}
3593
3594static ssize_t cgroup_cpu_pressure_write(struct kernfs_open_file *of,
3595 char *buf, size_t nbytes,
3596 loff_t off)
3597{
3598 return cgroup_pressure_write(of, buf, nbytes, PSI_CPU);
3599}
3600
3601static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of,
3602 poll_table *pt)
3603{
3604 return psi_trigger_poll(&of->priv, of->file, pt);
3605}
3606
3607static void cgroup_pressure_release(struct kernfs_open_file *of)
3608{
3609 psi_trigger_replace(&of->priv, NULL);
3610}
3611#endif
3612
3613static int cgroup_file_open(struct kernfs_open_file *of)
3614{
3615 struct cftype *cft = of->kn->priv;
3616
3617 if (cft->open)
3618 return cft->open(of);
3619 return 0;
3620}
3621
3622static void cgroup_file_release(struct kernfs_open_file *of)
3623{
3624 struct cftype *cft = of->kn->priv;
3625
3626 if (cft->release)
3627 cft->release(of);
3628}
3629
3630static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
3631 size_t nbytes, loff_t off)
3632{
3633 struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
3634 struct cgroup *cgrp = of->kn->parent->priv;
3635 struct cftype *cft = of->kn->priv;
3636 struct cgroup_subsys_state *css;
3637 int ret;
3638
3639
3640
3641
3642
3643
3644
3645 if ((cgrp->root->flags & CGRP_ROOT_NS_DELEGATE) &&
3646 !(cft->flags & CFTYPE_NS_DELEGATABLE) &&
3647 ns != &init_cgroup_ns && ns->root_cset->dfl_cgrp == cgrp)
3648 return -EPERM;
3649
3650 if (cft->write)
3651 return cft->write(of, buf, nbytes, off);
3652
3653
3654
3655
3656
3657
3658
3659 rcu_read_lock();
3660 css = cgroup_css(cgrp, cft->ss);
3661 rcu_read_unlock();
3662
3663 if (cft->write_u64) {
3664 unsigned long long v;
3665 ret = kstrtoull(buf, 0, &v);
3666 if (!ret)
3667 ret = cft->write_u64(css, cft, v);
3668 } else if (cft->write_s64) {
3669 long long v;
3670 ret = kstrtoll(buf, 0, &v);
3671 if (!ret)
3672 ret = cft->write_s64(css, cft, v);
3673 } else {
3674 ret = -EINVAL;
3675 }
3676
3677 return ret ?: nbytes;
3678}
3679
3680static __poll_t cgroup_file_poll(struct kernfs_open_file *of, poll_table *pt)
3681{
3682 struct cftype *cft = of->kn->priv;
3683
3684 if (cft->poll)
3685 return cft->poll(of, pt);
3686
3687 return kernfs_generic_poll(of, pt);
3688}
3689
3690static void *cgroup_seqfile_start(struct seq_file *seq, loff_t *ppos)
3691{
3692 return seq_cft(seq)->seq_start(seq, ppos);
3693}
3694
3695static void *cgroup_seqfile_next(struct seq_file *seq, void *v, loff_t *ppos)
3696{
3697 return seq_cft(seq)->seq_next(seq, v, ppos);
3698}
3699
3700static void cgroup_seqfile_stop(struct seq_file *seq, void *v)
3701{
3702 if (seq_cft(seq)->seq_stop)
3703 seq_cft(seq)->seq_stop(seq, v);
3704}
3705
3706static int cgroup_seqfile_show(struct seq_file *m, void *arg)
3707{
3708 struct cftype *cft = seq_cft(m);
3709 struct cgroup_subsys_state *css = seq_css(m);
3710
3711 if (cft->seq_show)
3712 return cft->seq_show(m, arg);
3713
3714 if (cft->read_u64)
3715 seq_printf(m, "%llu\n", cft->read_u64(css, cft));
3716 else if (cft->read_s64)
3717 seq_printf(m, "%lld\n", cft->read_s64(css, cft));
3718 else
3719 return -EINVAL;
3720 return 0;
3721}
3722
3723static struct kernfs_ops cgroup_kf_single_ops = {
3724 .atomic_write_len = PAGE_SIZE,
3725 .open = cgroup_file_open,
3726 .release = cgroup_file_release,
3727 .write = cgroup_file_write,
3728 .poll = cgroup_file_poll,
3729 .seq_show = cgroup_seqfile_show,
3730};
3731
3732static struct kernfs_ops cgroup_kf_ops = {
3733 .atomic_write_len = PAGE_SIZE,
3734 .open = cgroup_file_open,
3735 .release = cgroup_file_release,
3736 .write = cgroup_file_write,
3737 .poll = cgroup_file_poll,
3738 .seq_start = cgroup_seqfile_start,
3739 .seq_next = cgroup_seqfile_next,
3740 .seq_stop = cgroup_seqfile_stop,
3741 .seq_show = cgroup_seqfile_show,
3742};
3743
3744
3745static int cgroup_kn_set_ugid(struct kernfs_node *kn)
3746{
3747 struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
3748 .ia_uid = current_fsuid(),
3749 .ia_gid = current_fsgid(), };
3750
3751 if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
3752 gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
3753 return 0;
3754
3755 return kernfs_setattr(kn, &iattr);
3756}
3757
3758static void cgroup_file_notify_timer(struct timer_list *timer)
3759{
3760 cgroup_file_notify(container_of(timer, struct cgroup_file,
3761 notify_timer));
3762}
3763
3764static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
3765 struct cftype *cft)
3766{
3767 char name[CGROUP_FILE_NAME_MAX];
3768 struct kernfs_node *kn;
3769 struct lock_class_key *key = NULL;
3770 int ret;
3771
3772#ifdef CONFIG_DEBUG_LOCK_ALLOC
3773 key = &cft->lockdep_key;
3774#endif
3775 kn = __kernfs_create_file(cgrp->kn, cgroup_file_name(cgrp, cft, name),
3776 cgroup_file_mode(cft),
3777 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
3778 0, cft->kf_ops, cft,
3779 NULL, key);
3780 if (IS_ERR(kn))
3781 return PTR_ERR(kn);
3782
3783 ret = cgroup_kn_set_ugid(kn);
3784 if (ret) {
3785 kernfs_remove(kn);
3786 return ret;
3787 }
3788
3789 if (cft->file_offset) {
3790 struct cgroup_file *cfile = (void *)css + cft->file_offset;
3791
3792 timer_setup(&cfile->notify_timer, cgroup_file_notify_timer, 0);
3793
3794 spin_lock_irq(&cgroup_file_kn_lock);
3795 cfile->kn = kn;
3796 spin_unlock_irq(&cgroup_file_kn_lock);
3797 }
3798
3799 return 0;
3800}
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812static int cgroup_addrm_files(struct cgroup_subsys_state *css,
3813 struct cgroup *cgrp, struct cftype cfts[],
3814 bool is_add)
3815{
3816 struct cftype *cft, *cft_end = NULL;
3817 int ret = 0;
3818
3819 lockdep_assert_held(&cgroup_mutex);
3820
3821restart:
3822 for (cft = cfts; cft != cft_end && cft->name[0] != '\0'; cft++) {
3823
3824 if ((cft->flags & __CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp))
3825 continue;
3826 if ((cft->flags & __CFTYPE_NOT_ON_DFL) && cgroup_on_dfl(cgrp))
3827 continue;
3828 if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgroup_parent(cgrp))
3829 continue;
3830 if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgroup_parent(cgrp))
3831 continue;
3832 if ((cft->flags & CFTYPE_DEBUG) && !cgroup_debug)
3833 continue;
3834 if (is_add) {
3835 ret = cgroup_add_file(css, cgrp, cft);
3836 if (ret) {
3837 pr_warn("%s: failed to add %s, err=%d\n",
3838 __func__, cft->name, ret);
3839 cft_end = cft;
3840 is_add = false;
3841 goto restart;
3842 }
3843 } else {
3844 cgroup_rm_file(cgrp, cft);
3845 }
3846 }
3847 return ret;
3848}
3849
3850static int cgroup_apply_cftypes(struct cftype *cfts, bool is_add)
3851{
3852 struct cgroup_subsys *ss = cfts[0].ss;
3853 struct cgroup *root = &ss->root->cgrp;
3854 struct cgroup_subsys_state *css;
3855 int ret = 0;
3856
3857 lockdep_assert_held(&cgroup_mutex);
3858
3859
3860 css_for_each_descendant_pre(css, cgroup_css(root, ss)) {
3861 struct cgroup *cgrp = css->cgroup;
3862
3863 if (!(css->flags & CSS_VISIBLE))
3864 continue;
3865
3866 ret = cgroup_addrm_files(css, cgrp, cfts, is_add);
3867 if (ret)
3868 break;
3869 }
3870
3871 if (is_add && !ret)
3872 kernfs_activate(root->kn);
3873 return ret;
3874}
3875
3876static void cgroup_exit_cftypes(struct cftype *cfts)
3877{
3878 struct cftype *cft;
3879
3880 for (cft = cfts; cft->name[0] != '\0'; cft++) {
3881
3882 if (cft->max_write_len && cft->max_write_len != PAGE_SIZE)
3883 kfree(cft->kf_ops);
3884 cft->kf_ops = NULL;
3885 cft->ss = NULL;
3886
3887
3888 cft->flags &= ~(__CFTYPE_ONLY_ON_DFL | __CFTYPE_NOT_ON_DFL);
3889 }
3890}
3891
3892static int cgroup_init_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
3893{
3894 struct cftype *cft;
3895
3896 for (cft = cfts; cft->name[0] != '\0'; cft++) {
3897 struct kernfs_ops *kf_ops;
3898
3899 WARN_ON(cft->ss || cft->kf_ops);
3900
3901 if (cft->seq_start)
3902 kf_ops = &cgroup_kf_ops;
3903 else
3904 kf_ops = &cgroup_kf_single_ops;
3905
3906
3907
3908
3909
3910 if (cft->max_write_len && cft->max_write_len != PAGE_SIZE) {
3911 kf_ops = kmemdup(kf_ops, sizeof(*kf_ops), GFP_KERNEL);
3912 if (!kf_ops) {
3913 cgroup_exit_cftypes(cfts);
3914 return -ENOMEM;
3915 }
3916 kf_ops->atomic_write_len = cft->max_write_len;
3917 }
3918
3919 cft->kf_ops = kf_ops;
3920 cft->ss = ss;
3921 }
3922
3923 return 0;
3924}
3925
3926static int cgroup_rm_cftypes_locked(struct cftype *cfts)
3927{
3928 lockdep_assert_held(&cgroup_mutex);
3929
3930 if (!cfts || !cfts[0].ss)
3931 return -ENOENT;
3932
3933 list_del(&cfts->node);
3934 cgroup_apply_cftypes(cfts, false);
3935 cgroup_exit_cftypes(cfts);
3936 return 0;
3937}
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950int cgroup_rm_cftypes(struct cftype *cfts)
3951{
3952 int ret;
3953
3954 mutex_lock(&cgroup_mutex);
3955 ret = cgroup_rm_cftypes_locked(cfts);
3956 mutex_unlock(&cgroup_mutex);
3957 return ret;
3958}
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974static int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
3975{
3976 int ret;
3977
3978 if (!cgroup_ssid_enabled(ss->id))
3979 return 0;
3980
3981 if (!cfts || cfts[0].name[0] == '\0')
3982 return 0;
3983
3984 ret = cgroup_init_cftypes(ss, cfts);
3985 if (ret)
3986 return ret;
3987
3988 mutex_lock(&cgroup_mutex);
3989
3990 list_add_tail(&cfts->node, &ss->cfts);
3991 ret = cgroup_apply_cftypes(cfts, true);
3992 if (ret)
3993 cgroup_rm_cftypes_locked(cfts);
3994
3995 mutex_unlock(&cgroup_mutex);
3996 return ret;
3997}
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
4008{
4009 struct cftype *cft;
4010
4011 for (cft = cfts; cft && cft->name[0] != '\0'; cft++)
4012 cft->flags |= __CFTYPE_ONLY_ON_DFL;
4013 return cgroup_add_cftypes(ss, cfts);
4014}
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
4025{
4026 struct cftype *cft;
4027
4028 for (cft = cfts; cft && cft->name[0] != '\0'; cft++)
4029 cft->flags |= __CFTYPE_NOT_ON_DFL;
4030 return cgroup_add_cftypes(ss, cfts);
4031}
4032
4033
4034
4035
4036
4037
4038
4039void cgroup_file_notify(struct cgroup_file *cfile)
4040{
4041 unsigned long flags;
4042
4043 spin_lock_irqsave(&cgroup_file_kn_lock, flags);
4044 if (cfile->kn) {
4045 unsigned long last = cfile->notified_at;
4046 unsigned long next = last + CGROUP_FILE_NOTIFY_MIN_INTV;
4047
4048 if (time_in_range(jiffies, last, next)) {
4049 timer_reduce(&cfile->notify_timer, next);
4050 } else {
4051 kernfs_notify(cfile->kn);
4052 cfile->notified_at = jiffies;
4053 }
4054 }
4055 spin_unlock_irqrestore(&cgroup_file_kn_lock, flags);
4056}
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
4076 struct cgroup_subsys_state *parent)
4077{
4078 struct cgroup_subsys_state *next;
4079
4080 cgroup_assert_mutex_or_rcu_locked();
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102 if (!pos) {
4103 next = list_entry_rcu(parent->children.next, struct cgroup_subsys_state, sibling);
4104 } else if (likely(!(pos->flags & CSS_RELEASED))) {
4105 next = list_entry_rcu(pos->sibling.next, struct cgroup_subsys_state, sibling);
4106 } else {
4107 list_for_each_entry_rcu(next, &parent->children, sibling)
4108 if (next->serial_nr > pos->serial_nr)
4109 break;
4110 }
4111
4112
4113
4114
4115
4116 if (&next->sibling != &parent->children)
4117 return next;
4118 return NULL;
4119}
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142struct cgroup_subsys_state *
4143css_next_descendant_pre(struct cgroup_subsys_state *pos,
4144 struct cgroup_subsys_state *root)
4145{
4146 struct cgroup_subsys_state *next;
4147
4148 cgroup_assert_mutex_or_rcu_locked();
4149
4150
4151 if (!pos)
4152 return root;
4153
4154
4155 next = css_next_child(NULL, pos);
4156 if (next)
4157 return next;
4158
4159
4160 while (pos != root) {
4161 next = css_next_child(pos, pos->parent);
4162 if (next)
4163 return next;
4164 pos = pos->parent;
4165 }
4166
4167 return NULL;
4168}
4169EXPORT_SYMBOL_GPL(css_next_descendant_pre);
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184struct cgroup_subsys_state *
4185css_rightmost_descendant(struct cgroup_subsys_state *pos)
4186{
4187 struct cgroup_subsys_state *last, *tmp;
4188
4189 cgroup_assert_mutex_or_rcu_locked();
4190
4191 do {
4192 last = pos;
4193
4194 pos = NULL;
4195 css_for_each_child(tmp, last)
4196 pos = tmp;
4197 } while (pos);
4198
4199 return last;
4200}
4201
4202static struct cgroup_subsys_state *
4203css_leftmost_descendant(struct cgroup_subsys_state *pos)
4204{
4205 struct cgroup_subsys_state *last;
4206
4207 do {
4208 last = pos;
4209 pos = css_next_child(NULL, pos);
4210 } while (pos);
4211
4212 return last;
4213}
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237struct cgroup_subsys_state *
4238css_next_descendant_post(struct cgroup_subsys_state *pos,
4239 struct cgroup_subsys_state *root)
4240{
4241 struct cgroup_subsys_state *next;
4242
4243 cgroup_assert_mutex_or_rcu_locked();
4244
4245
4246 if (!pos)
4247 return css_leftmost_descendant(root);
4248
4249
4250 if (pos == root)
4251 return NULL;
4252
4253
4254 next = css_next_child(pos, pos->parent);
4255 if (next)
4256 return css_leftmost_descendant(next);
4257
4258
4259 return pos->parent;
4260}
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270bool css_has_online_children(struct cgroup_subsys_state *css)
4271{
4272 struct cgroup_subsys_state *child;
4273 bool ret = false;
4274
4275 rcu_read_lock();
4276 css_for_each_child(child, css) {
4277 if (child->flags & CSS_ONLINE) {
4278 ret = true;
4279 break;
4280 }
4281 }
4282 rcu_read_unlock();
4283 return ret;
4284}
4285
4286static struct css_set *css_task_iter_next_css_set(struct css_task_iter *it)
4287{
4288 struct list_head *l;
4289 struct cgrp_cset_link *link;
4290 struct css_set *cset;
4291
4292 lockdep_assert_held(&css_set_lock);
4293
4294
4295 if (it->tcset_pos) {
4296 l = it->tcset_pos->next;
4297
4298 if (l != it->tcset_head) {
4299 it->tcset_pos = l;
4300 return container_of(l, struct css_set,
4301 threaded_csets_node);
4302 }
4303
4304 it->tcset_pos = NULL;
4305 }
4306
4307
4308 l = it->cset_pos;
4309 l = l->next;
4310 if (l == it->cset_head) {
4311 it->cset_pos = NULL;
4312 return NULL;
4313 }
4314
4315 if (it->ss) {
4316 cset = container_of(l, struct css_set, e_cset_node[it->ss->id]);
4317 } else {
4318 link = list_entry(l, struct cgrp_cset_link, cset_link);
4319 cset = link->cset;
4320 }
4321
4322 it->cset_pos = l;
4323
4324
4325 if (it->flags & CSS_TASK_ITER_THREADED) {
4326 if (it->cur_dcset)
4327 put_css_set_locked(it->cur_dcset);
4328 it->cur_dcset = cset;
4329 get_css_set(cset);
4330
4331 it->tcset_head = &cset->threaded_csets;
4332 it->tcset_pos = &cset->threaded_csets;
4333 }
4334
4335 return cset;
4336}
4337
4338
4339
4340
4341
4342
4343
4344static void css_task_iter_advance_css_set(struct css_task_iter *it)
4345{
4346 struct css_set *cset;
4347
4348 lockdep_assert_held(&css_set_lock);
4349
4350
4351 do {
4352 cset = css_task_iter_next_css_set(it);
4353 if (!cset) {
4354 it->task_pos = NULL;
4355 return;
4356 }
4357 } while (!css_set_populated(cset));
4358
4359 if (!list_empty(&cset->tasks))
4360 it->task_pos = cset->tasks.next;
4361 else
4362 it->task_pos = cset->mg_tasks.next;
4363
4364 it->tasks_head = &cset->tasks;
4365 it->mg_tasks_head = &cset->mg_tasks;
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382 if (it->cur_cset) {
4383 list_del(&it->iters_node);
4384 put_css_set_locked(it->cur_cset);
4385 }
4386 get_css_set(cset);
4387 it->cur_cset = cset;
4388 list_add(&it->iters_node, &cset->task_iters);
4389}
4390
4391static void css_task_iter_advance(struct css_task_iter *it)
4392{
4393 struct list_head *next;
4394
4395 lockdep_assert_held(&css_set_lock);
4396repeat:
4397 if (it->task_pos) {
4398
4399
4400
4401
4402
4403 next = it->task_pos->next;
4404
4405 if (next == it->tasks_head)
4406 next = it->mg_tasks_head->next;
4407
4408 if (next == it->mg_tasks_head)
4409 css_task_iter_advance_css_set(it);
4410 else
4411 it->task_pos = next;
4412 } else {
4413
4414 css_task_iter_advance_css_set(it);
4415 }
4416
4417
4418 if ((it->flags & CSS_TASK_ITER_PROCS) && it->task_pos &&
4419 !thread_group_leader(list_entry(it->task_pos, struct task_struct,
4420 cg_list)))
4421 goto repeat;
4422}
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags,
4436 struct css_task_iter *it)
4437{
4438
4439 WARN_ON_ONCE(!use_task_css_set_links);
4440
4441 memset(it, 0, sizeof(*it));
4442
4443 spin_lock_irq(&css_set_lock);
4444
4445 it->ss = css->ss;
4446 it->flags = flags;
4447
4448 if (it->ss)
4449 it->cset_pos = &css->cgroup->e_csets[css->ss->id];
4450 else
4451 it->cset_pos = &css->cgroup->cset_links;
4452
4453 it->cset_head = it->cset_pos;
4454
4455 css_task_iter_advance(it);
4456
4457 spin_unlock_irq(&css_set_lock);
4458}
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468struct task_struct *css_task_iter_next(struct css_task_iter *it)
4469{
4470 if (it->cur_task) {
4471 put_task_struct(it->cur_task);
4472 it->cur_task = NULL;
4473 }
4474
4475 spin_lock_irq(&css_set_lock);
4476
4477 if (it->task_pos) {
4478 it->cur_task = list_entry(it->task_pos, struct task_struct,
4479 cg_list);
4480 get_task_struct(it->cur_task);
4481 css_task_iter_advance(it);
4482 }
4483
4484 spin_unlock_irq(&css_set_lock);
4485
4486 return it->cur_task;
4487}
4488
4489
4490
4491
4492
4493
4494
4495void css_task_iter_end(struct css_task_iter *it)
4496{
4497 if (it->cur_cset) {
4498 spin_lock_irq(&css_set_lock);
4499 list_del(&it->iters_node);
4500 put_css_set_locked(it->cur_cset);
4501 spin_unlock_irq(&css_set_lock);
4502 }
4503
4504 if (it->cur_dcset)
4505 put_css_set(it->cur_dcset);
4506
4507 if (it->cur_task)
4508 put_task_struct(it->cur_task);
4509}
4510
4511static void cgroup_procs_release(struct kernfs_open_file *of)
4512{
4513 if (of->priv) {
4514 css_task_iter_end(of->priv);
4515 kfree(of->priv);
4516 }
4517}
4518
4519static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos)
4520{
4521 struct kernfs_open_file *of = s->private;
4522 struct css_task_iter *it = of->priv;
4523
4524 return css_task_iter_next(it);
4525}
4526
4527static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos,
4528 unsigned int iter_flags)
4529{
4530 struct kernfs_open_file *of = s->private;
4531 struct cgroup *cgrp = seq_css(s)->cgroup;
4532 struct css_task_iter *it = of->priv;
4533
4534
4535
4536
4537
4538 if (!it) {
4539 if (WARN_ON_ONCE((*pos)++))
4540 return ERR_PTR(-EINVAL);
4541
4542 it = kzalloc(sizeof(*it), GFP_KERNEL);
4543 if (!it)
4544 return ERR_PTR(-ENOMEM);
4545 of->priv = it;
4546 css_task_iter_start(&cgrp->self, iter_flags, it);
4547 } else if (!(*pos)++) {
4548 css_task_iter_end(it);
4549 css_task_iter_start(&cgrp->self, iter_flags, it);
4550 }
4551
4552 return cgroup_procs_next(s, NULL, NULL);
4553}
4554
4555static void *cgroup_procs_start(struct seq_file *s, loff_t *pos)
4556{
4557 struct cgroup *cgrp = seq_css(s)->cgroup;
4558
4559
4560
4561
4562
4563
4564
4565 if (cgroup_is_threaded(cgrp))
4566 return ERR_PTR(-EOPNOTSUPP);
4567
4568 return __cgroup_procs_start(s, pos, CSS_TASK_ITER_PROCS |
4569 CSS_TASK_ITER_THREADED);
4570}
4571
4572static int cgroup_procs_show(struct seq_file *s, void *v)
4573{
4574 seq_printf(s, "%d\n", task_pid_vnr(v));
4575 return 0;
4576}
4577
4578static int cgroup_procs_write_permission(struct cgroup *src_cgrp,
4579 struct cgroup *dst_cgrp,
4580 struct super_block *sb)
4581{
4582 struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
4583 struct cgroup *com_cgrp = src_cgrp;
4584 struct inode *inode;
4585 int ret;
4586
4587 lockdep_assert_held(&cgroup_mutex);
4588
4589
4590 while (!cgroup_is_descendant(dst_cgrp, com_cgrp))
4591 com_cgrp = cgroup_parent(com_cgrp);
4592
4593
4594 inode = kernfs_get_inode(sb, com_cgrp->procs_file.kn);
4595 if (!inode)
4596 return -ENOMEM;
4597
4598 ret = inode_permission(inode, MAY_WRITE);
4599 iput(inode);
4600 if (ret)
4601 return ret;
4602
4603
4604
4605
4606
4607 if ((cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE) &&
4608 (!cgroup_is_descendant(src_cgrp, ns->root_cset->dfl_cgrp) ||
4609 !cgroup_is_descendant(dst_cgrp, ns->root_cset->dfl_cgrp)))
4610 return -ENOENT;
4611
4612 return 0;
4613}
4614
4615static ssize_t cgroup_procs_write(struct kernfs_open_file *of,
4616 char *buf, size_t nbytes, loff_t off)
4617{
4618 struct cgroup *src_cgrp, *dst_cgrp;
4619 struct task_struct *task;
4620 ssize_t ret;
4621
4622 dst_cgrp = cgroup_kn_lock_live(of->kn, false);
4623 if (!dst_cgrp)
4624 return -ENODEV;
4625
4626 task = cgroup_procs_write_start(buf, true);
4627 ret = PTR_ERR_OR_ZERO(task);
4628 if (ret)
4629 goto out_unlock;
4630
4631
4632 spin_lock_irq(&css_set_lock);
4633 src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
4634 spin_unlock_irq(&css_set_lock);
4635
4636 ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp,
4637 of->file->f_path.dentry->d_sb);
4638 if (ret)
4639 goto out_finish;
4640
4641 ret = cgroup_attach_task(dst_cgrp, task, true);
4642
4643out_finish:
4644 cgroup_procs_write_finish(task);
4645out_unlock:
4646 cgroup_kn_unlock(of->kn);
4647
4648 return ret ?: nbytes;
4649}
4650
4651static void *cgroup_threads_start(struct seq_file *s, loff_t *pos)
4652{
4653 return __cgroup_procs_start(s, pos, 0);
4654}
4655
4656static ssize_t cgroup_threads_write(struct kernfs_open_file *of,
4657 char *buf, size_t nbytes, loff_t off)
4658{
4659 struct cgroup *src_cgrp, *dst_cgrp;
4660 struct task_struct *task;
4661 ssize_t ret;
4662
4663 buf = strstrip(buf);
4664
4665 dst_cgrp = cgroup_kn_lock_live(of->kn, false);
4666 if (!dst_cgrp)
4667 return -ENODEV;
4668
4669 task = cgroup_procs_write_start(buf, false);
4670 ret = PTR_ERR_OR_ZERO(task);
4671 if (ret)
4672 goto out_unlock;
4673
4674
4675 spin_lock_irq(&css_set_lock);
4676 src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
4677 spin_unlock_irq(&css_set_lock);
4678
4679
4680 ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp,
4681 of->file->f_path.dentry->d_sb);
4682 if (ret)
4683 goto out_finish;
4684
4685
4686 ret = -EOPNOTSUPP;
4687 if (src_cgrp->dom_cgrp != dst_cgrp->dom_cgrp)
4688 goto out_finish;
4689
4690 ret = cgroup_attach_task(dst_cgrp, task, false);
4691
4692out_finish:
4693 cgroup_procs_write_finish(task);
4694out_unlock:
4695 cgroup_kn_unlock(of->kn);
4696
4697 return ret ?: nbytes;
4698}
4699
4700
4701static struct cftype cgroup_base_files[] = {
4702 {
4703 .name = "cgroup.type",
4704 .flags = CFTYPE_NOT_ON_ROOT,
4705 .seq_show = cgroup_type_show,
4706 .write = cgroup_type_write,
4707 },
4708 {
4709 .name = "cgroup.procs",
4710 .flags = CFTYPE_NS_DELEGATABLE,
4711 .file_offset = offsetof(struct cgroup, procs_file),
4712 .release = cgroup_procs_release,
4713 .seq_start = cgroup_procs_start,
4714 .seq_next = cgroup_procs_next,
4715 .seq_show = cgroup_procs_show,
4716 .write = cgroup_procs_write,
4717 },
4718 {
4719 .name = "cgroup.threads",
4720 .flags = CFTYPE_NS_DELEGATABLE,
4721 .release = cgroup_procs_release,
4722 .seq_start = cgroup_threads_start,
4723 .seq_next = cgroup_procs_next,
4724 .seq_show = cgroup_procs_show,
4725 .write = cgroup_threads_write,
4726 },
4727 {
4728 .name = "cgroup.controllers",
4729 .seq_show = cgroup_controllers_show,
4730 },
4731 {
4732 .name = "cgroup.subtree_control",
4733 .flags = CFTYPE_NS_DELEGATABLE,
4734 .seq_show = cgroup_subtree_control_show,
4735 .write = cgroup_subtree_control_write,
4736 },
4737 {
4738 .name = "cgroup.events",
4739 .flags = CFTYPE_NOT_ON_ROOT,
4740 .file_offset = offsetof(struct cgroup, events_file),
4741 .seq_show = cgroup_events_show,
4742 },
4743 {
4744 .name = "cgroup.max.descendants",
4745 .seq_show = cgroup_max_descendants_show,
4746 .write = cgroup_max_descendants_write,
4747 },
4748 {
4749 .name = "cgroup.max.depth",
4750 .seq_show = cgroup_max_depth_show,
4751 .write = cgroup_max_depth_write,
4752 },
4753 {
4754 .name = "cgroup.stat",
4755 .seq_show = cgroup_stat_show,
4756 },
4757 {
4758 .name = "cgroup.freeze",
4759 .flags = CFTYPE_NOT_ON_ROOT,
4760 .seq_show = cgroup_freeze_show,
4761 .write = cgroup_freeze_write,
4762 },
4763 {
4764 .name = "cpu.stat",
4765 .flags = CFTYPE_NOT_ON_ROOT,
4766 .seq_show = cpu_stat_show,
4767 },
4768#ifdef CONFIG_PSI
4769 {
4770 .name = "io.pressure",
4771 .seq_show = cgroup_io_pressure_show,
4772 .write = cgroup_io_pressure_write,
4773 .poll = cgroup_pressure_poll,
4774 .release = cgroup_pressure_release,
4775 },
4776 {
4777 .name = "memory.pressure",
4778 .seq_show = cgroup_memory_pressure_show,
4779 .write = cgroup_memory_pressure_write,
4780 .poll = cgroup_pressure_poll,
4781 .release = cgroup_pressure_release,
4782 },
4783 {
4784 .name = "cpu.pressure",
4785 .seq_show = cgroup_cpu_pressure_show,
4786 .write = cgroup_cpu_pressure_write,
4787 .poll = cgroup_pressure_poll,
4788 .release = cgroup_pressure_release,
4789 },
4790#endif
4791 { }
4792};
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816static void css_free_rwork_fn(struct work_struct *work)
4817{
4818 struct cgroup_subsys_state *css = container_of(to_rcu_work(work),
4819 struct cgroup_subsys_state, destroy_rwork);
4820 struct cgroup_subsys *ss = css->ss;
4821 struct cgroup *cgrp = css->cgroup;
4822
4823 percpu_ref_exit(&css->refcnt);
4824
4825 if (ss) {
4826
4827 struct cgroup_subsys_state *parent = css->parent;
4828 int id = css->id;
4829
4830 ss->css_free(css);
4831 cgroup_idr_remove(&ss->css_idr, id);
4832 cgroup_put(cgrp);
4833
4834 if (parent)
4835 css_put(parent);
4836 } else {
4837
4838 atomic_dec(&cgrp->root->nr_cgrps);
4839 cgroup1_pidlist_destroy_all(cgrp);
4840 cancel_work_sync(&cgrp->release_agent_work);
4841
4842 if (cgroup_parent(cgrp)) {
4843
4844
4845
4846
4847
4848
4849 cgroup_put(cgroup_parent(cgrp));
4850 kernfs_put(cgrp->kn);
4851 psi_cgroup_free(cgrp);
4852 if (cgroup_on_dfl(cgrp))
4853 cgroup_rstat_exit(cgrp);
4854 kfree(cgrp);
4855 } else {
4856
4857
4858
4859
4860
4861 cgroup_destroy_root(cgrp->root);
4862 }
4863 }
4864}
4865
4866static void css_release_work_fn(struct work_struct *work)
4867{
4868 struct cgroup_subsys_state *css =
4869 container_of(work, struct cgroup_subsys_state, destroy_work);
4870 struct cgroup_subsys *ss = css->ss;
4871 struct cgroup *cgrp = css->cgroup;
4872
4873 mutex_lock(&cgroup_mutex);
4874
4875 css->flags |= CSS_RELEASED;
4876 list_del_rcu(&css->sibling);
4877
4878 if (ss) {
4879
4880 if (!list_empty(&css->rstat_css_node)) {
4881 cgroup_rstat_flush(cgrp);
4882 list_del_rcu(&css->rstat_css_node);
4883 }
4884
4885 cgroup_idr_replace(&ss->css_idr, NULL, css->id);
4886 if (ss->css_released)
4887 ss->css_released(css);
4888 } else {
4889 struct cgroup *tcgrp;
4890
4891
4892 TRACE_CGROUP_PATH(release, cgrp);
4893
4894 if (cgroup_on_dfl(cgrp))
4895 cgroup_rstat_flush(cgrp);
4896
4897 spin_lock_irq(&css_set_lock);
4898 for (tcgrp = cgroup_parent(cgrp); tcgrp;
4899 tcgrp = cgroup_parent(tcgrp))
4900 tcgrp->nr_dying_descendants--;
4901 spin_unlock_irq(&css_set_lock);
4902
4903 cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
4904 cgrp->id = -1;
4905
4906
4907
4908
4909
4910
4911
4912
4913 if (cgrp->kn)
4914 RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv,
4915 NULL);
4916 }
4917
4918 mutex_unlock(&cgroup_mutex);
4919
4920 INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
4921 queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
4922}
4923
4924static void css_release(struct percpu_ref *ref)
4925{
4926 struct cgroup_subsys_state *css =
4927 container_of(ref, struct cgroup_subsys_state, refcnt);
4928
4929 INIT_WORK(&css->destroy_work, css_release_work_fn);
4930 queue_work(cgroup_destroy_wq, &css->destroy_work);
4931}
4932
4933static void init_and_link_css(struct cgroup_subsys_state *css,
4934 struct cgroup_subsys *ss, struct cgroup *cgrp)
4935{
4936 lockdep_assert_held(&cgroup_mutex);
4937
4938 cgroup_get_live(cgrp);
4939
4940 memset(css, 0, sizeof(*css));
4941 css->cgroup = cgrp;
4942 css->ss = ss;
4943 css->id = -1;
4944 INIT_LIST_HEAD(&css->sibling);
4945 INIT_LIST_HEAD(&css->children);
4946 INIT_LIST_HEAD(&css->rstat_css_node);
4947 css->serial_nr = css_serial_nr_next++;
4948 atomic_set(&css->online_cnt, 0);
4949
4950 if (cgroup_parent(cgrp)) {
4951 css->parent = cgroup_css(cgroup_parent(cgrp), ss);
4952 css_get(css->parent);
4953 }
4954
4955 if (cgroup_on_dfl(cgrp) && ss->css_rstat_flush)
4956 list_add_rcu(&css->rstat_css_node, &cgrp->rstat_css_list);
4957
4958 BUG_ON(cgroup_css(cgrp, ss));
4959}
4960
4961
4962static int online_css(struct cgroup_subsys_state *css)
4963{
4964 struct cgroup_subsys *ss = css->ss;
4965 int ret = 0;
4966
4967 lockdep_assert_held(&cgroup_mutex);
4968
4969 if (ss->css_online)
4970 ret = ss->css_online(css);
4971 if (!ret) {
4972 css->flags |= CSS_ONLINE;
4973 rcu_assign_pointer(css->cgroup->subsys[ss->id], css);
4974
4975 atomic_inc(&css->online_cnt);
4976 if (css->parent)
4977 atomic_inc(&css->parent->online_cnt);
4978 }
4979 return ret;
4980}
4981
4982
4983static void offline_css(struct cgroup_subsys_state *css)
4984{
4985 struct cgroup_subsys *ss = css->ss;
4986
4987 lockdep_assert_held(&cgroup_mutex);
4988
4989 if (!(css->flags & CSS_ONLINE))
4990 return;
4991
4992 if (ss->css_offline)
4993 ss->css_offline(css);
4994
4995 css->flags &= ~CSS_ONLINE;
4996 RCU_INIT_POINTER(css->cgroup->subsys[ss->id], NULL);
4997
4998 wake_up_all(&css->cgroup->offline_waitq);
4999}
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
5011 struct cgroup_subsys *ss)
5012{
5013 struct cgroup *parent = cgroup_parent(cgrp);
5014 struct cgroup_subsys_state *parent_css = cgroup_css(parent, ss);
5015 struct cgroup_subsys_state *css;
5016 int err;
5017
5018 lockdep_assert_held(&cgroup_mutex);
5019
5020 css = ss->css_alloc(parent_css);
5021 if (!css)
5022 css = ERR_PTR(-ENOMEM);
5023 if (IS_ERR(css))
5024 return css;
5025
5026 init_and_link_css(css, ss, cgrp);
5027
5028 err = percpu_ref_init(&css->refcnt, css_release, 0, GFP_KERNEL);
5029 if (err)
5030 goto err_free_css;
5031
5032 err = cgroup_idr_alloc(&ss->css_idr, NULL, 2, 0, GFP_KERNEL);
5033 if (err < 0)
5034 goto err_free_css;
5035 css->id = err;
5036
5037
5038 list_add_tail_rcu(&css->sibling, &parent_css->children);
5039 cgroup_idr_replace(&ss->css_idr, css, css->id);
5040
5041 err = online_css(css);
5042 if (err)
5043 goto err_list_del;
5044
5045 if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
5046 cgroup_parent(parent)) {
5047 pr_warn("%s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
5048 current->comm, current->pid, ss->name);
5049 if (!strcmp(ss->name, "memory"))
5050 pr_warn("\"memory\" requires setting use_hierarchy to 1 on the root\n");
5051 ss->warned_broken_hierarchy = true;
5052 }
5053
5054 return css;
5055
5056err_list_del:
5057 list_del_rcu(&css->sibling);
5058err_free_css:
5059 list_del_rcu(&css->rstat_css_node);
5060 INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
5061 queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
5062 return ERR_PTR(err);
5063}
5064
5065
5066
5067
5068
5069
5070static struct cgroup *cgroup_create(struct cgroup *parent)
5071{
5072 struct cgroup_root *root = parent->root;
5073 struct cgroup *cgrp, *tcgrp;
5074 int level = parent->level + 1;
5075 int ret;
5076
5077
5078 cgrp = kzalloc(struct_size(cgrp, ancestor_ids, (level + 1)),
5079 GFP_KERNEL);
5080 if (!cgrp)
5081 return ERR_PTR(-ENOMEM);
5082
5083 ret = percpu_ref_init(&cgrp->self.refcnt, css_release, 0, GFP_KERNEL);
5084 if (ret)
5085 goto out_free_cgrp;
5086
5087 if (cgroup_on_dfl(parent)) {
5088 ret = cgroup_rstat_init(cgrp);
5089 if (ret)
5090 goto out_cancel_ref;
5091 }
5092
5093
5094
5095
5096
5097 cgrp->id = cgroup_idr_alloc(&root->cgroup_idr, NULL, 2, 0, GFP_KERNEL);
5098 if (cgrp->id < 0) {
5099 ret = -ENOMEM;
5100 goto out_stat_exit;
5101 }
5102
5103 init_cgroup_housekeeping(cgrp);
5104
5105 cgrp->self.parent = &parent->self;
5106 cgrp->root = root;
5107 cgrp->level = level;
5108
5109 ret = psi_cgroup_alloc(cgrp);
5110 if (ret)
5111 goto out_idr_free;
5112
5113 ret = cgroup_bpf_inherit(cgrp);
5114 if (ret)
5115 goto out_psi_free;
5116
5117
5118
5119
5120
5121 cgrp->freezer.e_freeze = parent->freezer.e_freeze;
5122 if (cgrp->freezer.e_freeze)
5123 set_bit(CGRP_FROZEN, &cgrp->flags);
5124
5125 spin_lock_irq(&css_set_lock);
5126 for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) {
5127 cgrp->ancestor_ids[tcgrp->level] = tcgrp->id;
5128
5129 if (tcgrp != cgrp) {
5130 tcgrp->nr_descendants++;
5131
5132
5133
5134
5135
5136
5137 if (cgrp->freezer.e_freeze)
5138 tcgrp->freezer.nr_frozen_descendants++;
5139 }
5140 }
5141 spin_unlock_irq(&css_set_lock);
5142
5143 if (notify_on_release(parent))
5144 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
5145
5146 if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags))
5147 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
5148
5149 cgrp->self.serial_nr = css_serial_nr_next++;
5150
5151
5152 list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children);
5153 atomic_inc(&root->nr_cgrps);
5154 cgroup_get_live(parent);
5155
5156
5157
5158
5159
5160 cgroup_idr_replace(&root->cgroup_idr, cgrp, cgrp->id);
5161
5162
5163
5164
5165
5166 if (!cgroup_on_dfl(cgrp))
5167 cgrp->subtree_control = cgroup_control(cgrp);
5168
5169 cgroup_propagate_control(cgrp);
5170
5171 return cgrp;
5172
5173out_psi_free:
5174 psi_cgroup_free(cgrp);
5175out_idr_free:
5176 cgroup_idr_remove(&root->cgroup_idr, cgrp->id);
5177out_stat_exit:
5178 if (cgroup_on_dfl(parent))
5179 cgroup_rstat_exit(cgrp);
5180out_cancel_ref:
5181 percpu_ref_exit(&cgrp->self.refcnt);
5182out_free_cgrp:
5183 kfree(cgrp);
5184 return ERR_PTR(ret);
5185}
5186
5187static bool cgroup_check_hierarchy_limits(struct cgroup *parent)
5188{
5189 struct cgroup *cgroup;
5190 int ret = false;
5191 int level = 1;
5192
5193 lockdep_assert_held(&cgroup_mutex);
5194
5195 for (cgroup = parent; cgroup; cgroup = cgroup_parent(cgroup)) {
5196 if (cgroup->nr_descendants >= cgroup->max_descendants)
5197 goto fail;
5198
5199 if (level > cgroup->max_depth)
5200 goto fail;
5201
5202 level++;
5203 }
5204
5205 ret = true;
5206fail:
5207 return ret;
5208}
5209
5210int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode)
5211{
5212 struct cgroup *parent, *cgrp;
5213 struct kernfs_node *kn;
5214 int ret;
5215
5216
5217 if (strchr(name, '\n'))
5218 return -EINVAL;
5219
5220 parent = cgroup_kn_lock_live(parent_kn, false);
5221 if (!parent)
5222 return -ENODEV;
5223
5224 if (!cgroup_check_hierarchy_limits(parent)) {
5225 ret = -EAGAIN;
5226 goto out_unlock;
5227 }
5228
5229 cgrp = cgroup_create(parent);
5230 if (IS_ERR(cgrp)) {
5231 ret = PTR_ERR(cgrp);
5232 goto out_unlock;
5233 }
5234
5235
5236 kn = kernfs_create_dir(parent->kn, name, mode, cgrp);
5237 if (IS_ERR(kn)) {
5238 ret = PTR_ERR(kn);
5239 goto out_destroy;
5240 }
5241 cgrp->kn = kn;
5242
5243
5244
5245
5246
5247 kernfs_get(kn);
5248
5249 ret = cgroup_kn_set_ugid(kn);
5250 if (ret)
5251 goto out_destroy;
5252
5253 ret = css_populate_dir(&cgrp->self);
5254 if (ret)
5255 goto out_destroy;
5256
5257 ret = cgroup_apply_control_enable(cgrp);
5258 if (ret)
5259 goto out_destroy;
5260
5261 TRACE_CGROUP_PATH(mkdir, cgrp);
5262
5263
5264 kernfs_activate(kn);
5265
5266 ret = 0;
5267 goto out_unlock;
5268
5269out_destroy:
5270 cgroup_destroy_locked(cgrp);
5271out_unlock:
5272 cgroup_kn_unlock(parent_kn);
5273 return ret;
5274}
5275
5276
5277
5278
5279
5280
5281static void css_killed_work_fn(struct work_struct *work)
5282{
5283 struct cgroup_subsys_state *css =
5284 container_of(work, struct cgroup_subsys_state, destroy_work);
5285
5286 mutex_lock(&cgroup_mutex);
5287
5288 do {
5289 offline_css(css);
5290 css_put(css);
5291
5292 css = css->parent;
5293 } while (css && atomic_dec_and_test(&css->online_cnt));
5294
5295 mutex_unlock(&cgroup_mutex);
5296}
5297
5298
5299static void css_killed_ref_fn(struct percpu_ref *ref)
5300{
5301 struct cgroup_subsys_state *css =
5302 container_of(ref, struct cgroup_subsys_state, refcnt);
5303
5304 if (atomic_dec_and_test(&css->online_cnt)) {
5305 INIT_WORK(&css->destroy_work, css_killed_work_fn);
5306 queue_work(cgroup_destroy_wq, &css->destroy_work);
5307 }
5308}
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319static void kill_css(struct cgroup_subsys_state *css)
5320{
5321 lockdep_assert_held(&cgroup_mutex);
5322
5323 if (css->flags & CSS_DYING)
5324 return;
5325
5326 css->flags |= CSS_DYING;
5327
5328
5329
5330
5331
5332 css_clear_dir(css);
5333
5334
5335
5336
5337
5338 css_get(css);
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350 percpu_ref_kill_and_confirm(&css->refcnt, css_killed_ref_fn);
5351}
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
5368
5369
5370
5371
5372
5373
5374
5375
5376
5377static int cgroup_destroy_locked(struct cgroup *cgrp)
5378 __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
5379{
5380 struct cgroup *tcgrp, *parent = cgroup_parent(cgrp);
5381 struct cgroup_subsys_state *css;
5382 struct cgrp_cset_link *link;
5383 int ssid;
5384
5385 lockdep_assert_held(&cgroup_mutex);
5386
5387
5388
5389
5390
5391 if (cgroup_is_populated(cgrp))
5392 return -EBUSY;
5393
5394
5395
5396
5397
5398
5399 if (css_has_online_children(&cgrp->self))
5400 return -EBUSY;
5401
5402
5403
5404
5405
5406
5407
5408 cgrp->self.flags &= ~CSS_ONLINE;
5409
5410 spin_lock_irq(&css_set_lock);
5411 list_for_each_entry(link, &cgrp->cset_links, cset_link)
5412 link->cset->dead = true;
5413 spin_unlock_irq(&css_set_lock);
5414
5415
5416 for_each_css(css, ssid, cgrp)
5417 kill_css(css);
5418
5419
5420 css_clear_dir(&cgrp->self);
5421 kernfs_remove(cgrp->kn);
5422
5423 if (parent && cgroup_is_threaded(cgrp))
5424 parent->nr_threaded_children--;
5425
5426 spin_lock_irq(&css_set_lock);
5427 for (tcgrp = cgroup_parent(cgrp); tcgrp; tcgrp = cgroup_parent(tcgrp)) {
5428 tcgrp->nr_descendants--;
5429 tcgrp->nr_dying_descendants++;
5430
5431
5432
5433
5434 if (test_bit(CGRP_FROZEN, &cgrp->flags))
5435 tcgrp->freezer.nr_frozen_descendants--;
5436 }
5437 spin_unlock_irq(&css_set_lock);
5438
5439 cgroup1_check_for_release(parent);
5440
5441 cgroup_bpf_offline(cgrp);
5442
5443
5444 percpu_ref_kill(&cgrp->self.refcnt);
5445
5446 return 0;
5447};
5448
5449int cgroup_rmdir(struct kernfs_node *kn)
5450{
5451 struct cgroup *cgrp;
5452 int ret = 0;
5453
5454 cgrp = cgroup_kn_lock_live(kn, false);
5455 if (!cgrp)
5456 return 0;
5457
5458 ret = cgroup_destroy_locked(cgrp);
5459 if (!ret)
5460 TRACE_CGROUP_PATH(rmdir, cgrp);
5461
5462 cgroup_kn_unlock(kn);
5463 return ret;
5464}
5465
5466static struct kernfs_syscall_ops cgroup_kf_syscall_ops = {
5467 .show_options = cgroup_show_options,
5468 .remount_fs = cgroup_remount,
5469 .mkdir = cgroup_mkdir,
5470 .rmdir = cgroup_rmdir,
5471 .show_path = cgroup_show_path,
5472};
5473
5474static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
5475{
5476 struct cgroup_subsys_state *css;
5477
5478 pr_debug("Initializing cgroup subsys %s\n", ss->name);
5479
5480 mutex_lock(&cgroup_mutex);
5481
5482 idr_init(&ss->css_idr);
5483 INIT_LIST_HEAD(&ss->cfts);
5484
5485
5486 ss->root = &cgrp_dfl_root;
5487 css = ss->css_alloc(cgroup_css(&cgrp_dfl_root.cgrp, ss));
5488
5489 BUG_ON(IS_ERR(css));
5490 init_and_link_css(css, ss, &cgrp_dfl_root.cgrp);
5491
5492
5493
5494
5495
5496 css->flags |= CSS_NO_REF;
5497
5498 if (early) {
5499
5500 css->id = 1;
5501 } else {
5502 css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL);
5503 BUG_ON(css->id < 0);
5504 }
5505
5506
5507
5508
5509
5510 init_css_set.subsys[ss->id] = css;
5511
5512 have_fork_callback |= (bool)ss->fork << ss->id;
5513 have_exit_callback |= (bool)ss->exit << ss->id;
5514 have_free_callback |= (bool)ss->free << ss->id;
5515 have_canfork_callback |= (bool)ss->can_fork << ss->id;
5516
5517
5518
5519
5520 BUG_ON(!list_empty(&init_task.tasks));
5521
5522 BUG_ON(online_css(css));
5523
5524 mutex_unlock(&cgroup_mutex);
5525}
5526
5527
5528
5529
5530
5531
5532
5533int __init cgroup_init_early(void)
5534{
5535 static struct cgroup_sb_opts __initdata opts;
5536 struct cgroup_subsys *ss;
5537 int i;
5538
5539 init_cgroup_root(&cgrp_dfl_root, &opts);
5540 cgrp_dfl_root.cgrp.self.flags |= CSS_NO_REF;
5541
5542 RCU_INIT_POINTER(init_task.cgroups, &init_css_set);
5543
5544 for_each_subsys(ss, i) {
5545 WARN(!ss->css_alloc || !ss->css_free || ss->name || ss->id,
5546 "invalid cgroup_subsys %d:%s css_alloc=%p css_free=%p id:name=%d:%s\n",
5547 i, cgroup_subsys_name[i], ss->css_alloc, ss->css_free,
5548 ss->id, ss->name);
5549 WARN(strlen(cgroup_subsys_name[i]) > MAX_CGROUP_TYPE_NAMELEN,
5550 "cgroup_subsys_name %s too long\n", cgroup_subsys_name[i]);
5551
5552 ss->id = i;
5553 ss->name = cgroup_subsys_name[i];
5554 if (!ss->legacy_name)
5555 ss->legacy_name = cgroup_subsys_name[i];
5556
5557 if (ss->early_init)
5558 cgroup_init_subsys(ss, true);
5559 }
5560 return 0;
5561}
5562
5563static u16 cgroup_disable_mask __initdata;
5564
5565
5566
5567
5568
5569
5570
5571int __init cgroup_init(void)
5572{
5573 struct cgroup_subsys *ss;
5574 int ssid;
5575
5576 BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16);
5577 BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem));
5578 BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files));
5579 BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files));
5580
5581 cgroup_rstat_boot();
5582
5583
5584
5585
5586
5587 rcu_sync_enter_start(&cgroup_threadgroup_rwsem.rss);
5588
5589 get_user_ns(init_cgroup_ns.user_ns);
5590
5591 mutex_lock(&cgroup_mutex);
5592
5593
5594
5595
5596
5597 hash_add(css_set_table, &init_css_set.hlist,
5598 css_set_hash(init_css_set.subsys));
5599
5600 BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0));
5601
5602 mutex_unlock(&cgroup_mutex);
5603
5604 for_each_subsys(ss, ssid) {
5605 if (ss->early_init) {
5606 struct cgroup_subsys_state *css =
5607 init_css_set.subsys[ss->id];
5608
5609 css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2,
5610 GFP_KERNEL);
5611 BUG_ON(css->id < 0);
5612 } else {
5613 cgroup_init_subsys(ss, false);
5614 }
5615
5616 list_add_tail(&init_css_set.e_cset_node[ssid],
5617 &cgrp_dfl_root.cgrp.e_csets[ssid]);
5618
5619
5620
5621
5622
5623
5624 if (cgroup_disable_mask & (1 << ssid)) {
5625 static_branch_disable(cgroup_subsys_enabled_key[ssid]);
5626 printk(KERN_INFO "Disabling %s control group subsystem\n",
5627 ss->name);
5628 continue;
5629 }
5630
5631 if (cgroup1_ssid_disabled(ssid))
5632 printk(KERN_INFO "Disabling %s control group subsystem in v1 mounts\n",
5633 ss->name);
5634
5635 cgrp_dfl_root.subsys_mask |= 1 << ss->id;
5636
5637
5638 WARN_ON(ss->implicit_on_dfl && !ss->threaded);
5639
5640 if (ss->implicit_on_dfl)
5641 cgrp_dfl_implicit_ss_mask |= 1 << ss->id;
5642 else if (!ss->dfl_cftypes)
5643 cgrp_dfl_inhibit_ss_mask |= 1 << ss->id;
5644
5645 if (ss->threaded)
5646 cgrp_dfl_threaded_ss_mask |= 1 << ss->id;
5647
5648 if (ss->dfl_cftypes == ss->legacy_cftypes) {
5649 WARN_ON(cgroup_add_cftypes(ss, ss->dfl_cftypes));
5650 } else {
5651 WARN_ON(cgroup_add_dfl_cftypes(ss, ss->dfl_cftypes));
5652 WARN_ON(cgroup_add_legacy_cftypes(ss, ss->legacy_cftypes));
5653 }
5654
5655 if (ss->bind)
5656 ss->bind(init_css_set.subsys[ssid]);
5657
5658 mutex_lock(&cgroup_mutex);
5659 css_populate_dir(init_css_set.subsys[ssid]);
5660 mutex_unlock(&cgroup_mutex);
5661 }
5662
5663
5664 hash_del(&init_css_set.hlist);
5665 hash_add(css_set_table, &init_css_set.hlist,
5666 css_set_hash(init_css_set.subsys));
5667
5668 WARN_ON(sysfs_create_mount_point(fs_kobj, "cgroup"));
5669 WARN_ON(register_filesystem(&cgroup_fs_type));
5670 WARN_ON(register_filesystem(&cgroup2_fs_type));
5671 WARN_ON(!proc_create_single("cgroups", 0, NULL, proc_cgroupstats_show));
5672
5673 return 0;
5674}
5675
5676static int __init cgroup_wq_init(void)
5677{
5678
5679
5680
5681
5682
5683
5684
5685
5686 cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
5687 BUG_ON(!cgroup_destroy_wq);
5688 return 0;
5689}
5690core_initcall(cgroup_wq_init);
5691
5692void cgroup_path_from_kernfs_id(const union kernfs_node_id *id,
5693 char *buf, size_t buflen)
5694{
5695 struct kernfs_node *kn;
5696
5697 kn = kernfs_get_node_by_id(cgrp_dfl_root.kf_root, id);
5698 if (!kn)
5699 return;
5700 kernfs_path(kn, buf, buflen);
5701 kernfs_put(kn);
5702}
5703
5704
5705
5706
5707
5708
5709int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
5710 struct pid *pid, struct task_struct *tsk)
5711{
5712 char *buf;
5713 int retval;
5714 struct cgroup_root *root;
5715
5716 retval = -ENOMEM;
5717 buf = kmalloc(PATH_MAX, GFP_KERNEL);
5718 if (!buf)
5719 goto out;
5720
5721 mutex_lock(&cgroup_mutex);
5722 spin_lock_irq(&css_set_lock);
5723
5724 for_each_root(root) {
5725 struct cgroup_subsys *ss;
5726 struct cgroup *cgrp;
5727 int ssid, count = 0;
5728
5729 if (root == &cgrp_dfl_root && !cgrp_dfl_visible)
5730 continue;
5731
5732 seq_printf(m, "%d:", root->hierarchy_id);
5733 if (root != &cgrp_dfl_root)
5734 for_each_subsys(ss, ssid)
5735 if (root->subsys_mask & (1 << ssid))
5736 seq_printf(m, "%s%s", count++ ? "," : "",
5737 ss->legacy_name);
5738 if (strlen(root->name))
5739 seq_printf(m, "%sname=%s", count ? "," : "",
5740 root->name);
5741 seq_putc(m, ':');
5742
5743 cgrp = task_cgroup_from_root(tsk, root);
5744
5745
5746
5747
5748
5749
5750
5751
5752
5753
5754 if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) {
5755 retval = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
5756 current->nsproxy->cgroup_ns);
5757 if (retval >= PATH_MAX)
5758 retval = -ENAMETOOLONG;
5759 if (retval < 0)
5760 goto out_unlock;
5761
5762 seq_puts(m, buf);
5763 } else {
5764 seq_puts(m, "/");
5765 }
5766
5767 if (cgroup_on_dfl(cgrp) && cgroup_is_dead(cgrp))
5768 seq_puts(m, " (deleted)\n");
5769 else
5770 seq_putc(m, '\n');
5771 }
5772
5773 retval = 0;
5774out_unlock:
5775 spin_unlock_irq(&css_set_lock);
5776 mutex_unlock(&cgroup_mutex);
5777 kfree(buf);
5778out:
5779 return retval;
5780}
5781
5782
5783
5784
5785
5786
5787
5788
5789
5790void cgroup_fork(struct task_struct *child)
5791{
5792 RCU_INIT_POINTER(child->cgroups, &init_css_set);
5793 INIT_LIST_HEAD(&child->cg_list);
5794}
5795
5796
5797
5798
5799
5800
5801
5802
5803
5804int cgroup_can_fork(struct task_struct *child)
5805{
5806 struct cgroup_subsys *ss;
5807 int i, j, ret;
5808
5809 do_each_subsys_mask(ss, i, have_canfork_callback) {
5810 ret = ss->can_fork(child);
5811 if (ret)
5812 goto out_revert;
5813 } while_each_subsys_mask();
5814
5815 return 0;
5816
5817out_revert:
5818 for_each_subsys(ss, j) {
5819 if (j >= i)
5820 break;
5821 if (ss->cancel_fork)
5822 ss->cancel_fork(child);
5823 }
5824
5825 return ret;
5826}
5827
5828
5829
5830
5831
5832
5833
5834
5835void cgroup_cancel_fork(struct task_struct *child)
5836{
5837 struct cgroup_subsys *ss;
5838 int i;
5839
5840 for_each_subsys(ss, i)
5841 if (ss->cancel_fork)
5842 ss->cancel_fork(child);
5843}
5844
5845
5846
5847
5848
5849
5850
5851
5852
5853
5854
5855void cgroup_post_fork(struct task_struct *child)
5856{
5857 struct cgroup_subsys *ss;
5858 int i;
5859
5860
5861
5862
5863
5864
5865
5866
5867
5868
5869
5870
5871
5872
5873
5874
5875
5876
5877
5878
5879
5880
5881 if (use_task_css_set_links) {
5882 struct css_set *cset;
5883
5884 spin_lock_irq(&css_set_lock);
5885 cset = task_css_set(current);
5886 if (list_empty(&child->cg_list)) {
5887 get_css_set(cset);
5888 cset->nr_tasks++;
5889 css_set_move_task(child, NULL, cset, false);
5890 }
5891
5892
5893
5894
5895
5896
5897 if (unlikely(cgroup_task_freeze(child))) {
5898 spin_lock(&child->sighand->siglock);
5899 WARN_ON_ONCE(child->frozen);
5900 child->jobctl |= JOBCTL_TRAP_FREEZE;
5901 spin_unlock(&child->sighand->siglock);
5902
5903
5904
5905
5906
5907
5908
5909 }
5910
5911 spin_unlock_irq(&css_set_lock);
5912 }
5913
5914
5915
5916
5917
5918
5919 do_each_subsys_mask(ss, i, have_fork_callback) {
5920 ss->fork(child);
5921 } while_each_subsys_mask();
5922}
5923
5924
5925
5926
5927
5928
5929
5930
5931
5932
5933
5934
5935
5936
5937
5938
5939
5940
5941
5942
5943void cgroup_exit(struct task_struct *tsk)
5944{
5945 struct cgroup_subsys *ss;
5946 struct css_set *cset;
5947 int i;
5948
5949
5950
5951
5952
5953 cset = task_css_set(tsk);
5954
5955 if (!list_empty(&tsk->cg_list)) {
5956 spin_lock_irq(&css_set_lock);
5957 css_set_move_task(tsk, cset, NULL, false);
5958 cset->nr_tasks--;
5959
5960 WARN_ON_ONCE(cgroup_task_frozen(tsk));
5961 if (unlikely(cgroup_task_freeze(tsk)))
5962 cgroup_update_frozen(task_dfl_cgroup(tsk));
5963
5964 spin_unlock_irq(&css_set_lock);
5965 } else {
5966 get_css_set(cset);
5967 }
5968
5969
5970 do_each_subsys_mask(ss, i, have_exit_callback) {
5971 ss->exit(tsk);
5972 } while_each_subsys_mask();
5973}
5974
5975void cgroup_free(struct task_struct *task)
5976{
5977 struct css_set *cset = task_css_set(task);
5978 struct cgroup_subsys *ss;
5979 int ssid;
5980
5981 do_each_subsys_mask(ss, ssid, have_free_callback) {
5982 ss->free(task);
5983 } while_each_subsys_mask();
5984
5985 put_css_set(cset);
5986}
5987
5988static int __init cgroup_disable(char *str)
5989{
5990 struct cgroup_subsys *ss;
5991 char *token;
5992 int i;
5993
5994 while ((token = strsep(&str, ",")) != NULL) {
5995 if (!*token)
5996 continue;
5997
5998 for_each_subsys(ss, i) {
5999 if (strcmp(token, ss->name) &&
6000 strcmp(token, ss->legacy_name))
6001 continue;
6002 cgroup_disable_mask |= 1 << i;
6003 }
6004 }
6005 return 1;
6006}
6007__setup("cgroup_disable=", cgroup_disable);
6008
6009void __init __weak enable_debug_cgroup(void) { }
6010
6011static int __init enable_cgroup_debug(char *str)
6012{
6013 cgroup_debug = true;
6014 enable_debug_cgroup();
6015 return 1;
6016}
6017__setup("cgroup_debug", enable_cgroup_debug);
6018
6019
6020
6021
6022
6023
6024
6025
6026
6027
6028struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
6029 struct cgroup_subsys *ss)
6030{
6031 struct kernfs_node *kn = kernfs_node_from_dentry(dentry);
6032 struct file_system_type *s_type = dentry->d_sb->s_type;
6033 struct cgroup_subsys_state *css = NULL;
6034 struct cgroup *cgrp;
6035
6036
6037 if ((s_type != &cgroup_fs_type && s_type != &cgroup2_fs_type) ||
6038 !kn || kernfs_type(kn) != KERNFS_DIR)
6039 return ERR_PTR(-EBADF);
6040
6041 rcu_read_lock();
6042
6043
6044
6045
6046
6047
6048 cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv);
6049 if (cgrp)
6050 css = cgroup_css(cgrp, ss);
6051
6052 if (!css || !css_tryget_online(css))
6053 css = ERR_PTR(-ENOENT);
6054
6055 rcu_read_unlock();
6056 return css;
6057}
6058
6059
6060
6061
6062
6063
6064
6065
6066
6067struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss)
6068{
6069 WARN_ON_ONCE(!rcu_read_lock_held());
6070 return idr_find(&ss->css_idr, id);
6071}
6072
6073
6074
6075
6076
6077
6078
6079
6080
6081
6082struct cgroup *cgroup_get_from_path(const char *path)
6083{
6084 struct kernfs_node *kn;
6085 struct cgroup *cgrp;
6086
6087 mutex_lock(&cgroup_mutex);
6088
6089 kn = kernfs_walk_and_get(cgrp_dfl_root.cgrp.kn, path);
6090 if (kn) {
6091 if (kernfs_type(kn) == KERNFS_DIR) {
6092 cgrp = kn->priv;
6093 cgroup_get_live(cgrp);
6094 } else {
6095 cgrp = ERR_PTR(-ENOTDIR);
6096 }
6097 kernfs_put(kn);
6098 } else {
6099 cgrp = ERR_PTR(-ENOENT);
6100 }
6101
6102 mutex_unlock(&cgroup_mutex);
6103 return cgrp;
6104}
6105EXPORT_SYMBOL_GPL(cgroup_get_from_path);
6106
6107
6108
6109
6110
6111
6112
6113
6114
6115
6116struct cgroup *cgroup_get_from_fd(int fd)
6117{
6118 struct cgroup_subsys_state *css;
6119 struct cgroup *cgrp;
6120 struct file *f;
6121
6122 f = fget_raw(fd);
6123 if (!f)
6124 return ERR_PTR(-EBADF);
6125
6126 css = css_tryget_online_from_dir(f->f_path.dentry, NULL);
6127 fput(f);
6128 if (IS_ERR(css))
6129 return ERR_CAST(css);
6130
6131 cgrp = css->cgroup;
6132 if (!cgroup_on_dfl(cgrp)) {
6133 cgroup_put(cgrp);
6134 return ERR_PTR(-EBADF);
6135 }
6136
6137 return cgrp;
6138}
6139EXPORT_SYMBOL_GPL(cgroup_get_from_fd);
6140
6141
6142
6143
6144
6145#ifdef CONFIG_SOCK_CGROUP_DATA
6146
6147#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
6148
6149DEFINE_SPINLOCK(cgroup_sk_update_lock);
6150static bool cgroup_sk_alloc_disabled __read_mostly;
6151
6152void cgroup_sk_alloc_disable(void)
6153{
6154 if (cgroup_sk_alloc_disabled)
6155 return;
6156 pr_info("cgroup: disabling cgroup2 socket matching due to net_prio or net_cls activation\n");
6157 cgroup_sk_alloc_disabled = true;
6158}
6159
6160#else
6161
6162#define cgroup_sk_alloc_disabled false
6163
6164#endif
6165
6166void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
6167{
6168 if (cgroup_sk_alloc_disabled)
6169 return;
6170
6171
6172 if (skcd->val) {
6173
6174
6175
6176
6177
6178 cgroup_get(sock_cgroup_ptr(skcd));
6179 cgroup_bpf_get(sock_cgroup_ptr(skcd));
6180 return;
6181 }
6182
6183 rcu_read_lock();
6184
6185 while (true) {
6186 struct css_set *cset;
6187
6188 cset = task_css_set(current);
6189 if (likely(cgroup_tryget(cset->dfl_cgrp))) {
6190 skcd->val = (unsigned long)cset->dfl_cgrp;
6191 cgroup_bpf_get(cset->dfl_cgrp);
6192 break;
6193 }
6194 cpu_relax();
6195 }
6196
6197 rcu_read_unlock();
6198}
6199
6200void cgroup_sk_free(struct sock_cgroup_data *skcd)
6201{
6202 struct cgroup *cgrp = sock_cgroup_ptr(skcd);
6203
6204 cgroup_bpf_put(cgrp);
6205 cgroup_put(cgrp);
6206}
6207
6208#endif
6209
6210#ifdef CONFIG_CGROUP_BPF
6211int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
6212 enum bpf_attach_type type, u32 flags)
6213{
6214 int ret;
6215
6216 mutex_lock(&cgroup_mutex);
6217 ret = __cgroup_bpf_attach(cgrp, prog, type, flags);
6218 mutex_unlock(&cgroup_mutex);
6219 return ret;
6220}
6221int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
6222 enum bpf_attach_type type, u32 flags)
6223{
6224 int ret;
6225
6226 mutex_lock(&cgroup_mutex);
6227 ret = __cgroup_bpf_detach(cgrp, prog, type);
6228 mutex_unlock(&cgroup_mutex);
6229 return ret;
6230}
6231int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
6232 union bpf_attr __user *uattr)
6233{
6234 int ret;
6235
6236 mutex_lock(&cgroup_mutex);
6237 ret = __cgroup_bpf_query(cgrp, attr, uattr);
6238 mutex_unlock(&cgroup_mutex);
6239 return ret;
6240}
6241#endif
6242
6243#ifdef CONFIG_SYSFS
6244static ssize_t show_delegatable_files(struct cftype *files, char *buf,
6245 ssize_t size, const char *prefix)
6246{
6247 struct cftype *cft;
6248 ssize_t ret = 0;
6249
6250 for (cft = files; cft && cft->name[0] != '\0'; cft++) {
6251 if (!(cft->flags & CFTYPE_NS_DELEGATABLE))
6252 continue;
6253
6254 if (prefix)
6255 ret += snprintf(buf + ret, size - ret, "%s.", prefix);
6256
6257 ret += snprintf(buf + ret, size - ret, "%s\n", cft->name);
6258
6259 if (unlikely(ret >= size)) {
6260 WARN_ON(1);
6261 break;
6262 }
6263 }
6264
6265 return ret;
6266}
6267
6268static ssize_t delegate_show(struct kobject *kobj, struct kobj_attribute *attr,
6269 char *buf)
6270{
6271 struct cgroup_subsys *ss;
6272 int ssid;
6273 ssize_t ret = 0;
6274
6275 ret = show_delegatable_files(cgroup_base_files, buf, PAGE_SIZE - ret,
6276 NULL);
6277
6278 for_each_subsys(ss, ssid)
6279 ret += show_delegatable_files(ss->dfl_cftypes, buf + ret,
6280 PAGE_SIZE - ret,
6281 cgroup_subsys_name[ssid]);
6282
6283 return ret;
6284}
6285static struct kobj_attribute cgroup_delegate_attr = __ATTR_RO(delegate);
6286
6287static ssize_t features_show(struct kobject *kobj, struct kobj_attribute *attr,
6288 char *buf)
6289{
6290 return snprintf(buf, PAGE_SIZE, "nsdelegate\n");
6291}
6292static struct kobj_attribute cgroup_features_attr = __ATTR_RO(features);
6293
6294static struct attribute *cgroup_sysfs_attrs[] = {
6295 &cgroup_delegate_attr.attr,
6296 &cgroup_features_attr.attr,
6297 NULL,
6298};
6299
6300static const struct attribute_group cgroup_sysfs_attr_group = {
6301 .attrs = cgroup_sysfs_attrs,
6302 .name = "cgroup",
6303};
6304
6305static int __init cgroup_sysfs_init(void)
6306{
6307 return sysfs_create_group(kernel_kobj, &cgroup_sysfs_attr_group);
6308}
6309subsys_initcall(cgroup_sysfs_init);
6310#endif
6311