1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
30
31#include "cgroup-internal.h"
32
33#include <linux/cred.h>
34#include <linux/errno.h>
35#include <linux/init_task.h>
36#include <linux/kernel.h>
37#include <linux/magic.h>
38#include <linux/mutex.h>
39#include <linux/mount.h>
40#include <linux/pagemap.h>
41#include <linux/proc_fs.h>
42#include <linux/rcupdate.h>
43#include <linux/sched.h>
44#include <linux/sched/task.h>
45#include <linux/slab.h>
46#include <linux/spinlock.h>
47#include <linux/percpu-rwsem.h>
48#include <linux/string.h>
49#include <linux/hashtable.h>
50#include <linux/idr.h>
51#include <linux/kthread.h>
52#include <linux/atomic.h>
53#include <linux/cpuset.h>
54#include <linux/proc_ns.h>
55#include <linux/nsproxy.h>
56#include <linux/file.h>
57#include <linux/fs_parser.h>
58#include <linux/sched/cputime.h>
59#include <linux/psi.h>
60#include <net/sock.h>
61
62#define CREATE_TRACE_POINTS
63#include <trace/events/cgroup.h>
64
65#define CGROUP_FILE_NAME_MAX (MAX_CGROUP_TYPE_NAMELEN + \
66 MAX_CFTYPE_NAME + 2)
67
68#define CGROUP_FILE_NOTIFY_MIN_INTV DIV_ROUND_UP(HZ, 100)
69
70
71
72
73
74
75
76
77
78
79
80DEFINE_MUTEX(cgroup_mutex);
81DEFINE_SPINLOCK(css_set_lock);
82
83#ifdef CONFIG_PROVE_RCU
84EXPORT_SYMBOL_GPL(cgroup_mutex);
85EXPORT_SYMBOL_GPL(css_set_lock);
86#endif
87
88DEFINE_SPINLOCK(trace_cgroup_path_lock);
89char trace_cgroup_path[TRACE_CGROUP_PATH_LEN];
90bool cgroup_debug __read_mostly;
91
92
93
94
95
96static DEFINE_SPINLOCK(cgroup_idr_lock);
97
98
99
100
101
102static DEFINE_SPINLOCK(cgroup_file_kn_lock);
103
104DEFINE_PERCPU_RWSEM(cgroup_threadgroup_rwsem);
105
106#define cgroup_assert_mutex_or_rcu_locked() \
107 RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
108 !lockdep_is_held(&cgroup_mutex), \
109 "cgroup_mutex or RCU read lock required");
110
111
112
113
114
115
116
117static struct workqueue_struct *cgroup_destroy_wq;
118
119
120#define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys,
121struct cgroup_subsys *cgroup_subsys[] = {
122#include <linux/cgroup_subsys.h>
123};
124#undef SUBSYS
125
126
127#define SUBSYS(_x) [_x ## _cgrp_id] = #_x,
128static const char *cgroup_subsys_name[] = {
129#include <linux/cgroup_subsys.h>
130};
131#undef SUBSYS
132
133
134#define SUBSYS(_x) \
135 DEFINE_STATIC_KEY_TRUE(_x ## _cgrp_subsys_enabled_key); \
136 DEFINE_STATIC_KEY_TRUE(_x ## _cgrp_subsys_on_dfl_key); \
137 EXPORT_SYMBOL_GPL(_x ## _cgrp_subsys_enabled_key); \
138 EXPORT_SYMBOL_GPL(_x ## _cgrp_subsys_on_dfl_key);
139#include <linux/cgroup_subsys.h>
140#undef SUBSYS
141
142#define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys_enabled_key,
143static struct static_key_true *cgroup_subsys_enabled_key[] = {
144#include <linux/cgroup_subsys.h>
145};
146#undef SUBSYS
147
148#define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys_on_dfl_key,
149static struct static_key_true *cgroup_subsys_on_dfl_key[] = {
150#include <linux/cgroup_subsys.h>
151};
152#undef SUBSYS
153
154static DEFINE_PER_CPU(struct cgroup_rstat_cpu, cgrp_dfl_root_rstat_cpu);
155
156
157
158
159
160
161struct cgroup_root cgrp_dfl_root = { .cgrp.rstat_cpu = &cgrp_dfl_root_rstat_cpu };
162EXPORT_SYMBOL_GPL(cgrp_dfl_root);
163
164
165
166
167
168static bool cgrp_dfl_visible;
169
170
171static u16 cgrp_dfl_inhibit_ss_mask;
172
173
174static u16 cgrp_dfl_implicit_ss_mask;
175
176
177static u16 cgrp_dfl_threaded_ss_mask;
178
179
180LIST_HEAD(cgroup_roots);
181static int cgroup_root_count;
182
183
184static DEFINE_IDR(cgroup_hierarchy_idr);
185
186
187
188
189
190
191
192
193static u64 css_serial_nr_next = 1;
194
195
196
197
198
199static u16 have_fork_callback __read_mostly;
200static u16 have_exit_callback __read_mostly;
201static u16 have_release_callback __read_mostly;
202static u16 have_canfork_callback __read_mostly;
203
204
205struct cgroup_namespace init_cgroup_ns = {
206 .count = REFCOUNT_INIT(2),
207 .user_ns = &init_user_ns,
208 .ns.ops = &cgroupns_operations,
209 .ns.inum = PROC_CGROUP_INIT_INO,
210 .root_cset = &init_css_set,
211};
212
213static struct file_system_type cgroup2_fs_type;
214static struct cftype cgroup_base_files[];
215
216static int cgroup_apply_control(struct cgroup *cgrp);
217static void cgroup_finalize_control(struct cgroup *cgrp, int ret);
218static void css_task_iter_skip(struct css_task_iter *it,
219 struct task_struct *task);
220static int cgroup_destroy_locked(struct cgroup *cgrp);
221static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
222 struct cgroup_subsys *ss);
223static void css_release(struct percpu_ref *ref);
224static void kill_css(struct cgroup_subsys_state *css);
225static int cgroup_addrm_files(struct cgroup_subsys_state *css,
226 struct cgroup *cgrp, struct cftype cfts[],
227 bool is_add);
228
229
230
231
232
233
234
235
236
237bool cgroup_ssid_enabled(int ssid)
238{
239 if (CGROUP_SUBSYS_COUNT == 0)
240 return false;
241
242 return static_key_enabled(cgroup_subsys_enabled_key[ssid]);
243}
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298bool cgroup_on_dfl(const struct cgroup *cgrp)
299{
300 return cgrp->root == &cgrp_dfl_root;
301}
302
303
304static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end,
305 gfp_t gfp_mask)
306{
307 int ret;
308
309 idr_preload(gfp_mask);
310 spin_lock_bh(&cgroup_idr_lock);
311 ret = idr_alloc(idr, ptr, start, end, gfp_mask & ~__GFP_DIRECT_RECLAIM);
312 spin_unlock_bh(&cgroup_idr_lock);
313 idr_preload_end();
314 return ret;
315}
316
317static void *cgroup_idr_replace(struct idr *idr, void *ptr, int id)
318{
319 void *ret;
320
321 spin_lock_bh(&cgroup_idr_lock);
322 ret = idr_replace(idr, ptr, id);
323 spin_unlock_bh(&cgroup_idr_lock);
324 return ret;
325}
326
327static void cgroup_idr_remove(struct idr *idr, int id)
328{
329 spin_lock_bh(&cgroup_idr_lock);
330 idr_remove(idr, id);
331 spin_unlock_bh(&cgroup_idr_lock);
332}
333
334static bool cgroup_has_tasks(struct cgroup *cgrp)
335{
336 return cgrp->nr_populated_csets;
337}
338
339bool cgroup_is_threaded(struct cgroup *cgrp)
340{
341 return cgrp->dom_cgrp != cgrp;
342}
343
344
345static bool cgroup_is_mixable(struct cgroup *cgrp)
346{
347
348
349
350
351
352 return !cgroup_parent(cgrp);
353}
354
355
356static bool cgroup_can_be_thread_root(struct cgroup *cgrp)
357{
358
359 if (cgroup_is_mixable(cgrp))
360 return true;
361
362
363 if (cgroup_is_threaded(cgrp))
364 return false;
365
366
367 if (cgrp->nr_populated_domain_children)
368 return false;
369
370
371 if (cgrp->subtree_control & ~cgrp_dfl_threaded_ss_mask)
372 return false;
373
374 return true;
375}
376
377
378bool cgroup_is_thread_root(struct cgroup *cgrp)
379{
380
381 if (cgroup_is_threaded(cgrp))
382 return false;
383
384
385 if (cgrp->nr_threaded_children)
386 return true;
387
388
389
390
391
392 if (cgroup_has_tasks(cgrp) &&
393 (cgrp->subtree_control & cgrp_dfl_threaded_ss_mask))
394 return true;
395
396 return false;
397}
398
399
400static bool cgroup_is_valid_domain(struct cgroup *cgrp)
401{
402
403 if (cgroup_is_threaded(cgrp))
404 return false;
405
406
407 while ((cgrp = cgroup_parent(cgrp))) {
408 if (!cgroup_is_mixable(cgrp) && cgroup_is_thread_root(cgrp))
409 return false;
410 if (cgroup_is_threaded(cgrp))
411 return false;
412 }
413
414 return true;
415}
416
417
418static u16 cgroup_control(struct cgroup *cgrp)
419{
420 struct cgroup *parent = cgroup_parent(cgrp);
421 u16 root_ss_mask = cgrp->root->subsys_mask;
422
423 if (parent) {
424 u16 ss_mask = parent->subtree_control;
425
426
427 if (cgroup_is_threaded(cgrp))
428 ss_mask &= cgrp_dfl_threaded_ss_mask;
429 return ss_mask;
430 }
431
432 if (cgroup_on_dfl(cgrp))
433 root_ss_mask &= ~(cgrp_dfl_inhibit_ss_mask |
434 cgrp_dfl_implicit_ss_mask);
435 return root_ss_mask;
436}
437
438
439static u16 cgroup_ss_mask(struct cgroup *cgrp)
440{
441 struct cgroup *parent = cgroup_parent(cgrp);
442
443 if (parent) {
444 u16 ss_mask = parent->subtree_ss_mask;
445
446
447 if (cgroup_is_threaded(cgrp))
448 ss_mask &= cgrp_dfl_threaded_ss_mask;
449 return ss_mask;
450 }
451
452 return cgrp->root->subsys_mask;
453}
454
455
456
457
458
459
460
461
462
463
464
465
466static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
467 struct cgroup_subsys *ss)
468{
469 if (ss)
470 return rcu_dereference_check(cgrp->subsys[ss->id],
471 lockdep_is_held(&cgroup_mutex));
472 else
473 return &cgrp->self;
474}
475
476
477
478
479
480
481
482
483
484static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
485 struct cgroup_subsys *ss)
486{
487 struct cgroup_subsys_state *css;
488
489 rcu_read_lock();
490 css = cgroup_css(cgrp, ss);
491 if (!css || !css_tryget_online(css))
492 css = NULL;
493 rcu_read_unlock();
494
495 return css;
496}
497
498
499
500
501
502
503
504
505
506
507
508static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp,
509 struct cgroup_subsys *ss)
510{
511 lockdep_assert_held(&cgroup_mutex);
512
513 if (!ss)
514 return &cgrp->self;
515
516
517
518
519
520 while (!(cgroup_ss_mask(cgrp) & (1 << ss->id))) {
521 cgrp = cgroup_parent(cgrp);
522 if (!cgrp)
523 return NULL;
524 }
525
526 return cgroup_css(cgrp, ss);
527}
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
543 struct cgroup_subsys *ss)
544{
545 struct cgroup_subsys_state *css;
546
547 do {
548 css = cgroup_css(cgrp, ss);
549
550 if (css)
551 return css;
552 cgrp = cgroup_parent(cgrp);
553 } while (cgrp);
554
555 return init_css_set.subsys[ss->id];
556}
557
558
559
560
561
562
563
564
565
566
567
568
569struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgrp,
570 struct cgroup_subsys *ss)
571{
572 struct cgroup_subsys_state *css;
573
574 rcu_read_lock();
575
576 do {
577 css = cgroup_css(cgrp, ss);
578
579 if (css && css_tryget_online(css))
580 goto out_unlock;
581 cgrp = cgroup_parent(cgrp);
582 } while (cgrp);
583
584 css = init_css_set.subsys[ss->id];
585 css_get(css);
586out_unlock:
587 rcu_read_unlock();
588 return css;
589}
590
591static void cgroup_get_live(struct cgroup *cgrp)
592{
593 WARN_ON_ONCE(cgroup_is_dead(cgrp));
594 css_get(&cgrp->self);
595}
596
597
598
599
600
601
602int __cgroup_task_count(const struct cgroup *cgrp)
603{
604 int count = 0;
605 struct cgrp_cset_link *link;
606
607 lockdep_assert_held(&css_set_lock);
608
609 list_for_each_entry(link, &cgrp->cset_links, cset_link)
610 count += link->cset->nr_tasks;
611
612 return count;
613}
614
615
616
617
618
619int cgroup_task_count(const struct cgroup *cgrp)
620{
621 int count;
622
623 spin_lock_irq(&css_set_lock);
624 count = __cgroup_task_count(cgrp);
625 spin_unlock_irq(&css_set_lock);
626
627 return count;
628}
629
630struct cgroup_subsys_state *of_css(struct kernfs_open_file *of)
631{
632 struct cgroup *cgrp = of->kn->parent->priv;
633 struct cftype *cft = of_cft(of);
634
635
636
637
638
639
640
641
642
643 if (cft->ss)
644 return rcu_dereference_raw(cgrp->subsys[cft->ss->id]);
645 else
646 return &cgrp->self;
647}
648EXPORT_SYMBOL_GPL(of_css);
649
650
651
652
653
654
655
656
657
658#define for_each_css(css, ssid, cgrp) \
659 for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
660 if (!((css) = rcu_dereference_check( \
661 (cgrp)->subsys[(ssid)], \
662 lockdep_is_held(&cgroup_mutex)))) { } \
663 else
664
665
666
667
668
669
670
671
672
673#define for_each_e_css(css, ssid, cgrp) \
674 for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
675 if (!((css) = cgroup_e_css_by_mask(cgrp, \
676 cgroup_subsys[(ssid)]))) \
677 ; \
678 else
679
680
681
682
683
684
685
686
687
688
689#define do_each_subsys_mask(ss, ssid, ss_mask) do { \
690 unsigned long __ss_mask = (ss_mask); \
691 if (!CGROUP_SUBSYS_COUNT) { \
692 (ssid) = 0; \
693 break; \
694 } \
695 for_each_set_bit(ssid, &__ss_mask, CGROUP_SUBSYS_COUNT) { \
696 (ss) = cgroup_subsys[ssid]; \
697 {
698
699#define while_each_subsys_mask() \
700 } \
701 } \
702} while (false)
703
704
705#define cgroup_for_each_live_child(child, cgrp) \
706 list_for_each_entry((child), &(cgrp)->self.children, self.sibling) \
707 if (({ lockdep_assert_held(&cgroup_mutex); \
708 cgroup_is_dead(child); })) \
709 ; \
710 else
711
712
713#define cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) \
714 css_for_each_descendant_pre((d_css), cgroup_css((cgrp), NULL)) \
715 if (({ lockdep_assert_held(&cgroup_mutex); \
716 (dsct) = (d_css)->cgroup; \
717 cgroup_is_dead(dsct); })) \
718 ; \
719 else
720
721
722#define cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) \
723 css_for_each_descendant_post((d_css), cgroup_css((cgrp), NULL)) \
724 if (({ lockdep_assert_held(&cgroup_mutex); \
725 (dsct) = (d_css)->cgroup; \
726 cgroup_is_dead(dsct); })) \
727 ; \
728 else
729
730
731
732
733
734
735
736
737struct css_set init_css_set = {
738 .refcount = REFCOUNT_INIT(1),
739 .dom_cset = &init_css_set,
740 .tasks = LIST_HEAD_INIT(init_css_set.tasks),
741 .mg_tasks = LIST_HEAD_INIT(init_css_set.mg_tasks),
742 .dying_tasks = LIST_HEAD_INIT(init_css_set.dying_tasks),
743 .task_iters = LIST_HEAD_INIT(init_css_set.task_iters),
744 .threaded_csets = LIST_HEAD_INIT(init_css_set.threaded_csets),
745 .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links),
746 .mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node),
747 .mg_node = LIST_HEAD_INIT(init_css_set.mg_node),
748
749
750
751
752
753
754
755 .dfl_cgrp = &cgrp_dfl_root.cgrp,
756};
757
758static int css_set_count = 1;
759
760static bool css_set_threaded(struct css_set *cset)
761{
762 return cset->dom_cset != cset;
763}
764
765
766
767
768
769
770
771
772
773
774static bool css_set_populated(struct css_set *cset)
775{
776 lockdep_assert_held(&css_set_lock);
777
778 return !list_empty(&cset->tasks) || !list_empty(&cset->mg_tasks);
779}
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798static void cgroup_update_populated(struct cgroup *cgrp, bool populated)
799{
800 struct cgroup *child = NULL;
801 int adj = populated ? 1 : -1;
802
803 lockdep_assert_held(&css_set_lock);
804
805 do {
806 bool was_populated = cgroup_is_populated(cgrp);
807
808 if (!child) {
809 cgrp->nr_populated_csets += adj;
810 } else {
811 if (cgroup_is_threaded(child))
812 cgrp->nr_populated_threaded_children += adj;
813 else
814 cgrp->nr_populated_domain_children += adj;
815 }
816
817 if (was_populated == cgroup_is_populated(cgrp))
818 break;
819
820 cgroup1_check_for_release(cgrp);
821 TRACE_CGROUP_PATH(notify_populated, cgrp,
822 cgroup_is_populated(cgrp));
823 cgroup_file_notify(&cgrp->events_file);
824
825 child = cgrp;
826 cgrp = cgroup_parent(cgrp);
827 } while (cgrp);
828}
829
830
831
832
833
834
835
836
837
838static void css_set_update_populated(struct css_set *cset, bool populated)
839{
840 struct cgrp_cset_link *link;
841
842 lockdep_assert_held(&css_set_lock);
843
844 list_for_each_entry(link, &cset->cgrp_links, cgrp_link)
845 cgroup_update_populated(link->cgrp, populated);
846}
847
848
849
850
851
852
853
854static void css_set_skip_task_iters(struct css_set *cset,
855 struct task_struct *task)
856{
857 struct css_task_iter *it, *pos;
858
859 list_for_each_entry_safe(it, pos, &cset->task_iters, iters_node)
860 css_task_iter_skip(it, task);
861}
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878static void css_set_move_task(struct task_struct *task,
879 struct css_set *from_cset, struct css_set *to_cset,
880 bool use_mg_tasks)
881{
882 lockdep_assert_held(&css_set_lock);
883
884 if (to_cset && !css_set_populated(to_cset))
885 css_set_update_populated(to_cset, true);
886
887 if (from_cset) {
888 WARN_ON_ONCE(list_empty(&task->cg_list));
889
890 css_set_skip_task_iters(from_cset, task);
891 list_del_init(&task->cg_list);
892 if (!css_set_populated(from_cset))
893 css_set_update_populated(from_cset, false);
894 } else {
895 WARN_ON_ONCE(!list_empty(&task->cg_list));
896 }
897
898 if (to_cset) {
899
900
901
902
903
904
905 WARN_ON_ONCE(task->flags & PF_EXITING);
906
907 cgroup_move_task(task, to_cset);
908 list_add_tail(&task->cg_list, use_mg_tasks ? &to_cset->mg_tasks :
909 &to_cset->tasks);
910 }
911}
912
913
914
915
916
917
918#define CSS_SET_HASH_BITS 7
919static DEFINE_HASHTABLE(css_set_table, CSS_SET_HASH_BITS);
920
921static unsigned long css_set_hash(struct cgroup_subsys_state *css[])
922{
923 unsigned long key = 0UL;
924 struct cgroup_subsys *ss;
925 int i;
926
927 for_each_subsys(ss, i)
928 key += (unsigned long)css[i];
929 key = (key >> 16) ^ key;
930
931 return key;
932}
933
934void put_css_set_locked(struct css_set *cset)
935{
936 struct cgrp_cset_link *link, *tmp_link;
937 struct cgroup_subsys *ss;
938 int ssid;
939
940 lockdep_assert_held(&css_set_lock);
941
942 if (!refcount_dec_and_test(&cset->refcount))
943 return;
944
945 WARN_ON_ONCE(!list_empty(&cset->threaded_csets));
946
947
948 for_each_subsys(ss, ssid) {
949 list_del(&cset->e_cset_node[ssid]);
950 css_put(cset->subsys[ssid]);
951 }
952 hash_del(&cset->hlist);
953 css_set_count--;
954
955 list_for_each_entry_safe(link, tmp_link, &cset->cgrp_links, cgrp_link) {
956 list_del(&link->cset_link);
957 list_del(&link->cgrp_link);
958 if (cgroup_parent(link->cgrp))
959 cgroup_put(link->cgrp);
960 kfree(link);
961 }
962
963 if (css_set_threaded(cset)) {
964 list_del(&cset->threaded_csets_node);
965 put_css_set_locked(cset->dom_cset);
966 }
967
968 kfree_rcu(cset, rcu_head);
969}
970
971
972
973
974
975
976
977
978
979
980
981static bool compare_css_sets(struct css_set *cset,
982 struct css_set *old_cset,
983 struct cgroup *new_cgrp,
984 struct cgroup_subsys_state *template[])
985{
986 struct cgroup *new_dfl_cgrp;
987 struct list_head *l1, *l2;
988
989
990
991
992
993
994 if (memcmp(template, cset->subsys, sizeof(cset->subsys)))
995 return false;
996
997
998
999 if (cgroup_on_dfl(new_cgrp))
1000 new_dfl_cgrp = new_cgrp;
1001 else
1002 new_dfl_cgrp = old_cset->dfl_cgrp;
1003
1004 if (new_dfl_cgrp->dom_cgrp != cset->dom_cset->dfl_cgrp)
1005 return false;
1006
1007
1008
1009
1010
1011
1012
1013 l1 = &cset->cgrp_links;
1014 l2 = &old_cset->cgrp_links;
1015 while (1) {
1016 struct cgrp_cset_link *link1, *link2;
1017 struct cgroup *cgrp1, *cgrp2;
1018
1019 l1 = l1->next;
1020 l2 = l2->next;
1021
1022 if (l1 == &cset->cgrp_links) {
1023 BUG_ON(l2 != &old_cset->cgrp_links);
1024 break;
1025 } else {
1026 BUG_ON(l2 == &old_cset->cgrp_links);
1027 }
1028
1029 link1 = list_entry(l1, struct cgrp_cset_link, cgrp_link);
1030 link2 = list_entry(l2, struct cgrp_cset_link, cgrp_link);
1031 cgrp1 = link1->cgrp;
1032 cgrp2 = link2->cgrp;
1033
1034 BUG_ON(cgrp1->root != cgrp2->root);
1035
1036
1037
1038
1039
1040
1041
1042
1043 if (cgrp1->root == new_cgrp->root) {
1044 if (cgrp1 != new_cgrp)
1045 return false;
1046 } else {
1047 if (cgrp1 != cgrp2)
1048 return false;
1049 }
1050 }
1051 return true;
1052}
1053
1054
1055
1056
1057
1058
1059
1060static struct css_set *find_existing_css_set(struct css_set *old_cset,
1061 struct cgroup *cgrp,
1062 struct cgroup_subsys_state *template[])
1063{
1064 struct cgroup_root *root = cgrp->root;
1065 struct cgroup_subsys *ss;
1066 struct css_set *cset;
1067 unsigned long key;
1068 int i;
1069
1070
1071
1072
1073
1074
1075 for_each_subsys(ss, i) {
1076 if (root->subsys_mask & (1UL << i)) {
1077
1078
1079
1080
1081 template[i] = cgroup_e_css_by_mask(cgrp, ss);
1082 } else {
1083
1084
1085
1086
1087 template[i] = old_cset->subsys[i];
1088 }
1089 }
1090
1091 key = css_set_hash(template);
1092 hash_for_each_possible(css_set_table, cset, hlist, key) {
1093 if (!compare_css_sets(cset, old_cset, cgrp, template))
1094 continue;
1095
1096
1097 return cset;
1098 }
1099
1100
1101 return NULL;
1102}
1103
1104static void free_cgrp_cset_links(struct list_head *links_to_free)
1105{
1106 struct cgrp_cset_link *link, *tmp_link;
1107
1108 list_for_each_entry_safe(link, tmp_link, links_to_free, cset_link) {
1109 list_del(&link->cset_link);
1110 kfree(link);
1111 }
1112}
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122static int allocate_cgrp_cset_links(int count, struct list_head *tmp_links)
1123{
1124 struct cgrp_cset_link *link;
1125 int i;
1126
1127 INIT_LIST_HEAD(tmp_links);
1128
1129 for (i = 0; i < count; i++) {
1130 link = kzalloc(sizeof(*link), GFP_KERNEL);
1131 if (!link) {
1132 free_cgrp_cset_links(tmp_links);
1133 return -ENOMEM;
1134 }
1135 list_add(&link->cset_link, tmp_links);
1136 }
1137 return 0;
1138}
1139
1140
1141
1142
1143
1144
1145
1146static void link_css_set(struct list_head *tmp_links, struct css_set *cset,
1147 struct cgroup *cgrp)
1148{
1149 struct cgrp_cset_link *link;
1150
1151 BUG_ON(list_empty(tmp_links));
1152
1153 if (cgroup_on_dfl(cgrp))
1154 cset->dfl_cgrp = cgrp;
1155
1156 link = list_first_entry(tmp_links, struct cgrp_cset_link, cset_link);
1157 link->cset = cset;
1158 link->cgrp = cgrp;
1159
1160
1161
1162
1163
1164 list_move_tail(&link->cset_link, &cgrp->cset_links);
1165 list_add_tail(&link->cgrp_link, &cset->cgrp_links);
1166
1167 if (cgroup_parent(cgrp))
1168 cgroup_get_live(cgrp);
1169}
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179static struct css_set *find_css_set(struct css_set *old_cset,
1180 struct cgroup *cgrp)
1181{
1182 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT] = { };
1183 struct css_set *cset;
1184 struct list_head tmp_links;
1185 struct cgrp_cset_link *link;
1186 struct cgroup_subsys *ss;
1187 unsigned long key;
1188 int ssid;
1189
1190 lockdep_assert_held(&cgroup_mutex);
1191
1192
1193
1194 spin_lock_irq(&css_set_lock);
1195 cset = find_existing_css_set(old_cset, cgrp, template);
1196 if (cset)
1197 get_css_set(cset);
1198 spin_unlock_irq(&css_set_lock);
1199
1200 if (cset)
1201 return cset;
1202
1203 cset = kzalloc(sizeof(*cset), GFP_KERNEL);
1204 if (!cset)
1205 return NULL;
1206
1207
1208 if (allocate_cgrp_cset_links(cgroup_root_count, &tmp_links) < 0) {
1209 kfree(cset);
1210 return NULL;
1211 }
1212
1213 refcount_set(&cset->refcount, 1);
1214 cset->dom_cset = cset;
1215 INIT_LIST_HEAD(&cset->tasks);
1216 INIT_LIST_HEAD(&cset->mg_tasks);
1217 INIT_LIST_HEAD(&cset->dying_tasks);
1218 INIT_LIST_HEAD(&cset->task_iters);
1219 INIT_LIST_HEAD(&cset->threaded_csets);
1220 INIT_HLIST_NODE(&cset->hlist);
1221 INIT_LIST_HEAD(&cset->cgrp_links);
1222 INIT_LIST_HEAD(&cset->mg_preload_node);
1223 INIT_LIST_HEAD(&cset->mg_node);
1224
1225
1226
1227 memcpy(cset->subsys, template, sizeof(cset->subsys));
1228
1229 spin_lock_irq(&css_set_lock);
1230
1231 list_for_each_entry(link, &old_cset->cgrp_links, cgrp_link) {
1232 struct cgroup *c = link->cgrp;
1233
1234 if (c->root == cgrp->root)
1235 c = cgrp;
1236 link_css_set(&tmp_links, cset, c);
1237 }
1238
1239 BUG_ON(!list_empty(&tmp_links));
1240
1241 css_set_count++;
1242
1243
1244 key = css_set_hash(cset->subsys);
1245 hash_add(css_set_table, &cset->hlist, key);
1246
1247 for_each_subsys(ss, ssid) {
1248 struct cgroup_subsys_state *css = cset->subsys[ssid];
1249
1250 list_add_tail(&cset->e_cset_node[ssid],
1251 &css->cgroup->e_csets[ssid]);
1252 css_get(css);
1253 }
1254
1255 spin_unlock_irq(&css_set_lock);
1256
1257
1258
1259
1260
1261
1262
1263 if (cgroup_is_threaded(cset->dfl_cgrp)) {
1264 struct css_set *dcset;
1265
1266 dcset = find_css_set(cset, cset->dfl_cgrp->dom_cgrp);
1267 if (!dcset) {
1268 put_css_set(cset);
1269 return NULL;
1270 }
1271
1272 spin_lock_irq(&css_set_lock);
1273 cset->dom_cset = dcset;
1274 list_add_tail(&cset->threaded_csets_node,
1275 &dcset->threaded_csets);
1276 spin_unlock_irq(&css_set_lock);
1277 }
1278
1279 return cset;
1280}
1281
1282struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root)
1283{
1284 struct cgroup *root_cgrp = kf_root->kn->priv;
1285
1286 return root_cgrp->root;
1287}
1288
1289static int cgroup_init_root_id(struct cgroup_root *root)
1290{
1291 int id;
1292
1293 lockdep_assert_held(&cgroup_mutex);
1294
1295 id = idr_alloc_cyclic(&cgroup_hierarchy_idr, root, 0, 0, GFP_KERNEL);
1296 if (id < 0)
1297 return id;
1298
1299 root->hierarchy_id = id;
1300 return 0;
1301}
1302
1303static void cgroup_exit_root_id(struct cgroup_root *root)
1304{
1305 lockdep_assert_held(&cgroup_mutex);
1306
1307 idr_remove(&cgroup_hierarchy_idr, root->hierarchy_id);
1308}
1309
1310void cgroup_free_root(struct cgroup_root *root)
1311{
1312 if (root) {
1313 idr_destroy(&root->cgroup_idr);
1314 kfree(root);
1315 }
1316}
1317
1318static void cgroup_destroy_root(struct cgroup_root *root)
1319{
1320 struct cgroup *cgrp = &root->cgrp;
1321 struct cgrp_cset_link *link, *tmp_link;
1322
1323 trace_cgroup_destroy_root(root);
1324
1325 cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
1326
1327 BUG_ON(atomic_read(&root->nr_cgrps));
1328 BUG_ON(!list_empty(&cgrp->self.children));
1329
1330
1331 WARN_ON(rebind_subsystems(&cgrp_dfl_root, root->subsys_mask));
1332
1333
1334
1335
1336
1337 spin_lock_irq(&css_set_lock);
1338
1339 list_for_each_entry_safe(link, tmp_link, &cgrp->cset_links, cset_link) {
1340 list_del(&link->cset_link);
1341 list_del(&link->cgrp_link);
1342 kfree(link);
1343 }
1344
1345 spin_unlock_irq(&css_set_lock);
1346
1347 if (!list_empty(&root->root_list)) {
1348 list_del(&root->root_list);
1349 cgroup_root_count--;
1350 }
1351
1352 cgroup_exit_root_id(root);
1353
1354 mutex_unlock(&cgroup_mutex);
1355
1356 kernfs_destroy_root(root->kf_root);
1357 cgroup_free_root(root);
1358}
1359
1360
1361
1362
1363
1364static struct cgroup *
1365current_cgns_cgroup_from_root(struct cgroup_root *root)
1366{
1367 struct cgroup *res = NULL;
1368 struct css_set *cset;
1369
1370 lockdep_assert_held(&css_set_lock);
1371
1372 rcu_read_lock();
1373
1374 cset = current->nsproxy->cgroup_ns->root_cset;
1375 if (cset == &init_css_set) {
1376 res = &root->cgrp;
1377 } else {
1378 struct cgrp_cset_link *link;
1379
1380 list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
1381 struct cgroup *c = link->cgrp;
1382
1383 if (c->root == root) {
1384 res = c;
1385 break;
1386 }
1387 }
1388 }
1389 rcu_read_unlock();
1390
1391 BUG_ON(!res);
1392 return res;
1393}
1394
1395
1396static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
1397 struct cgroup_root *root)
1398{
1399 struct cgroup *res = NULL;
1400
1401 lockdep_assert_held(&cgroup_mutex);
1402 lockdep_assert_held(&css_set_lock);
1403
1404 if (cset == &init_css_set) {
1405 res = &root->cgrp;
1406 } else if (root == &cgrp_dfl_root) {
1407 res = cset->dfl_cgrp;
1408 } else {
1409 struct cgrp_cset_link *link;
1410
1411 list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
1412 struct cgroup *c = link->cgrp;
1413
1414 if (c->root == root) {
1415 res = c;
1416 break;
1417 }
1418 }
1419 }
1420
1421 BUG_ON(!res);
1422 return res;
1423}
1424
1425
1426
1427
1428
1429struct cgroup *task_cgroup_from_root(struct task_struct *task,
1430 struct cgroup_root *root)
1431{
1432
1433
1434
1435
1436
1437 return cset_cgroup_from_root(task_css_set(task), root);
1438}
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466static struct kernfs_syscall_ops cgroup_kf_syscall_ops;
1467
1468static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft,
1469 char *buf)
1470{
1471 struct cgroup_subsys *ss = cft->ss;
1472
1473 if (cft->ss && !(cft->flags & CFTYPE_NO_PREFIX) &&
1474 !(cgrp->root->flags & CGRP_ROOT_NOPREFIX)) {
1475 const char *dbg = (cft->flags & CFTYPE_DEBUG) ? ".__DEBUG__." : "";
1476
1477 snprintf(buf, CGROUP_FILE_NAME_MAX, "%s%s.%s",
1478 dbg, cgroup_on_dfl(cgrp) ? ss->name : ss->legacy_name,
1479 cft->name);
1480 } else {
1481 strscpy(buf, cft->name, CGROUP_FILE_NAME_MAX);
1482 }
1483 return buf;
1484}
1485
1486
1487
1488
1489
1490
1491
1492static umode_t cgroup_file_mode(const struct cftype *cft)
1493{
1494 umode_t mode = 0;
1495
1496 if (cft->read_u64 || cft->read_s64 || cft->seq_show)
1497 mode |= S_IRUGO;
1498
1499 if (cft->write_u64 || cft->write_s64 || cft->write) {
1500 if (cft->flags & CFTYPE_WORLD_WRITABLE)
1501 mode |= S_IWUGO;
1502 else
1503 mode |= S_IWUSR;
1504 }
1505
1506 return mode;
1507}
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521static u16 cgroup_calc_subtree_ss_mask(u16 subtree_control, u16 this_ss_mask)
1522{
1523 u16 cur_ss_mask = subtree_control;
1524 struct cgroup_subsys *ss;
1525 int ssid;
1526
1527 lockdep_assert_held(&cgroup_mutex);
1528
1529 cur_ss_mask |= cgrp_dfl_implicit_ss_mask;
1530
1531 while (true) {
1532 u16 new_ss_mask = cur_ss_mask;
1533
1534 do_each_subsys_mask(ss, ssid, cur_ss_mask) {
1535 new_ss_mask |= ss->depends_on;
1536 } while_each_subsys_mask();
1537
1538
1539
1540
1541
1542
1543 new_ss_mask &= this_ss_mask;
1544
1545 if (new_ss_mask == cur_ss_mask)
1546 break;
1547 cur_ss_mask = new_ss_mask;
1548 }
1549
1550 return cur_ss_mask;
1551}
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563void cgroup_kn_unlock(struct kernfs_node *kn)
1564{
1565 struct cgroup *cgrp;
1566
1567 if (kernfs_type(kn) == KERNFS_DIR)
1568 cgrp = kn->priv;
1569 else
1570 cgrp = kn->parent->priv;
1571
1572 mutex_unlock(&cgroup_mutex);
1573
1574 kernfs_unbreak_active_protection(kn);
1575 cgroup_put(cgrp);
1576}
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn, bool drain_offline)
1596{
1597 struct cgroup *cgrp;
1598
1599 if (kernfs_type(kn) == KERNFS_DIR)
1600 cgrp = kn->priv;
1601 else
1602 cgrp = kn->parent->priv;
1603
1604
1605
1606
1607
1608
1609
1610 if (!cgroup_tryget(cgrp))
1611 return NULL;
1612 kernfs_break_active_protection(kn);
1613
1614 if (drain_offline)
1615 cgroup_lock_and_drain_offline(cgrp);
1616 else
1617 mutex_lock(&cgroup_mutex);
1618
1619 if (!cgroup_is_dead(cgrp))
1620 return cgrp;
1621
1622 cgroup_kn_unlock(kn);
1623 return NULL;
1624}
1625
1626static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
1627{
1628 char name[CGROUP_FILE_NAME_MAX];
1629
1630 lockdep_assert_held(&cgroup_mutex);
1631
1632 if (cft->file_offset) {
1633 struct cgroup_subsys_state *css = cgroup_css(cgrp, cft->ss);
1634 struct cgroup_file *cfile = (void *)css + cft->file_offset;
1635
1636 spin_lock_irq(&cgroup_file_kn_lock);
1637 cfile->kn = NULL;
1638 spin_unlock_irq(&cgroup_file_kn_lock);
1639
1640 del_timer_sync(&cfile->notify_timer);
1641 }
1642
1643 kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name));
1644}
1645
1646
1647
1648
1649
1650static void css_clear_dir(struct cgroup_subsys_state *css)
1651{
1652 struct cgroup *cgrp = css->cgroup;
1653 struct cftype *cfts;
1654
1655 if (!(css->flags & CSS_VISIBLE))
1656 return;
1657
1658 css->flags &= ~CSS_VISIBLE;
1659
1660 if (!css->ss) {
1661 if (cgroup_on_dfl(cgrp))
1662 cfts = cgroup_base_files;
1663 else
1664 cfts = cgroup1_base_files;
1665
1666 cgroup_addrm_files(css, cgrp, cfts, false);
1667 } else {
1668 list_for_each_entry(cfts, &css->ss->cfts, node)
1669 cgroup_addrm_files(css, cgrp, cfts, false);
1670 }
1671}
1672
1673
1674
1675
1676
1677
1678
1679static int css_populate_dir(struct cgroup_subsys_state *css)
1680{
1681 struct cgroup *cgrp = css->cgroup;
1682 struct cftype *cfts, *failed_cfts;
1683 int ret;
1684
1685 if ((css->flags & CSS_VISIBLE) || !cgrp->kn)
1686 return 0;
1687
1688 if (!css->ss) {
1689 if (cgroup_on_dfl(cgrp))
1690 cfts = cgroup_base_files;
1691 else
1692 cfts = cgroup1_base_files;
1693
1694 ret = cgroup_addrm_files(&cgrp->self, cgrp, cfts, true);
1695 if (ret < 0)
1696 return ret;
1697 } else {
1698 list_for_each_entry(cfts, &css->ss->cfts, node) {
1699 ret = cgroup_addrm_files(css, cgrp, cfts, true);
1700 if (ret < 0) {
1701 failed_cfts = cfts;
1702 goto err;
1703 }
1704 }
1705 }
1706
1707 css->flags |= CSS_VISIBLE;
1708
1709 return 0;
1710err:
1711 list_for_each_entry(cfts, &css->ss->cfts, node) {
1712 if (cfts == failed_cfts)
1713 break;
1714 cgroup_addrm_files(css, cgrp, cfts, false);
1715 }
1716 return ret;
1717}
1718
1719int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
1720{
1721 struct cgroup *dcgrp = &dst_root->cgrp;
1722 struct cgroup_subsys *ss;
1723 int ssid, i, ret;
1724
1725 lockdep_assert_held(&cgroup_mutex);
1726
1727 do_each_subsys_mask(ss, ssid, ss_mask) {
1728
1729
1730
1731
1732
1733 if (css_next_child(NULL, cgroup_css(&ss->root->cgrp, ss)) &&
1734 !ss->implicit_on_dfl)
1735 return -EBUSY;
1736
1737
1738 if (ss->root != &cgrp_dfl_root && dst_root != &cgrp_dfl_root)
1739 return -EBUSY;
1740 } while_each_subsys_mask();
1741
1742 do_each_subsys_mask(ss, ssid, ss_mask) {
1743 struct cgroup_root *src_root = ss->root;
1744 struct cgroup *scgrp = &src_root->cgrp;
1745 struct cgroup_subsys_state *css = cgroup_css(scgrp, ss);
1746 struct css_set *cset;
1747
1748 WARN_ON(!css || cgroup_css(dcgrp, ss));
1749
1750
1751 src_root->subsys_mask &= ~(1 << ssid);
1752 WARN_ON(cgroup_apply_control(scgrp));
1753 cgroup_finalize_control(scgrp, 0);
1754
1755
1756 RCU_INIT_POINTER(scgrp->subsys[ssid], NULL);
1757 rcu_assign_pointer(dcgrp->subsys[ssid], css);
1758 ss->root = dst_root;
1759 css->cgroup = dcgrp;
1760
1761 spin_lock_irq(&css_set_lock);
1762 hash_for_each(css_set_table, i, cset, hlist)
1763 list_move_tail(&cset->e_cset_node[ss->id],
1764 &dcgrp->e_csets[ss->id]);
1765 spin_unlock_irq(&css_set_lock);
1766
1767
1768 dst_root->subsys_mask |= 1 << ssid;
1769 if (dst_root == &cgrp_dfl_root) {
1770 static_branch_enable(cgroup_subsys_on_dfl_key[ssid]);
1771 } else {
1772 dcgrp->subtree_control |= 1 << ssid;
1773 static_branch_disable(cgroup_subsys_on_dfl_key[ssid]);
1774 }
1775
1776 ret = cgroup_apply_control(dcgrp);
1777 if (ret)
1778 pr_warn("partial failure to rebind %s controller (err=%d)\n",
1779 ss->name, ret);
1780
1781 if (ss->bind)
1782 ss->bind(css);
1783 } while_each_subsys_mask();
1784
1785 kernfs_activate(dcgrp->kn);
1786 return 0;
1787}
1788
1789int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
1790 struct kernfs_root *kf_root)
1791{
1792 int len = 0;
1793 char *buf = NULL;
1794 struct cgroup_root *kf_cgroot = cgroup_root_from_kf(kf_root);
1795 struct cgroup *ns_cgroup;
1796
1797 buf = kmalloc(PATH_MAX, GFP_KERNEL);
1798 if (!buf)
1799 return -ENOMEM;
1800
1801 spin_lock_irq(&css_set_lock);
1802 ns_cgroup = current_cgns_cgroup_from_root(kf_cgroot);
1803 len = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, PATH_MAX);
1804 spin_unlock_irq(&css_set_lock);
1805
1806 if (len >= PATH_MAX)
1807 len = -ERANGE;
1808 else if (len > 0) {
1809 seq_escape(sf, buf, " \t\n\\");
1810 len = 0;
1811 }
1812 kfree(buf);
1813 return len;
1814}
1815
1816enum cgroup2_param {
1817 Opt_nsdelegate,
1818 Opt_memory_localevents,
1819 nr__cgroup2_params
1820};
1821
1822static const struct fs_parameter_spec cgroup2_param_specs[] = {
1823 fsparam_flag("nsdelegate", Opt_nsdelegate),
1824 fsparam_flag("memory_localevents", Opt_memory_localevents),
1825 {}
1826};
1827
1828static const struct fs_parameter_description cgroup2_fs_parameters = {
1829 .name = "cgroup2",
1830 .specs = cgroup2_param_specs,
1831};
1832
1833static int cgroup2_parse_param(struct fs_context *fc, struct fs_parameter *param)
1834{
1835 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
1836 struct fs_parse_result result;
1837 int opt;
1838
1839 opt = fs_parse(fc, &cgroup2_fs_parameters, param, &result);
1840 if (opt < 0)
1841 return opt;
1842
1843 switch (opt) {
1844 case Opt_nsdelegate:
1845 ctx->flags |= CGRP_ROOT_NS_DELEGATE;
1846 return 0;
1847 case Opt_memory_localevents:
1848 ctx->flags |= CGRP_ROOT_MEMORY_LOCAL_EVENTS;
1849 return 0;
1850 }
1851 return -EINVAL;
1852}
1853
1854static void apply_cgroup_root_flags(unsigned int root_flags)
1855{
1856 if (current->nsproxy->cgroup_ns == &init_cgroup_ns) {
1857 if (root_flags & CGRP_ROOT_NS_DELEGATE)
1858 cgrp_dfl_root.flags |= CGRP_ROOT_NS_DELEGATE;
1859 else
1860 cgrp_dfl_root.flags &= ~CGRP_ROOT_NS_DELEGATE;
1861
1862 if (root_flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
1863 cgrp_dfl_root.flags |= CGRP_ROOT_MEMORY_LOCAL_EVENTS;
1864 else
1865 cgrp_dfl_root.flags &= ~CGRP_ROOT_MEMORY_LOCAL_EVENTS;
1866 }
1867}
1868
1869static int cgroup_show_options(struct seq_file *seq, struct kernfs_root *kf_root)
1870{
1871 if (cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE)
1872 seq_puts(seq, ",nsdelegate");
1873 if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
1874 seq_puts(seq, ",memory_localevents");
1875 return 0;
1876}
1877
1878static int cgroup_reconfigure(struct fs_context *fc)
1879{
1880 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
1881
1882 apply_cgroup_root_flags(ctx->flags);
1883 return 0;
1884}
1885
1886
1887
1888
1889
1890
1891
1892static bool use_task_css_set_links __read_mostly;
1893
1894static void cgroup_enable_task_cg_lists(void)
1895{
1896 struct task_struct *p, *g;
1897
1898
1899
1900
1901
1902
1903
1904
1905 read_lock(&tasklist_lock);
1906 spin_lock_irq(&css_set_lock);
1907
1908 if (use_task_css_set_links)
1909 goto out_unlock;
1910
1911 use_task_css_set_links = true;
1912
1913 do_each_thread(g, p) {
1914 WARN_ON_ONCE(!list_empty(&p->cg_list) ||
1915 task_css_set(p) != &init_css_set);
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928 spin_lock(&p->sighand->siglock);
1929 if (!(p->flags & PF_EXITING)) {
1930 struct css_set *cset = task_css_set(p);
1931
1932 if (!css_set_populated(cset))
1933 css_set_update_populated(cset, true);
1934 list_add_tail(&p->cg_list, &cset->tasks);
1935 get_css_set(cset);
1936 cset->nr_tasks++;
1937 }
1938 spin_unlock(&p->sighand->siglock);
1939 } while_each_thread(g, p);
1940out_unlock:
1941 spin_unlock_irq(&css_set_lock);
1942 read_unlock(&tasklist_lock);
1943}
1944
1945static void init_cgroup_housekeeping(struct cgroup *cgrp)
1946{
1947 struct cgroup_subsys *ss;
1948 int ssid;
1949
1950 INIT_LIST_HEAD(&cgrp->self.sibling);
1951 INIT_LIST_HEAD(&cgrp->self.children);
1952 INIT_LIST_HEAD(&cgrp->cset_links);
1953 INIT_LIST_HEAD(&cgrp->pidlists);
1954 mutex_init(&cgrp->pidlist_mutex);
1955 cgrp->self.cgroup = cgrp;
1956 cgrp->self.flags |= CSS_ONLINE;
1957 cgrp->dom_cgrp = cgrp;
1958 cgrp->max_descendants = INT_MAX;
1959 cgrp->max_depth = INT_MAX;
1960 INIT_LIST_HEAD(&cgrp->rstat_css_list);
1961 prev_cputime_init(&cgrp->prev_cputime);
1962
1963 for_each_subsys(ss, ssid)
1964 INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
1965
1966 init_waitqueue_head(&cgrp->offline_waitq);
1967 INIT_WORK(&cgrp->release_agent_work, cgroup1_release_agent);
1968}
1969
1970void init_cgroup_root(struct cgroup_fs_context *ctx)
1971{
1972 struct cgroup_root *root = ctx->root;
1973 struct cgroup *cgrp = &root->cgrp;
1974
1975 INIT_LIST_HEAD(&root->root_list);
1976 atomic_set(&root->nr_cgrps, 1);
1977 cgrp->root = root;
1978 init_cgroup_housekeeping(cgrp);
1979 idr_init(&root->cgroup_idr);
1980
1981 root->flags = ctx->flags;
1982 if (ctx->release_agent)
1983 strscpy(root->release_agent_path, ctx->release_agent, PATH_MAX);
1984 if (ctx->name)
1985 strscpy(root->name, ctx->name, MAX_CGROUP_ROOT_NAMELEN);
1986 if (ctx->cpuset_clone_children)
1987 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
1988}
1989
1990int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
1991{
1992 LIST_HEAD(tmp_links);
1993 struct cgroup *root_cgrp = &root->cgrp;
1994 struct kernfs_syscall_ops *kf_sops;
1995 struct css_set *cset;
1996 int i, ret;
1997
1998 lockdep_assert_held(&cgroup_mutex);
1999
2000 ret = cgroup_idr_alloc(&root->cgroup_idr, root_cgrp, 1, 2, GFP_KERNEL);
2001 if (ret < 0)
2002 goto out;
2003 root_cgrp->id = ret;
2004 root_cgrp->ancestor_ids[0] = ret;
2005
2006 ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release,
2007 0, GFP_KERNEL);
2008 if (ret)
2009 goto out;
2010
2011
2012
2013
2014
2015
2016
2017
2018 ret = allocate_cgrp_cset_links(2 * css_set_count, &tmp_links);
2019 if (ret)
2020 goto cancel_ref;
2021
2022 ret = cgroup_init_root_id(root);
2023 if (ret)
2024 goto cancel_ref;
2025
2026 kf_sops = root == &cgrp_dfl_root ?
2027 &cgroup_kf_syscall_ops : &cgroup1_kf_syscall_ops;
2028
2029 root->kf_root = kernfs_create_root(kf_sops,
2030 KERNFS_ROOT_CREATE_DEACTIVATED |
2031 KERNFS_ROOT_SUPPORT_EXPORTOP,
2032 root_cgrp);
2033 if (IS_ERR(root->kf_root)) {
2034 ret = PTR_ERR(root->kf_root);
2035 goto exit_root_id;
2036 }
2037 root_cgrp->kn = root->kf_root->kn;
2038
2039 ret = css_populate_dir(&root_cgrp->self);
2040 if (ret)
2041 goto destroy_root;
2042
2043 ret = rebind_subsystems(root, ss_mask);
2044 if (ret)
2045 goto destroy_root;
2046
2047 ret = cgroup_bpf_inherit(root_cgrp);
2048 WARN_ON_ONCE(ret);
2049
2050 trace_cgroup_setup_root(root);
2051
2052
2053
2054
2055
2056
2057 list_add(&root->root_list, &cgroup_roots);
2058 cgroup_root_count++;
2059
2060
2061
2062
2063
2064 spin_lock_irq(&css_set_lock);
2065 hash_for_each(css_set_table, i, cset, hlist) {
2066 link_css_set(&tmp_links, cset, root_cgrp);
2067 if (css_set_populated(cset))
2068 cgroup_update_populated(root_cgrp, true);
2069 }
2070 spin_unlock_irq(&css_set_lock);
2071
2072 BUG_ON(!list_empty(&root_cgrp->self.children));
2073 BUG_ON(atomic_read(&root->nr_cgrps) != 1);
2074
2075 kernfs_activate(root_cgrp->kn);
2076 ret = 0;
2077 goto out;
2078
2079destroy_root:
2080 kernfs_destroy_root(root->kf_root);
2081 root->kf_root = NULL;
2082exit_root_id:
2083 cgroup_exit_root_id(root);
2084cancel_ref:
2085 percpu_ref_exit(&root_cgrp->self.refcnt);
2086out:
2087 free_cgrp_cset_links(&tmp_links);
2088 return ret;
2089}
2090
2091int cgroup_do_get_tree(struct fs_context *fc)
2092{
2093 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
2094 int ret;
2095
2096 ctx->kfc.root = ctx->root->kf_root;
2097 if (fc->fs_type == &cgroup2_fs_type)
2098 ctx->kfc.magic = CGROUP2_SUPER_MAGIC;
2099 else
2100 ctx->kfc.magic = CGROUP_SUPER_MAGIC;
2101 ret = kernfs_get_tree(fc);
2102
2103
2104
2105
2106
2107 if (!ret && ctx->ns != &init_cgroup_ns) {
2108 struct dentry *nsdentry;
2109 struct super_block *sb = fc->root->d_sb;
2110 struct cgroup *cgrp;
2111
2112 mutex_lock(&cgroup_mutex);
2113 spin_lock_irq(&css_set_lock);
2114
2115 cgrp = cset_cgroup_from_root(ctx->ns->root_cset, ctx->root);
2116
2117 spin_unlock_irq(&css_set_lock);
2118 mutex_unlock(&cgroup_mutex);
2119
2120 nsdentry = kernfs_node_dentry(cgrp->kn, sb);
2121 dput(fc->root);
2122 fc->root = nsdentry;
2123 if (IS_ERR(nsdentry)) {
2124 ret = PTR_ERR(nsdentry);
2125 deactivate_locked_super(sb);
2126 }
2127 }
2128
2129 if (!ctx->kfc.new_sb_created)
2130 cgroup_put(&ctx->root->cgrp);
2131
2132 return ret;
2133}
2134
2135
2136
2137
2138static void cgroup_fs_context_free(struct fs_context *fc)
2139{
2140 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
2141
2142 kfree(ctx->name);
2143 kfree(ctx->release_agent);
2144 put_cgroup_ns(ctx->ns);
2145 kernfs_free_fs_context(fc);
2146 kfree(ctx);
2147}
2148
2149static int cgroup_get_tree(struct fs_context *fc)
2150{
2151 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
2152 int ret;
2153
2154 cgrp_dfl_visible = true;
2155 cgroup_get_live(&cgrp_dfl_root.cgrp);
2156 ctx->root = &cgrp_dfl_root;
2157
2158 ret = cgroup_do_get_tree(fc);
2159 if (!ret)
2160 apply_cgroup_root_flags(ctx->flags);
2161 return ret;
2162}
2163
2164static const struct fs_context_operations cgroup_fs_context_ops = {
2165 .free = cgroup_fs_context_free,
2166 .parse_param = cgroup2_parse_param,
2167 .get_tree = cgroup_get_tree,
2168 .reconfigure = cgroup_reconfigure,
2169};
2170
2171static const struct fs_context_operations cgroup1_fs_context_ops = {
2172 .free = cgroup_fs_context_free,
2173 .parse_param = cgroup1_parse_param,
2174 .get_tree = cgroup1_get_tree,
2175 .reconfigure = cgroup1_reconfigure,
2176};
2177
2178
2179
2180
2181
2182static int cgroup_init_fs_context(struct fs_context *fc)
2183{
2184 struct cgroup_fs_context *ctx;
2185
2186 ctx = kzalloc(sizeof(struct cgroup_fs_context), GFP_KERNEL);
2187 if (!ctx)
2188 return -ENOMEM;
2189
2190
2191
2192
2193
2194 if (!use_task_css_set_links)
2195 cgroup_enable_task_cg_lists();
2196
2197 ctx->ns = current->nsproxy->cgroup_ns;
2198 get_cgroup_ns(ctx->ns);
2199 fc->fs_private = &ctx->kfc;
2200 if (fc->fs_type == &cgroup2_fs_type)
2201 fc->ops = &cgroup_fs_context_ops;
2202 else
2203 fc->ops = &cgroup1_fs_context_ops;
2204 put_user_ns(fc->user_ns);
2205 fc->user_ns = get_user_ns(ctx->ns->user_ns);
2206 fc->global = true;
2207 return 0;
2208}
2209
2210static void cgroup_kill_sb(struct super_block *sb)
2211{
2212 struct kernfs_root *kf_root = kernfs_root_from_sb(sb);
2213 struct cgroup_root *root = cgroup_root_from_kf(kf_root);
2214
2215
2216
2217
2218
2219
2220
2221
2222 if (list_empty(&root->cgrp.self.children) && root != &cgrp_dfl_root &&
2223 !percpu_ref_is_dying(&root->cgrp.self.refcnt))
2224 percpu_ref_kill(&root->cgrp.self.refcnt);
2225 cgroup_put(&root->cgrp);
2226 kernfs_kill_sb(sb);
2227}
2228
2229struct file_system_type cgroup_fs_type = {
2230 .name = "cgroup",
2231 .init_fs_context = cgroup_init_fs_context,
2232 .parameters = &cgroup1_fs_parameters,
2233 .kill_sb = cgroup_kill_sb,
2234 .fs_flags = FS_USERNS_MOUNT,
2235};
2236
2237static struct file_system_type cgroup2_fs_type = {
2238 .name = "cgroup2",
2239 .init_fs_context = cgroup_init_fs_context,
2240 .parameters = &cgroup2_fs_parameters,
2241 .kill_sb = cgroup_kill_sb,
2242 .fs_flags = FS_USERNS_MOUNT,
2243};
2244
2245#ifdef CONFIG_CPUSETS
2246static const struct fs_context_operations cpuset_fs_context_ops = {
2247 .get_tree = cgroup1_get_tree,
2248 .free = cgroup_fs_context_free,
2249};
2250
2251
2252
2253
2254
2255
2256static int cpuset_init_fs_context(struct fs_context *fc)
2257{
2258 char *agent = kstrdup("/sbin/cpuset_release_agent", GFP_USER);
2259 struct cgroup_fs_context *ctx;
2260 int err;
2261
2262 err = cgroup_init_fs_context(fc);
2263 if (err) {
2264 kfree(agent);
2265 return err;
2266 }
2267
2268 fc->ops = &cpuset_fs_context_ops;
2269
2270 ctx = cgroup_fc2context(fc);
2271 ctx->subsys_mask = 1 << cpuset_cgrp_id;
2272 ctx->flags |= CGRP_ROOT_NOPREFIX;
2273 ctx->release_agent = agent;
2274
2275 get_filesystem(&cgroup_fs_type);
2276 put_filesystem(fc->fs_type);
2277 fc->fs_type = &cgroup_fs_type;
2278
2279 return 0;
2280}
2281
2282static struct file_system_type cpuset_fs_type = {
2283 .name = "cpuset",
2284 .init_fs_context = cpuset_init_fs_context,
2285 .fs_flags = FS_USERNS_MOUNT,
2286};
2287#endif
2288
2289int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
2290 struct cgroup_namespace *ns)
2291{
2292 struct cgroup *root = cset_cgroup_from_root(ns->root_cset, cgrp->root);
2293
2294 return kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
2295}
2296
2297int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
2298 struct cgroup_namespace *ns)
2299{
2300 int ret;
2301
2302 mutex_lock(&cgroup_mutex);
2303 spin_lock_irq(&css_set_lock);
2304
2305 ret = cgroup_path_ns_locked(cgrp, buf, buflen, ns);
2306
2307 spin_unlock_irq(&css_set_lock);
2308 mutex_unlock(&cgroup_mutex);
2309
2310 return ret;
2311}
2312EXPORT_SYMBOL_GPL(cgroup_path_ns);
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
2328{
2329 struct cgroup_root *root;
2330 struct cgroup *cgrp;
2331 int hierarchy_id = 1;
2332 int ret;
2333
2334 mutex_lock(&cgroup_mutex);
2335 spin_lock_irq(&css_set_lock);
2336
2337 root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
2338
2339 if (root) {
2340 cgrp = task_cgroup_from_root(task, root);
2341 ret = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
2342 } else {
2343
2344 ret = strlcpy(buf, "/", buflen);
2345 }
2346
2347 spin_unlock_irq(&css_set_lock);
2348 mutex_unlock(&cgroup_mutex);
2349 return ret;
2350}
2351EXPORT_SYMBOL_GPL(task_cgroup_path);
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363static void cgroup_migrate_add_task(struct task_struct *task,
2364 struct cgroup_mgctx *mgctx)
2365{
2366 struct css_set *cset;
2367
2368 lockdep_assert_held(&css_set_lock);
2369
2370
2371 if (task->flags & PF_EXITING)
2372 return;
2373
2374
2375 if (list_empty(&task->cg_list))
2376 return;
2377
2378 cset = task_css_set(task);
2379 if (!cset->mg_src_cgrp)
2380 return;
2381
2382 mgctx->tset.nr_tasks++;
2383
2384 list_move_tail(&task->cg_list, &cset->mg_tasks);
2385 if (list_empty(&cset->mg_node))
2386 list_add_tail(&cset->mg_node,
2387 &mgctx->tset.src_csets);
2388 if (list_empty(&cset->mg_dst_cset->mg_node))
2389 list_add_tail(&cset->mg_dst_cset->mg_node,
2390 &mgctx->tset.dst_csets);
2391}
2392
2393
2394
2395
2396
2397
2398
2399
2400struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
2401 struct cgroup_subsys_state **dst_cssp)
2402{
2403 tset->cur_cset = list_first_entry(tset->csets, struct css_set, mg_node);
2404 tset->cur_task = NULL;
2405
2406 return cgroup_taskset_next(tset, dst_cssp);
2407}
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
2418 struct cgroup_subsys_state **dst_cssp)
2419{
2420 struct css_set *cset = tset->cur_cset;
2421 struct task_struct *task = tset->cur_task;
2422
2423 while (&cset->mg_node != tset->csets) {
2424 if (!task)
2425 task = list_first_entry(&cset->mg_tasks,
2426 struct task_struct, cg_list);
2427 else
2428 task = list_next_entry(task, cg_list);
2429
2430 if (&task->cg_list != &cset->mg_tasks) {
2431 tset->cur_cset = cset;
2432 tset->cur_task = task;
2433
2434
2435
2436
2437
2438
2439
2440 if (cset->mg_dst_cset)
2441 *dst_cssp = cset->mg_dst_cset->subsys[tset->ssid];
2442 else
2443 *dst_cssp = cset->subsys[tset->ssid];
2444
2445 return task;
2446 }
2447
2448 cset = list_next_entry(cset, mg_node);
2449 task = NULL;
2450 }
2451
2452 return NULL;
2453}
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx)
2465{
2466 struct cgroup_taskset *tset = &mgctx->tset;
2467 struct cgroup_subsys *ss;
2468 struct task_struct *task, *tmp_task;
2469 struct css_set *cset, *tmp_cset;
2470 int ssid, failed_ssid, ret;
2471
2472
2473 if (tset->nr_tasks) {
2474 do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
2475 if (ss->can_attach) {
2476 tset->ssid = ssid;
2477 ret = ss->can_attach(tset);
2478 if (ret) {
2479 failed_ssid = ssid;
2480 goto out_cancel_attach;
2481 }
2482 }
2483 } while_each_subsys_mask();
2484 }
2485
2486
2487
2488
2489
2490
2491 spin_lock_irq(&css_set_lock);
2492 list_for_each_entry(cset, &tset->src_csets, mg_node) {
2493 list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, cg_list) {
2494 struct css_set *from_cset = task_css_set(task);
2495 struct css_set *to_cset = cset->mg_dst_cset;
2496
2497 get_css_set(to_cset);
2498 to_cset->nr_tasks++;
2499 css_set_move_task(task, from_cset, to_cset, true);
2500 from_cset->nr_tasks--;
2501
2502
2503
2504
2505 cgroup_freezer_migrate_task(task, from_cset->dfl_cgrp,
2506 to_cset->dfl_cgrp);
2507 put_css_set_locked(from_cset);
2508
2509 }
2510 }
2511 spin_unlock_irq(&css_set_lock);
2512
2513
2514
2515
2516
2517
2518 tset->csets = &tset->dst_csets;
2519
2520 if (tset->nr_tasks) {
2521 do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
2522 if (ss->attach) {
2523 tset->ssid = ssid;
2524 ss->attach(tset);
2525 }
2526 } while_each_subsys_mask();
2527 }
2528
2529 ret = 0;
2530 goto out_release_tset;
2531
2532out_cancel_attach:
2533 if (tset->nr_tasks) {
2534 do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
2535 if (ssid == failed_ssid)
2536 break;
2537 if (ss->cancel_attach) {
2538 tset->ssid = ssid;
2539 ss->cancel_attach(tset);
2540 }
2541 } while_each_subsys_mask();
2542 }
2543out_release_tset:
2544 spin_lock_irq(&css_set_lock);
2545 list_splice_init(&tset->dst_csets, &tset->src_csets);
2546 list_for_each_entry_safe(cset, tmp_cset, &tset->src_csets, mg_node) {
2547 list_splice_tail_init(&cset->mg_tasks, &cset->tasks);
2548 list_del_init(&cset->mg_node);
2549 }
2550 spin_unlock_irq(&css_set_lock);
2551
2552
2553
2554
2555
2556
2557 tset->nr_tasks = 0;
2558 tset->csets = &tset->src_csets;
2559 return ret;
2560}
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp)
2572{
2573
2574 if (!cgroup_on_dfl(dst_cgrp))
2575 return 0;
2576
2577
2578 if (!cgroup_is_valid_domain(dst_cgrp->dom_cgrp))
2579 return -EOPNOTSUPP;
2580
2581
2582 if (cgroup_is_mixable(dst_cgrp))
2583 return 0;
2584
2585
2586
2587
2588
2589 if (cgroup_can_be_thread_root(dst_cgrp) || cgroup_is_threaded(dst_cgrp))
2590 return 0;
2591
2592
2593 if (dst_cgrp->subtree_control)
2594 return -EBUSY;
2595
2596 return 0;
2597}
2598
2599
2600
2601
2602
2603
2604
2605
2606void cgroup_migrate_finish(struct cgroup_mgctx *mgctx)
2607{
2608 LIST_HEAD(preloaded);
2609 struct css_set *cset, *tmp_cset;
2610
2611 lockdep_assert_held(&cgroup_mutex);
2612
2613 spin_lock_irq(&css_set_lock);
2614
2615 list_splice_tail_init(&mgctx->preloaded_src_csets, &preloaded);
2616 list_splice_tail_init(&mgctx->preloaded_dst_csets, &preloaded);
2617
2618 list_for_each_entry_safe(cset, tmp_cset, &preloaded, mg_preload_node) {
2619 cset->mg_src_cgrp = NULL;
2620 cset->mg_dst_cgrp = NULL;
2621 cset->mg_dst_cset = NULL;
2622 list_del_init(&cset->mg_preload_node);
2623 put_css_set_locked(cset);
2624 }
2625
2626 spin_unlock_irq(&css_set_lock);
2627}
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645void cgroup_migrate_add_src(struct css_set *src_cset,
2646 struct cgroup *dst_cgrp,
2647 struct cgroup_mgctx *mgctx)
2648{
2649 struct cgroup *src_cgrp;
2650
2651 lockdep_assert_held(&cgroup_mutex);
2652 lockdep_assert_held(&css_set_lock);
2653
2654
2655
2656
2657
2658
2659 if (src_cset->dead)
2660 return;
2661
2662 src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root);
2663
2664 if (!list_empty(&src_cset->mg_preload_node))
2665 return;
2666
2667 WARN_ON(src_cset->mg_src_cgrp);
2668 WARN_ON(src_cset->mg_dst_cgrp);
2669 WARN_ON(!list_empty(&src_cset->mg_tasks));
2670 WARN_ON(!list_empty(&src_cset->mg_node));
2671
2672 src_cset->mg_src_cgrp = src_cgrp;
2673 src_cset->mg_dst_cgrp = dst_cgrp;
2674 get_css_set(src_cset);
2675 list_add_tail(&src_cset->mg_preload_node, &mgctx->preloaded_src_csets);
2676}
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx)
2693{
2694 struct css_set *src_cset, *tmp_cset;
2695
2696 lockdep_assert_held(&cgroup_mutex);
2697
2698
2699 list_for_each_entry_safe(src_cset, tmp_cset, &mgctx->preloaded_src_csets,
2700 mg_preload_node) {
2701 struct css_set *dst_cset;
2702 struct cgroup_subsys *ss;
2703 int ssid;
2704
2705 dst_cset = find_css_set(src_cset, src_cset->mg_dst_cgrp);
2706 if (!dst_cset)
2707 return -ENOMEM;
2708
2709 WARN_ON_ONCE(src_cset->mg_dst_cset || dst_cset->mg_dst_cset);
2710
2711
2712
2713
2714
2715
2716 if (src_cset == dst_cset) {
2717 src_cset->mg_src_cgrp = NULL;
2718 src_cset->mg_dst_cgrp = NULL;
2719 list_del_init(&src_cset->mg_preload_node);
2720 put_css_set(src_cset);
2721 put_css_set(dst_cset);
2722 continue;
2723 }
2724
2725 src_cset->mg_dst_cset = dst_cset;
2726
2727 if (list_empty(&dst_cset->mg_preload_node))
2728 list_add_tail(&dst_cset->mg_preload_node,
2729 &mgctx->preloaded_dst_csets);
2730 else
2731 put_css_set(dst_cset);
2732
2733 for_each_subsys(ss, ssid)
2734 if (src_cset->subsys[ssid] != dst_cset->subsys[ssid])
2735 mgctx->ss_mask |= 1 << ssid;
2736 }
2737
2738 return 0;
2739}
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759int cgroup_migrate(struct task_struct *leader, bool threadgroup,
2760 struct cgroup_mgctx *mgctx)
2761{
2762 struct task_struct *task;
2763
2764
2765
2766
2767
2768
2769 spin_lock_irq(&css_set_lock);
2770 rcu_read_lock();
2771 task = leader;
2772 do {
2773 cgroup_migrate_add_task(task, mgctx);
2774 if (!threadgroup)
2775 break;
2776 } while_each_thread(leader, task);
2777 rcu_read_unlock();
2778 spin_unlock_irq(&css_set_lock);
2779
2780 return cgroup_migrate_execute(mgctx);
2781}
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
2792 bool threadgroup)
2793{
2794 DEFINE_CGROUP_MGCTX(mgctx);
2795 struct task_struct *task;
2796 int ret;
2797
2798 ret = cgroup_migrate_vet_dst(dst_cgrp);
2799 if (ret)
2800 return ret;
2801
2802
2803 spin_lock_irq(&css_set_lock);
2804 rcu_read_lock();
2805 task = leader;
2806 do {
2807 cgroup_migrate_add_src(task_css_set(task), dst_cgrp, &mgctx);
2808 if (!threadgroup)
2809 break;
2810 } while_each_thread(leader, task);
2811 rcu_read_unlock();
2812 spin_unlock_irq(&css_set_lock);
2813
2814
2815 ret = cgroup_migrate_prepare_dst(&mgctx);
2816 if (!ret)
2817 ret = cgroup_migrate(leader, threadgroup, &mgctx);
2818
2819 cgroup_migrate_finish(&mgctx);
2820
2821 if (!ret)
2822 TRACE_CGROUP_PATH(attach_task, dst_cgrp, leader, threadgroup);
2823
2824 return ret;
2825}
2826
2827struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup)
2828 __acquires(&cgroup_threadgroup_rwsem)
2829{
2830 struct task_struct *tsk;
2831 pid_t pid;
2832
2833 if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
2834 return ERR_PTR(-EINVAL);
2835
2836 percpu_down_write(&cgroup_threadgroup_rwsem);
2837
2838 rcu_read_lock();
2839 if (pid) {
2840 tsk = find_task_by_vpid(pid);
2841 if (!tsk) {
2842 tsk = ERR_PTR(-ESRCH);
2843 goto out_unlock_threadgroup;
2844 }
2845 } else {
2846 tsk = current;
2847 }
2848
2849 if (threadgroup)
2850 tsk = tsk->group_leader;
2851
2852
2853
2854
2855
2856
2857
2858 if (tsk->no_cgroup_migration || (tsk->flags & PF_NO_SETAFFINITY)) {
2859 tsk = ERR_PTR(-EINVAL);
2860 goto out_unlock_threadgroup;
2861 }
2862
2863 get_task_struct(tsk);
2864 goto out_unlock_rcu;
2865
2866out_unlock_threadgroup:
2867 percpu_up_write(&cgroup_threadgroup_rwsem);
2868out_unlock_rcu:
2869 rcu_read_unlock();
2870 return tsk;
2871}
2872
2873void cgroup_procs_write_finish(struct task_struct *task)
2874 __releases(&cgroup_threadgroup_rwsem)
2875{
2876 struct cgroup_subsys *ss;
2877 int ssid;
2878
2879
2880 put_task_struct(task);
2881
2882 percpu_up_write(&cgroup_threadgroup_rwsem);
2883 for_each_subsys(ss, ssid)
2884 if (ss->post_attach)
2885 ss->post_attach();
2886}
2887
2888static void cgroup_print_ss_mask(struct seq_file *seq, u16 ss_mask)
2889{
2890 struct cgroup_subsys *ss;
2891 bool printed = false;
2892 int ssid;
2893
2894 do_each_subsys_mask(ss, ssid, ss_mask) {
2895 if (printed)
2896 seq_putc(seq, ' ');
2897 seq_printf(seq, "%s", ss->name);
2898 printed = true;
2899 } while_each_subsys_mask();
2900 if (printed)
2901 seq_putc(seq, '\n');
2902}
2903
2904
2905static int cgroup_controllers_show(struct seq_file *seq, void *v)
2906{
2907 struct cgroup *cgrp = seq_css(seq)->cgroup;
2908
2909 cgroup_print_ss_mask(seq, cgroup_control(cgrp));
2910 return 0;
2911}
2912
2913
2914static int cgroup_subtree_control_show(struct seq_file *seq, void *v)
2915{
2916 struct cgroup *cgrp = seq_css(seq)->cgroup;
2917
2918 cgroup_print_ss_mask(seq, cgrp->subtree_control);
2919 return 0;
2920}
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931static int cgroup_update_dfl_csses(struct cgroup *cgrp)
2932{
2933 DEFINE_CGROUP_MGCTX(mgctx);
2934 struct cgroup_subsys_state *d_css;
2935 struct cgroup *dsct;
2936 struct css_set *src_cset;
2937 int ret;
2938
2939 lockdep_assert_held(&cgroup_mutex);
2940
2941 percpu_down_write(&cgroup_threadgroup_rwsem);
2942
2943
2944 spin_lock_irq(&css_set_lock);
2945 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
2946 struct cgrp_cset_link *link;
2947
2948 list_for_each_entry(link, &dsct->cset_links, cset_link)
2949 cgroup_migrate_add_src(link->cset, dsct, &mgctx);
2950 }
2951 spin_unlock_irq(&css_set_lock);
2952
2953
2954 ret = cgroup_migrate_prepare_dst(&mgctx);
2955 if (ret)
2956 goto out_finish;
2957
2958 spin_lock_irq(&css_set_lock);
2959 list_for_each_entry(src_cset, &mgctx.preloaded_src_csets, mg_preload_node) {
2960 struct task_struct *task, *ntask;
2961
2962
2963 list_for_each_entry_safe(task, ntask, &src_cset->tasks, cg_list)
2964 cgroup_migrate_add_task(task, &mgctx);
2965 }
2966 spin_unlock_irq(&css_set_lock);
2967
2968 ret = cgroup_migrate_execute(&mgctx);
2969out_finish:
2970 cgroup_migrate_finish(&mgctx);
2971 percpu_up_write(&cgroup_threadgroup_rwsem);
2972 return ret;
2973}
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983void cgroup_lock_and_drain_offline(struct cgroup *cgrp)
2984 __acquires(&cgroup_mutex)
2985{
2986 struct cgroup *dsct;
2987 struct cgroup_subsys_state *d_css;
2988 struct cgroup_subsys *ss;
2989 int ssid;
2990
2991restart:
2992 mutex_lock(&cgroup_mutex);
2993
2994 cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
2995 for_each_subsys(ss, ssid) {
2996 struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
2997 DEFINE_WAIT(wait);
2998
2999 if (!css || !percpu_ref_is_dying(&css->refcnt))
3000 continue;
3001
3002 cgroup_get_live(dsct);
3003 prepare_to_wait(&dsct->offline_waitq, &wait,
3004 TASK_UNINTERRUPTIBLE);
3005
3006 mutex_unlock(&cgroup_mutex);
3007 schedule();
3008 finish_wait(&dsct->offline_waitq, &wait);
3009
3010 cgroup_put(dsct);
3011 goto restart;
3012 }
3013 }
3014}
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024static void cgroup_save_control(struct cgroup *cgrp)
3025{
3026 struct cgroup *dsct;
3027 struct cgroup_subsys_state *d_css;
3028
3029 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
3030 dsct->old_subtree_control = dsct->subtree_control;
3031 dsct->old_subtree_ss_mask = dsct->subtree_ss_mask;
3032 dsct->old_dom_cgrp = dsct->dom_cgrp;
3033 }
3034}
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044static void cgroup_propagate_control(struct cgroup *cgrp)
3045{
3046 struct cgroup *dsct;
3047 struct cgroup_subsys_state *d_css;
3048
3049 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
3050 dsct->subtree_control &= cgroup_control(dsct);
3051 dsct->subtree_ss_mask =
3052 cgroup_calc_subtree_ss_mask(dsct->subtree_control,
3053 cgroup_ss_mask(dsct));
3054 }
3055}
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065static void cgroup_restore_control(struct cgroup *cgrp)
3066{
3067 struct cgroup *dsct;
3068 struct cgroup_subsys_state *d_css;
3069
3070 cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
3071 dsct->subtree_control = dsct->old_subtree_control;
3072 dsct->subtree_ss_mask = dsct->old_subtree_ss_mask;
3073 dsct->dom_cgrp = dsct->old_dom_cgrp;
3074 }
3075}
3076
3077static bool css_visible(struct cgroup_subsys_state *css)
3078{
3079 struct cgroup_subsys *ss = css->ss;
3080 struct cgroup *cgrp = css->cgroup;
3081
3082 if (cgroup_control(cgrp) & (1 << ss->id))
3083 return true;
3084 if (!(cgroup_ss_mask(cgrp) & (1 << ss->id)))
3085 return false;
3086 return cgroup_on_dfl(cgrp) && ss->implicit_on_dfl;
3087}
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102static int cgroup_apply_control_enable(struct cgroup *cgrp)
3103{
3104 struct cgroup *dsct;
3105 struct cgroup_subsys_state *d_css;
3106 struct cgroup_subsys *ss;
3107 int ssid, ret;
3108
3109 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
3110 for_each_subsys(ss, ssid) {
3111 struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
3112
3113 WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt));
3114
3115 if (!(cgroup_ss_mask(dsct) & (1 << ss->id)))
3116 continue;
3117
3118 if (!css) {
3119 css = css_create(dsct, ss);
3120 if (IS_ERR(css))
3121 return PTR_ERR(css);
3122 }
3123
3124 if (css_visible(css)) {
3125 ret = css_populate_dir(css);
3126 if (ret)
3127 return ret;
3128 }
3129 }
3130 }
3131
3132 return 0;
3133}
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148static void cgroup_apply_control_disable(struct cgroup *cgrp)
3149{
3150 struct cgroup *dsct;
3151 struct cgroup_subsys_state *d_css;
3152 struct cgroup_subsys *ss;
3153 int ssid;
3154
3155 cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
3156 for_each_subsys(ss, ssid) {
3157 struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
3158
3159 WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt));
3160
3161 if (!css)
3162 continue;
3163
3164 if (css->parent &&
3165 !(cgroup_ss_mask(dsct) & (1 << ss->id))) {
3166 kill_css(css);
3167 } else if (!css_visible(css)) {
3168 css_clear_dir(css);
3169 if (ss->css_reset)
3170 ss->css_reset(css);
3171 }
3172 }
3173 }
3174}
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193static int cgroup_apply_control(struct cgroup *cgrp)
3194{
3195 int ret;
3196
3197 cgroup_propagate_control(cgrp);
3198
3199 ret = cgroup_apply_control_enable(cgrp);
3200 if (ret)
3201 return ret;
3202
3203
3204
3205
3206
3207
3208 ret = cgroup_update_dfl_csses(cgrp);
3209 if (ret)
3210 return ret;
3211
3212 return 0;
3213}
3214
3215
3216
3217
3218
3219
3220
3221
3222static void cgroup_finalize_control(struct cgroup *cgrp, int ret)
3223{
3224 if (ret) {
3225 cgroup_restore_control(cgrp);
3226 cgroup_propagate_control(cgrp);
3227 }
3228
3229 cgroup_apply_control_disable(cgrp);
3230}
3231
3232static int cgroup_vet_subtree_control_enable(struct cgroup *cgrp, u16 enable)
3233{
3234 u16 domain_enable = enable & ~cgrp_dfl_threaded_ss_mask;
3235
3236
3237 if (!enable)
3238 return 0;
3239
3240
3241 if (!cgroup_is_valid_domain(cgrp->dom_cgrp))
3242 return -EOPNOTSUPP;
3243
3244
3245 if (cgroup_is_mixable(cgrp))
3246 return 0;
3247
3248 if (domain_enable) {
3249
3250 if (cgroup_is_thread_root(cgrp) || cgroup_is_threaded(cgrp))
3251 return -EOPNOTSUPP;
3252 } else {
3253
3254
3255
3256
3257
3258 if (cgroup_can_be_thread_root(cgrp) || cgroup_is_threaded(cgrp))
3259 return 0;
3260 }
3261
3262
3263
3264
3265
3266 if (cgroup_has_tasks(cgrp))
3267 return -EBUSY;
3268
3269 return 0;
3270}
3271
3272
3273static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
3274 char *buf, size_t nbytes,
3275 loff_t off)
3276{
3277 u16 enable = 0, disable = 0;
3278 struct cgroup *cgrp, *child;
3279 struct cgroup_subsys *ss;
3280 char *tok;
3281 int ssid, ret;
3282
3283
3284
3285
3286
3287 buf = strstrip(buf);
3288 while ((tok = strsep(&buf, " "))) {
3289 if (tok[0] == '\0')
3290 continue;
3291 do_each_subsys_mask(ss, ssid, ~cgrp_dfl_inhibit_ss_mask) {
3292 if (!cgroup_ssid_enabled(ssid) ||
3293 strcmp(tok + 1, ss->name))
3294 continue;
3295
3296 if (*tok == '+') {
3297 enable |= 1 << ssid;
3298 disable &= ~(1 << ssid);
3299 } else if (*tok == '-') {
3300 disable |= 1 << ssid;
3301 enable &= ~(1 << ssid);
3302 } else {
3303 return -EINVAL;
3304 }
3305 break;
3306 } while_each_subsys_mask();
3307 if (ssid == CGROUP_SUBSYS_COUNT)
3308 return -EINVAL;
3309 }
3310
3311 cgrp = cgroup_kn_lock_live(of->kn, true);
3312 if (!cgrp)
3313 return -ENODEV;
3314
3315 for_each_subsys(ss, ssid) {
3316 if (enable & (1 << ssid)) {
3317 if (cgrp->subtree_control & (1 << ssid)) {
3318 enable &= ~(1 << ssid);
3319 continue;
3320 }
3321
3322 if (!(cgroup_control(cgrp) & (1 << ssid))) {
3323 ret = -ENOENT;
3324 goto out_unlock;
3325 }
3326 } else if (disable & (1 << ssid)) {
3327 if (!(cgrp->subtree_control & (1 << ssid))) {
3328 disable &= ~(1 << ssid);
3329 continue;
3330 }
3331
3332
3333 cgroup_for_each_live_child(child, cgrp) {
3334 if (child->subtree_control & (1 << ssid)) {
3335 ret = -EBUSY;
3336 goto out_unlock;
3337 }
3338 }
3339 }
3340 }
3341
3342 if (!enable && !disable) {
3343 ret = 0;
3344 goto out_unlock;
3345 }
3346
3347 ret = cgroup_vet_subtree_control_enable(cgrp, enable);
3348 if (ret)
3349 goto out_unlock;
3350
3351
3352 cgroup_save_control(cgrp);
3353
3354 cgrp->subtree_control |= enable;
3355 cgrp->subtree_control &= ~disable;
3356
3357 ret = cgroup_apply_control(cgrp);
3358 cgroup_finalize_control(cgrp, ret);
3359 if (ret)
3360 goto out_unlock;
3361
3362 kernfs_activate(cgrp->kn);
3363out_unlock:
3364 cgroup_kn_unlock(of->kn);
3365 return ret ?: nbytes;
3366}
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377static int cgroup_enable_threaded(struct cgroup *cgrp)
3378{
3379 struct cgroup *parent = cgroup_parent(cgrp);
3380 struct cgroup *dom_cgrp = parent->dom_cgrp;
3381 struct cgroup *dsct;
3382 struct cgroup_subsys_state *d_css;
3383 int ret;
3384
3385 lockdep_assert_held(&cgroup_mutex);
3386
3387
3388 if (cgroup_is_threaded(cgrp))
3389 return 0;
3390
3391
3392
3393
3394
3395
3396
3397 if (cgroup_is_populated(cgrp) ||
3398 cgrp->subtree_control & ~cgrp_dfl_threaded_ss_mask)
3399 return -EOPNOTSUPP;
3400
3401
3402 if (!cgroup_is_valid_domain(dom_cgrp) ||
3403 !cgroup_can_be_thread_root(dom_cgrp))
3404 return -EOPNOTSUPP;
3405
3406
3407
3408
3409
3410 cgroup_save_control(cgrp);
3411
3412 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp)
3413 if (dsct == cgrp || cgroup_is_threaded(dsct))
3414 dsct->dom_cgrp = dom_cgrp;
3415
3416 ret = cgroup_apply_control(cgrp);
3417 if (!ret)
3418 parent->nr_threaded_children++;
3419
3420 cgroup_finalize_control(cgrp, ret);
3421 return ret;
3422}
3423
3424static int cgroup_type_show(struct seq_file *seq, void *v)
3425{
3426 struct cgroup *cgrp = seq_css(seq)->cgroup;
3427
3428 if (cgroup_is_threaded(cgrp))
3429 seq_puts(seq, "threaded\n");
3430 else if (!cgroup_is_valid_domain(cgrp))
3431 seq_puts(seq, "domain invalid\n");
3432 else if (cgroup_is_thread_root(cgrp))
3433 seq_puts(seq, "domain threaded\n");
3434 else
3435 seq_puts(seq, "domain\n");
3436
3437 return 0;
3438}
3439
3440static ssize_t cgroup_type_write(struct kernfs_open_file *of, char *buf,
3441 size_t nbytes, loff_t off)
3442{
3443 struct cgroup *cgrp;
3444 int ret;
3445
3446
3447 if (strcmp(strstrip(buf), "threaded"))
3448 return -EINVAL;
3449
3450 cgrp = cgroup_kn_lock_live(of->kn, false);
3451 if (!cgrp)
3452 return -ENOENT;
3453
3454
3455 ret = cgroup_enable_threaded(cgrp);
3456
3457 cgroup_kn_unlock(of->kn);
3458 return ret ?: nbytes;
3459}
3460
3461static int cgroup_max_descendants_show(struct seq_file *seq, void *v)
3462{
3463 struct cgroup *cgrp = seq_css(seq)->cgroup;
3464 int descendants = READ_ONCE(cgrp->max_descendants);
3465
3466 if (descendants == INT_MAX)
3467 seq_puts(seq, "max\n");
3468 else
3469 seq_printf(seq, "%d\n", descendants);
3470
3471 return 0;
3472}
3473
3474static ssize_t cgroup_max_descendants_write(struct kernfs_open_file *of,
3475 char *buf, size_t nbytes, loff_t off)
3476{
3477 struct cgroup *cgrp;
3478 int descendants;
3479 ssize_t ret;
3480
3481 buf = strstrip(buf);
3482 if (!strcmp(buf, "max")) {
3483 descendants = INT_MAX;
3484 } else {
3485 ret = kstrtoint(buf, 0, &descendants);
3486 if (ret)
3487 return ret;
3488 }
3489
3490 if (descendants < 0)
3491 return -ERANGE;
3492
3493 cgrp = cgroup_kn_lock_live(of->kn, false);
3494 if (!cgrp)
3495 return -ENOENT;
3496
3497 cgrp->max_descendants = descendants;
3498
3499 cgroup_kn_unlock(of->kn);
3500
3501 return nbytes;
3502}
3503
3504static int cgroup_max_depth_show(struct seq_file *seq, void *v)
3505{
3506 struct cgroup *cgrp = seq_css(seq)->cgroup;
3507 int depth = READ_ONCE(cgrp->max_depth);
3508
3509 if (depth == INT_MAX)
3510 seq_puts(seq, "max\n");
3511 else
3512 seq_printf(seq, "%d\n", depth);
3513
3514 return 0;
3515}
3516
3517static ssize_t cgroup_max_depth_write(struct kernfs_open_file *of,
3518 char *buf, size_t nbytes, loff_t off)
3519{
3520 struct cgroup *cgrp;
3521 ssize_t ret;
3522 int depth;
3523
3524 buf = strstrip(buf);
3525 if (!strcmp(buf, "max")) {
3526 depth = INT_MAX;
3527 } else {
3528 ret = kstrtoint(buf, 0, &depth);
3529 if (ret)
3530 return ret;
3531 }
3532
3533 if (depth < 0)
3534 return -ERANGE;
3535
3536 cgrp = cgroup_kn_lock_live(of->kn, false);
3537 if (!cgrp)
3538 return -ENOENT;
3539
3540 cgrp->max_depth = depth;
3541
3542 cgroup_kn_unlock(of->kn);
3543
3544 return nbytes;
3545}
3546
3547static int cgroup_events_show(struct seq_file *seq, void *v)
3548{
3549 struct cgroup *cgrp = seq_css(seq)->cgroup;
3550
3551 seq_printf(seq, "populated %d\n", cgroup_is_populated(cgrp));
3552 seq_printf(seq, "frozen %d\n", test_bit(CGRP_FROZEN, &cgrp->flags));
3553
3554 return 0;
3555}
3556
3557static int cgroup_stat_show(struct seq_file *seq, void *v)
3558{
3559 struct cgroup *cgroup = seq_css(seq)->cgroup;
3560
3561 seq_printf(seq, "nr_descendants %d\n",
3562 cgroup->nr_descendants);
3563 seq_printf(seq, "nr_dying_descendants %d\n",
3564 cgroup->nr_dying_descendants);
3565
3566 return 0;
3567}
3568
3569static int __maybe_unused cgroup_extra_stat_show(struct seq_file *seq,
3570 struct cgroup *cgrp, int ssid)
3571{
3572 struct cgroup_subsys *ss = cgroup_subsys[ssid];
3573 struct cgroup_subsys_state *css;
3574 int ret;
3575
3576 if (!ss->css_extra_stat_show)
3577 return 0;
3578
3579 css = cgroup_tryget_css(cgrp, ss);
3580 if (!css)
3581 return 0;
3582
3583 ret = ss->css_extra_stat_show(seq, css);
3584 css_put(css);
3585 return ret;
3586}
3587
3588static int cpu_stat_show(struct seq_file *seq, void *v)
3589{
3590 struct cgroup __maybe_unused *cgrp = seq_css(seq)->cgroup;
3591 int ret = 0;
3592
3593 cgroup_base_stat_cputime_show(seq);
3594#ifdef CONFIG_CGROUP_SCHED
3595 ret = cgroup_extra_stat_show(seq, cgrp, cpu_cgrp_id);
3596#endif
3597 return ret;
3598}
3599
3600#ifdef CONFIG_PSI
3601static int cgroup_io_pressure_show(struct seq_file *seq, void *v)
3602{
3603 struct cgroup *cgroup = seq_css(seq)->cgroup;
3604 struct psi_group *psi = cgroup->id == 1 ? &psi_system : &cgroup->psi;
3605
3606 return psi_show(seq, psi, PSI_IO);
3607}
3608static int cgroup_memory_pressure_show(struct seq_file *seq, void *v)
3609{
3610 struct cgroup *cgroup = seq_css(seq)->cgroup;
3611 struct psi_group *psi = cgroup->id == 1 ? &psi_system : &cgroup->psi;
3612
3613 return psi_show(seq, psi, PSI_MEM);
3614}
3615static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v)
3616{
3617 struct cgroup *cgroup = seq_css(seq)->cgroup;
3618 struct psi_group *psi = cgroup->id == 1 ? &psi_system : &cgroup->psi;
3619
3620 return psi_show(seq, psi, PSI_CPU);
3621}
3622
3623static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
3624 size_t nbytes, enum psi_res res)
3625{
3626 struct psi_trigger *new;
3627 struct cgroup *cgrp;
3628
3629 cgrp = cgroup_kn_lock_live(of->kn, false);
3630 if (!cgrp)
3631 return -ENODEV;
3632
3633 cgroup_get(cgrp);
3634 cgroup_kn_unlock(of->kn);
3635
3636 new = psi_trigger_create(&cgrp->psi, buf, nbytes, res);
3637 if (IS_ERR(new)) {
3638 cgroup_put(cgrp);
3639 return PTR_ERR(new);
3640 }
3641
3642 psi_trigger_replace(&of->priv, new);
3643
3644 cgroup_put(cgrp);
3645
3646 return nbytes;
3647}
3648
3649static ssize_t cgroup_io_pressure_write(struct kernfs_open_file *of,
3650 char *buf, size_t nbytes,
3651 loff_t off)
3652{
3653 return cgroup_pressure_write(of, buf, nbytes, PSI_IO);
3654}
3655
3656static ssize_t cgroup_memory_pressure_write(struct kernfs_open_file *of,
3657 char *buf, size_t nbytes,
3658 loff_t off)
3659{
3660 return cgroup_pressure_write(of, buf, nbytes, PSI_MEM);
3661}
3662
3663static ssize_t cgroup_cpu_pressure_write(struct kernfs_open_file *of,
3664 char *buf, size_t nbytes,
3665 loff_t off)
3666{
3667 return cgroup_pressure_write(of, buf, nbytes, PSI_CPU);
3668}
3669
3670static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of,
3671 poll_table *pt)
3672{
3673 return psi_trigger_poll(&of->priv, of->file, pt);
3674}
3675
3676static void cgroup_pressure_release(struct kernfs_open_file *of)
3677{
3678 psi_trigger_replace(&of->priv, NULL);
3679}
3680#endif
3681
3682static int cgroup_freeze_show(struct seq_file *seq, void *v)
3683{
3684 struct cgroup *cgrp = seq_css(seq)->cgroup;
3685
3686 seq_printf(seq, "%d\n", cgrp->freezer.freeze);
3687
3688 return 0;
3689}
3690
3691static ssize_t cgroup_freeze_write(struct kernfs_open_file *of,
3692 char *buf, size_t nbytes, loff_t off)
3693{
3694 struct cgroup *cgrp;
3695 ssize_t ret;
3696 int freeze;
3697
3698 ret = kstrtoint(strstrip(buf), 0, &freeze);
3699 if (ret)
3700 return ret;
3701
3702 if (freeze < 0 || freeze > 1)
3703 return -ERANGE;
3704
3705 cgrp = cgroup_kn_lock_live(of->kn, false);
3706 if (!cgrp)
3707 return -ENOENT;
3708
3709 cgroup_freeze(cgrp, freeze);
3710
3711 cgroup_kn_unlock(of->kn);
3712
3713 return nbytes;
3714}
3715
3716static int cgroup_file_open(struct kernfs_open_file *of)
3717{
3718 struct cftype *cft = of->kn->priv;
3719
3720 if (cft->open)
3721 return cft->open(of);
3722 return 0;
3723}
3724
3725static void cgroup_file_release(struct kernfs_open_file *of)
3726{
3727 struct cftype *cft = of->kn->priv;
3728
3729 if (cft->release)
3730 cft->release(of);
3731}
3732
3733static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
3734 size_t nbytes, loff_t off)
3735{
3736 struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
3737 struct cgroup *cgrp = of->kn->parent->priv;
3738 struct cftype *cft = of->kn->priv;
3739 struct cgroup_subsys_state *css;
3740 int ret;
3741
3742
3743
3744
3745
3746
3747
3748 if ((cgrp->root->flags & CGRP_ROOT_NS_DELEGATE) &&
3749 !(cft->flags & CFTYPE_NS_DELEGATABLE) &&
3750 ns != &init_cgroup_ns && ns->root_cset->dfl_cgrp == cgrp)
3751 return -EPERM;
3752
3753 if (cft->write)
3754 return cft->write(of, buf, nbytes, off);
3755
3756
3757
3758
3759
3760
3761
3762 rcu_read_lock();
3763 css = cgroup_css(cgrp, cft->ss);
3764 rcu_read_unlock();
3765
3766 if (cft->write_u64) {
3767 unsigned long long v;
3768 ret = kstrtoull(buf, 0, &v);
3769 if (!ret)
3770 ret = cft->write_u64(css, cft, v);
3771 } else if (cft->write_s64) {
3772 long long v;
3773 ret = kstrtoll(buf, 0, &v);
3774 if (!ret)
3775 ret = cft->write_s64(css, cft, v);
3776 } else {
3777 ret = -EINVAL;
3778 }
3779
3780 return ret ?: nbytes;
3781}
3782
3783static __poll_t cgroup_file_poll(struct kernfs_open_file *of, poll_table *pt)
3784{
3785 struct cftype *cft = of->kn->priv;
3786
3787 if (cft->poll)
3788 return cft->poll(of, pt);
3789
3790 return kernfs_generic_poll(of, pt);
3791}
3792
3793static void *cgroup_seqfile_start(struct seq_file *seq, loff_t *ppos)
3794{
3795 return seq_cft(seq)->seq_start(seq, ppos);
3796}
3797
3798static void *cgroup_seqfile_next(struct seq_file *seq, void *v, loff_t *ppos)
3799{
3800 return seq_cft(seq)->seq_next(seq, v, ppos);
3801}
3802
3803static void cgroup_seqfile_stop(struct seq_file *seq, void *v)
3804{
3805 if (seq_cft(seq)->seq_stop)
3806 seq_cft(seq)->seq_stop(seq, v);
3807}
3808
3809static int cgroup_seqfile_show(struct seq_file *m, void *arg)
3810{
3811 struct cftype *cft = seq_cft(m);
3812 struct cgroup_subsys_state *css = seq_css(m);
3813
3814 if (cft->seq_show)
3815 return cft->seq_show(m, arg);
3816
3817 if (cft->read_u64)
3818 seq_printf(m, "%llu\n", cft->read_u64(css, cft));
3819 else if (cft->read_s64)
3820 seq_printf(m, "%lld\n", cft->read_s64(css, cft));
3821 else
3822 return -EINVAL;
3823 return 0;
3824}
3825
3826static struct kernfs_ops cgroup_kf_single_ops = {
3827 .atomic_write_len = PAGE_SIZE,
3828 .open = cgroup_file_open,
3829 .release = cgroup_file_release,
3830 .write = cgroup_file_write,
3831 .poll = cgroup_file_poll,
3832 .seq_show = cgroup_seqfile_show,
3833};
3834
3835static struct kernfs_ops cgroup_kf_ops = {
3836 .atomic_write_len = PAGE_SIZE,
3837 .open = cgroup_file_open,
3838 .release = cgroup_file_release,
3839 .write = cgroup_file_write,
3840 .poll = cgroup_file_poll,
3841 .seq_start = cgroup_seqfile_start,
3842 .seq_next = cgroup_seqfile_next,
3843 .seq_stop = cgroup_seqfile_stop,
3844 .seq_show = cgroup_seqfile_show,
3845};
3846
3847
3848static int cgroup_kn_set_ugid(struct kernfs_node *kn)
3849{
3850 struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
3851 .ia_uid = current_fsuid(),
3852 .ia_gid = current_fsgid(), };
3853
3854 if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
3855 gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
3856 return 0;
3857
3858 return kernfs_setattr(kn, &iattr);
3859}
3860
3861static void cgroup_file_notify_timer(struct timer_list *timer)
3862{
3863 cgroup_file_notify(container_of(timer, struct cgroup_file,
3864 notify_timer));
3865}
3866
3867static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
3868 struct cftype *cft)
3869{
3870 char name[CGROUP_FILE_NAME_MAX];
3871 struct kernfs_node *kn;
3872 struct lock_class_key *key = NULL;
3873 int ret;
3874
3875#ifdef CONFIG_DEBUG_LOCK_ALLOC
3876 key = &cft->lockdep_key;
3877#endif
3878 kn = __kernfs_create_file(cgrp->kn, cgroup_file_name(cgrp, cft, name),
3879 cgroup_file_mode(cft),
3880 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
3881 0, cft->kf_ops, cft,
3882 NULL, key);
3883 if (IS_ERR(kn))
3884 return PTR_ERR(kn);
3885
3886 ret = cgroup_kn_set_ugid(kn);
3887 if (ret) {
3888 kernfs_remove(kn);
3889 return ret;
3890 }
3891
3892 if (cft->file_offset) {
3893 struct cgroup_file *cfile = (void *)css + cft->file_offset;
3894
3895 timer_setup(&cfile->notify_timer, cgroup_file_notify_timer, 0);
3896
3897 spin_lock_irq(&cgroup_file_kn_lock);
3898 cfile->kn = kn;
3899 spin_unlock_irq(&cgroup_file_kn_lock);
3900 }
3901
3902 return 0;
3903}
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915static int cgroup_addrm_files(struct cgroup_subsys_state *css,
3916 struct cgroup *cgrp, struct cftype cfts[],
3917 bool is_add)
3918{
3919 struct cftype *cft, *cft_end = NULL;
3920 int ret = 0;
3921
3922 lockdep_assert_held(&cgroup_mutex);
3923
3924restart:
3925 for (cft = cfts; cft != cft_end && cft->name[0] != '\0'; cft++) {
3926
3927 if ((cft->flags & __CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp))
3928 continue;
3929 if ((cft->flags & __CFTYPE_NOT_ON_DFL) && cgroup_on_dfl(cgrp))
3930 continue;
3931 if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgroup_parent(cgrp))
3932 continue;
3933 if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgroup_parent(cgrp))
3934 continue;
3935 if ((cft->flags & CFTYPE_DEBUG) && !cgroup_debug)
3936 continue;
3937 if (is_add) {
3938 ret = cgroup_add_file(css, cgrp, cft);
3939 if (ret) {
3940 pr_warn("%s: failed to add %s, err=%d\n",
3941 __func__, cft->name, ret);
3942 cft_end = cft;
3943 is_add = false;
3944 goto restart;
3945 }
3946 } else {
3947 cgroup_rm_file(cgrp, cft);
3948 }
3949 }
3950 return ret;
3951}
3952
3953static int cgroup_apply_cftypes(struct cftype *cfts, bool is_add)
3954{
3955 struct cgroup_subsys *ss = cfts[0].ss;
3956 struct cgroup *root = &ss->root->cgrp;
3957 struct cgroup_subsys_state *css;
3958 int ret = 0;
3959
3960 lockdep_assert_held(&cgroup_mutex);
3961
3962
3963 css_for_each_descendant_pre(css, cgroup_css(root, ss)) {
3964 struct cgroup *cgrp = css->cgroup;
3965
3966 if (!(css->flags & CSS_VISIBLE))
3967 continue;
3968
3969 ret = cgroup_addrm_files(css, cgrp, cfts, is_add);
3970 if (ret)
3971 break;
3972 }
3973
3974 if (is_add && !ret)
3975 kernfs_activate(root->kn);
3976 return ret;
3977}
3978
3979static void cgroup_exit_cftypes(struct cftype *cfts)
3980{
3981 struct cftype *cft;
3982
3983 for (cft = cfts; cft->name[0] != '\0'; cft++) {
3984
3985 if (cft->max_write_len && cft->max_write_len != PAGE_SIZE)
3986 kfree(cft->kf_ops);
3987 cft->kf_ops = NULL;
3988 cft->ss = NULL;
3989
3990
3991 cft->flags &= ~(__CFTYPE_ONLY_ON_DFL | __CFTYPE_NOT_ON_DFL);
3992 }
3993}
3994
3995static int cgroup_init_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
3996{
3997 struct cftype *cft;
3998
3999 for (cft = cfts; cft->name[0] != '\0'; cft++) {
4000 struct kernfs_ops *kf_ops;
4001
4002 WARN_ON(cft->ss || cft->kf_ops);
4003
4004 if (cft->seq_start)
4005 kf_ops = &cgroup_kf_ops;
4006 else
4007 kf_ops = &cgroup_kf_single_ops;
4008
4009
4010
4011
4012
4013 if (cft->max_write_len && cft->max_write_len != PAGE_SIZE) {
4014 kf_ops = kmemdup(kf_ops, sizeof(*kf_ops), GFP_KERNEL);
4015 if (!kf_ops) {
4016 cgroup_exit_cftypes(cfts);
4017 return -ENOMEM;
4018 }
4019 kf_ops->atomic_write_len = cft->max_write_len;
4020 }
4021
4022 cft->kf_ops = kf_ops;
4023 cft->ss = ss;
4024 }
4025
4026 return 0;
4027}
4028
4029static int cgroup_rm_cftypes_locked(struct cftype *cfts)
4030{
4031 lockdep_assert_held(&cgroup_mutex);
4032
4033 if (!cfts || !cfts[0].ss)
4034 return -ENOENT;
4035
4036 list_del(&cfts->node);
4037 cgroup_apply_cftypes(cfts, false);
4038 cgroup_exit_cftypes(cfts);
4039 return 0;
4040}
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053int cgroup_rm_cftypes(struct cftype *cfts)
4054{
4055 int ret;
4056
4057 mutex_lock(&cgroup_mutex);
4058 ret = cgroup_rm_cftypes_locked(cfts);
4059 mutex_unlock(&cgroup_mutex);
4060 return ret;
4061}
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077static int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
4078{
4079 int ret;
4080
4081 if (!cgroup_ssid_enabled(ss->id))
4082 return 0;
4083
4084 if (!cfts || cfts[0].name[0] == '\0')
4085 return 0;
4086
4087 ret = cgroup_init_cftypes(ss, cfts);
4088 if (ret)
4089 return ret;
4090
4091 mutex_lock(&cgroup_mutex);
4092
4093 list_add_tail(&cfts->node, &ss->cfts);
4094 ret = cgroup_apply_cftypes(cfts, true);
4095 if (ret)
4096 cgroup_rm_cftypes_locked(cfts);
4097
4098 mutex_unlock(&cgroup_mutex);
4099 return ret;
4100}
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
4111{
4112 struct cftype *cft;
4113
4114 for (cft = cfts; cft && cft->name[0] != '\0'; cft++)
4115 cft->flags |= __CFTYPE_ONLY_ON_DFL;
4116 return cgroup_add_cftypes(ss, cfts);
4117}
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
4128{
4129 struct cftype *cft;
4130
4131 for (cft = cfts; cft && cft->name[0] != '\0'; cft++)
4132 cft->flags |= __CFTYPE_NOT_ON_DFL;
4133 return cgroup_add_cftypes(ss, cfts);
4134}
4135
4136
4137
4138
4139
4140
4141
4142void cgroup_file_notify(struct cgroup_file *cfile)
4143{
4144 unsigned long flags;
4145
4146 spin_lock_irqsave(&cgroup_file_kn_lock, flags);
4147 if (cfile->kn) {
4148 unsigned long last = cfile->notified_at;
4149 unsigned long next = last + CGROUP_FILE_NOTIFY_MIN_INTV;
4150
4151 if (time_in_range(jiffies, last, next)) {
4152 timer_reduce(&cfile->notify_timer, next);
4153 } else {
4154 kernfs_notify(cfile->kn);
4155 cfile->notified_at = jiffies;
4156 }
4157 }
4158 spin_unlock_irqrestore(&cgroup_file_kn_lock, flags);
4159}
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
4179 struct cgroup_subsys_state *parent)
4180{
4181 struct cgroup_subsys_state *next;
4182
4183 cgroup_assert_mutex_or_rcu_locked();
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205 if (!pos) {
4206 next = list_entry_rcu(parent->children.next, struct cgroup_subsys_state, sibling);
4207 } else if (likely(!(pos->flags & CSS_RELEASED))) {
4208 next = list_entry_rcu(pos->sibling.next, struct cgroup_subsys_state, sibling);
4209 } else {
4210 list_for_each_entry_rcu(next, &parent->children, sibling)
4211 if (next->serial_nr > pos->serial_nr)
4212 break;
4213 }
4214
4215
4216
4217
4218
4219 if (&next->sibling != &parent->children)
4220 return next;
4221 return NULL;
4222}
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245struct cgroup_subsys_state *
4246css_next_descendant_pre(struct cgroup_subsys_state *pos,
4247 struct cgroup_subsys_state *root)
4248{
4249 struct cgroup_subsys_state *next;
4250
4251 cgroup_assert_mutex_or_rcu_locked();
4252
4253
4254 if (!pos)
4255 return root;
4256
4257
4258 next = css_next_child(NULL, pos);
4259 if (next)
4260 return next;
4261
4262
4263 while (pos != root) {
4264 next = css_next_child(pos, pos->parent);
4265 if (next)
4266 return next;
4267 pos = pos->parent;
4268 }
4269
4270 return NULL;
4271}
4272EXPORT_SYMBOL_GPL(css_next_descendant_pre);
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287struct cgroup_subsys_state *
4288css_rightmost_descendant(struct cgroup_subsys_state *pos)
4289{
4290 struct cgroup_subsys_state *last, *tmp;
4291
4292 cgroup_assert_mutex_or_rcu_locked();
4293
4294 do {
4295 last = pos;
4296
4297 pos = NULL;
4298 css_for_each_child(tmp, last)
4299 pos = tmp;
4300 } while (pos);
4301
4302 return last;
4303}
4304
4305static struct cgroup_subsys_state *
4306css_leftmost_descendant(struct cgroup_subsys_state *pos)
4307{
4308 struct cgroup_subsys_state *last;
4309
4310 do {
4311 last = pos;
4312 pos = css_next_child(NULL, pos);
4313 } while (pos);
4314
4315 return last;
4316}
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340struct cgroup_subsys_state *
4341css_next_descendant_post(struct cgroup_subsys_state *pos,
4342 struct cgroup_subsys_state *root)
4343{
4344 struct cgroup_subsys_state *next;
4345
4346 cgroup_assert_mutex_or_rcu_locked();
4347
4348
4349 if (!pos)
4350 return css_leftmost_descendant(root);
4351
4352
4353 if (pos == root)
4354 return NULL;
4355
4356
4357 next = css_next_child(pos, pos->parent);
4358 if (next)
4359 return css_leftmost_descendant(next);
4360
4361
4362 return pos->parent;
4363}
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373bool css_has_online_children(struct cgroup_subsys_state *css)
4374{
4375 struct cgroup_subsys_state *child;
4376 bool ret = false;
4377
4378 rcu_read_lock();
4379 css_for_each_child(child, css) {
4380 if (child->flags & CSS_ONLINE) {
4381 ret = true;
4382 break;
4383 }
4384 }
4385 rcu_read_unlock();
4386 return ret;
4387}
4388
4389static struct css_set *css_task_iter_next_css_set(struct css_task_iter *it)
4390{
4391 struct list_head *l;
4392 struct cgrp_cset_link *link;
4393 struct css_set *cset;
4394
4395 lockdep_assert_held(&css_set_lock);
4396
4397
4398 if (it->tcset_pos) {
4399 l = it->tcset_pos->next;
4400
4401 if (l != it->tcset_head) {
4402 it->tcset_pos = l;
4403 return container_of(l, struct css_set,
4404 threaded_csets_node);
4405 }
4406
4407 it->tcset_pos = NULL;
4408 }
4409
4410
4411 l = it->cset_pos;
4412 l = l->next;
4413 if (l == it->cset_head) {
4414 it->cset_pos = NULL;
4415 return NULL;
4416 }
4417
4418 if (it->ss) {
4419 cset = container_of(l, struct css_set, e_cset_node[it->ss->id]);
4420 } else {
4421 link = list_entry(l, struct cgrp_cset_link, cset_link);
4422 cset = link->cset;
4423 }
4424
4425 it->cset_pos = l;
4426
4427
4428 if (it->flags & CSS_TASK_ITER_THREADED) {
4429 if (it->cur_dcset)
4430 put_css_set_locked(it->cur_dcset);
4431 it->cur_dcset = cset;
4432 get_css_set(cset);
4433
4434 it->tcset_head = &cset->threaded_csets;
4435 it->tcset_pos = &cset->threaded_csets;
4436 }
4437
4438 return cset;
4439}
4440
4441
4442
4443
4444
4445
4446
4447static void css_task_iter_advance_css_set(struct css_task_iter *it)
4448{
4449 struct css_set *cset;
4450
4451 lockdep_assert_held(&css_set_lock);
4452
4453
4454 do {
4455 cset = css_task_iter_next_css_set(it);
4456 if (!cset) {
4457 it->task_pos = NULL;
4458 return;
4459 }
4460 } while (!css_set_populated(cset) && list_empty(&cset->dying_tasks));
4461
4462 if (!list_empty(&cset->tasks))
4463 it->task_pos = cset->tasks.next;
4464 else if (!list_empty(&cset->mg_tasks))
4465 it->task_pos = cset->mg_tasks.next;
4466 else
4467 it->task_pos = cset->dying_tasks.next;
4468
4469 it->tasks_head = &cset->tasks;
4470 it->mg_tasks_head = &cset->mg_tasks;
4471 it->dying_tasks_head = &cset->dying_tasks;
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488 if (it->cur_cset) {
4489 list_del(&it->iters_node);
4490 put_css_set_locked(it->cur_cset);
4491 }
4492 get_css_set(cset);
4493 it->cur_cset = cset;
4494 list_add(&it->iters_node, &cset->task_iters);
4495}
4496
4497static void css_task_iter_skip(struct css_task_iter *it,
4498 struct task_struct *task)
4499{
4500 lockdep_assert_held(&css_set_lock);
4501
4502 if (it->task_pos == &task->cg_list) {
4503 it->task_pos = it->task_pos->next;
4504 it->flags |= CSS_TASK_ITER_SKIPPED;
4505 }
4506}
4507
4508static void css_task_iter_advance(struct css_task_iter *it)
4509{
4510 struct task_struct *task;
4511
4512 lockdep_assert_held(&css_set_lock);
4513repeat:
4514 if (it->task_pos) {
4515
4516
4517
4518
4519
4520 if (it->flags & CSS_TASK_ITER_SKIPPED)
4521 it->flags &= ~CSS_TASK_ITER_SKIPPED;
4522 else
4523 it->task_pos = it->task_pos->next;
4524
4525 if (it->task_pos == it->tasks_head)
4526 it->task_pos = it->mg_tasks_head->next;
4527 if (it->task_pos == it->mg_tasks_head)
4528 it->task_pos = it->dying_tasks_head->next;
4529 if (it->task_pos == it->dying_tasks_head)
4530 css_task_iter_advance_css_set(it);
4531 } else {
4532
4533 css_task_iter_advance_css_set(it);
4534 }
4535
4536 if (!it->task_pos)
4537 return;
4538
4539 task = list_entry(it->task_pos, struct task_struct, cg_list);
4540
4541 if (it->flags & CSS_TASK_ITER_PROCS) {
4542
4543 if (!thread_group_leader(task))
4544 goto repeat;
4545
4546
4547 if (!atomic_read(&task->signal->live))
4548 goto repeat;
4549 } else {
4550
4551 if (task->flags & PF_EXITING)
4552 goto repeat;
4553 }
4554}
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags,
4568 struct css_task_iter *it)
4569{
4570
4571 WARN_ON_ONCE(!use_task_css_set_links);
4572
4573 memset(it, 0, sizeof(*it));
4574
4575 spin_lock_irq(&css_set_lock);
4576
4577 it->ss = css->ss;
4578 it->flags = flags;
4579
4580 if (it->ss)
4581 it->cset_pos = &css->cgroup->e_csets[css->ss->id];
4582 else
4583 it->cset_pos = &css->cgroup->cset_links;
4584
4585 it->cset_head = it->cset_pos;
4586
4587 css_task_iter_advance(it);
4588
4589 spin_unlock_irq(&css_set_lock);
4590}
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600struct task_struct *css_task_iter_next(struct css_task_iter *it)
4601{
4602 if (it->cur_task) {
4603 put_task_struct(it->cur_task);
4604 it->cur_task = NULL;
4605 }
4606
4607 spin_lock_irq(&css_set_lock);
4608
4609
4610 if (it->flags & CSS_TASK_ITER_SKIPPED)
4611 css_task_iter_advance(it);
4612
4613 if (it->task_pos) {
4614 it->cur_task = list_entry(it->task_pos, struct task_struct,
4615 cg_list);
4616 get_task_struct(it->cur_task);
4617 css_task_iter_advance(it);
4618 }
4619
4620 spin_unlock_irq(&css_set_lock);
4621
4622 return it->cur_task;
4623}
4624
4625
4626
4627
4628
4629
4630
4631void css_task_iter_end(struct css_task_iter *it)
4632{
4633 if (it->cur_cset) {
4634 spin_lock_irq(&css_set_lock);
4635 list_del(&it->iters_node);
4636 put_css_set_locked(it->cur_cset);
4637 spin_unlock_irq(&css_set_lock);
4638 }
4639
4640 if (it->cur_dcset)
4641 put_css_set(it->cur_dcset);
4642
4643 if (it->cur_task)
4644 put_task_struct(it->cur_task);
4645}
4646
4647static void cgroup_procs_release(struct kernfs_open_file *of)
4648{
4649 if (of->priv) {
4650 css_task_iter_end(of->priv);
4651 kfree(of->priv);
4652 }
4653}
4654
4655static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos)
4656{
4657 struct kernfs_open_file *of = s->private;
4658 struct css_task_iter *it = of->priv;
4659
4660 return css_task_iter_next(it);
4661}
4662
4663static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos,
4664 unsigned int iter_flags)
4665{
4666 struct kernfs_open_file *of = s->private;
4667 struct cgroup *cgrp = seq_css(s)->cgroup;
4668 struct css_task_iter *it = of->priv;
4669
4670
4671
4672
4673
4674 if (!it) {
4675 if (WARN_ON_ONCE((*pos)++))
4676 return ERR_PTR(-EINVAL);
4677
4678 it = kzalloc(sizeof(*it), GFP_KERNEL);
4679 if (!it)
4680 return ERR_PTR(-ENOMEM);
4681 of->priv = it;
4682 css_task_iter_start(&cgrp->self, iter_flags, it);
4683 } else if (!(*pos)++) {
4684 css_task_iter_end(it);
4685 css_task_iter_start(&cgrp->self, iter_flags, it);
4686 }
4687
4688 return cgroup_procs_next(s, NULL, NULL);
4689}
4690
4691static void *cgroup_procs_start(struct seq_file *s, loff_t *pos)
4692{
4693 struct cgroup *cgrp = seq_css(s)->cgroup;
4694
4695
4696
4697
4698
4699
4700
4701 if (cgroup_is_threaded(cgrp))
4702 return ERR_PTR(-EOPNOTSUPP);
4703
4704 return __cgroup_procs_start(s, pos, CSS_TASK_ITER_PROCS |
4705 CSS_TASK_ITER_THREADED);
4706}
4707
4708static int cgroup_procs_show(struct seq_file *s, void *v)
4709{
4710 seq_printf(s, "%d\n", task_pid_vnr(v));
4711 return 0;
4712}
4713
4714static int cgroup_procs_write_permission(struct cgroup *src_cgrp,
4715 struct cgroup *dst_cgrp,
4716 struct super_block *sb)
4717{
4718 struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
4719 struct cgroup *com_cgrp = src_cgrp;
4720 struct inode *inode;
4721 int ret;
4722
4723 lockdep_assert_held(&cgroup_mutex);
4724
4725
4726 while (!cgroup_is_descendant(dst_cgrp, com_cgrp))
4727 com_cgrp = cgroup_parent(com_cgrp);
4728
4729
4730 inode = kernfs_get_inode(sb, com_cgrp->procs_file.kn);
4731 if (!inode)
4732 return -ENOMEM;
4733
4734 ret = inode_permission(inode, MAY_WRITE);
4735 iput(inode);
4736 if (ret)
4737 return ret;
4738
4739
4740
4741
4742
4743 if ((cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE) &&
4744 (!cgroup_is_descendant(src_cgrp, ns->root_cset->dfl_cgrp) ||
4745 !cgroup_is_descendant(dst_cgrp, ns->root_cset->dfl_cgrp)))
4746 return -ENOENT;
4747
4748 return 0;
4749}
4750
4751static ssize_t cgroup_procs_write(struct kernfs_open_file *of,
4752 char *buf, size_t nbytes, loff_t off)
4753{
4754 struct cgroup *src_cgrp, *dst_cgrp;
4755 struct task_struct *task;
4756 ssize_t ret;
4757
4758 dst_cgrp = cgroup_kn_lock_live(of->kn, false);
4759 if (!dst_cgrp)
4760 return -ENODEV;
4761
4762 task = cgroup_procs_write_start(buf, true);
4763 ret = PTR_ERR_OR_ZERO(task);
4764 if (ret)
4765 goto out_unlock;
4766
4767
4768 spin_lock_irq(&css_set_lock);
4769 src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
4770 spin_unlock_irq(&css_set_lock);
4771
4772 ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp,
4773 of->file->f_path.dentry->d_sb);
4774 if (ret)
4775 goto out_finish;
4776
4777 ret = cgroup_attach_task(dst_cgrp, task, true);
4778
4779out_finish:
4780 cgroup_procs_write_finish(task);
4781out_unlock:
4782 cgroup_kn_unlock(of->kn);
4783
4784 return ret ?: nbytes;
4785}
4786
4787static void *cgroup_threads_start(struct seq_file *s, loff_t *pos)
4788{
4789 return __cgroup_procs_start(s, pos, 0);
4790}
4791
4792static ssize_t cgroup_threads_write(struct kernfs_open_file *of,
4793 char *buf, size_t nbytes, loff_t off)
4794{
4795 struct cgroup *src_cgrp, *dst_cgrp;
4796 struct task_struct *task;
4797 ssize_t ret;
4798
4799 buf = strstrip(buf);
4800
4801 dst_cgrp = cgroup_kn_lock_live(of->kn, false);
4802 if (!dst_cgrp)
4803 return -ENODEV;
4804
4805 task = cgroup_procs_write_start(buf, false);
4806 ret = PTR_ERR_OR_ZERO(task);
4807 if (ret)
4808 goto out_unlock;
4809
4810
4811 spin_lock_irq(&css_set_lock);
4812 src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
4813 spin_unlock_irq(&css_set_lock);
4814
4815
4816 ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp,
4817 of->file->f_path.dentry->d_sb);
4818 if (ret)
4819 goto out_finish;
4820
4821
4822 ret = -EOPNOTSUPP;
4823 if (src_cgrp->dom_cgrp != dst_cgrp->dom_cgrp)
4824 goto out_finish;
4825
4826 ret = cgroup_attach_task(dst_cgrp, task, false);
4827
4828out_finish:
4829 cgroup_procs_write_finish(task);
4830out_unlock:
4831 cgroup_kn_unlock(of->kn);
4832
4833 return ret ?: nbytes;
4834}
4835
4836
4837static struct cftype cgroup_base_files[] = {
4838 {
4839 .name = "cgroup.type",
4840 .flags = CFTYPE_NOT_ON_ROOT,
4841 .seq_show = cgroup_type_show,
4842 .write = cgroup_type_write,
4843 },
4844 {
4845 .name = "cgroup.procs",
4846 .flags = CFTYPE_NS_DELEGATABLE,
4847 .file_offset = offsetof(struct cgroup, procs_file),
4848 .release = cgroup_procs_release,
4849 .seq_start = cgroup_procs_start,
4850 .seq_next = cgroup_procs_next,
4851 .seq_show = cgroup_procs_show,
4852 .write = cgroup_procs_write,
4853 },
4854 {
4855 .name = "cgroup.threads",
4856 .flags = CFTYPE_NS_DELEGATABLE,
4857 .release = cgroup_procs_release,
4858 .seq_start = cgroup_threads_start,
4859 .seq_next = cgroup_procs_next,
4860 .seq_show = cgroup_procs_show,
4861 .write = cgroup_threads_write,
4862 },
4863 {
4864 .name = "cgroup.controllers",
4865 .seq_show = cgroup_controllers_show,
4866 },
4867 {
4868 .name = "cgroup.subtree_control",
4869 .flags = CFTYPE_NS_DELEGATABLE,
4870 .seq_show = cgroup_subtree_control_show,
4871 .write = cgroup_subtree_control_write,
4872 },
4873 {
4874 .name = "cgroup.events",
4875 .flags = CFTYPE_NOT_ON_ROOT,
4876 .file_offset = offsetof(struct cgroup, events_file),
4877 .seq_show = cgroup_events_show,
4878 },
4879 {
4880 .name = "cgroup.max.descendants",
4881 .seq_show = cgroup_max_descendants_show,
4882 .write = cgroup_max_descendants_write,
4883 },
4884 {
4885 .name = "cgroup.max.depth",
4886 .seq_show = cgroup_max_depth_show,
4887 .write = cgroup_max_depth_write,
4888 },
4889 {
4890 .name = "cgroup.stat",
4891 .seq_show = cgroup_stat_show,
4892 },
4893 {
4894 .name = "cgroup.freeze",
4895 .flags = CFTYPE_NOT_ON_ROOT,
4896 .seq_show = cgroup_freeze_show,
4897 .write = cgroup_freeze_write,
4898 },
4899 {
4900 .name = "cpu.stat",
4901 .flags = CFTYPE_NOT_ON_ROOT,
4902 .seq_show = cpu_stat_show,
4903 },
4904#ifdef CONFIG_PSI
4905 {
4906 .name = "io.pressure",
4907 .seq_show = cgroup_io_pressure_show,
4908 .write = cgroup_io_pressure_write,
4909 .poll = cgroup_pressure_poll,
4910 .release = cgroup_pressure_release,
4911 },
4912 {
4913 .name = "memory.pressure",
4914 .seq_show = cgroup_memory_pressure_show,
4915 .write = cgroup_memory_pressure_write,
4916 .poll = cgroup_pressure_poll,
4917 .release = cgroup_pressure_release,
4918 },
4919 {
4920 .name = "cpu.pressure",
4921 .seq_show = cgroup_cpu_pressure_show,
4922 .write = cgroup_cpu_pressure_write,
4923 .poll = cgroup_pressure_poll,
4924 .release = cgroup_pressure_release,
4925 },
4926#endif
4927 { }
4928};
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952static void css_free_rwork_fn(struct work_struct *work)
4953{
4954 struct cgroup_subsys_state *css = container_of(to_rcu_work(work),
4955 struct cgroup_subsys_state, destroy_rwork);
4956 struct cgroup_subsys *ss = css->ss;
4957 struct cgroup *cgrp = css->cgroup;
4958
4959 percpu_ref_exit(&css->refcnt);
4960
4961 if (ss) {
4962
4963 struct cgroup_subsys_state *parent = css->parent;
4964 int id = css->id;
4965
4966 ss->css_free(css);
4967 cgroup_idr_remove(&ss->css_idr, id);
4968 cgroup_put(cgrp);
4969
4970 if (parent)
4971 css_put(parent);
4972 } else {
4973
4974 atomic_dec(&cgrp->root->nr_cgrps);
4975 cgroup1_pidlist_destroy_all(cgrp);
4976 cancel_work_sync(&cgrp->release_agent_work);
4977
4978 if (cgroup_parent(cgrp)) {
4979
4980
4981
4982
4983
4984
4985 cgroup_put(cgroup_parent(cgrp));
4986 kernfs_put(cgrp->kn);
4987 psi_cgroup_free(cgrp);
4988 if (cgroup_on_dfl(cgrp))
4989 cgroup_rstat_exit(cgrp);
4990 kfree(cgrp);
4991 } else {
4992
4993
4994
4995
4996
4997 cgroup_destroy_root(cgrp->root);
4998 }
4999 }
5000}
5001
5002static void css_release_work_fn(struct work_struct *work)
5003{
5004 struct cgroup_subsys_state *css =
5005 container_of(work, struct cgroup_subsys_state, destroy_work);
5006 struct cgroup_subsys *ss = css->ss;
5007 struct cgroup *cgrp = css->cgroup;
5008
5009 mutex_lock(&cgroup_mutex);
5010
5011 css->flags |= CSS_RELEASED;
5012 list_del_rcu(&css->sibling);
5013
5014 if (ss) {
5015
5016 if (!list_empty(&css->rstat_css_node)) {
5017 cgroup_rstat_flush(cgrp);
5018 list_del_rcu(&css->rstat_css_node);
5019 }
5020
5021 cgroup_idr_replace(&ss->css_idr, NULL, css->id);
5022 if (ss->css_released)
5023 ss->css_released(css);
5024 } else {
5025 struct cgroup *tcgrp;
5026
5027
5028 TRACE_CGROUP_PATH(release, cgrp);
5029
5030 if (cgroup_on_dfl(cgrp))
5031 cgroup_rstat_flush(cgrp);
5032
5033 spin_lock_irq(&css_set_lock);
5034 for (tcgrp = cgroup_parent(cgrp); tcgrp;
5035 tcgrp = cgroup_parent(tcgrp))
5036 tcgrp->nr_dying_descendants--;
5037 spin_unlock_irq(&css_set_lock);
5038
5039 cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
5040 cgrp->id = -1;
5041
5042
5043
5044
5045
5046
5047
5048
5049 if (cgrp->kn)
5050 RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv,
5051 NULL);
5052 }
5053
5054 mutex_unlock(&cgroup_mutex);
5055
5056 INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
5057 queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
5058}
5059
5060static void css_release(struct percpu_ref *ref)
5061{
5062 struct cgroup_subsys_state *css =
5063 container_of(ref, struct cgroup_subsys_state, refcnt);
5064
5065 INIT_WORK(&css->destroy_work, css_release_work_fn);
5066 queue_work(cgroup_destroy_wq, &css->destroy_work);
5067}
5068
5069static void init_and_link_css(struct cgroup_subsys_state *css,
5070 struct cgroup_subsys *ss, struct cgroup *cgrp)
5071{
5072 lockdep_assert_held(&cgroup_mutex);
5073
5074 cgroup_get_live(cgrp);
5075
5076 memset(css, 0, sizeof(*css));
5077 css->cgroup = cgrp;
5078 css->ss = ss;
5079 css->id = -1;
5080 INIT_LIST_HEAD(&css->sibling);
5081 INIT_LIST_HEAD(&css->children);
5082 INIT_LIST_HEAD(&css->rstat_css_node);
5083 css->serial_nr = css_serial_nr_next++;
5084 atomic_set(&css->online_cnt, 0);
5085
5086 if (cgroup_parent(cgrp)) {
5087 css->parent = cgroup_css(cgroup_parent(cgrp), ss);
5088 css_get(css->parent);
5089 }
5090
5091 if (cgroup_on_dfl(cgrp) && ss->css_rstat_flush)
5092 list_add_rcu(&css->rstat_css_node, &cgrp->rstat_css_list);
5093
5094 BUG_ON(cgroup_css(cgrp, ss));
5095}
5096
5097
5098static int online_css(struct cgroup_subsys_state *css)
5099{
5100 struct cgroup_subsys *ss = css->ss;
5101 int ret = 0;
5102
5103 lockdep_assert_held(&cgroup_mutex);
5104
5105 if (ss->css_online)
5106 ret = ss->css_online(css);
5107 if (!ret) {
5108 css->flags |= CSS_ONLINE;
5109 rcu_assign_pointer(css->cgroup->subsys[ss->id], css);
5110
5111 atomic_inc(&css->online_cnt);
5112 if (css->parent)
5113 atomic_inc(&css->parent->online_cnt);
5114 }
5115 return ret;
5116}
5117
5118
5119static void offline_css(struct cgroup_subsys_state *css)
5120{
5121 struct cgroup_subsys *ss = css->ss;
5122
5123 lockdep_assert_held(&cgroup_mutex);
5124
5125 if (!(css->flags & CSS_ONLINE))
5126 return;
5127
5128 if (ss->css_offline)
5129 ss->css_offline(css);
5130
5131 css->flags &= ~CSS_ONLINE;
5132 RCU_INIT_POINTER(css->cgroup->subsys[ss->id], NULL);
5133
5134 wake_up_all(&css->cgroup->offline_waitq);
5135}
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
5147 struct cgroup_subsys *ss)
5148{
5149 struct cgroup *parent = cgroup_parent(cgrp);
5150 struct cgroup_subsys_state *parent_css = cgroup_css(parent, ss);
5151 struct cgroup_subsys_state *css;
5152 int err;
5153
5154 lockdep_assert_held(&cgroup_mutex);
5155
5156 css = ss->css_alloc(parent_css);
5157 if (!css)
5158 css = ERR_PTR(-ENOMEM);
5159 if (IS_ERR(css))
5160 return css;
5161
5162 init_and_link_css(css, ss, cgrp);
5163
5164 err = percpu_ref_init(&css->refcnt, css_release, 0, GFP_KERNEL);
5165 if (err)
5166 goto err_free_css;
5167
5168 err = cgroup_idr_alloc(&ss->css_idr, NULL, 2, 0, GFP_KERNEL);
5169 if (err < 0)
5170 goto err_free_css;
5171 css->id = err;
5172
5173
5174 list_add_tail_rcu(&css->sibling, &parent_css->children);
5175 cgroup_idr_replace(&ss->css_idr, css, css->id);
5176
5177 err = online_css(css);
5178 if (err)
5179 goto err_list_del;
5180
5181 if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
5182 cgroup_parent(parent)) {
5183 pr_warn("%s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
5184 current->comm, current->pid, ss->name);
5185 if (!strcmp(ss->name, "memory"))
5186 pr_warn("\"memory\" requires setting use_hierarchy to 1 on the root\n");
5187 ss->warned_broken_hierarchy = true;
5188 }
5189
5190 return css;
5191
5192err_list_del:
5193 list_del_rcu(&css->sibling);
5194err_free_css:
5195 list_del_rcu(&css->rstat_css_node);
5196 INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
5197 queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
5198 return ERR_PTR(err);
5199}
5200
5201
5202
5203
5204
5205
5206static struct cgroup *cgroup_create(struct cgroup *parent)
5207{
5208 struct cgroup_root *root = parent->root;
5209 struct cgroup *cgrp, *tcgrp;
5210 int level = parent->level + 1;
5211 int ret;
5212
5213
5214 cgrp = kzalloc(struct_size(cgrp, ancestor_ids, (level + 1)),
5215 GFP_KERNEL);
5216 if (!cgrp)
5217 return ERR_PTR(-ENOMEM);
5218
5219 ret = percpu_ref_init(&cgrp->self.refcnt, css_release, 0, GFP_KERNEL);
5220 if (ret)
5221 goto out_free_cgrp;
5222
5223 if (cgroup_on_dfl(parent)) {
5224 ret = cgroup_rstat_init(cgrp);
5225 if (ret)
5226 goto out_cancel_ref;
5227 }
5228
5229
5230
5231
5232
5233 cgrp->id = cgroup_idr_alloc(&root->cgroup_idr, NULL, 2, 0, GFP_KERNEL);
5234 if (cgrp->id < 0) {
5235 ret = -ENOMEM;
5236 goto out_stat_exit;
5237 }
5238
5239 init_cgroup_housekeeping(cgrp);
5240
5241 cgrp->self.parent = &parent->self;
5242 cgrp->root = root;
5243 cgrp->level = level;
5244
5245 ret = psi_cgroup_alloc(cgrp);
5246 if (ret)
5247 goto out_idr_free;
5248
5249 ret = cgroup_bpf_inherit(cgrp);
5250 if (ret)
5251 goto out_psi_free;
5252
5253
5254
5255
5256
5257 cgrp->freezer.e_freeze = parent->freezer.e_freeze;
5258 if (cgrp->freezer.e_freeze) {
5259
5260
5261
5262
5263
5264
5265 set_bit(CGRP_FREEZE, &cgrp->flags);
5266 set_bit(CGRP_FROZEN, &cgrp->flags);
5267 }
5268
5269 spin_lock_irq(&css_set_lock);
5270 for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) {
5271 cgrp->ancestor_ids[tcgrp->level] = tcgrp->id;
5272
5273 if (tcgrp != cgrp) {
5274 tcgrp->nr_descendants++;
5275
5276
5277
5278
5279
5280
5281 if (cgrp->freezer.e_freeze)
5282 tcgrp->freezer.nr_frozen_descendants++;
5283 }
5284 }
5285 spin_unlock_irq(&css_set_lock);
5286
5287 if (notify_on_release(parent))
5288 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
5289
5290 if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags))
5291 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
5292
5293 cgrp->self.serial_nr = css_serial_nr_next++;
5294
5295
5296 list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children);
5297 atomic_inc(&root->nr_cgrps);
5298 cgroup_get_live(parent);
5299
5300
5301
5302
5303
5304 cgroup_idr_replace(&root->cgroup_idr, cgrp, cgrp->id);
5305
5306
5307
5308
5309
5310 if (!cgroup_on_dfl(cgrp))
5311 cgrp->subtree_control = cgroup_control(cgrp);
5312
5313 cgroup_propagate_control(cgrp);
5314
5315 return cgrp;
5316
5317out_psi_free:
5318 psi_cgroup_free(cgrp);
5319out_idr_free:
5320 cgroup_idr_remove(&root->cgroup_idr, cgrp->id);
5321out_stat_exit:
5322 if (cgroup_on_dfl(parent))
5323 cgroup_rstat_exit(cgrp);
5324out_cancel_ref:
5325 percpu_ref_exit(&cgrp->self.refcnt);
5326out_free_cgrp:
5327 kfree(cgrp);
5328 return ERR_PTR(ret);
5329}
5330
5331static bool cgroup_check_hierarchy_limits(struct cgroup *parent)
5332{
5333 struct cgroup *cgroup;
5334 int ret = false;
5335 int level = 1;
5336
5337 lockdep_assert_held(&cgroup_mutex);
5338
5339 for (cgroup = parent; cgroup; cgroup = cgroup_parent(cgroup)) {
5340 if (cgroup->nr_descendants >= cgroup->max_descendants)
5341 goto fail;
5342
5343 if (level > cgroup->max_depth)
5344 goto fail;
5345
5346 level++;
5347 }
5348
5349 ret = true;
5350fail:
5351 return ret;
5352}
5353
5354int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode)
5355{
5356 struct cgroup *parent, *cgrp;
5357 struct kernfs_node *kn;
5358 int ret;
5359
5360
5361 if (strchr(name, '\n'))
5362 return -EINVAL;
5363
5364 parent = cgroup_kn_lock_live(parent_kn, false);
5365 if (!parent)
5366 return -ENODEV;
5367
5368 if (!cgroup_check_hierarchy_limits(parent)) {
5369 ret = -EAGAIN;
5370 goto out_unlock;
5371 }
5372
5373 cgrp = cgroup_create(parent);
5374 if (IS_ERR(cgrp)) {
5375 ret = PTR_ERR(cgrp);
5376 goto out_unlock;
5377 }
5378
5379
5380 kn = kernfs_create_dir(parent->kn, name, mode, cgrp);
5381 if (IS_ERR(kn)) {
5382 ret = PTR_ERR(kn);
5383 goto out_destroy;
5384 }
5385 cgrp->kn = kn;
5386
5387
5388
5389
5390
5391 kernfs_get(kn);
5392
5393 ret = cgroup_kn_set_ugid(kn);
5394 if (ret)
5395 goto out_destroy;
5396
5397 ret = css_populate_dir(&cgrp->self);
5398 if (ret)
5399 goto out_destroy;
5400
5401 ret = cgroup_apply_control_enable(cgrp);
5402 if (ret)
5403 goto out_destroy;
5404
5405 TRACE_CGROUP_PATH(mkdir, cgrp);
5406
5407
5408 kernfs_activate(kn);
5409
5410 ret = 0;
5411 goto out_unlock;
5412
5413out_destroy:
5414 cgroup_destroy_locked(cgrp);
5415out_unlock:
5416 cgroup_kn_unlock(parent_kn);
5417 return ret;
5418}
5419
5420
5421
5422
5423
5424
5425static void css_killed_work_fn(struct work_struct *work)
5426{
5427 struct cgroup_subsys_state *css =
5428 container_of(work, struct cgroup_subsys_state, destroy_work);
5429
5430 mutex_lock(&cgroup_mutex);
5431
5432 do {
5433 offline_css(css);
5434 css_put(css);
5435
5436 css = css->parent;
5437 } while (css && atomic_dec_and_test(&css->online_cnt));
5438
5439 mutex_unlock(&cgroup_mutex);
5440}
5441
5442
5443static void css_killed_ref_fn(struct percpu_ref *ref)
5444{
5445 struct cgroup_subsys_state *css =
5446 container_of(ref, struct cgroup_subsys_state, refcnt);
5447
5448 if (atomic_dec_and_test(&css->online_cnt)) {
5449 INIT_WORK(&css->destroy_work, css_killed_work_fn);
5450 queue_work(cgroup_destroy_wq, &css->destroy_work);
5451 }
5452}
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463static void kill_css(struct cgroup_subsys_state *css)
5464{
5465 lockdep_assert_held(&cgroup_mutex);
5466
5467 if (css->flags & CSS_DYING)
5468 return;
5469
5470 css->flags |= CSS_DYING;
5471
5472
5473
5474
5475
5476 css_clear_dir(css);
5477
5478
5479
5480
5481
5482 css_get(css);
5483
5484
5485
5486
5487
5488
5489
5490
5491
5492
5493
5494 percpu_ref_kill_and_confirm(&css->refcnt, css_killed_ref_fn);
5495}
5496
5497
5498
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509
5510
5511
5512
5513
5514
5515
5516
5517
5518
5519
5520
5521static int cgroup_destroy_locked(struct cgroup *cgrp)
5522 __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
5523{
5524 struct cgroup *tcgrp, *parent = cgroup_parent(cgrp);
5525 struct cgroup_subsys_state *css;
5526 struct cgrp_cset_link *link;
5527 int ssid;
5528
5529 lockdep_assert_held(&cgroup_mutex);
5530
5531
5532
5533
5534
5535 if (cgroup_is_populated(cgrp))
5536 return -EBUSY;
5537
5538
5539
5540
5541
5542
5543 if (css_has_online_children(&cgrp->self))
5544 return -EBUSY;
5545
5546
5547
5548
5549
5550
5551
5552 cgrp->self.flags &= ~CSS_ONLINE;
5553
5554 spin_lock_irq(&css_set_lock);
5555 list_for_each_entry(link, &cgrp->cset_links, cset_link)
5556 link->cset->dead = true;
5557 spin_unlock_irq(&css_set_lock);
5558
5559
5560 for_each_css(css, ssid, cgrp)
5561 kill_css(css);
5562
5563
5564 css_clear_dir(&cgrp->self);
5565 kernfs_remove(cgrp->kn);
5566
5567 if (parent && cgroup_is_threaded(cgrp))
5568 parent->nr_threaded_children--;
5569
5570 spin_lock_irq(&css_set_lock);
5571 for (tcgrp = cgroup_parent(cgrp); tcgrp; tcgrp = cgroup_parent(tcgrp)) {
5572 tcgrp->nr_descendants--;
5573 tcgrp->nr_dying_descendants++;
5574
5575
5576
5577
5578 if (test_bit(CGRP_FROZEN, &cgrp->flags))
5579 tcgrp->freezer.nr_frozen_descendants--;
5580 }
5581 spin_unlock_irq(&css_set_lock);
5582
5583 cgroup1_check_for_release(parent);
5584
5585 cgroup_bpf_offline(cgrp);
5586
5587
5588 percpu_ref_kill(&cgrp->self.refcnt);
5589
5590 return 0;
5591};
5592
5593int cgroup_rmdir(struct kernfs_node *kn)
5594{
5595 struct cgroup *cgrp;
5596 int ret = 0;
5597
5598 cgrp = cgroup_kn_lock_live(kn, false);
5599 if (!cgrp)
5600 return 0;
5601
5602 ret = cgroup_destroy_locked(cgrp);
5603 if (!ret)
5604 TRACE_CGROUP_PATH(rmdir, cgrp);
5605
5606 cgroup_kn_unlock(kn);
5607 return ret;
5608}
5609
5610static struct kernfs_syscall_ops cgroup_kf_syscall_ops = {
5611 .show_options = cgroup_show_options,
5612 .mkdir = cgroup_mkdir,
5613 .rmdir = cgroup_rmdir,
5614 .show_path = cgroup_show_path,
5615};
5616
5617static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
5618{
5619 struct cgroup_subsys_state *css;
5620
5621 pr_debug("Initializing cgroup subsys %s\n", ss->name);
5622
5623 mutex_lock(&cgroup_mutex);
5624
5625 idr_init(&ss->css_idr);
5626 INIT_LIST_HEAD(&ss->cfts);
5627
5628
5629 ss->root = &cgrp_dfl_root;
5630 css = ss->css_alloc(cgroup_css(&cgrp_dfl_root.cgrp, ss));
5631
5632 BUG_ON(IS_ERR(css));
5633 init_and_link_css(css, ss, &cgrp_dfl_root.cgrp);
5634
5635
5636
5637
5638
5639 css->flags |= CSS_NO_REF;
5640
5641 if (early) {
5642
5643 css->id = 1;
5644 } else {
5645 css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL);
5646 BUG_ON(css->id < 0);
5647 }
5648
5649
5650
5651
5652
5653 init_css_set.subsys[ss->id] = css;
5654
5655 have_fork_callback |= (bool)ss->fork << ss->id;
5656 have_exit_callback |= (bool)ss->exit << ss->id;
5657 have_release_callback |= (bool)ss->release << ss->id;
5658 have_canfork_callback |= (bool)ss->can_fork << ss->id;
5659
5660
5661
5662
5663 BUG_ON(!list_empty(&init_task.tasks));
5664
5665 BUG_ON(online_css(css));
5666
5667 mutex_unlock(&cgroup_mutex);
5668}
5669
5670
5671
5672
5673
5674
5675
5676int __init cgroup_init_early(void)
5677{
5678 static struct cgroup_fs_context __initdata ctx;
5679 struct cgroup_subsys *ss;
5680 int i;
5681
5682 ctx.root = &cgrp_dfl_root;
5683 init_cgroup_root(&ctx);
5684 cgrp_dfl_root.cgrp.self.flags |= CSS_NO_REF;
5685
5686 RCU_INIT_POINTER(init_task.cgroups, &init_css_set);
5687
5688 for_each_subsys(ss, i) {
5689 WARN(!ss->css_alloc || !ss->css_free || ss->name || ss->id,
5690 "invalid cgroup_subsys %d:%s css_alloc=%p css_free=%p id:name=%d:%s\n",
5691 i, cgroup_subsys_name[i], ss->css_alloc, ss->css_free,
5692 ss->id, ss->name);
5693 WARN(strlen(cgroup_subsys_name[i]) > MAX_CGROUP_TYPE_NAMELEN,
5694 "cgroup_subsys_name %s too long\n", cgroup_subsys_name[i]);
5695
5696 ss->id = i;
5697 ss->name = cgroup_subsys_name[i];
5698 if (!ss->legacy_name)
5699 ss->legacy_name = cgroup_subsys_name[i];
5700
5701 if (ss->early_init)
5702 cgroup_init_subsys(ss, true);
5703 }
5704 return 0;
5705}
5706
5707static u16 cgroup_disable_mask __initdata;
5708
5709
5710
5711
5712
5713
5714
5715int __init cgroup_init(void)
5716{
5717 struct cgroup_subsys *ss;
5718 int ssid;
5719
5720 BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16);
5721 BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files));
5722 BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files));
5723
5724 cgroup_rstat_boot();
5725
5726
5727
5728
5729
5730 rcu_sync_enter_start(&cgroup_threadgroup_rwsem.rss);
5731
5732 get_user_ns(init_cgroup_ns.user_ns);
5733
5734 mutex_lock(&cgroup_mutex);
5735
5736
5737
5738
5739
5740 hash_add(css_set_table, &init_css_set.hlist,
5741 css_set_hash(init_css_set.subsys));
5742
5743 BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0));
5744
5745 mutex_unlock(&cgroup_mutex);
5746
5747 for_each_subsys(ss, ssid) {
5748 if (ss->early_init) {
5749 struct cgroup_subsys_state *css =
5750 init_css_set.subsys[ss->id];
5751
5752 css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2,
5753 GFP_KERNEL);
5754 BUG_ON(css->id < 0);
5755 } else {
5756 cgroup_init_subsys(ss, false);
5757 }
5758
5759 list_add_tail(&init_css_set.e_cset_node[ssid],
5760 &cgrp_dfl_root.cgrp.e_csets[ssid]);
5761
5762
5763
5764
5765
5766
5767 if (cgroup_disable_mask & (1 << ssid)) {
5768 static_branch_disable(cgroup_subsys_enabled_key[ssid]);
5769 printk(KERN_INFO "Disabling %s control group subsystem\n",
5770 ss->name);
5771 continue;
5772 }
5773
5774 if (cgroup1_ssid_disabled(ssid))
5775 printk(KERN_INFO "Disabling %s control group subsystem in v1 mounts\n",
5776 ss->name);
5777
5778 cgrp_dfl_root.subsys_mask |= 1 << ss->id;
5779
5780
5781 WARN_ON(ss->implicit_on_dfl && !ss->threaded);
5782
5783 if (ss->implicit_on_dfl)
5784 cgrp_dfl_implicit_ss_mask |= 1 << ss->id;
5785 else if (!ss->dfl_cftypes)
5786 cgrp_dfl_inhibit_ss_mask |= 1 << ss->id;
5787
5788 if (ss->threaded)
5789 cgrp_dfl_threaded_ss_mask |= 1 << ss->id;
5790
5791 if (ss->dfl_cftypes == ss->legacy_cftypes) {
5792 WARN_ON(cgroup_add_cftypes(ss, ss->dfl_cftypes));
5793 } else {
5794 WARN_ON(cgroup_add_dfl_cftypes(ss, ss->dfl_cftypes));
5795 WARN_ON(cgroup_add_legacy_cftypes(ss, ss->legacy_cftypes));
5796 }
5797
5798 if (ss->bind)
5799 ss->bind(init_css_set.subsys[ssid]);
5800
5801 mutex_lock(&cgroup_mutex);
5802 css_populate_dir(init_css_set.subsys[ssid]);
5803 mutex_unlock(&cgroup_mutex);
5804 }
5805
5806
5807 hash_del(&init_css_set.hlist);
5808 hash_add(css_set_table, &init_css_set.hlist,
5809 css_set_hash(init_css_set.subsys));
5810
5811 WARN_ON(sysfs_create_mount_point(fs_kobj, "cgroup"));
5812 WARN_ON(register_filesystem(&cgroup_fs_type));
5813 WARN_ON(register_filesystem(&cgroup2_fs_type));
5814 WARN_ON(!proc_create_single("cgroups", 0, NULL, proc_cgroupstats_show));
5815#ifdef CONFIG_CPUSETS
5816 WARN_ON(register_filesystem(&cpuset_fs_type));
5817#endif
5818
5819 return 0;
5820}
5821
5822static int __init cgroup_wq_init(void)
5823{
5824
5825
5826
5827
5828
5829
5830
5831
5832 cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
5833 BUG_ON(!cgroup_destroy_wq);
5834 return 0;
5835}
5836core_initcall(cgroup_wq_init);
5837
5838void cgroup_path_from_kernfs_id(const union kernfs_node_id *id,
5839 char *buf, size_t buflen)
5840{
5841 struct kernfs_node *kn;
5842
5843 kn = kernfs_get_node_by_id(cgrp_dfl_root.kf_root, id);
5844 if (!kn)
5845 return;
5846 kernfs_path(kn, buf, buflen);
5847 kernfs_put(kn);
5848}
5849
5850
5851
5852
5853
5854
5855int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
5856 struct pid *pid, struct task_struct *tsk)
5857{
5858 char *buf;
5859 int retval;
5860 struct cgroup_root *root;
5861
5862 retval = -ENOMEM;
5863 buf = kmalloc(PATH_MAX, GFP_KERNEL);
5864 if (!buf)
5865 goto out;
5866
5867 mutex_lock(&cgroup_mutex);
5868 spin_lock_irq(&css_set_lock);
5869
5870 for_each_root(root) {
5871 struct cgroup_subsys *ss;
5872 struct cgroup *cgrp;
5873 int ssid, count = 0;
5874
5875 if (root == &cgrp_dfl_root && !cgrp_dfl_visible)
5876 continue;
5877
5878 seq_printf(m, "%d:", root->hierarchy_id);
5879 if (root != &cgrp_dfl_root)
5880 for_each_subsys(ss, ssid)
5881 if (root->subsys_mask & (1 << ssid))
5882 seq_printf(m, "%s%s", count++ ? "," : "",
5883 ss->legacy_name);
5884 if (strlen(root->name))
5885 seq_printf(m, "%sname=%s", count ? "," : "",
5886 root->name);
5887 seq_putc(m, ':');
5888
5889 cgrp = task_cgroup_from_root(tsk, root);
5890
5891
5892
5893
5894
5895
5896
5897
5898
5899
5900 if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) {
5901 retval = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
5902 current->nsproxy->cgroup_ns);
5903 if (retval >= PATH_MAX)
5904 retval = -ENAMETOOLONG;
5905 if (retval < 0)
5906 goto out_unlock;
5907
5908 seq_puts(m, buf);
5909 } else {
5910 seq_puts(m, "/");
5911 }
5912
5913 if (cgroup_on_dfl(cgrp) && cgroup_is_dead(cgrp))
5914 seq_puts(m, " (deleted)\n");
5915 else
5916 seq_putc(m, '\n');
5917 }
5918
5919 retval = 0;
5920out_unlock:
5921 spin_unlock_irq(&css_set_lock);
5922 mutex_unlock(&cgroup_mutex);
5923 kfree(buf);
5924out:
5925 return retval;
5926}
5927
5928
5929
5930
5931
5932
5933
5934
5935
5936void cgroup_fork(struct task_struct *child)
5937{
5938 RCU_INIT_POINTER(child->cgroups, &init_css_set);
5939 INIT_LIST_HEAD(&child->cg_list);
5940}
5941
5942
5943
5944
5945
5946
5947
5948
5949
5950int cgroup_can_fork(struct task_struct *child)
5951{
5952 struct cgroup_subsys *ss;
5953 int i, j, ret;
5954
5955 do_each_subsys_mask(ss, i, have_canfork_callback) {
5956 ret = ss->can_fork(child);
5957 if (ret)
5958 goto out_revert;
5959 } while_each_subsys_mask();
5960
5961 return 0;
5962
5963out_revert:
5964 for_each_subsys(ss, j) {
5965 if (j >= i)
5966 break;
5967 if (ss->cancel_fork)
5968 ss->cancel_fork(child);
5969 }
5970
5971 return ret;
5972}
5973
5974
5975
5976
5977
5978
5979
5980
5981void cgroup_cancel_fork(struct task_struct *child)
5982{
5983 struct cgroup_subsys *ss;
5984 int i;
5985
5986 for_each_subsys(ss, i)
5987 if (ss->cancel_fork)
5988 ss->cancel_fork(child);
5989}
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001void cgroup_post_fork(struct task_struct *child)
6002{
6003 struct cgroup_subsys *ss;
6004 int i;
6005
6006
6007
6008
6009
6010
6011
6012
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022
6023
6024
6025
6026
6027 if (use_task_css_set_links) {
6028 struct css_set *cset;
6029
6030 spin_lock_irq(&css_set_lock);
6031 cset = task_css_set(current);
6032 if (list_empty(&child->cg_list)) {
6033 get_css_set(cset);
6034 cset->nr_tasks++;
6035 css_set_move_task(child, NULL, cset, false);
6036 }
6037
6038
6039
6040
6041
6042
6043 if (unlikely(cgroup_task_freeze(child))) {
6044 spin_lock(&child->sighand->siglock);
6045 WARN_ON_ONCE(child->frozen);
6046 child->jobctl |= JOBCTL_TRAP_FREEZE;
6047 spin_unlock(&child->sighand->siglock);
6048
6049
6050
6051
6052
6053
6054
6055 }
6056
6057 spin_unlock_irq(&css_set_lock);
6058 }
6059
6060
6061
6062
6063
6064
6065 do_each_subsys_mask(ss, i, have_fork_callback) {
6066 ss->fork(child);
6067 } while_each_subsys_mask();
6068}
6069
6070
6071
6072
6073
6074
6075
6076
6077
6078
6079
6080
6081
6082
6083
6084
6085
6086
6087
6088
6089void cgroup_exit(struct task_struct *tsk)
6090{
6091 struct cgroup_subsys *ss;
6092 struct css_set *cset;
6093 int i;
6094
6095
6096
6097
6098
6099 cset = task_css_set(tsk);
6100
6101 if (!list_empty(&tsk->cg_list)) {
6102 spin_lock_irq(&css_set_lock);
6103 css_set_move_task(tsk, cset, NULL, false);
6104 list_add_tail(&tsk->cg_list, &cset->dying_tasks);
6105 cset->nr_tasks--;
6106
6107 WARN_ON_ONCE(cgroup_task_frozen(tsk));
6108 if (unlikely(cgroup_task_freeze(tsk)))
6109 cgroup_update_frozen(task_dfl_cgroup(tsk));
6110
6111 spin_unlock_irq(&css_set_lock);
6112 } else {
6113 get_css_set(cset);
6114 }
6115
6116
6117 do_each_subsys_mask(ss, i, have_exit_callback) {
6118 ss->exit(tsk);
6119 } while_each_subsys_mask();
6120}
6121
6122void cgroup_release(struct task_struct *task)
6123{
6124 struct cgroup_subsys *ss;
6125 int ssid;
6126
6127 do_each_subsys_mask(ss, ssid, have_release_callback) {
6128 ss->release(task);
6129 } while_each_subsys_mask();
6130
6131 if (use_task_css_set_links) {
6132 spin_lock_irq(&css_set_lock);
6133 css_set_skip_task_iters(task_css_set(task), task);
6134 list_del_init(&task->cg_list);
6135 spin_unlock_irq(&css_set_lock);
6136 }
6137}
6138
6139void cgroup_free(struct task_struct *task)
6140{
6141 struct css_set *cset = task_css_set(task);
6142 put_css_set(cset);
6143}
6144
6145static int __init cgroup_disable(char *str)
6146{
6147 struct cgroup_subsys *ss;
6148 char *token;
6149 int i;
6150
6151 while ((token = strsep(&str, ",")) != NULL) {
6152 if (!*token)
6153 continue;
6154
6155 for_each_subsys(ss, i) {
6156 if (strcmp(token, ss->name) &&
6157 strcmp(token, ss->legacy_name))
6158 continue;
6159 cgroup_disable_mask |= 1 << i;
6160 }
6161 }
6162 return 1;
6163}
6164__setup("cgroup_disable=", cgroup_disable);
6165
6166void __init __weak enable_debug_cgroup(void) { }
6167
6168static int __init enable_cgroup_debug(char *str)
6169{
6170 cgroup_debug = true;
6171 enable_debug_cgroup();
6172 return 1;
6173}
6174__setup("cgroup_debug", enable_cgroup_debug);
6175
6176
6177
6178
6179
6180
6181
6182
6183
6184
6185struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
6186 struct cgroup_subsys *ss)
6187{
6188 struct kernfs_node *kn = kernfs_node_from_dentry(dentry);
6189 struct file_system_type *s_type = dentry->d_sb->s_type;
6190 struct cgroup_subsys_state *css = NULL;
6191 struct cgroup *cgrp;
6192
6193
6194 if ((s_type != &cgroup_fs_type && s_type != &cgroup2_fs_type) ||
6195 !kn || kernfs_type(kn) != KERNFS_DIR)
6196 return ERR_PTR(-EBADF);
6197
6198 rcu_read_lock();
6199
6200
6201
6202
6203
6204
6205 cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv);
6206 if (cgrp)
6207 css = cgroup_css(cgrp, ss);
6208
6209 if (!css || !css_tryget_online(css))
6210 css = ERR_PTR(-ENOENT);
6211
6212 rcu_read_unlock();
6213 return css;
6214}
6215
6216
6217
6218
6219
6220
6221
6222
6223
6224struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss)
6225{
6226 WARN_ON_ONCE(!rcu_read_lock_held());
6227 return idr_find(&ss->css_idr, id);
6228}
6229
6230
6231
6232
6233
6234
6235
6236
6237
6238
6239struct cgroup *cgroup_get_from_path(const char *path)
6240{
6241 struct kernfs_node *kn;
6242 struct cgroup *cgrp;
6243
6244 mutex_lock(&cgroup_mutex);
6245
6246 kn = kernfs_walk_and_get(cgrp_dfl_root.cgrp.kn, path);
6247 if (kn) {
6248 if (kernfs_type(kn) == KERNFS_DIR) {
6249 cgrp = kn->priv;
6250 cgroup_get_live(cgrp);
6251 } else {
6252 cgrp = ERR_PTR(-ENOTDIR);
6253 }
6254 kernfs_put(kn);
6255 } else {
6256 cgrp = ERR_PTR(-ENOENT);
6257 }
6258
6259 mutex_unlock(&cgroup_mutex);
6260 return cgrp;
6261}
6262EXPORT_SYMBOL_GPL(cgroup_get_from_path);
6263
6264
6265
6266
6267
6268
6269
6270
6271
6272
6273struct cgroup *cgroup_get_from_fd(int fd)
6274{
6275 struct cgroup_subsys_state *css;
6276 struct cgroup *cgrp;
6277 struct file *f;
6278
6279 f = fget_raw(fd);
6280 if (!f)
6281 return ERR_PTR(-EBADF);
6282
6283 css = css_tryget_online_from_dir(f->f_path.dentry, NULL);
6284 fput(f);
6285 if (IS_ERR(css))
6286 return ERR_CAST(css);
6287
6288 cgrp = css->cgroup;
6289 if (!cgroup_on_dfl(cgrp)) {
6290 cgroup_put(cgrp);
6291 return ERR_PTR(-EBADF);
6292 }
6293
6294 return cgrp;
6295}
6296EXPORT_SYMBOL_GPL(cgroup_get_from_fd);
6297
6298static u64 power_of_ten(int power)
6299{
6300 u64 v = 1;
6301 while (power--)
6302 v *= 10;
6303 return v;
6304}
6305
6306
6307
6308
6309
6310
6311
6312
6313
6314
6315
6316
6317
6318
6319
6320int cgroup_parse_float(const char *input, unsigned dec_shift, s64 *v)
6321{
6322 s64 whole, frac = 0;
6323 int fstart = 0, fend = 0, flen;
6324
6325 if (!sscanf(input, "%lld.%n%lld%n", &whole, &fstart, &frac, &fend))
6326 return -EINVAL;
6327 if (frac < 0)
6328 return -EINVAL;
6329
6330 flen = fend > fstart ? fend - fstart : 0;
6331 if (flen < dec_shift)
6332 frac *= power_of_ten(dec_shift - flen);
6333 else
6334 frac = DIV_ROUND_CLOSEST_ULL(frac, power_of_ten(flen - dec_shift));
6335
6336 *v = whole * power_of_ten(dec_shift) + frac;
6337 return 0;
6338}
6339
6340
6341
6342
6343
6344#ifdef CONFIG_SOCK_CGROUP_DATA
6345
6346#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
6347
6348DEFINE_SPINLOCK(cgroup_sk_update_lock);
6349static bool cgroup_sk_alloc_disabled __read_mostly;
6350
6351void cgroup_sk_alloc_disable(void)
6352{
6353 if (cgroup_sk_alloc_disabled)
6354 return;
6355 pr_info("cgroup: disabling cgroup2 socket matching due to net_prio or net_cls activation\n");
6356 cgroup_sk_alloc_disabled = true;
6357}
6358
6359#else
6360
6361#define cgroup_sk_alloc_disabled false
6362
6363#endif
6364
6365void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
6366{
6367 if (cgroup_sk_alloc_disabled)
6368 return;
6369
6370
6371 if (skcd->val) {
6372
6373
6374
6375
6376
6377 cgroup_get(sock_cgroup_ptr(skcd));
6378 cgroup_bpf_get(sock_cgroup_ptr(skcd));
6379 return;
6380 }
6381
6382 rcu_read_lock();
6383
6384 while (true) {
6385 struct css_set *cset;
6386
6387 cset = task_css_set(current);
6388 if (likely(cgroup_tryget(cset->dfl_cgrp))) {
6389 skcd->val = (unsigned long)cset->dfl_cgrp;
6390 cgroup_bpf_get(cset->dfl_cgrp);
6391 break;
6392 }
6393 cpu_relax();
6394 }
6395
6396 rcu_read_unlock();
6397}
6398
6399void cgroup_sk_free(struct sock_cgroup_data *skcd)
6400{
6401 struct cgroup *cgrp = sock_cgroup_ptr(skcd);
6402
6403 cgroup_bpf_put(cgrp);
6404 cgroup_put(cgrp);
6405}
6406
6407#endif
6408
6409#ifdef CONFIG_CGROUP_BPF
6410int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
6411 enum bpf_attach_type type, u32 flags)
6412{
6413 int ret;
6414
6415 mutex_lock(&cgroup_mutex);
6416 ret = __cgroup_bpf_attach(cgrp, prog, type, flags);
6417 mutex_unlock(&cgroup_mutex);
6418 return ret;
6419}
6420int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
6421 enum bpf_attach_type type, u32 flags)
6422{
6423 int ret;
6424
6425 mutex_lock(&cgroup_mutex);
6426 ret = __cgroup_bpf_detach(cgrp, prog, type);
6427 mutex_unlock(&cgroup_mutex);
6428 return ret;
6429}
6430int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
6431 union bpf_attr __user *uattr)
6432{
6433 int ret;
6434
6435 mutex_lock(&cgroup_mutex);
6436 ret = __cgroup_bpf_query(cgrp, attr, uattr);
6437 mutex_unlock(&cgroup_mutex);
6438 return ret;
6439}
6440#endif
6441
6442#ifdef CONFIG_SYSFS
6443static ssize_t show_delegatable_files(struct cftype *files, char *buf,
6444 ssize_t size, const char *prefix)
6445{
6446 struct cftype *cft;
6447 ssize_t ret = 0;
6448
6449 for (cft = files; cft && cft->name[0] != '\0'; cft++) {
6450 if (!(cft->flags & CFTYPE_NS_DELEGATABLE))
6451 continue;
6452
6453 if (prefix)
6454 ret += snprintf(buf + ret, size - ret, "%s.", prefix);
6455
6456 ret += snprintf(buf + ret, size - ret, "%s\n", cft->name);
6457
6458 if (WARN_ON(ret >= size))
6459 break;
6460 }
6461
6462 return ret;
6463}
6464
6465static ssize_t delegate_show(struct kobject *kobj, struct kobj_attribute *attr,
6466 char *buf)
6467{
6468 struct cgroup_subsys *ss;
6469 int ssid;
6470 ssize_t ret = 0;
6471
6472 ret = show_delegatable_files(cgroup_base_files, buf, PAGE_SIZE - ret,
6473 NULL);
6474
6475 for_each_subsys(ss, ssid)
6476 ret += show_delegatable_files(ss->dfl_cftypes, buf + ret,
6477 PAGE_SIZE - ret,
6478 cgroup_subsys_name[ssid]);
6479
6480 return ret;
6481}
6482static struct kobj_attribute cgroup_delegate_attr = __ATTR_RO(delegate);
6483
6484static ssize_t features_show(struct kobject *kobj, struct kobj_attribute *attr,
6485 char *buf)
6486{
6487 return snprintf(buf, PAGE_SIZE, "nsdelegate\nmemory_localevents\n");
6488}
6489static struct kobj_attribute cgroup_features_attr = __ATTR_RO(features);
6490
6491static struct attribute *cgroup_sysfs_attrs[] = {
6492 &cgroup_delegate_attr.attr,
6493 &cgroup_features_attr.attr,
6494 NULL,
6495};
6496
6497static const struct attribute_group cgroup_sysfs_attr_group = {
6498 .attrs = cgroup_sysfs_attrs,
6499 .name = "cgroup",
6500};
6501
6502static int __init cgroup_sysfs_init(void)
6503{
6504 return sysfs_create_group(kernel_kobj, &cgroup_sysfs_attr_group);
6505}
6506subsys_initcall(cgroup_sysfs_init);
6507
6508#endif
6509