1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
30
31#include "cgroup-internal.h"
32
33#include <linux/cred.h>
34#include <linux/errno.h>
35#include <linux/init_task.h>
36#include <linux/kernel.h>
37#include <linux/magic.h>
38#include <linux/mutex.h>
39#include <linux/mount.h>
40#include <linux/pagemap.h>
41#include <linux/proc_fs.h>
42#include <linux/rcupdate.h>
43#include <linux/sched.h>
44#include <linux/sched/task.h>
45#include <linux/slab.h>
46#include <linux/spinlock.h>
47#include <linux/percpu-rwsem.h>
48#include <linux/string.h>
49#include <linux/hashtable.h>
50#include <linux/idr.h>
51#include <linux/kthread.h>
52#include <linux/atomic.h>
53#include <linux/cpuset.h>
54#include <linux/proc_ns.h>
55#include <linux/nsproxy.h>
56#include <linux/file.h>
57#include <linux/fs_parser.h>
58#include <linux/sched/cputime.h>
59#include <linux/psi.h>
60#include <net/sock.h>
61
62#define CREATE_TRACE_POINTS
63#include <trace/events/cgroup.h>
64
65#define CGROUP_FILE_NAME_MAX (MAX_CGROUP_TYPE_NAMELEN + \
66 MAX_CFTYPE_NAME + 2)
67
68#define CGROUP_FILE_NOTIFY_MIN_INTV DIV_ROUND_UP(HZ, 100)
69
70
71
72
73
74
75
76
77
78
79
80DEFINE_MUTEX(cgroup_mutex);
81DEFINE_SPINLOCK(css_set_lock);
82
83#ifdef CONFIG_PROVE_RCU
84EXPORT_SYMBOL_GPL(cgroup_mutex);
85EXPORT_SYMBOL_GPL(css_set_lock);
86#endif
87
88DEFINE_SPINLOCK(trace_cgroup_path_lock);
89char trace_cgroup_path[TRACE_CGROUP_PATH_LEN];
90bool cgroup_debug __read_mostly;
91
92
93
94
95
96static DEFINE_SPINLOCK(cgroup_idr_lock);
97
98
99
100
101
102static DEFINE_SPINLOCK(cgroup_file_kn_lock);
103
104struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
105
106#define cgroup_assert_mutex_or_rcu_locked() \
107 RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
108 !lockdep_is_held(&cgroup_mutex), \
109 "cgroup_mutex or RCU read lock required");
110
111
112
113
114
115
116
117static struct workqueue_struct *cgroup_destroy_wq;
118
119
120#define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys,
121struct cgroup_subsys *cgroup_subsys[] = {
122#include <linux/cgroup_subsys.h>
123};
124#undef SUBSYS
125
126
127#define SUBSYS(_x) [_x ## _cgrp_id] = #_x,
128static const char *cgroup_subsys_name[] = {
129#include <linux/cgroup_subsys.h>
130};
131#undef SUBSYS
132
133
134#define SUBSYS(_x) \
135 DEFINE_STATIC_KEY_TRUE(_x ## _cgrp_subsys_enabled_key); \
136 DEFINE_STATIC_KEY_TRUE(_x ## _cgrp_subsys_on_dfl_key); \
137 EXPORT_SYMBOL_GPL(_x ## _cgrp_subsys_enabled_key); \
138 EXPORT_SYMBOL_GPL(_x ## _cgrp_subsys_on_dfl_key);
139#include <linux/cgroup_subsys.h>
140#undef SUBSYS
141
142#define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys_enabled_key,
143static struct static_key_true *cgroup_subsys_enabled_key[] = {
144#include <linux/cgroup_subsys.h>
145};
146#undef SUBSYS
147
148#define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys_on_dfl_key,
149static struct static_key_true *cgroup_subsys_on_dfl_key[] = {
150#include <linux/cgroup_subsys.h>
151};
152#undef SUBSYS
153
154static DEFINE_PER_CPU(struct cgroup_rstat_cpu, cgrp_dfl_root_rstat_cpu);
155
156
157
158
159
160
161struct cgroup_root cgrp_dfl_root = { .cgrp.rstat_cpu = &cgrp_dfl_root_rstat_cpu };
162EXPORT_SYMBOL_GPL(cgrp_dfl_root);
163
164
165
166
167
168static bool cgrp_dfl_visible;
169
170
171static u16 cgrp_dfl_inhibit_ss_mask;
172
173
174static u16 cgrp_dfl_implicit_ss_mask;
175
176
177static u16 cgrp_dfl_threaded_ss_mask;
178
179
180LIST_HEAD(cgroup_roots);
181static int cgroup_root_count;
182
183
184static DEFINE_IDR(cgroup_hierarchy_idr);
185
186
187
188
189
190
191
192
193static u64 css_serial_nr_next = 1;
194
195
196
197
198
199static u16 have_fork_callback __read_mostly;
200static u16 have_exit_callback __read_mostly;
201static u16 have_release_callback __read_mostly;
202static u16 have_canfork_callback __read_mostly;
203
204
205struct cgroup_namespace init_cgroup_ns = {
206 .count = REFCOUNT_INIT(2),
207 .user_ns = &init_user_ns,
208 .ns.ops = &cgroupns_operations,
209 .ns.inum = PROC_CGROUP_INIT_INO,
210 .root_cset = &init_css_set,
211};
212
213static struct file_system_type cgroup2_fs_type;
214static struct cftype cgroup_base_files[];
215
216static int cgroup_apply_control(struct cgroup *cgrp);
217static void cgroup_finalize_control(struct cgroup *cgrp, int ret);
218static void css_task_iter_skip(struct css_task_iter *it,
219 struct task_struct *task);
220static int cgroup_destroy_locked(struct cgroup *cgrp);
221static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
222 struct cgroup_subsys *ss);
223static void css_release(struct percpu_ref *ref);
224static void kill_css(struct cgroup_subsys_state *css);
225static int cgroup_addrm_files(struct cgroup_subsys_state *css,
226 struct cgroup *cgrp, struct cftype cfts[],
227 bool is_add);
228
229
230
231
232
233
234
235
236
237bool cgroup_ssid_enabled(int ssid)
238{
239 if (CGROUP_SUBSYS_COUNT == 0)
240 return false;
241
242 return static_key_enabled(cgroup_subsys_enabled_key[ssid]);
243}
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298bool cgroup_on_dfl(const struct cgroup *cgrp)
299{
300 return cgrp->root == &cgrp_dfl_root;
301}
302
303
304static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end,
305 gfp_t gfp_mask)
306{
307 int ret;
308
309 idr_preload(gfp_mask);
310 spin_lock_bh(&cgroup_idr_lock);
311 ret = idr_alloc(idr, ptr, start, end, gfp_mask & ~__GFP_DIRECT_RECLAIM);
312 spin_unlock_bh(&cgroup_idr_lock);
313 idr_preload_end();
314 return ret;
315}
316
317static void *cgroup_idr_replace(struct idr *idr, void *ptr, int id)
318{
319 void *ret;
320
321 spin_lock_bh(&cgroup_idr_lock);
322 ret = idr_replace(idr, ptr, id);
323 spin_unlock_bh(&cgroup_idr_lock);
324 return ret;
325}
326
327static void cgroup_idr_remove(struct idr *idr, int id)
328{
329 spin_lock_bh(&cgroup_idr_lock);
330 idr_remove(idr, id);
331 spin_unlock_bh(&cgroup_idr_lock);
332}
333
334static bool cgroup_has_tasks(struct cgroup *cgrp)
335{
336 return cgrp->nr_populated_csets;
337}
338
339bool cgroup_is_threaded(struct cgroup *cgrp)
340{
341 return cgrp->dom_cgrp != cgrp;
342}
343
344
345static bool cgroup_is_mixable(struct cgroup *cgrp)
346{
347
348
349
350
351
352 return !cgroup_parent(cgrp);
353}
354
355
356static bool cgroup_can_be_thread_root(struct cgroup *cgrp)
357{
358
359 if (cgroup_is_mixable(cgrp))
360 return true;
361
362
363 if (cgroup_is_threaded(cgrp))
364 return false;
365
366
367 if (cgrp->nr_populated_domain_children)
368 return false;
369
370
371 if (cgrp->subtree_control & ~cgrp_dfl_threaded_ss_mask)
372 return false;
373
374 return true;
375}
376
377
378bool cgroup_is_thread_root(struct cgroup *cgrp)
379{
380
381 if (cgroup_is_threaded(cgrp))
382 return false;
383
384
385 if (cgrp->nr_threaded_children)
386 return true;
387
388
389
390
391
392 if (cgroup_has_tasks(cgrp) &&
393 (cgrp->subtree_control & cgrp_dfl_threaded_ss_mask))
394 return true;
395
396 return false;
397}
398
399
400static bool cgroup_is_valid_domain(struct cgroup *cgrp)
401{
402
403 if (cgroup_is_threaded(cgrp))
404 return false;
405
406
407 while ((cgrp = cgroup_parent(cgrp))) {
408 if (!cgroup_is_mixable(cgrp) && cgroup_is_thread_root(cgrp))
409 return false;
410 if (cgroup_is_threaded(cgrp))
411 return false;
412 }
413
414 return true;
415}
416
417
418static u16 cgroup_control(struct cgroup *cgrp)
419{
420 struct cgroup *parent = cgroup_parent(cgrp);
421 u16 root_ss_mask = cgrp->root->subsys_mask;
422
423 if (parent) {
424 u16 ss_mask = parent->subtree_control;
425
426
427 if (cgroup_is_threaded(cgrp))
428 ss_mask &= cgrp_dfl_threaded_ss_mask;
429 return ss_mask;
430 }
431
432 if (cgroup_on_dfl(cgrp))
433 root_ss_mask &= ~(cgrp_dfl_inhibit_ss_mask |
434 cgrp_dfl_implicit_ss_mask);
435 return root_ss_mask;
436}
437
438
439static u16 cgroup_ss_mask(struct cgroup *cgrp)
440{
441 struct cgroup *parent = cgroup_parent(cgrp);
442
443 if (parent) {
444 u16 ss_mask = parent->subtree_ss_mask;
445
446
447 if (cgroup_is_threaded(cgrp))
448 ss_mask &= cgrp_dfl_threaded_ss_mask;
449 return ss_mask;
450 }
451
452 return cgrp->root->subsys_mask;
453}
454
455
456
457
458
459
460
461
462
463
464
465
466static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
467 struct cgroup_subsys *ss)
468{
469 if (ss)
470 return rcu_dereference_check(cgrp->subsys[ss->id],
471 lockdep_is_held(&cgroup_mutex));
472 else
473 return &cgrp->self;
474}
475
476
477
478
479
480
481
482
483
484static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
485 struct cgroup_subsys *ss)
486{
487 struct cgroup_subsys_state *css;
488
489 rcu_read_lock();
490 css = cgroup_css(cgrp, ss);
491 if (!css || !css_tryget_online(css))
492 css = NULL;
493 rcu_read_unlock();
494
495 return css;
496}
497
498
499
500
501
502
503
504
505
506
507
508static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp,
509 struct cgroup_subsys *ss)
510{
511 lockdep_assert_held(&cgroup_mutex);
512
513 if (!ss)
514 return &cgrp->self;
515
516
517
518
519
520 while (!(cgroup_ss_mask(cgrp) & (1 << ss->id))) {
521 cgrp = cgroup_parent(cgrp);
522 if (!cgrp)
523 return NULL;
524 }
525
526 return cgroup_css(cgrp, ss);
527}
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
543 struct cgroup_subsys *ss)
544{
545 struct cgroup_subsys_state *css;
546
547 do {
548 css = cgroup_css(cgrp, ss);
549
550 if (css)
551 return css;
552 cgrp = cgroup_parent(cgrp);
553 } while (cgrp);
554
555 return init_css_set.subsys[ss->id];
556}
557
558
559
560
561
562
563
564
565
566
567
568
569struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgrp,
570 struct cgroup_subsys *ss)
571{
572 struct cgroup_subsys_state *css;
573
574 rcu_read_lock();
575
576 do {
577 css = cgroup_css(cgrp, ss);
578
579 if (css && css_tryget_online(css))
580 goto out_unlock;
581 cgrp = cgroup_parent(cgrp);
582 } while (cgrp);
583
584 css = init_css_set.subsys[ss->id];
585 css_get(css);
586out_unlock:
587 rcu_read_unlock();
588 return css;
589}
590
591static void cgroup_get_live(struct cgroup *cgrp)
592{
593 WARN_ON_ONCE(cgroup_is_dead(cgrp));
594 css_get(&cgrp->self);
595}
596
597
598
599
600
601
602int __cgroup_task_count(const struct cgroup *cgrp)
603{
604 int count = 0;
605 struct cgrp_cset_link *link;
606
607 lockdep_assert_held(&css_set_lock);
608
609 list_for_each_entry(link, &cgrp->cset_links, cset_link)
610 count += link->cset->nr_tasks;
611
612 return count;
613}
614
615
616
617
618
619int cgroup_task_count(const struct cgroup *cgrp)
620{
621 int count;
622
623 spin_lock_irq(&css_set_lock);
624 count = __cgroup_task_count(cgrp);
625 spin_unlock_irq(&css_set_lock);
626
627 return count;
628}
629
630struct cgroup_subsys_state *of_css(struct kernfs_open_file *of)
631{
632 struct cgroup *cgrp = of->kn->parent->priv;
633 struct cftype *cft = of_cft(of);
634
635
636
637
638
639
640
641
642
643 if (cft->ss)
644 return rcu_dereference_raw(cgrp->subsys[cft->ss->id]);
645 else
646 return &cgrp->self;
647}
648EXPORT_SYMBOL_GPL(of_css);
649
650
651
652
653
654
655
656
657
658#define for_each_css(css, ssid, cgrp) \
659 for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
660 if (!((css) = rcu_dereference_check( \
661 (cgrp)->subsys[(ssid)], \
662 lockdep_is_held(&cgroup_mutex)))) { } \
663 else
664
665
666
667
668
669
670
671
672
673#define for_each_e_css(css, ssid, cgrp) \
674 for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
675 if (!((css) = cgroup_e_css_by_mask(cgrp, \
676 cgroup_subsys[(ssid)]))) \
677 ; \
678 else
679
680
681
682
683
684
685
686
687
688
689#define do_each_subsys_mask(ss, ssid, ss_mask) do { \
690 unsigned long __ss_mask = (ss_mask); \
691 if (!CGROUP_SUBSYS_COUNT) { \
692 (ssid) = 0; \
693 break; \
694 } \
695 for_each_set_bit(ssid, &__ss_mask, CGROUP_SUBSYS_COUNT) { \
696 (ss) = cgroup_subsys[ssid]; \
697 {
698
699#define while_each_subsys_mask() \
700 } \
701 } \
702} while (false)
703
704
705#define cgroup_for_each_live_child(child, cgrp) \
706 list_for_each_entry((child), &(cgrp)->self.children, self.sibling) \
707 if (({ lockdep_assert_held(&cgroup_mutex); \
708 cgroup_is_dead(child); })) \
709 ; \
710 else
711
712
713#define cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) \
714 css_for_each_descendant_pre((d_css), cgroup_css((cgrp), NULL)) \
715 if (({ lockdep_assert_held(&cgroup_mutex); \
716 (dsct) = (d_css)->cgroup; \
717 cgroup_is_dead(dsct); })) \
718 ; \
719 else
720
721
722#define cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) \
723 css_for_each_descendant_post((d_css), cgroup_css((cgrp), NULL)) \
724 if (({ lockdep_assert_held(&cgroup_mutex); \
725 (dsct) = (d_css)->cgroup; \
726 cgroup_is_dead(dsct); })) \
727 ; \
728 else
729
730
731
732
733
734
735
736
737struct css_set init_css_set = {
738 .refcount = REFCOUNT_INIT(1),
739 .dom_cset = &init_css_set,
740 .tasks = LIST_HEAD_INIT(init_css_set.tasks),
741 .mg_tasks = LIST_HEAD_INIT(init_css_set.mg_tasks),
742 .dying_tasks = LIST_HEAD_INIT(init_css_set.dying_tasks),
743 .task_iters = LIST_HEAD_INIT(init_css_set.task_iters),
744 .threaded_csets = LIST_HEAD_INIT(init_css_set.threaded_csets),
745 .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links),
746 .mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node),
747 .mg_node = LIST_HEAD_INIT(init_css_set.mg_node),
748
749
750
751
752
753
754
755 .dfl_cgrp = &cgrp_dfl_root.cgrp,
756};
757
758static int css_set_count = 1;
759
760static bool css_set_threaded(struct css_set *cset)
761{
762 return cset->dom_cset != cset;
763}
764
765
766
767
768
769
770
771
772
773
774static bool css_set_populated(struct css_set *cset)
775{
776 lockdep_assert_held(&css_set_lock);
777
778 return !list_empty(&cset->tasks) || !list_empty(&cset->mg_tasks);
779}
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798static void cgroup_update_populated(struct cgroup *cgrp, bool populated)
799{
800 struct cgroup *child = NULL;
801 int adj = populated ? 1 : -1;
802
803 lockdep_assert_held(&css_set_lock);
804
805 do {
806 bool was_populated = cgroup_is_populated(cgrp);
807
808 if (!child) {
809 cgrp->nr_populated_csets += adj;
810 } else {
811 if (cgroup_is_threaded(child))
812 cgrp->nr_populated_threaded_children += adj;
813 else
814 cgrp->nr_populated_domain_children += adj;
815 }
816
817 if (was_populated == cgroup_is_populated(cgrp))
818 break;
819
820 cgroup1_check_for_release(cgrp);
821 TRACE_CGROUP_PATH(notify_populated, cgrp,
822 cgroup_is_populated(cgrp));
823 cgroup_file_notify(&cgrp->events_file);
824
825 child = cgrp;
826 cgrp = cgroup_parent(cgrp);
827 } while (cgrp);
828}
829
830
831
832
833
834
835
836
837
838static void css_set_update_populated(struct css_set *cset, bool populated)
839{
840 struct cgrp_cset_link *link;
841
842 lockdep_assert_held(&css_set_lock);
843
844 list_for_each_entry(link, &cset->cgrp_links, cgrp_link)
845 cgroup_update_populated(link->cgrp, populated);
846}
847
848
849
850
851
852
853
854static void css_set_skip_task_iters(struct css_set *cset,
855 struct task_struct *task)
856{
857 struct css_task_iter *it, *pos;
858
859 list_for_each_entry_safe(it, pos, &cset->task_iters, iters_node)
860 css_task_iter_skip(it, task);
861}
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878static void css_set_move_task(struct task_struct *task,
879 struct css_set *from_cset, struct css_set *to_cset,
880 bool use_mg_tasks)
881{
882 lockdep_assert_held(&css_set_lock);
883
884 if (to_cset && !css_set_populated(to_cset))
885 css_set_update_populated(to_cset, true);
886
887 if (from_cset) {
888 WARN_ON_ONCE(list_empty(&task->cg_list));
889
890 css_set_skip_task_iters(from_cset, task);
891 list_del_init(&task->cg_list);
892 if (!css_set_populated(from_cset))
893 css_set_update_populated(from_cset, false);
894 } else {
895 WARN_ON_ONCE(!list_empty(&task->cg_list));
896 }
897
898 if (to_cset) {
899
900
901
902
903
904
905 WARN_ON_ONCE(task->flags & PF_EXITING);
906
907 cgroup_move_task(task, to_cset);
908 list_add_tail(&task->cg_list, use_mg_tasks ? &to_cset->mg_tasks :
909 &to_cset->tasks);
910 }
911}
912
913
914
915
916
917
918#define CSS_SET_HASH_BITS 7
919static DEFINE_HASHTABLE(css_set_table, CSS_SET_HASH_BITS);
920
921static unsigned long css_set_hash(struct cgroup_subsys_state *css[])
922{
923 unsigned long key = 0UL;
924 struct cgroup_subsys *ss;
925 int i;
926
927 for_each_subsys(ss, i)
928 key += (unsigned long)css[i];
929 key = (key >> 16) ^ key;
930
931 return key;
932}
933
934void put_css_set_locked(struct css_set *cset)
935{
936 struct cgrp_cset_link *link, *tmp_link;
937 struct cgroup_subsys *ss;
938 int ssid;
939
940 lockdep_assert_held(&css_set_lock);
941
942 if (!refcount_dec_and_test(&cset->refcount))
943 return;
944
945 WARN_ON_ONCE(!list_empty(&cset->threaded_csets));
946
947
948 for_each_subsys(ss, ssid) {
949 list_del(&cset->e_cset_node[ssid]);
950 css_put(cset->subsys[ssid]);
951 }
952 hash_del(&cset->hlist);
953 css_set_count--;
954
955 list_for_each_entry_safe(link, tmp_link, &cset->cgrp_links, cgrp_link) {
956 list_del(&link->cset_link);
957 list_del(&link->cgrp_link);
958 if (cgroup_parent(link->cgrp))
959 cgroup_put(link->cgrp);
960 kfree(link);
961 }
962
963 if (css_set_threaded(cset)) {
964 list_del(&cset->threaded_csets_node);
965 put_css_set_locked(cset->dom_cset);
966 }
967
968 kfree_rcu(cset, rcu_head);
969}
970
971
972
973
974
975
976
977
978
979
980
981static bool compare_css_sets(struct css_set *cset,
982 struct css_set *old_cset,
983 struct cgroup *new_cgrp,
984 struct cgroup_subsys_state *template[])
985{
986 struct cgroup *new_dfl_cgrp;
987 struct list_head *l1, *l2;
988
989
990
991
992
993
994 if (memcmp(template, cset->subsys, sizeof(cset->subsys)))
995 return false;
996
997
998
999 if (cgroup_on_dfl(new_cgrp))
1000 new_dfl_cgrp = new_cgrp;
1001 else
1002 new_dfl_cgrp = old_cset->dfl_cgrp;
1003
1004 if (new_dfl_cgrp->dom_cgrp != cset->dom_cset->dfl_cgrp)
1005 return false;
1006
1007
1008
1009
1010
1011
1012
1013 l1 = &cset->cgrp_links;
1014 l2 = &old_cset->cgrp_links;
1015 while (1) {
1016 struct cgrp_cset_link *link1, *link2;
1017 struct cgroup *cgrp1, *cgrp2;
1018
1019 l1 = l1->next;
1020 l2 = l2->next;
1021
1022 if (l1 == &cset->cgrp_links) {
1023 BUG_ON(l2 != &old_cset->cgrp_links);
1024 break;
1025 } else {
1026 BUG_ON(l2 == &old_cset->cgrp_links);
1027 }
1028
1029 link1 = list_entry(l1, struct cgrp_cset_link, cgrp_link);
1030 link2 = list_entry(l2, struct cgrp_cset_link, cgrp_link);
1031 cgrp1 = link1->cgrp;
1032 cgrp2 = link2->cgrp;
1033
1034 BUG_ON(cgrp1->root != cgrp2->root);
1035
1036
1037
1038
1039
1040
1041
1042
1043 if (cgrp1->root == new_cgrp->root) {
1044 if (cgrp1 != new_cgrp)
1045 return false;
1046 } else {
1047 if (cgrp1 != cgrp2)
1048 return false;
1049 }
1050 }
1051 return true;
1052}
1053
1054
1055
1056
1057
1058
1059
1060static struct css_set *find_existing_css_set(struct css_set *old_cset,
1061 struct cgroup *cgrp,
1062 struct cgroup_subsys_state *template[])
1063{
1064 struct cgroup_root *root = cgrp->root;
1065 struct cgroup_subsys *ss;
1066 struct css_set *cset;
1067 unsigned long key;
1068 int i;
1069
1070
1071
1072
1073
1074
1075 for_each_subsys(ss, i) {
1076 if (root->subsys_mask & (1UL << i)) {
1077
1078
1079
1080
1081 template[i] = cgroup_e_css_by_mask(cgrp, ss);
1082 } else {
1083
1084
1085
1086
1087 template[i] = old_cset->subsys[i];
1088 }
1089 }
1090
1091 key = css_set_hash(template);
1092 hash_for_each_possible(css_set_table, cset, hlist, key) {
1093 if (!compare_css_sets(cset, old_cset, cgrp, template))
1094 continue;
1095
1096
1097 return cset;
1098 }
1099
1100
1101 return NULL;
1102}
1103
1104static void free_cgrp_cset_links(struct list_head *links_to_free)
1105{
1106 struct cgrp_cset_link *link, *tmp_link;
1107
1108 list_for_each_entry_safe(link, tmp_link, links_to_free, cset_link) {
1109 list_del(&link->cset_link);
1110 kfree(link);
1111 }
1112}
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122static int allocate_cgrp_cset_links(int count, struct list_head *tmp_links)
1123{
1124 struct cgrp_cset_link *link;
1125 int i;
1126
1127 INIT_LIST_HEAD(tmp_links);
1128
1129 for (i = 0; i < count; i++) {
1130 link = kzalloc(sizeof(*link), GFP_KERNEL);
1131 if (!link) {
1132 free_cgrp_cset_links(tmp_links);
1133 return -ENOMEM;
1134 }
1135 list_add(&link->cset_link, tmp_links);
1136 }
1137 return 0;
1138}
1139
1140
1141
1142
1143
1144
1145
1146static void link_css_set(struct list_head *tmp_links, struct css_set *cset,
1147 struct cgroup *cgrp)
1148{
1149 struct cgrp_cset_link *link;
1150
1151 BUG_ON(list_empty(tmp_links));
1152
1153 if (cgroup_on_dfl(cgrp))
1154 cset->dfl_cgrp = cgrp;
1155
1156 link = list_first_entry(tmp_links, struct cgrp_cset_link, cset_link);
1157 link->cset = cset;
1158 link->cgrp = cgrp;
1159
1160
1161
1162
1163
1164 list_move_tail(&link->cset_link, &cgrp->cset_links);
1165 list_add_tail(&link->cgrp_link, &cset->cgrp_links);
1166
1167 if (cgroup_parent(cgrp))
1168 cgroup_get_live(cgrp);
1169}
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179static struct css_set *find_css_set(struct css_set *old_cset,
1180 struct cgroup *cgrp)
1181{
1182 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT] = { };
1183 struct css_set *cset;
1184 struct list_head tmp_links;
1185 struct cgrp_cset_link *link;
1186 struct cgroup_subsys *ss;
1187 unsigned long key;
1188 int ssid;
1189
1190 lockdep_assert_held(&cgroup_mutex);
1191
1192
1193
1194 spin_lock_irq(&css_set_lock);
1195 cset = find_existing_css_set(old_cset, cgrp, template);
1196 if (cset)
1197 get_css_set(cset);
1198 spin_unlock_irq(&css_set_lock);
1199
1200 if (cset)
1201 return cset;
1202
1203 cset = kzalloc(sizeof(*cset), GFP_KERNEL);
1204 if (!cset)
1205 return NULL;
1206
1207
1208 if (allocate_cgrp_cset_links(cgroup_root_count, &tmp_links) < 0) {
1209 kfree(cset);
1210 return NULL;
1211 }
1212
1213 refcount_set(&cset->refcount, 1);
1214 cset->dom_cset = cset;
1215 INIT_LIST_HEAD(&cset->tasks);
1216 INIT_LIST_HEAD(&cset->mg_tasks);
1217 INIT_LIST_HEAD(&cset->dying_tasks);
1218 INIT_LIST_HEAD(&cset->task_iters);
1219 INIT_LIST_HEAD(&cset->threaded_csets);
1220 INIT_HLIST_NODE(&cset->hlist);
1221 INIT_LIST_HEAD(&cset->cgrp_links);
1222 INIT_LIST_HEAD(&cset->mg_preload_node);
1223 INIT_LIST_HEAD(&cset->mg_node);
1224
1225
1226
1227 memcpy(cset->subsys, template, sizeof(cset->subsys));
1228
1229 spin_lock_irq(&css_set_lock);
1230
1231 list_for_each_entry(link, &old_cset->cgrp_links, cgrp_link) {
1232 struct cgroup *c = link->cgrp;
1233
1234 if (c->root == cgrp->root)
1235 c = cgrp;
1236 link_css_set(&tmp_links, cset, c);
1237 }
1238
1239 BUG_ON(!list_empty(&tmp_links));
1240
1241 css_set_count++;
1242
1243
1244 key = css_set_hash(cset->subsys);
1245 hash_add(css_set_table, &cset->hlist, key);
1246
1247 for_each_subsys(ss, ssid) {
1248 struct cgroup_subsys_state *css = cset->subsys[ssid];
1249
1250 list_add_tail(&cset->e_cset_node[ssid],
1251 &css->cgroup->e_csets[ssid]);
1252 css_get(css);
1253 }
1254
1255 spin_unlock_irq(&css_set_lock);
1256
1257
1258
1259
1260
1261
1262
1263 if (cgroup_is_threaded(cset->dfl_cgrp)) {
1264 struct css_set *dcset;
1265
1266 dcset = find_css_set(cset, cset->dfl_cgrp->dom_cgrp);
1267 if (!dcset) {
1268 put_css_set(cset);
1269 return NULL;
1270 }
1271
1272 spin_lock_irq(&css_set_lock);
1273 cset->dom_cset = dcset;
1274 list_add_tail(&cset->threaded_csets_node,
1275 &dcset->threaded_csets);
1276 spin_unlock_irq(&css_set_lock);
1277 }
1278
1279 return cset;
1280}
1281
1282struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root)
1283{
1284 struct cgroup *root_cgrp = kf_root->kn->priv;
1285
1286 return root_cgrp->root;
1287}
1288
1289static int cgroup_init_root_id(struct cgroup_root *root)
1290{
1291 int id;
1292
1293 lockdep_assert_held(&cgroup_mutex);
1294
1295 id = idr_alloc_cyclic(&cgroup_hierarchy_idr, root, 0, 0, GFP_KERNEL);
1296 if (id < 0)
1297 return id;
1298
1299 root->hierarchy_id = id;
1300 return 0;
1301}
1302
1303static void cgroup_exit_root_id(struct cgroup_root *root)
1304{
1305 lockdep_assert_held(&cgroup_mutex);
1306
1307 idr_remove(&cgroup_hierarchy_idr, root->hierarchy_id);
1308}
1309
1310void cgroup_free_root(struct cgroup_root *root)
1311{
1312 if (root) {
1313 idr_destroy(&root->cgroup_idr);
1314 kfree(root);
1315 }
1316}
1317
1318static void cgroup_destroy_root(struct cgroup_root *root)
1319{
1320 struct cgroup *cgrp = &root->cgrp;
1321 struct cgrp_cset_link *link, *tmp_link;
1322
1323 trace_cgroup_destroy_root(root);
1324
1325 cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
1326
1327 BUG_ON(atomic_read(&root->nr_cgrps));
1328 BUG_ON(!list_empty(&cgrp->self.children));
1329
1330
1331 WARN_ON(rebind_subsystems(&cgrp_dfl_root, root->subsys_mask));
1332
1333
1334
1335
1336
1337 spin_lock_irq(&css_set_lock);
1338
1339 list_for_each_entry_safe(link, tmp_link, &cgrp->cset_links, cset_link) {
1340 list_del(&link->cset_link);
1341 list_del(&link->cgrp_link);
1342 kfree(link);
1343 }
1344
1345 spin_unlock_irq(&css_set_lock);
1346
1347 if (!list_empty(&root->root_list)) {
1348 list_del(&root->root_list);
1349 cgroup_root_count--;
1350 }
1351
1352 cgroup_exit_root_id(root);
1353
1354 mutex_unlock(&cgroup_mutex);
1355
1356 kernfs_destroy_root(root->kf_root);
1357 cgroup_free_root(root);
1358}
1359
1360
1361
1362
1363
1364static struct cgroup *
1365current_cgns_cgroup_from_root(struct cgroup_root *root)
1366{
1367 struct cgroup *res = NULL;
1368 struct css_set *cset;
1369
1370 lockdep_assert_held(&css_set_lock);
1371
1372 rcu_read_lock();
1373
1374 cset = current->nsproxy->cgroup_ns->root_cset;
1375 if (cset == &init_css_set) {
1376 res = &root->cgrp;
1377 } else {
1378 struct cgrp_cset_link *link;
1379
1380 list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
1381 struct cgroup *c = link->cgrp;
1382
1383 if (c->root == root) {
1384 res = c;
1385 break;
1386 }
1387 }
1388 }
1389 rcu_read_unlock();
1390
1391 BUG_ON(!res);
1392 return res;
1393}
1394
1395
1396static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
1397 struct cgroup_root *root)
1398{
1399 struct cgroup *res = NULL;
1400
1401 lockdep_assert_held(&cgroup_mutex);
1402 lockdep_assert_held(&css_set_lock);
1403
1404 if (cset == &init_css_set) {
1405 res = &root->cgrp;
1406 } else if (root == &cgrp_dfl_root) {
1407 res = cset->dfl_cgrp;
1408 } else {
1409 struct cgrp_cset_link *link;
1410
1411 list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
1412 struct cgroup *c = link->cgrp;
1413
1414 if (c->root == root) {
1415 res = c;
1416 break;
1417 }
1418 }
1419 }
1420
1421 BUG_ON(!res);
1422 return res;
1423}
1424
1425
1426
1427
1428
1429struct cgroup *task_cgroup_from_root(struct task_struct *task,
1430 struct cgroup_root *root)
1431{
1432
1433
1434
1435
1436
1437 return cset_cgroup_from_root(task_css_set(task), root);
1438}
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466static struct kernfs_syscall_ops cgroup_kf_syscall_ops;
1467
1468static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft,
1469 char *buf)
1470{
1471 struct cgroup_subsys *ss = cft->ss;
1472
1473 if (cft->ss && !(cft->flags & CFTYPE_NO_PREFIX) &&
1474 !(cgrp->root->flags & CGRP_ROOT_NOPREFIX)) {
1475 const char *dbg = (cft->flags & CFTYPE_DEBUG) ? ".__DEBUG__." : "";
1476
1477 snprintf(buf, CGROUP_FILE_NAME_MAX, "%s%s.%s",
1478 dbg, cgroup_on_dfl(cgrp) ? ss->name : ss->legacy_name,
1479 cft->name);
1480 } else {
1481 strscpy(buf, cft->name, CGROUP_FILE_NAME_MAX);
1482 }
1483 return buf;
1484}
1485
1486
1487
1488
1489
1490
1491
1492static umode_t cgroup_file_mode(const struct cftype *cft)
1493{
1494 umode_t mode = 0;
1495
1496 if (cft->read_u64 || cft->read_s64 || cft->seq_show)
1497 mode |= S_IRUGO;
1498
1499 if (cft->write_u64 || cft->write_s64 || cft->write) {
1500 if (cft->flags & CFTYPE_WORLD_WRITABLE)
1501 mode |= S_IWUGO;
1502 else
1503 mode |= S_IWUSR;
1504 }
1505
1506 return mode;
1507}
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521static u16 cgroup_calc_subtree_ss_mask(u16 subtree_control, u16 this_ss_mask)
1522{
1523 u16 cur_ss_mask = subtree_control;
1524 struct cgroup_subsys *ss;
1525 int ssid;
1526
1527 lockdep_assert_held(&cgroup_mutex);
1528
1529 cur_ss_mask |= cgrp_dfl_implicit_ss_mask;
1530
1531 while (true) {
1532 u16 new_ss_mask = cur_ss_mask;
1533
1534 do_each_subsys_mask(ss, ssid, cur_ss_mask) {
1535 new_ss_mask |= ss->depends_on;
1536 } while_each_subsys_mask();
1537
1538
1539
1540
1541
1542
1543 new_ss_mask &= this_ss_mask;
1544
1545 if (new_ss_mask == cur_ss_mask)
1546 break;
1547 cur_ss_mask = new_ss_mask;
1548 }
1549
1550 return cur_ss_mask;
1551}
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563void cgroup_kn_unlock(struct kernfs_node *kn)
1564{
1565 struct cgroup *cgrp;
1566
1567 if (kernfs_type(kn) == KERNFS_DIR)
1568 cgrp = kn->priv;
1569 else
1570 cgrp = kn->parent->priv;
1571
1572 mutex_unlock(&cgroup_mutex);
1573
1574 kernfs_unbreak_active_protection(kn);
1575 cgroup_put(cgrp);
1576}
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn, bool drain_offline)
1596{
1597 struct cgroup *cgrp;
1598
1599 if (kernfs_type(kn) == KERNFS_DIR)
1600 cgrp = kn->priv;
1601 else
1602 cgrp = kn->parent->priv;
1603
1604
1605
1606
1607
1608
1609
1610 if (!cgroup_tryget(cgrp))
1611 return NULL;
1612 kernfs_break_active_protection(kn);
1613
1614 if (drain_offline)
1615 cgroup_lock_and_drain_offline(cgrp);
1616 else
1617 mutex_lock(&cgroup_mutex);
1618
1619 if (!cgroup_is_dead(cgrp))
1620 return cgrp;
1621
1622 cgroup_kn_unlock(kn);
1623 return NULL;
1624}
1625
1626static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
1627{
1628 char name[CGROUP_FILE_NAME_MAX];
1629
1630 lockdep_assert_held(&cgroup_mutex);
1631
1632 if (cft->file_offset) {
1633 struct cgroup_subsys_state *css = cgroup_css(cgrp, cft->ss);
1634 struct cgroup_file *cfile = (void *)css + cft->file_offset;
1635
1636 spin_lock_irq(&cgroup_file_kn_lock);
1637 cfile->kn = NULL;
1638 spin_unlock_irq(&cgroup_file_kn_lock);
1639
1640 del_timer_sync(&cfile->notify_timer);
1641 }
1642
1643 kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name));
1644}
1645
1646
1647
1648
1649
1650static void css_clear_dir(struct cgroup_subsys_state *css)
1651{
1652 struct cgroup *cgrp = css->cgroup;
1653 struct cftype *cfts;
1654
1655 if (!(css->flags & CSS_VISIBLE))
1656 return;
1657
1658 css->flags &= ~CSS_VISIBLE;
1659
1660 if (!css->ss) {
1661 if (cgroup_on_dfl(cgrp))
1662 cfts = cgroup_base_files;
1663 else
1664 cfts = cgroup1_base_files;
1665
1666 cgroup_addrm_files(css, cgrp, cfts, false);
1667 } else {
1668 list_for_each_entry(cfts, &css->ss->cfts, node)
1669 cgroup_addrm_files(css, cgrp, cfts, false);
1670 }
1671}
1672
1673
1674
1675
1676
1677
1678
1679static int css_populate_dir(struct cgroup_subsys_state *css)
1680{
1681 struct cgroup *cgrp = css->cgroup;
1682 struct cftype *cfts, *failed_cfts;
1683 int ret;
1684
1685 if ((css->flags & CSS_VISIBLE) || !cgrp->kn)
1686 return 0;
1687
1688 if (!css->ss) {
1689 if (cgroup_on_dfl(cgrp))
1690 cfts = cgroup_base_files;
1691 else
1692 cfts = cgroup1_base_files;
1693
1694 ret = cgroup_addrm_files(&cgrp->self, cgrp, cfts, true);
1695 if (ret < 0)
1696 return ret;
1697 } else {
1698 list_for_each_entry(cfts, &css->ss->cfts, node) {
1699 ret = cgroup_addrm_files(css, cgrp, cfts, true);
1700 if (ret < 0) {
1701 failed_cfts = cfts;
1702 goto err;
1703 }
1704 }
1705 }
1706
1707 css->flags |= CSS_VISIBLE;
1708
1709 return 0;
1710err:
1711 list_for_each_entry(cfts, &css->ss->cfts, node) {
1712 if (cfts == failed_cfts)
1713 break;
1714 cgroup_addrm_files(css, cgrp, cfts, false);
1715 }
1716 return ret;
1717}
1718
1719int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
1720{
1721 struct cgroup *dcgrp = &dst_root->cgrp;
1722 struct cgroup_subsys *ss;
1723 int ssid, i, ret;
1724
1725 lockdep_assert_held(&cgroup_mutex);
1726
1727 do_each_subsys_mask(ss, ssid, ss_mask) {
1728
1729
1730
1731
1732
1733 if (css_next_child(NULL, cgroup_css(&ss->root->cgrp, ss)) &&
1734 !ss->implicit_on_dfl)
1735 return -EBUSY;
1736
1737
1738 if (ss->root != &cgrp_dfl_root && dst_root != &cgrp_dfl_root)
1739 return -EBUSY;
1740 } while_each_subsys_mask();
1741
1742 do_each_subsys_mask(ss, ssid, ss_mask) {
1743 struct cgroup_root *src_root = ss->root;
1744 struct cgroup *scgrp = &src_root->cgrp;
1745 struct cgroup_subsys_state *css = cgroup_css(scgrp, ss);
1746 struct css_set *cset;
1747
1748 WARN_ON(!css || cgroup_css(dcgrp, ss));
1749
1750
1751 src_root->subsys_mask &= ~(1 << ssid);
1752 WARN_ON(cgroup_apply_control(scgrp));
1753 cgroup_finalize_control(scgrp, 0);
1754
1755
1756 RCU_INIT_POINTER(scgrp->subsys[ssid], NULL);
1757 rcu_assign_pointer(dcgrp->subsys[ssid], css);
1758 ss->root = dst_root;
1759 css->cgroup = dcgrp;
1760
1761 spin_lock_irq(&css_set_lock);
1762 hash_for_each(css_set_table, i, cset, hlist)
1763 list_move_tail(&cset->e_cset_node[ss->id],
1764 &dcgrp->e_csets[ss->id]);
1765 spin_unlock_irq(&css_set_lock);
1766
1767
1768 dst_root->subsys_mask |= 1 << ssid;
1769 if (dst_root == &cgrp_dfl_root) {
1770 static_branch_enable(cgroup_subsys_on_dfl_key[ssid]);
1771 } else {
1772 dcgrp->subtree_control |= 1 << ssid;
1773 static_branch_disable(cgroup_subsys_on_dfl_key[ssid]);
1774 }
1775
1776 ret = cgroup_apply_control(dcgrp);
1777 if (ret)
1778 pr_warn("partial failure to rebind %s controller (err=%d)\n",
1779 ss->name, ret);
1780
1781 if (ss->bind)
1782 ss->bind(css);
1783 } while_each_subsys_mask();
1784
1785 kernfs_activate(dcgrp->kn);
1786 return 0;
1787}
1788
1789int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
1790 struct kernfs_root *kf_root)
1791{
1792 int len = 0;
1793 char *buf = NULL;
1794 struct cgroup_root *kf_cgroot = cgroup_root_from_kf(kf_root);
1795 struct cgroup *ns_cgroup;
1796
1797 buf = kmalloc(PATH_MAX, GFP_KERNEL);
1798 if (!buf)
1799 return -ENOMEM;
1800
1801 spin_lock_irq(&css_set_lock);
1802 ns_cgroup = current_cgns_cgroup_from_root(kf_cgroot);
1803 len = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, PATH_MAX);
1804 spin_unlock_irq(&css_set_lock);
1805
1806 if (len >= PATH_MAX)
1807 len = -ERANGE;
1808 else if (len > 0) {
1809 seq_escape(sf, buf, " \t\n\\");
1810 len = 0;
1811 }
1812 kfree(buf);
1813 return len;
1814}
1815
1816enum cgroup2_param {
1817 Opt_nsdelegate,
1818 Opt_memory_localevents,
1819 nr__cgroup2_params
1820};
1821
1822static const struct fs_parameter_spec cgroup2_param_specs[] = {
1823 fsparam_flag("nsdelegate", Opt_nsdelegate),
1824 fsparam_flag("memory_localevents", Opt_memory_localevents),
1825 {}
1826};
1827
1828static const struct fs_parameter_description cgroup2_fs_parameters = {
1829 .name = "cgroup2",
1830 .specs = cgroup2_param_specs,
1831};
1832
1833static int cgroup2_parse_param(struct fs_context *fc, struct fs_parameter *param)
1834{
1835 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
1836 struct fs_parse_result result;
1837 int opt;
1838
1839 opt = fs_parse(fc, &cgroup2_fs_parameters, param, &result);
1840 if (opt < 0)
1841 return opt;
1842
1843 switch (opt) {
1844 case Opt_nsdelegate:
1845 ctx->flags |= CGRP_ROOT_NS_DELEGATE;
1846 return 0;
1847 case Opt_memory_localevents:
1848 ctx->flags |= CGRP_ROOT_MEMORY_LOCAL_EVENTS;
1849 return 0;
1850 }
1851 return -EINVAL;
1852}
1853
1854static void apply_cgroup_root_flags(unsigned int root_flags)
1855{
1856 if (current->nsproxy->cgroup_ns == &init_cgroup_ns) {
1857 if (root_flags & CGRP_ROOT_NS_DELEGATE)
1858 cgrp_dfl_root.flags |= CGRP_ROOT_NS_DELEGATE;
1859 else
1860 cgrp_dfl_root.flags &= ~CGRP_ROOT_NS_DELEGATE;
1861
1862 if (root_flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
1863 cgrp_dfl_root.flags |= CGRP_ROOT_MEMORY_LOCAL_EVENTS;
1864 else
1865 cgrp_dfl_root.flags &= ~CGRP_ROOT_MEMORY_LOCAL_EVENTS;
1866 }
1867}
1868
1869static int cgroup_show_options(struct seq_file *seq, struct kernfs_root *kf_root)
1870{
1871 if (cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE)
1872 seq_puts(seq, ",nsdelegate");
1873 if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
1874 seq_puts(seq, ",memory_localevents");
1875 return 0;
1876}
1877
1878static int cgroup_reconfigure(struct fs_context *fc)
1879{
1880 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
1881
1882 apply_cgroup_root_flags(ctx->flags);
1883 return 0;
1884}
1885
1886
1887
1888
1889
1890
1891
1892static bool use_task_css_set_links __read_mostly;
1893
1894static void cgroup_enable_task_cg_lists(void)
1895{
1896 struct task_struct *p, *g;
1897
1898
1899
1900
1901
1902
1903
1904
1905 read_lock(&tasklist_lock);
1906 spin_lock_irq(&css_set_lock);
1907
1908 if (use_task_css_set_links)
1909 goto out_unlock;
1910
1911 use_task_css_set_links = true;
1912
1913 do_each_thread(g, p) {
1914 WARN_ON_ONCE(!list_empty(&p->cg_list) ||
1915 task_css_set(p) != &init_css_set);
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928 spin_lock(&p->sighand->siglock);
1929 if (!(p->flags & PF_EXITING)) {
1930 struct css_set *cset = task_css_set(p);
1931
1932 if (!css_set_populated(cset))
1933 css_set_update_populated(cset, true);
1934 list_add_tail(&p->cg_list, &cset->tasks);
1935 get_css_set(cset);
1936 cset->nr_tasks++;
1937 }
1938 spin_unlock(&p->sighand->siglock);
1939 } while_each_thread(g, p);
1940out_unlock:
1941 spin_unlock_irq(&css_set_lock);
1942 read_unlock(&tasklist_lock);
1943}
1944
1945static void init_cgroup_housekeeping(struct cgroup *cgrp)
1946{
1947 struct cgroup_subsys *ss;
1948 int ssid;
1949
1950 INIT_LIST_HEAD(&cgrp->self.sibling);
1951 INIT_LIST_HEAD(&cgrp->self.children);
1952 INIT_LIST_HEAD(&cgrp->cset_links);
1953 INIT_LIST_HEAD(&cgrp->pidlists);
1954 mutex_init(&cgrp->pidlist_mutex);
1955 cgrp->self.cgroup = cgrp;
1956 cgrp->self.flags |= CSS_ONLINE;
1957 cgrp->dom_cgrp = cgrp;
1958 cgrp->max_descendants = INT_MAX;
1959 cgrp->max_depth = INT_MAX;
1960 INIT_LIST_HEAD(&cgrp->rstat_css_list);
1961 prev_cputime_init(&cgrp->prev_cputime);
1962
1963 for_each_subsys(ss, ssid)
1964 INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
1965
1966 init_waitqueue_head(&cgrp->offline_waitq);
1967 INIT_WORK(&cgrp->release_agent_work, cgroup1_release_agent);
1968}
1969
1970void init_cgroup_root(struct cgroup_fs_context *ctx)
1971{
1972 struct cgroup_root *root = ctx->root;
1973 struct cgroup *cgrp = &root->cgrp;
1974
1975 INIT_LIST_HEAD(&root->root_list);
1976 atomic_set(&root->nr_cgrps, 1);
1977 cgrp->root = root;
1978 init_cgroup_housekeeping(cgrp);
1979 idr_init(&root->cgroup_idr);
1980
1981 root->flags = ctx->flags;
1982 if (ctx->release_agent)
1983 strscpy(root->release_agent_path, ctx->release_agent, PATH_MAX);
1984 if (ctx->name)
1985 strscpy(root->name, ctx->name, MAX_CGROUP_ROOT_NAMELEN);
1986 if (ctx->cpuset_clone_children)
1987 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
1988}
1989
1990int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
1991{
1992 LIST_HEAD(tmp_links);
1993 struct cgroup *root_cgrp = &root->cgrp;
1994 struct kernfs_syscall_ops *kf_sops;
1995 struct css_set *cset;
1996 int i, ret;
1997
1998 lockdep_assert_held(&cgroup_mutex);
1999
2000 ret = cgroup_idr_alloc(&root->cgroup_idr, root_cgrp, 1, 2, GFP_KERNEL);
2001 if (ret < 0)
2002 goto out;
2003 root_cgrp->id = ret;
2004 root_cgrp->ancestor_ids[0] = ret;
2005
2006 ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release,
2007 0, GFP_KERNEL);
2008 if (ret)
2009 goto out;
2010
2011
2012
2013
2014
2015
2016
2017
2018 ret = allocate_cgrp_cset_links(2 * css_set_count, &tmp_links);
2019 if (ret)
2020 goto cancel_ref;
2021
2022 ret = cgroup_init_root_id(root);
2023 if (ret)
2024 goto cancel_ref;
2025
2026 kf_sops = root == &cgrp_dfl_root ?
2027 &cgroup_kf_syscall_ops : &cgroup1_kf_syscall_ops;
2028
2029 root->kf_root = kernfs_create_root(kf_sops,
2030 KERNFS_ROOT_CREATE_DEACTIVATED |
2031 KERNFS_ROOT_SUPPORT_EXPORTOP,
2032 root_cgrp);
2033 if (IS_ERR(root->kf_root)) {
2034 ret = PTR_ERR(root->kf_root);
2035 goto exit_root_id;
2036 }
2037 root_cgrp->kn = root->kf_root->kn;
2038
2039 ret = css_populate_dir(&root_cgrp->self);
2040 if (ret)
2041 goto destroy_root;
2042
2043 ret = rebind_subsystems(root, ss_mask);
2044 if (ret)
2045 goto destroy_root;
2046
2047 ret = cgroup_bpf_inherit(root_cgrp);
2048 WARN_ON_ONCE(ret);
2049
2050 trace_cgroup_setup_root(root);
2051
2052
2053
2054
2055
2056
2057 list_add(&root->root_list, &cgroup_roots);
2058 cgroup_root_count++;
2059
2060
2061
2062
2063
2064 spin_lock_irq(&css_set_lock);
2065 hash_for_each(css_set_table, i, cset, hlist) {
2066 link_css_set(&tmp_links, cset, root_cgrp);
2067 if (css_set_populated(cset))
2068 cgroup_update_populated(root_cgrp, true);
2069 }
2070 spin_unlock_irq(&css_set_lock);
2071
2072 BUG_ON(!list_empty(&root_cgrp->self.children));
2073 BUG_ON(atomic_read(&root->nr_cgrps) != 1);
2074
2075 kernfs_activate(root_cgrp->kn);
2076 ret = 0;
2077 goto out;
2078
2079destroy_root:
2080 kernfs_destroy_root(root->kf_root);
2081 root->kf_root = NULL;
2082exit_root_id:
2083 cgroup_exit_root_id(root);
2084cancel_ref:
2085 percpu_ref_exit(&root_cgrp->self.refcnt);
2086out:
2087 free_cgrp_cset_links(&tmp_links);
2088 return ret;
2089}
2090
2091int cgroup_do_get_tree(struct fs_context *fc)
2092{
2093 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
2094 int ret;
2095
2096 ctx->kfc.root = ctx->root->kf_root;
2097 if (fc->fs_type == &cgroup2_fs_type)
2098 ctx->kfc.magic = CGROUP2_SUPER_MAGIC;
2099 else
2100 ctx->kfc.magic = CGROUP_SUPER_MAGIC;
2101 ret = kernfs_get_tree(fc);
2102
2103
2104
2105
2106
2107 if (!ret && ctx->ns != &init_cgroup_ns) {
2108 struct dentry *nsdentry;
2109 struct super_block *sb = fc->root->d_sb;
2110 struct cgroup *cgrp;
2111
2112 mutex_lock(&cgroup_mutex);
2113 spin_lock_irq(&css_set_lock);
2114
2115 cgrp = cset_cgroup_from_root(ctx->ns->root_cset, ctx->root);
2116
2117 spin_unlock_irq(&css_set_lock);
2118 mutex_unlock(&cgroup_mutex);
2119
2120 nsdentry = kernfs_node_dentry(cgrp->kn, sb);
2121 dput(fc->root);
2122 fc->root = nsdentry;
2123 if (IS_ERR(nsdentry)) {
2124 ret = PTR_ERR(nsdentry);
2125 deactivate_locked_super(sb);
2126 }
2127 }
2128
2129 if (!ctx->kfc.new_sb_created)
2130 cgroup_put(&ctx->root->cgrp);
2131
2132 return ret;
2133}
2134
2135
2136
2137
2138static void cgroup_fs_context_free(struct fs_context *fc)
2139{
2140 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
2141
2142 kfree(ctx->name);
2143 kfree(ctx->release_agent);
2144 put_cgroup_ns(ctx->ns);
2145 kernfs_free_fs_context(fc);
2146 kfree(ctx);
2147}
2148
2149static int cgroup_get_tree(struct fs_context *fc)
2150{
2151 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
2152 int ret;
2153
2154 cgrp_dfl_visible = true;
2155 cgroup_get_live(&cgrp_dfl_root.cgrp);
2156 ctx->root = &cgrp_dfl_root;
2157
2158 ret = cgroup_do_get_tree(fc);
2159 if (!ret)
2160 apply_cgroup_root_flags(ctx->flags);
2161 return ret;
2162}
2163
2164static const struct fs_context_operations cgroup_fs_context_ops = {
2165 .free = cgroup_fs_context_free,
2166 .parse_param = cgroup2_parse_param,
2167 .get_tree = cgroup_get_tree,
2168 .reconfigure = cgroup_reconfigure,
2169};
2170
2171static const struct fs_context_operations cgroup1_fs_context_ops = {
2172 .free = cgroup_fs_context_free,
2173 .parse_param = cgroup1_parse_param,
2174 .get_tree = cgroup1_get_tree,
2175 .reconfigure = cgroup1_reconfigure,
2176};
2177
2178
2179
2180
2181
2182static int cgroup_init_fs_context(struct fs_context *fc)
2183{
2184 struct cgroup_fs_context *ctx;
2185
2186 ctx = kzalloc(sizeof(struct cgroup_fs_context), GFP_KERNEL);
2187 if (!ctx)
2188 return -ENOMEM;
2189
2190
2191
2192
2193
2194 if (!use_task_css_set_links)
2195 cgroup_enable_task_cg_lists();
2196
2197 ctx->ns = current->nsproxy->cgroup_ns;
2198 get_cgroup_ns(ctx->ns);
2199 fc->fs_private = &ctx->kfc;
2200 if (fc->fs_type == &cgroup2_fs_type)
2201 fc->ops = &cgroup_fs_context_ops;
2202 else
2203 fc->ops = &cgroup1_fs_context_ops;
2204 if (fc->user_ns)
2205 put_user_ns(fc->user_ns);
2206 fc->user_ns = get_user_ns(ctx->ns->user_ns);
2207 fc->global = true;
2208 return 0;
2209}
2210
2211static void cgroup_kill_sb(struct super_block *sb)
2212{
2213 struct kernfs_root *kf_root = kernfs_root_from_sb(sb);
2214 struct cgroup_root *root = cgroup_root_from_kf(kf_root);
2215
2216
2217
2218
2219
2220
2221
2222
2223 if (list_empty(&root->cgrp.self.children) && root != &cgrp_dfl_root &&
2224 !percpu_ref_is_dying(&root->cgrp.self.refcnt))
2225 percpu_ref_kill(&root->cgrp.self.refcnt);
2226 cgroup_put(&root->cgrp);
2227 kernfs_kill_sb(sb);
2228}
2229
2230struct file_system_type cgroup_fs_type = {
2231 .name = "cgroup",
2232 .init_fs_context = cgroup_init_fs_context,
2233 .parameters = &cgroup1_fs_parameters,
2234 .kill_sb = cgroup_kill_sb,
2235 .fs_flags = FS_USERNS_MOUNT,
2236};
2237
2238static struct file_system_type cgroup2_fs_type = {
2239 .name = "cgroup2",
2240 .init_fs_context = cgroup_init_fs_context,
2241 .parameters = &cgroup2_fs_parameters,
2242 .kill_sb = cgroup_kill_sb,
2243 .fs_flags = FS_USERNS_MOUNT,
2244};
2245
2246int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
2247 struct cgroup_namespace *ns)
2248{
2249 struct cgroup *root = cset_cgroup_from_root(ns->root_cset, cgrp->root);
2250
2251 return kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
2252}
2253
2254int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
2255 struct cgroup_namespace *ns)
2256{
2257 int ret;
2258
2259 mutex_lock(&cgroup_mutex);
2260 spin_lock_irq(&css_set_lock);
2261
2262 ret = cgroup_path_ns_locked(cgrp, buf, buflen, ns);
2263
2264 spin_unlock_irq(&css_set_lock);
2265 mutex_unlock(&cgroup_mutex);
2266
2267 return ret;
2268}
2269EXPORT_SYMBOL_GPL(cgroup_path_ns);
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
2285{
2286 struct cgroup_root *root;
2287 struct cgroup *cgrp;
2288 int hierarchy_id = 1;
2289 int ret;
2290
2291 mutex_lock(&cgroup_mutex);
2292 spin_lock_irq(&css_set_lock);
2293
2294 root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
2295
2296 if (root) {
2297 cgrp = task_cgroup_from_root(task, root);
2298 ret = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
2299 } else {
2300
2301 ret = strlcpy(buf, "/", buflen);
2302 }
2303
2304 spin_unlock_irq(&css_set_lock);
2305 mutex_unlock(&cgroup_mutex);
2306 return ret;
2307}
2308EXPORT_SYMBOL_GPL(task_cgroup_path);
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320static void cgroup_migrate_add_task(struct task_struct *task,
2321 struct cgroup_mgctx *mgctx)
2322{
2323 struct css_set *cset;
2324
2325 lockdep_assert_held(&css_set_lock);
2326
2327
2328 if (task->flags & PF_EXITING)
2329 return;
2330
2331
2332 if (list_empty(&task->cg_list))
2333 return;
2334
2335 cset = task_css_set(task);
2336 if (!cset->mg_src_cgrp)
2337 return;
2338
2339 mgctx->tset.nr_tasks++;
2340
2341 list_move_tail(&task->cg_list, &cset->mg_tasks);
2342 if (list_empty(&cset->mg_node))
2343 list_add_tail(&cset->mg_node,
2344 &mgctx->tset.src_csets);
2345 if (list_empty(&cset->mg_dst_cset->mg_node))
2346 list_add_tail(&cset->mg_dst_cset->mg_node,
2347 &mgctx->tset.dst_csets);
2348}
2349
2350
2351
2352
2353
2354
2355
2356
2357struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
2358 struct cgroup_subsys_state **dst_cssp)
2359{
2360 tset->cur_cset = list_first_entry(tset->csets, struct css_set, mg_node);
2361 tset->cur_task = NULL;
2362
2363 return cgroup_taskset_next(tset, dst_cssp);
2364}
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
2375 struct cgroup_subsys_state **dst_cssp)
2376{
2377 struct css_set *cset = tset->cur_cset;
2378 struct task_struct *task = tset->cur_task;
2379
2380 while (&cset->mg_node != tset->csets) {
2381 if (!task)
2382 task = list_first_entry(&cset->mg_tasks,
2383 struct task_struct, cg_list);
2384 else
2385 task = list_next_entry(task, cg_list);
2386
2387 if (&task->cg_list != &cset->mg_tasks) {
2388 tset->cur_cset = cset;
2389 tset->cur_task = task;
2390
2391
2392
2393
2394
2395
2396
2397 if (cset->mg_dst_cset)
2398 *dst_cssp = cset->mg_dst_cset->subsys[tset->ssid];
2399 else
2400 *dst_cssp = cset->subsys[tset->ssid];
2401
2402 return task;
2403 }
2404
2405 cset = list_next_entry(cset, mg_node);
2406 task = NULL;
2407 }
2408
2409 return NULL;
2410}
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx)
2422{
2423 struct cgroup_taskset *tset = &mgctx->tset;
2424 struct cgroup_subsys *ss;
2425 struct task_struct *task, *tmp_task;
2426 struct css_set *cset, *tmp_cset;
2427 int ssid, failed_ssid, ret;
2428
2429
2430 if (tset->nr_tasks) {
2431 do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
2432 if (ss->can_attach) {
2433 tset->ssid = ssid;
2434 ret = ss->can_attach(tset);
2435 if (ret) {
2436 failed_ssid = ssid;
2437 goto out_cancel_attach;
2438 }
2439 }
2440 } while_each_subsys_mask();
2441 }
2442
2443
2444
2445
2446
2447
2448 spin_lock_irq(&css_set_lock);
2449 list_for_each_entry(cset, &tset->src_csets, mg_node) {
2450 list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, cg_list) {
2451 struct css_set *from_cset = task_css_set(task);
2452 struct css_set *to_cset = cset->mg_dst_cset;
2453
2454 get_css_set(to_cset);
2455 to_cset->nr_tasks++;
2456 css_set_move_task(task, from_cset, to_cset, true);
2457 from_cset->nr_tasks--;
2458
2459
2460
2461
2462 cgroup_freezer_migrate_task(task, from_cset->dfl_cgrp,
2463 to_cset->dfl_cgrp);
2464 put_css_set_locked(from_cset);
2465
2466 }
2467 }
2468 spin_unlock_irq(&css_set_lock);
2469
2470
2471
2472
2473
2474
2475 tset->csets = &tset->dst_csets;
2476
2477 if (tset->nr_tasks) {
2478 do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
2479 if (ss->attach) {
2480 tset->ssid = ssid;
2481 ss->attach(tset);
2482 }
2483 } while_each_subsys_mask();
2484 }
2485
2486 ret = 0;
2487 goto out_release_tset;
2488
2489out_cancel_attach:
2490 if (tset->nr_tasks) {
2491 do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
2492 if (ssid == failed_ssid)
2493 break;
2494 if (ss->cancel_attach) {
2495 tset->ssid = ssid;
2496 ss->cancel_attach(tset);
2497 }
2498 } while_each_subsys_mask();
2499 }
2500out_release_tset:
2501 spin_lock_irq(&css_set_lock);
2502 list_splice_init(&tset->dst_csets, &tset->src_csets);
2503 list_for_each_entry_safe(cset, tmp_cset, &tset->src_csets, mg_node) {
2504 list_splice_tail_init(&cset->mg_tasks, &cset->tasks);
2505 list_del_init(&cset->mg_node);
2506 }
2507 spin_unlock_irq(&css_set_lock);
2508
2509
2510
2511
2512
2513
2514 tset->nr_tasks = 0;
2515 tset->csets = &tset->src_csets;
2516 return ret;
2517}
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp)
2529{
2530
2531 if (!cgroup_on_dfl(dst_cgrp))
2532 return 0;
2533
2534
2535 if (!cgroup_is_valid_domain(dst_cgrp->dom_cgrp))
2536 return -EOPNOTSUPP;
2537
2538
2539 if (cgroup_is_mixable(dst_cgrp))
2540 return 0;
2541
2542
2543
2544
2545
2546 if (cgroup_can_be_thread_root(dst_cgrp) || cgroup_is_threaded(dst_cgrp))
2547 return 0;
2548
2549
2550 if (dst_cgrp->subtree_control)
2551 return -EBUSY;
2552
2553 return 0;
2554}
2555
2556
2557
2558
2559
2560
2561
2562
2563void cgroup_migrate_finish(struct cgroup_mgctx *mgctx)
2564{
2565 LIST_HEAD(preloaded);
2566 struct css_set *cset, *tmp_cset;
2567
2568 lockdep_assert_held(&cgroup_mutex);
2569
2570 spin_lock_irq(&css_set_lock);
2571
2572 list_splice_tail_init(&mgctx->preloaded_src_csets, &preloaded);
2573 list_splice_tail_init(&mgctx->preloaded_dst_csets, &preloaded);
2574
2575 list_for_each_entry_safe(cset, tmp_cset, &preloaded, mg_preload_node) {
2576 cset->mg_src_cgrp = NULL;
2577 cset->mg_dst_cgrp = NULL;
2578 cset->mg_dst_cset = NULL;
2579 list_del_init(&cset->mg_preload_node);
2580 put_css_set_locked(cset);
2581 }
2582
2583 spin_unlock_irq(&css_set_lock);
2584}
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602void cgroup_migrate_add_src(struct css_set *src_cset,
2603 struct cgroup *dst_cgrp,
2604 struct cgroup_mgctx *mgctx)
2605{
2606 struct cgroup *src_cgrp;
2607
2608 lockdep_assert_held(&cgroup_mutex);
2609 lockdep_assert_held(&css_set_lock);
2610
2611
2612
2613
2614
2615
2616 if (src_cset->dead)
2617 return;
2618
2619 src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root);
2620
2621 if (!list_empty(&src_cset->mg_preload_node))
2622 return;
2623
2624 WARN_ON(src_cset->mg_src_cgrp);
2625 WARN_ON(src_cset->mg_dst_cgrp);
2626 WARN_ON(!list_empty(&src_cset->mg_tasks));
2627 WARN_ON(!list_empty(&src_cset->mg_node));
2628
2629 src_cset->mg_src_cgrp = src_cgrp;
2630 src_cset->mg_dst_cgrp = dst_cgrp;
2631 get_css_set(src_cset);
2632 list_add_tail(&src_cset->mg_preload_node, &mgctx->preloaded_src_csets);
2633}
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx)
2650{
2651 struct css_set *src_cset, *tmp_cset;
2652
2653 lockdep_assert_held(&cgroup_mutex);
2654
2655
2656 list_for_each_entry_safe(src_cset, tmp_cset, &mgctx->preloaded_src_csets,
2657 mg_preload_node) {
2658 struct css_set *dst_cset;
2659 struct cgroup_subsys *ss;
2660 int ssid;
2661
2662 dst_cset = find_css_set(src_cset, src_cset->mg_dst_cgrp);
2663 if (!dst_cset)
2664 return -ENOMEM;
2665
2666 WARN_ON_ONCE(src_cset->mg_dst_cset || dst_cset->mg_dst_cset);
2667
2668
2669
2670
2671
2672
2673 if (src_cset == dst_cset) {
2674 src_cset->mg_src_cgrp = NULL;
2675 src_cset->mg_dst_cgrp = NULL;
2676 list_del_init(&src_cset->mg_preload_node);
2677 put_css_set(src_cset);
2678 put_css_set(dst_cset);
2679 continue;
2680 }
2681
2682 src_cset->mg_dst_cset = dst_cset;
2683
2684 if (list_empty(&dst_cset->mg_preload_node))
2685 list_add_tail(&dst_cset->mg_preload_node,
2686 &mgctx->preloaded_dst_csets);
2687 else
2688 put_css_set(dst_cset);
2689
2690 for_each_subsys(ss, ssid)
2691 if (src_cset->subsys[ssid] != dst_cset->subsys[ssid])
2692 mgctx->ss_mask |= 1 << ssid;
2693 }
2694
2695 return 0;
2696}
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716int cgroup_migrate(struct task_struct *leader, bool threadgroup,
2717 struct cgroup_mgctx *mgctx)
2718{
2719 struct task_struct *task;
2720
2721
2722
2723
2724
2725
2726 spin_lock_irq(&css_set_lock);
2727 rcu_read_lock();
2728 task = leader;
2729 do {
2730 cgroup_migrate_add_task(task, mgctx);
2731 if (!threadgroup)
2732 break;
2733 } while_each_thread(leader, task);
2734 rcu_read_unlock();
2735 spin_unlock_irq(&css_set_lock);
2736
2737 return cgroup_migrate_execute(mgctx);
2738}
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
2749 bool threadgroup)
2750{
2751 DEFINE_CGROUP_MGCTX(mgctx);
2752 struct task_struct *task;
2753 int ret;
2754
2755 ret = cgroup_migrate_vet_dst(dst_cgrp);
2756 if (ret)
2757 return ret;
2758
2759
2760 spin_lock_irq(&css_set_lock);
2761 rcu_read_lock();
2762 task = leader;
2763 do {
2764 cgroup_migrate_add_src(task_css_set(task), dst_cgrp, &mgctx);
2765 if (!threadgroup)
2766 break;
2767 } while_each_thread(leader, task);
2768 rcu_read_unlock();
2769 spin_unlock_irq(&css_set_lock);
2770
2771
2772 ret = cgroup_migrate_prepare_dst(&mgctx);
2773 if (!ret)
2774 ret = cgroup_migrate(leader, threadgroup, &mgctx);
2775
2776 cgroup_migrate_finish(&mgctx);
2777
2778 if (!ret)
2779 TRACE_CGROUP_PATH(attach_task, dst_cgrp, leader, threadgroup);
2780
2781 return ret;
2782}
2783
2784struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup)
2785 __acquires(&cgroup_threadgroup_rwsem)
2786{
2787 struct task_struct *tsk;
2788 pid_t pid;
2789
2790 if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
2791 return ERR_PTR(-EINVAL);
2792
2793 percpu_down_write(&cgroup_threadgroup_rwsem);
2794
2795 rcu_read_lock();
2796 if (pid) {
2797 tsk = find_task_by_vpid(pid);
2798 if (!tsk) {
2799 tsk = ERR_PTR(-ESRCH);
2800 goto out_unlock_threadgroup;
2801 }
2802 } else {
2803 tsk = current;
2804 }
2805
2806 if (threadgroup)
2807 tsk = tsk->group_leader;
2808
2809
2810
2811
2812
2813
2814
2815 if (tsk->no_cgroup_migration || (tsk->flags & PF_NO_SETAFFINITY)) {
2816 tsk = ERR_PTR(-EINVAL);
2817 goto out_unlock_threadgroup;
2818 }
2819
2820 get_task_struct(tsk);
2821 goto out_unlock_rcu;
2822
2823out_unlock_threadgroup:
2824 percpu_up_write(&cgroup_threadgroup_rwsem);
2825out_unlock_rcu:
2826 rcu_read_unlock();
2827 return tsk;
2828}
2829
2830void cgroup_procs_write_finish(struct task_struct *task)
2831 __releases(&cgroup_threadgroup_rwsem)
2832{
2833 struct cgroup_subsys *ss;
2834 int ssid;
2835
2836
2837 put_task_struct(task);
2838
2839 percpu_up_write(&cgroup_threadgroup_rwsem);
2840 for_each_subsys(ss, ssid)
2841 if (ss->post_attach)
2842 ss->post_attach();
2843}
2844
2845static void cgroup_print_ss_mask(struct seq_file *seq, u16 ss_mask)
2846{
2847 struct cgroup_subsys *ss;
2848 bool printed = false;
2849 int ssid;
2850
2851 do_each_subsys_mask(ss, ssid, ss_mask) {
2852 if (printed)
2853 seq_putc(seq, ' ');
2854 seq_printf(seq, "%s", ss->name);
2855 printed = true;
2856 } while_each_subsys_mask();
2857 if (printed)
2858 seq_putc(seq, '\n');
2859}
2860
2861
2862static int cgroup_controllers_show(struct seq_file *seq, void *v)
2863{
2864 struct cgroup *cgrp = seq_css(seq)->cgroup;
2865
2866 cgroup_print_ss_mask(seq, cgroup_control(cgrp));
2867 return 0;
2868}
2869
2870
2871static int cgroup_subtree_control_show(struct seq_file *seq, void *v)
2872{
2873 struct cgroup *cgrp = seq_css(seq)->cgroup;
2874
2875 cgroup_print_ss_mask(seq, cgrp->subtree_control);
2876 return 0;
2877}
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888static int cgroup_update_dfl_csses(struct cgroup *cgrp)
2889{
2890 DEFINE_CGROUP_MGCTX(mgctx);
2891 struct cgroup_subsys_state *d_css;
2892 struct cgroup *dsct;
2893 struct css_set *src_cset;
2894 int ret;
2895
2896 lockdep_assert_held(&cgroup_mutex);
2897
2898 percpu_down_write(&cgroup_threadgroup_rwsem);
2899
2900
2901 spin_lock_irq(&css_set_lock);
2902 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
2903 struct cgrp_cset_link *link;
2904
2905 list_for_each_entry(link, &dsct->cset_links, cset_link)
2906 cgroup_migrate_add_src(link->cset, dsct, &mgctx);
2907 }
2908 spin_unlock_irq(&css_set_lock);
2909
2910
2911 ret = cgroup_migrate_prepare_dst(&mgctx);
2912 if (ret)
2913 goto out_finish;
2914
2915 spin_lock_irq(&css_set_lock);
2916 list_for_each_entry(src_cset, &mgctx.preloaded_src_csets, mg_preload_node) {
2917 struct task_struct *task, *ntask;
2918
2919
2920 list_for_each_entry_safe(task, ntask, &src_cset->tasks, cg_list)
2921 cgroup_migrate_add_task(task, &mgctx);
2922 }
2923 spin_unlock_irq(&css_set_lock);
2924
2925 ret = cgroup_migrate_execute(&mgctx);
2926out_finish:
2927 cgroup_migrate_finish(&mgctx);
2928 percpu_up_write(&cgroup_threadgroup_rwsem);
2929 return ret;
2930}
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940void cgroup_lock_and_drain_offline(struct cgroup *cgrp)
2941 __acquires(&cgroup_mutex)
2942{
2943 struct cgroup *dsct;
2944 struct cgroup_subsys_state *d_css;
2945 struct cgroup_subsys *ss;
2946 int ssid;
2947
2948restart:
2949 mutex_lock(&cgroup_mutex);
2950
2951 cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
2952 for_each_subsys(ss, ssid) {
2953 struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
2954 DEFINE_WAIT(wait);
2955
2956 if (!css || !percpu_ref_is_dying(&css->refcnt))
2957 continue;
2958
2959 cgroup_get_live(dsct);
2960 prepare_to_wait(&dsct->offline_waitq, &wait,
2961 TASK_UNINTERRUPTIBLE);
2962
2963 mutex_unlock(&cgroup_mutex);
2964 schedule();
2965 finish_wait(&dsct->offline_waitq, &wait);
2966
2967 cgroup_put(dsct);
2968 goto restart;
2969 }
2970 }
2971}
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981static void cgroup_save_control(struct cgroup *cgrp)
2982{
2983 struct cgroup *dsct;
2984 struct cgroup_subsys_state *d_css;
2985
2986 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
2987 dsct->old_subtree_control = dsct->subtree_control;
2988 dsct->old_subtree_ss_mask = dsct->subtree_ss_mask;
2989 dsct->old_dom_cgrp = dsct->dom_cgrp;
2990 }
2991}
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001static void cgroup_propagate_control(struct cgroup *cgrp)
3002{
3003 struct cgroup *dsct;
3004 struct cgroup_subsys_state *d_css;
3005
3006 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
3007 dsct->subtree_control &= cgroup_control(dsct);
3008 dsct->subtree_ss_mask =
3009 cgroup_calc_subtree_ss_mask(dsct->subtree_control,
3010 cgroup_ss_mask(dsct));
3011 }
3012}
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022static void cgroup_restore_control(struct cgroup *cgrp)
3023{
3024 struct cgroup *dsct;
3025 struct cgroup_subsys_state *d_css;
3026
3027 cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
3028 dsct->subtree_control = dsct->old_subtree_control;
3029 dsct->subtree_ss_mask = dsct->old_subtree_ss_mask;
3030 dsct->dom_cgrp = dsct->old_dom_cgrp;
3031 }
3032}
3033
3034static bool css_visible(struct cgroup_subsys_state *css)
3035{
3036 struct cgroup_subsys *ss = css->ss;
3037 struct cgroup *cgrp = css->cgroup;
3038
3039 if (cgroup_control(cgrp) & (1 << ss->id))
3040 return true;
3041 if (!(cgroup_ss_mask(cgrp) & (1 << ss->id)))
3042 return false;
3043 return cgroup_on_dfl(cgrp) && ss->implicit_on_dfl;
3044}
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059static int cgroup_apply_control_enable(struct cgroup *cgrp)
3060{
3061 struct cgroup *dsct;
3062 struct cgroup_subsys_state *d_css;
3063 struct cgroup_subsys *ss;
3064 int ssid, ret;
3065
3066 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
3067 for_each_subsys(ss, ssid) {
3068 struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
3069
3070 WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt));
3071
3072 if (!(cgroup_ss_mask(dsct) & (1 << ss->id)))
3073 continue;
3074
3075 if (!css) {
3076 css = css_create(dsct, ss);
3077 if (IS_ERR(css))
3078 return PTR_ERR(css);
3079 }
3080
3081 if (css_visible(css)) {
3082 ret = css_populate_dir(css);
3083 if (ret)
3084 return ret;
3085 }
3086 }
3087 }
3088
3089 return 0;
3090}
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105static void cgroup_apply_control_disable(struct cgroup *cgrp)
3106{
3107 struct cgroup *dsct;
3108 struct cgroup_subsys_state *d_css;
3109 struct cgroup_subsys *ss;
3110 int ssid;
3111
3112 cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
3113 for_each_subsys(ss, ssid) {
3114 struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
3115
3116 WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt));
3117
3118 if (!css)
3119 continue;
3120
3121 if (css->parent &&
3122 !(cgroup_ss_mask(dsct) & (1 << ss->id))) {
3123 kill_css(css);
3124 } else if (!css_visible(css)) {
3125 css_clear_dir(css);
3126 if (ss->css_reset)
3127 ss->css_reset(css);
3128 }
3129 }
3130 }
3131}
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150static int cgroup_apply_control(struct cgroup *cgrp)
3151{
3152 int ret;
3153
3154 cgroup_propagate_control(cgrp);
3155
3156 ret = cgroup_apply_control_enable(cgrp);
3157 if (ret)
3158 return ret;
3159
3160
3161
3162
3163
3164
3165 ret = cgroup_update_dfl_csses(cgrp);
3166 if (ret)
3167 return ret;
3168
3169 return 0;
3170}
3171
3172
3173
3174
3175
3176
3177
3178
3179static void cgroup_finalize_control(struct cgroup *cgrp, int ret)
3180{
3181 if (ret) {
3182 cgroup_restore_control(cgrp);
3183 cgroup_propagate_control(cgrp);
3184 }
3185
3186 cgroup_apply_control_disable(cgrp);
3187}
3188
3189static int cgroup_vet_subtree_control_enable(struct cgroup *cgrp, u16 enable)
3190{
3191 u16 domain_enable = enable & ~cgrp_dfl_threaded_ss_mask;
3192
3193
3194 if (!enable)
3195 return 0;
3196
3197
3198 if (!cgroup_is_valid_domain(cgrp->dom_cgrp))
3199 return -EOPNOTSUPP;
3200
3201
3202 if (cgroup_is_mixable(cgrp))
3203 return 0;
3204
3205 if (domain_enable) {
3206
3207 if (cgroup_is_thread_root(cgrp) || cgroup_is_threaded(cgrp))
3208 return -EOPNOTSUPP;
3209 } else {
3210
3211
3212
3213
3214
3215 if (cgroup_can_be_thread_root(cgrp) || cgroup_is_threaded(cgrp))
3216 return 0;
3217 }
3218
3219
3220
3221
3222
3223 if (cgroup_has_tasks(cgrp))
3224 return -EBUSY;
3225
3226 return 0;
3227}
3228
3229
3230static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
3231 char *buf, size_t nbytes,
3232 loff_t off)
3233{
3234 u16 enable = 0, disable = 0;
3235 struct cgroup *cgrp, *child;
3236 struct cgroup_subsys *ss;
3237 char *tok;
3238 int ssid, ret;
3239
3240
3241
3242
3243
3244 buf = strstrip(buf);
3245 while ((tok = strsep(&buf, " "))) {
3246 if (tok[0] == '\0')
3247 continue;
3248 do_each_subsys_mask(ss, ssid, ~cgrp_dfl_inhibit_ss_mask) {
3249 if (!cgroup_ssid_enabled(ssid) ||
3250 strcmp(tok + 1, ss->name))
3251 continue;
3252
3253 if (*tok == '+') {
3254 enable |= 1 << ssid;
3255 disable &= ~(1 << ssid);
3256 } else if (*tok == '-') {
3257 disable |= 1 << ssid;
3258 enable &= ~(1 << ssid);
3259 } else {
3260 return -EINVAL;
3261 }
3262 break;
3263 } while_each_subsys_mask();
3264 if (ssid == CGROUP_SUBSYS_COUNT)
3265 return -EINVAL;
3266 }
3267
3268 cgrp = cgroup_kn_lock_live(of->kn, true);
3269 if (!cgrp)
3270 return -ENODEV;
3271
3272 for_each_subsys(ss, ssid) {
3273 if (enable & (1 << ssid)) {
3274 if (cgrp->subtree_control & (1 << ssid)) {
3275 enable &= ~(1 << ssid);
3276 continue;
3277 }
3278
3279 if (!(cgroup_control(cgrp) & (1 << ssid))) {
3280 ret = -ENOENT;
3281 goto out_unlock;
3282 }
3283 } else if (disable & (1 << ssid)) {
3284 if (!(cgrp->subtree_control & (1 << ssid))) {
3285 disable &= ~(1 << ssid);
3286 continue;
3287 }
3288
3289
3290 cgroup_for_each_live_child(child, cgrp) {
3291 if (child->subtree_control & (1 << ssid)) {
3292 ret = -EBUSY;
3293 goto out_unlock;
3294 }
3295 }
3296 }
3297 }
3298
3299 if (!enable && !disable) {
3300 ret = 0;
3301 goto out_unlock;
3302 }
3303
3304 ret = cgroup_vet_subtree_control_enable(cgrp, enable);
3305 if (ret)
3306 goto out_unlock;
3307
3308
3309 cgroup_save_control(cgrp);
3310
3311 cgrp->subtree_control |= enable;
3312 cgrp->subtree_control &= ~disable;
3313
3314 ret = cgroup_apply_control(cgrp);
3315 cgroup_finalize_control(cgrp, ret);
3316 if (ret)
3317 goto out_unlock;
3318
3319 kernfs_activate(cgrp->kn);
3320out_unlock:
3321 cgroup_kn_unlock(of->kn);
3322 return ret ?: nbytes;
3323}
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334static int cgroup_enable_threaded(struct cgroup *cgrp)
3335{
3336 struct cgroup *parent = cgroup_parent(cgrp);
3337 struct cgroup *dom_cgrp = parent->dom_cgrp;
3338 struct cgroup *dsct;
3339 struct cgroup_subsys_state *d_css;
3340 int ret;
3341
3342 lockdep_assert_held(&cgroup_mutex);
3343
3344
3345 if (cgroup_is_threaded(cgrp))
3346 return 0;
3347
3348
3349
3350
3351
3352
3353
3354 if (cgroup_is_populated(cgrp) ||
3355 cgrp->subtree_control & ~cgrp_dfl_threaded_ss_mask)
3356 return -EOPNOTSUPP;
3357
3358
3359 if (!cgroup_is_valid_domain(dom_cgrp) ||
3360 !cgroup_can_be_thread_root(dom_cgrp))
3361 return -EOPNOTSUPP;
3362
3363
3364
3365
3366
3367 cgroup_save_control(cgrp);
3368
3369 cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp)
3370 if (dsct == cgrp || cgroup_is_threaded(dsct))
3371 dsct->dom_cgrp = dom_cgrp;
3372
3373 ret = cgroup_apply_control(cgrp);
3374 if (!ret)
3375 parent->nr_threaded_children++;
3376
3377 cgroup_finalize_control(cgrp, ret);
3378 return ret;
3379}
3380
3381static int cgroup_type_show(struct seq_file *seq, void *v)
3382{
3383 struct cgroup *cgrp = seq_css(seq)->cgroup;
3384
3385 if (cgroup_is_threaded(cgrp))
3386 seq_puts(seq, "threaded\n");
3387 else if (!cgroup_is_valid_domain(cgrp))
3388 seq_puts(seq, "domain invalid\n");
3389 else if (cgroup_is_thread_root(cgrp))
3390 seq_puts(seq, "domain threaded\n");
3391 else
3392 seq_puts(seq, "domain\n");
3393
3394 return 0;
3395}
3396
3397static ssize_t cgroup_type_write(struct kernfs_open_file *of, char *buf,
3398 size_t nbytes, loff_t off)
3399{
3400 struct cgroup *cgrp;
3401 int ret;
3402
3403
3404 if (strcmp(strstrip(buf), "threaded"))
3405 return -EINVAL;
3406
3407 cgrp = cgroup_kn_lock_live(of->kn, false);
3408 if (!cgrp)
3409 return -ENOENT;
3410
3411
3412 ret = cgroup_enable_threaded(cgrp);
3413
3414 cgroup_kn_unlock(of->kn);
3415 return ret ?: nbytes;
3416}
3417
3418static int cgroup_max_descendants_show(struct seq_file *seq, void *v)
3419{
3420 struct cgroup *cgrp = seq_css(seq)->cgroup;
3421 int descendants = READ_ONCE(cgrp->max_descendants);
3422
3423 if (descendants == INT_MAX)
3424 seq_puts(seq, "max\n");
3425 else
3426 seq_printf(seq, "%d\n", descendants);
3427
3428 return 0;
3429}
3430
3431static ssize_t cgroup_max_descendants_write(struct kernfs_open_file *of,
3432 char *buf, size_t nbytes, loff_t off)
3433{
3434 struct cgroup *cgrp;
3435 int descendants;
3436 ssize_t ret;
3437
3438 buf = strstrip(buf);
3439 if (!strcmp(buf, "max")) {
3440 descendants = INT_MAX;
3441 } else {
3442 ret = kstrtoint(buf, 0, &descendants);
3443 if (ret)
3444 return ret;
3445 }
3446
3447 if (descendants < 0)
3448 return -ERANGE;
3449
3450 cgrp = cgroup_kn_lock_live(of->kn, false);
3451 if (!cgrp)
3452 return -ENOENT;
3453
3454 cgrp->max_descendants = descendants;
3455
3456 cgroup_kn_unlock(of->kn);
3457
3458 return nbytes;
3459}
3460
3461static int cgroup_max_depth_show(struct seq_file *seq, void *v)
3462{
3463 struct cgroup *cgrp = seq_css(seq)->cgroup;
3464 int depth = READ_ONCE(cgrp->max_depth);
3465
3466 if (depth == INT_MAX)
3467 seq_puts(seq, "max\n");
3468 else
3469 seq_printf(seq, "%d\n", depth);
3470
3471 return 0;
3472}
3473
3474static ssize_t cgroup_max_depth_write(struct kernfs_open_file *of,
3475 char *buf, size_t nbytes, loff_t off)
3476{
3477 struct cgroup *cgrp;
3478 ssize_t ret;
3479 int depth;
3480
3481 buf = strstrip(buf);
3482 if (!strcmp(buf, "max")) {
3483 depth = INT_MAX;
3484 } else {
3485 ret = kstrtoint(buf, 0, &depth);
3486 if (ret)
3487 return ret;
3488 }
3489
3490 if (depth < 0)
3491 return -ERANGE;
3492
3493 cgrp = cgroup_kn_lock_live(of->kn, false);
3494 if (!cgrp)
3495 return -ENOENT;
3496
3497 cgrp->max_depth = depth;
3498
3499 cgroup_kn_unlock(of->kn);
3500
3501 return nbytes;
3502}
3503
3504static int cgroup_events_show(struct seq_file *seq, void *v)
3505{
3506 struct cgroup *cgrp = seq_css(seq)->cgroup;
3507
3508 seq_printf(seq, "populated %d\n", cgroup_is_populated(cgrp));
3509 seq_printf(seq, "frozen %d\n", test_bit(CGRP_FROZEN, &cgrp->flags));
3510
3511 return 0;
3512}
3513
3514static int cgroup_stat_show(struct seq_file *seq, void *v)
3515{
3516 struct cgroup *cgroup = seq_css(seq)->cgroup;
3517
3518 seq_printf(seq, "nr_descendants %d\n",
3519 cgroup->nr_descendants);
3520 seq_printf(seq, "nr_dying_descendants %d\n",
3521 cgroup->nr_dying_descendants);
3522
3523 return 0;
3524}
3525
3526static int __maybe_unused cgroup_extra_stat_show(struct seq_file *seq,
3527 struct cgroup *cgrp, int ssid)
3528{
3529 struct cgroup_subsys *ss = cgroup_subsys[ssid];
3530 struct cgroup_subsys_state *css;
3531 int ret;
3532
3533 if (!ss->css_extra_stat_show)
3534 return 0;
3535
3536 css = cgroup_tryget_css(cgrp, ss);
3537 if (!css)
3538 return 0;
3539
3540 ret = ss->css_extra_stat_show(seq, css);
3541 css_put(css);
3542 return ret;
3543}
3544
3545static int cpu_stat_show(struct seq_file *seq, void *v)
3546{
3547 struct cgroup __maybe_unused *cgrp = seq_css(seq)->cgroup;
3548 int ret = 0;
3549
3550 cgroup_base_stat_cputime_show(seq);
3551#ifdef CONFIG_CGROUP_SCHED
3552 ret = cgroup_extra_stat_show(seq, cgrp, cpu_cgrp_id);
3553#endif
3554 return ret;
3555}
3556
3557#ifdef CONFIG_PSI
3558static int cgroup_io_pressure_show(struct seq_file *seq, void *v)
3559{
3560 struct cgroup *cgroup = seq_css(seq)->cgroup;
3561 struct psi_group *psi = cgroup->id == 1 ? &psi_system : &cgroup->psi;
3562
3563 return psi_show(seq, psi, PSI_IO);
3564}
3565static int cgroup_memory_pressure_show(struct seq_file *seq, void *v)
3566{
3567 struct cgroup *cgroup = seq_css(seq)->cgroup;
3568 struct psi_group *psi = cgroup->id == 1 ? &psi_system : &cgroup->psi;
3569
3570 return psi_show(seq, psi, PSI_MEM);
3571}
3572static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v)
3573{
3574 struct cgroup *cgroup = seq_css(seq)->cgroup;
3575 struct psi_group *psi = cgroup->id == 1 ? &psi_system : &cgroup->psi;
3576
3577 return psi_show(seq, psi, PSI_CPU);
3578}
3579
3580static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
3581 size_t nbytes, enum psi_res res)
3582{
3583 struct psi_trigger *new;
3584 struct cgroup *cgrp;
3585
3586 cgrp = cgroup_kn_lock_live(of->kn, false);
3587 if (!cgrp)
3588 return -ENODEV;
3589
3590 cgroup_get(cgrp);
3591 cgroup_kn_unlock(of->kn);
3592
3593 new = psi_trigger_create(&cgrp->psi, buf, nbytes, res);
3594 if (IS_ERR(new)) {
3595 cgroup_put(cgrp);
3596 return PTR_ERR(new);
3597 }
3598
3599 psi_trigger_replace(&of->priv, new);
3600
3601 cgroup_put(cgrp);
3602
3603 return nbytes;
3604}
3605
3606static ssize_t cgroup_io_pressure_write(struct kernfs_open_file *of,
3607 char *buf, size_t nbytes,
3608 loff_t off)
3609{
3610 return cgroup_pressure_write(of, buf, nbytes, PSI_IO);
3611}
3612
3613static ssize_t cgroup_memory_pressure_write(struct kernfs_open_file *of,
3614 char *buf, size_t nbytes,
3615 loff_t off)
3616{
3617 return cgroup_pressure_write(of, buf, nbytes, PSI_MEM);
3618}
3619
3620static ssize_t cgroup_cpu_pressure_write(struct kernfs_open_file *of,
3621 char *buf, size_t nbytes,
3622 loff_t off)
3623{
3624 return cgroup_pressure_write(of, buf, nbytes, PSI_CPU);
3625}
3626
3627static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of,
3628 poll_table *pt)
3629{
3630 return psi_trigger_poll(&of->priv, of->file, pt);
3631}
3632
3633static void cgroup_pressure_release(struct kernfs_open_file *of)
3634{
3635 psi_trigger_replace(&of->priv, NULL);
3636}
3637#endif
3638
3639static int cgroup_freeze_show(struct seq_file *seq, void *v)
3640{
3641 struct cgroup *cgrp = seq_css(seq)->cgroup;
3642
3643 seq_printf(seq, "%d\n", cgrp->freezer.freeze);
3644
3645 return 0;
3646}
3647
3648static ssize_t cgroup_freeze_write(struct kernfs_open_file *of,
3649 char *buf, size_t nbytes, loff_t off)
3650{
3651 struct cgroup *cgrp;
3652 ssize_t ret;
3653 int freeze;
3654
3655 ret = kstrtoint(strstrip(buf), 0, &freeze);
3656 if (ret)
3657 return ret;
3658
3659 if (freeze < 0 || freeze > 1)
3660 return -ERANGE;
3661
3662 cgrp = cgroup_kn_lock_live(of->kn, false);
3663 if (!cgrp)
3664 return -ENOENT;
3665
3666 cgroup_freeze(cgrp, freeze);
3667
3668 cgroup_kn_unlock(of->kn);
3669
3670 return nbytes;
3671}
3672
3673static int cgroup_file_open(struct kernfs_open_file *of)
3674{
3675 struct cftype *cft = of->kn->priv;
3676
3677 if (cft->open)
3678 return cft->open(of);
3679 return 0;
3680}
3681
3682static void cgroup_file_release(struct kernfs_open_file *of)
3683{
3684 struct cftype *cft = of->kn->priv;
3685
3686 if (cft->release)
3687 cft->release(of);
3688}
3689
3690static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
3691 size_t nbytes, loff_t off)
3692{
3693 struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
3694 struct cgroup *cgrp = of->kn->parent->priv;
3695 struct cftype *cft = of->kn->priv;
3696 struct cgroup_subsys_state *css;
3697 int ret;
3698
3699
3700
3701
3702
3703
3704
3705 if ((cgrp->root->flags & CGRP_ROOT_NS_DELEGATE) &&
3706 !(cft->flags & CFTYPE_NS_DELEGATABLE) &&
3707 ns != &init_cgroup_ns && ns->root_cset->dfl_cgrp == cgrp)
3708 return -EPERM;
3709
3710 if (cft->write)
3711 return cft->write(of, buf, nbytes, off);
3712
3713
3714
3715
3716
3717
3718
3719 rcu_read_lock();
3720 css = cgroup_css(cgrp, cft->ss);
3721 rcu_read_unlock();
3722
3723 if (cft->write_u64) {
3724 unsigned long long v;
3725 ret = kstrtoull(buf, 0, &v);
3726 if (!ret)
3727 ret = cft->write_u64(css, cft, v);
3728 } else if (cft->write_s64) {
3729 long long v;
3730 ret = kstrtoll(buf, 0, &v);
3731 if (!ret)
3732 ret = cft->write_s64(css, cft, v);
3733 } else {
3734 ret = -EINVAL;
3735 }
3736
3737 return ret ?: nbytes;
3738}
3739
3740static __poll_t cgroup_file_poll(struct kernfs_open_file *of, poll_table *pt)
3741{
3742 struct cftype *cft = of->kn->priv;
3743
3744 if (cft->poll)
3745 return cft->poll(of, pt);
3746
3747 return kernfs_generic_poll(of, pt);
3748}
3749
3750static void *cgroup_seqfile_start(struct seq_file *seq, loff_t *ppos)
3751{
3752 return seq_cft(seq)->seq_start(seq, ppos);
3753}
3754
3755static void *cgroup_seqfile_next(struct seq_file *seq, void *v, loff_t *ppos)
3756{
3757 return seq_cft(seq)->seq_next(seq, v, ppos);
3758}
3759
3760static void cgroup_seqfile_stop(struct seq_file *seq, void *v)
3761{
3762 if (seq_cft(seq)->seq_stop)
3763 seq_cft(seq)->seq_stop(seq, v);
3764}
3765
3766static int cgroup_seqfile_show(struct seq_file *m, void *arg)
3767{
3768 struct cftype *cft = seq_cft(m);
3769 struct cgroup_subsys_state *css = seq_css(m);
3770
3771 if (cft->seq_show)
3772 return cft->seq_show(m, arg);
3773
3774 if (cft->read_u64)
3775 seq_printf(m, "%llu\n", cft->read_u64(css, cft));
3776 else if (cft->read_s64)
3777 seq_printf(m, "%lld\n", cft->read_s64(css, cft));
3778 else
3779 return -EINVAL;
3780 return 0;
3781}
3782
3783static struct kernfs_ops cgroup_kf_single_ops = {
3784 .atomic_write_len = PAGE_SIZE,
3785 .open = cgroup_file_open,
3786 .release = cgroup_file_release,
3787 .write = cgroup_file_write,
3788 .poll = cgroup_file_poll,
3789 .seq_show = cgroup_seqfile_show,
3790};
3791
3792static struct kernfs_ops cgroup_kf_ops = {
3793 .atomic_write_len = PAGE_SIZE,
3794 .open = cgroup_file_open,
3795 .release = cgroup_file_release,
3796 .write = cgroup_file_write,
3797 .poll = cgroup_file_poll,
3798 .seq_start = cgroup_seqfile_start,
3799 .seq_next = cgroup_seqfile_next,
3800 .seq_stop = cgroup_seqfile_stop,
3801 .seq_show = cgroup_seqfile_show,
3802};
3803
3804
3805static int cgroup_kn_set_ugid(struct kernfs_node *kn)
3806{
3807 struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
3808 .ia_uid = current_fsuid(),
3809 .ia_gid = current_fsgid(), };
3810
3811 if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
3812 gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
3813 return 0;
3814
3815 return kernfs_setattr(kn, &iattr);
3816}
3817
3818static void cgroup_file_notify_timer(struct timer_list *timer)
3819{
3820 cgroup_file_notify(container_of(timer, struct cgroup_file,
3821 notify_timer));
3822}
3823
3824static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
3825 struct cftype *cft)
3826{
3827 char name[CGROUP_FILE_NAME_MAX];
3828 struct kernfs_node *kn;
3829 struct lock_class_key *key = NULL;
3830 int ret;
3831
3832#ifdef CONFIG_DEBUG_LOCK_ALLOC
3833 key = &cft->lockdep_key;
3834#endif
3835 kn = __kernfs_create_file(cgrp->kn, cgroup_file_name(cgrp, cft, name),
3836 cgroup_file_mode(cft),
3837 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
3838 0, cft->kf_ops, cft,
3839 NULL, key);
3840 if (IS_ERR(kn))
3841 return PTR_ERR(kn);
3842
3843 ret = cgroup_kn_set_ugid(kn);
3844 if (ret) {
3845 kernfs_remove(kn);
3846 return ret;
3847 }
3848
3849 if (cft->file_offset) {
3850 struct cgroup_file *cfile = (void *)css + cft->file_offset;
3851
3852 timer_setup(&cfile->notify_timer, cgroup_file_notify_timer, 0);
3853
3854 spin_lock_irq(&cgroup_file_kn_lock);
3855 cfile->kn = kn;
3856 spin_unlock_irq(&cgroup_file_kn_lock);
3857 }
3858
3859 return 0;
3860}
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872static int cgroup_addrm_files(struct cgroup_subsys_state *css,
3873 struct cgroup *cgrp, struct cftype cfts[],
3874 bool is_add)
3875{
3876 struct cftype *cft, *cft_end = NULL;
3877 int ret = 0;
3878
3879 lockdep_assert_held(&cgroup_mutex);
3880
3881restart:
3882 for (cft = cfts; cft != cft_end && cft->name[0] != '\0'; cft++) {
3883
3884 if ((cft->flags & __CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp))
3885 continue;
3886 if ((cft->flags & __CFTYPE_NOT_ON_DFL) && cgroup_on_dfl(cgrp))
3887 continue;
3888 if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgroup_parent(cgrp))
3889 continue;
3890 if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgroup_parent(cgrp))
3891 continue;
3892 if ((cft->flags & CFTYPE_DEBUG) && !cgroup_debug)
3893 continue;
3894 if (is_add) {
3895 ret = cgroup_add_file(css, cgrp, cft);
3896 if (ret) {
3897 pr_warn("%s: failed to add %s, err=%d\n",
3898 __func__, cft->name, ret);
3899 cft_end = cft;
3900 is_add = false;
3901 goto restart;
3902 }
3903 } else {
3904 cgroup_rm_file(cgrp, cft);
3905 }
3906 }
3907 return ret;
3908}
3909
3910static int cgroup_apply_cftypes(struct cftype *cfts, bool is_add)
3911{
3912 struct cgroup_subsys *ss = cfts[0].ss;
3913 struct cgroup *root = &ss->root->cgrp;
3914 struct cgroup_subsys_state *css;
3915 int ret = 0;
3916
3917 lockdep_assert_held(&cgroup_mutex);
3918
3919
3920 css_for_each_descendant_pre(css, cgroup_css(root, ss)) {
3921 struct cgroup *cgrp = css->cgroup;
3922
3923 if (!(css->flags & CSS_VISIBLE))
3924 continue;
3925
3926 ret = cgroup_addrm_files(css, cgrp, cfts, is_add);
3927 if (ret)
3928 break;
3929 }
3930
3931 if (is_add && !ret)
3932 kernfs_activate(root->kn);
3933 return ret;
3934}
3935
3936static void cgroup_exit_cftypes(struct cftype *cfts)
3937{
3938 struct cftype *cft;
3939
3940 for (cft = cfts; cft->name[0] != '\0'; cft++) {
3941
3942 if (cft->max_write_len && cft->max_write_len != PAGE_SIZE)
3943 kfree(cft->kf_ops);
3944 cft->kf_ops = NULL;
3945 cft->ss = NULL;
3946
3947
3948 cft->flags &= ~(__CFTYPE_ONLY_ON_DFL | __CFTYPE_NOT_ON_DFL);
3949 }
3950}
3951
3952static int cgroup_init_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
3953{
3954 struct cftype *cft;
3955
3956 for (cft = cfts; cft->name[0] != '\0'; cft++) {
3957 struct kernfs_ops *kf_ops;
3958
3959 WARN_ON(cft->ss || cft->kf_ops);
3960
3961 if (cft->seq_start)
3962 kf_ops = &cgroup_kf_ops;
3963 else
3964 kf_ops = &cgroup_kf_single_ops;
3965
3966
3967
3968
3969
3970 if (cft->max_write_len && cft->max_write_len != PAGE_SIZE) {
3971 kf_ops = kmemdup(kf_ops, sizeof(*kf_ops), GFP_KERNEL);
3972 if (!kf_ops) {
3973 cgroup_exit_cftypes(cfts);
3974 return -ENOMEM;
3975 }
3976 kf_ops->atomic_write_len = cft->max_write_len;
3977 }
3978
3979 cft->kf_ops = kf_ops;
3980 cft->ss = ss;
3981 }
3982
3983 return 0;
3984}
3985
3986static int cgroup_rm_cftypes_locked(struct cftype *cfts)
3987{
3988 lockdep_assert_held(&cgroup_mutex);
3989
3990 if (!cfts || !cfts[0].ss)
3991 return -ENOENT;
3992
3993 list_del(&cfts->node);
3994 cgroup_apply_cftypes(cfts, false);
3995 cgroup_exit_cftypes(cfts);
3996 return 0;
3997}
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010int cgroup_rm_cftypes(struct cftype *cfts)
4011{
4012 int ret;
4013
4014 mutex_lock(&cgroup_mutex);
4015 ret = cgroup_rm_cftypes_locked(cfts);
4016 mutex_unlock(&cgroup_mutex);
4017 return ret;
4018}
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034static int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
4035{
4036 int ret;
4037
4038 if (!cgroup_ssid_enabled(ss->id))
4039 return 0;
4040
4041 if (!cfts || cfts[0].name[0] == '\0')
4042 return 0;
4043
4044 ret = cgroup_init_cftypes(ss, cfts);
4045 if (ret)
4046 return ret;
4047
4048 mutex_lock(&cgroup_mutex);
4049
4050 list_add_tail(&cfts->node, &ss->cfts);
4051 ret = cgroup_apply_cftypes(cfts, true);
4052 if (ret)
4053 cgroup_rm_cftypes_locked(cfts);
4054
4055 mutex_unlock(&cgroup_mutex);
4056 return ret;
4057}
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
4068{
4069 struct cftype *cft;
4070
4071 for (cft = cfts; cft && cft->name[0] != '\0'; cft++)
4072 cft->flags |= __CFTYPE_ONLY_ON_DFL;
4073 return cgroup_add_cftypes(ss, cfts);
4074}
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
4085{
4086 struct cftype *cft;
4087
4088 for (cft = cfts; cft && cft->name[0] != '\0'; cft++)
4089 cft->flags |= __CFTYPE_NOT_ON_DFL;
4090 return cgroup_add_cftypes(ss, cfts);
4091}
4092
4093
4094
4095
4096
4097
4098
4099void cgroup_file_notify(struct cgroup_file *cfile)
4100{
4101 unsigned long flags;
4102
4103 spin_lock_irqsave(&cgroup_file_kn_lock, flags);
4104 if (cfile->kn) {
4105 unsigned long last = cfile->notified_at;
4106 unsigned long next = last + CGROUP_FILE_NOTIFY_MIN_INTV;
4107
4108 if (time_in_range(jiffies, last, next)) {
4109 timer_reduce(&cfile->notify_timer, next);
4110 } else {
4111 kernfs_notify(cfile->kn);
4112 cfile->notified_at = jiffies;
4113 }
4114 }
4115 spin_unlock_irqrestore(&cgroup_file_kn_lock, flags);
4116}
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
4136 struct cgroup_subsys_state *parent)
4137{
4138 struct cgroup_subsys_state *next;
4139
4140 cgroup_assert_mutex_or_rcu_locked();
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162 if (!pos) {
4163 next = list_entry_rcu(parent->children.next, struct cgroup_subsys_state, sibling);
4164 } else if (likely(!(pos->flags & CSS_RELEASED))) {
4165 next = list_entry_rcu(pos->sibling.next, struct cgroup_subsys_state, sibling);
4166 } else {
4167 list_for_each_entry_rcu(next, &parent->children, sibling)
4168 if (next->serial_nr > pos->serial_nr)
4169 break;
4170 }
4171
4172
4173
4174
4175
4176 if (&next->sibling != &parent->children)
4177 return next;
4178 return NULL;
4179}
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202struct cgroup_subsys_state *
4203css_next_descendant_pre(struct cgroup_subsys_state *pos,
4204 struct cgroup_subsys_state *root)
4205{
4206 struct cgroup_subsys_state *next;
4207
4208 cgroup_assert_mutex_or_rcu_locked();
4209
4210
4211 if (!pos)
4212 return root;
4213
4214
4215 next = css_next_child(NULL, pos);
4216 if (next)
4217 return next;
4218
4219
4220 while (pos != root) {
4221 next = css_next_child(pos, pos->parent);
4222 if (next)
4223 return next;
4224 pos = pos->parent;
4225 }
4226
4227 return NULL;
4228}
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243struct cgroup_subsys_state *
4244css_rightmost_descendant(struct cgroup_subsys_state *pos)
4245{
4246 struct cgroup_subsys_state *last, *tmp;
4247
4248 cgroup_assert_mutex_or_rcu_locked();
4249
4250 do {
4251 last = pos;
4252
4253 pos = NULL;
4254 css_for_each_child(tmp, last)
4255 pos = tmp;
4256 } while (pos);
4257
4258 return last;
4259}
4260
4261static struct cgroup_subsys_state *
4262css_leftmost_descendant(struct cgroup_subsys_state *pos)
4263{
4264 struct cgroup_subsys_state *last;
4265
4266 do {
4267 last = pos;
4268 pos = css_next_child(NULL, pos);
4269 } while (pos);
4270
4271 return last;
4272}
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296struct cgroup_subsys_state *
4297css_next_descendant_post(struct cgroup_subsys_state *pos,
4298 struct cgroup_subsys_state *root)
4299{
4300 struct cgroup_subsys_state *next;
4301
4302 cgroup_assert_mutex_or_rcu_locked();
4303
4304
4305 if (!pos)
4306 return css_leftmost_descendant(root);
4307
4308
4309 if (pos == root)
4310 return NULL;
4311
4312
4313 next = css_next_child(pos, pos->parent);
4314 if (next)
4315 return css_leftmost_descendant(next);
4316
4317
4318 return pos->parent;
4319}
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329bool css_has_online_children(struct cgroup_subsys_state *css)
4330{
4331 struct cgroup_subsys_state *child;
4332 bool ret = false;
4333
4334 rcu_read_lock();
4335 css_for_each_child(child, css) {
4336 if (child->flags & CSS_ONLINE) {
4337 ret = true;
4338 break;
4339 }
4340 }
4341 rcu_read_unlock();
4342 return ret;
4343}
4344
4345static struct css_set *css_task_iter_next_css_set(struct css_task_iter *it)
4346{
4347 struct list_head *l;
4348 struct cgrp_cset_link *link;
4349 struct css_set *cset;
4350
4351 lockdep_assert_held(&css_set_lock);
4352
4353
4354 if (it->tcset_pos) {
4355 l = it->tcset_pos->next;
4356
4357 if (l != it->tcset_head) {
4358 it->tcset_pos = l;
4359 return container_of(l, struct css_set,
4360 threaded_csets_node);
4361 }
4362
4363 it->tcset_pos = NULL;
4364 }
4365
4366
4367 l = it->cset_pos;
4368 l = l->next;
4369 if (l == it->cset_head) {
4370 it->cset_pos = NULL;
4371 return NULL;
4372 }
4373
4374 if (it->ss) {
4375 cset = container_of(l, struct css_set, e_cset_node[it->ss->id]);
4376 } else {
4377 link = list_entry(l, struct cgrp_cset_link, cset_link);
4378 cset = link->cset;
4379 }
4380
4381 it->cset_pos = l;
4382
4383
4384 if (it->flags & CSS_TASK_ITER_THREADED) {
4385 if (it->cur_dcset)
4386 put_css_set_locked(it->cur_dcset);
4387 it->cur_dcset = cset;
4388 get_css_set(cset);
4389
4390 it->tcset_head = &cset->threaded_csets;
4391 it->tcset_pos = &cset->threaded_csets;
4392 }
4393
4394 return cset;
4395}
4396
4397
4398
4399
4400
4401
4402
4403static void css_task_iter_advance_css_set(struct css_task_iter *it)
4404{
4405 struct css_set *cset;
4406
4407 lockdep_assert_held(&css_set_lock);
4408
4409
4410 do {
4411 cset = css_task_iter_next_css_set(it);
4412 if (!cset) {
4413 it->task_pos = NULL;
4414 return;
4415 }
4416 } while (!css_set_populated(cset) && list_empty(&cset->dying_tasks));
4417
4418 if (!list_empty(&cset->tasks))
4419 it->task_pos = cset->tasks.next;
4420 else if (!list_empty(&cset->mg_tasks))
4421 it->task_pos = cset->mg_tasks.next;
4422 else
4423 it->task_pos = cset->dying_tasks.next;
4424
4425 it->tasks_head = &cset->tasks;
4426 it->mg_tasks_head = &cset->mg_tasks;
4427 it->dying_tasks_head = &cset->dying_tasks;
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444 if (it->cur_cset) {
4445 list_del(&it->iters_node);
4446 put_css_set_locked(it->cur_cset);
4447 }
4448 get_css_set(cset);
4449 it->cur_cset = cset;
4450 list_add(&it->iters_node, &cset->task_iters);
4451}
4452
4453static void css_task_iter_skip(struct css_task_iter *it,
4454 struct task_struct *task)
4455{
4456 lockdep_assert_held(&css_set_lock);
4457
4458 if (it->task_pos == &task->cg_list) {
4459 it->task_pos = it->task_pos->next;
4460 it->flags |= CSS_TASK_ITER_SKIPPED;
4461 }
4462}
4463
4464static void css_task_iter_advance(struct css_task_iter *it)
4465{
4466 struct task_struct *task;
4467
4468 lockdep_assert_held(&css_set_lock);
4469repeat:
4470 if (it->task_pos) {
4471
4472
4473
4474
4475
4476 if (it->flags & CSS_TASK_ITER_SKIPPED)
4477 it->flags &= ~CSS_TASK_ITER_SKIPPED;
4478 else
4479 it->task_pos = it->task_pos->next;
4480
4481 if (it->task_pos == it->tasks_head)
4482 it->task_pos = it->mg_tasks_head->next;
4483 if (it->task_pos == it->mg_tasks_head)
4484 it->task_pos = it->dying_tasks_head->next;
4485 if (it->task_pos == it->dying_tasks_head)
4486 css_task_iter_advance_css_set(it);
4487 } else {
4488
4489 css_task_iter_advance_css_set(it);
4490 }
4491
4492 if (!it->task_pos)
4493 return;
4494
4495 task = list_entry(it->task_pos, struct task_struct, cg_list);
4496
4497 if (it->flags & CSS_TASK_ITER_PROCS) {
4498
4499 if (!thread_group_leader(task))
4500 goto repeat;
4501
4502
4503 if (!atomic_read(&task->signal->live))
4504 goto repeat;
4505 } else {
4506
4507 if (task->flags & PF_EXITING)
4508 goto repeat;
4509 }
4510}
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags,
4524 struct css_task_iter *it)
4525{
4526
4527 WARN_ON_ONCE(!use_task_css_set_links);
4528
4529 memset(it, 0, sizeof(*it));
4530
4531 spin_lock_irq(&css_set_lock);
4532
4533 it->ss = css->ss;
4534 it->flags = flags;
4535
4536 if (it->ss)
4537 it->cset_pos = &css->cgroup->e_csets[css->ss->id];
4538 else
4539 it->cset_pos = &css->cgroup->cset_links;
4540
4541 it->cset_head = it->cset_pos;
4542
4543 css_task_iter_advance(it);
4544
4545 spin_unlock_irq(&css_set_lock);
4546}
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556struct task_struct *css_task_iter_next(struct css_task_iter *it)
4557{
4558 if (it->cur_task) {
4559 put_task_struct(it->cur_task);
4560 it->cur_task = NULL;
4561 }
4562
4563 spin_lock_irq(&css_set_lock);
4564
4565
4566 if (it->flags & CSS_TASK_ITER_SKIPPED)
4567 css_task_iter_advance(it);
4568
4569 if (it->task_pos) {
4570 it->cur_task = list_entry(it->task_pos, struct task_struct,
4571 cg_list);
4572 get_task_struct(it->cur_task);
4573 css_task_iter_advance(it);
4574 }
4575
4576 spin_unlock_irq(&css_set_lock);
4577
4578 return it->cur_task;
4579}
4580
4581
4582
4583
4584
4585
4586
4587void css_task_iter_end(struct css_task_iter *it)
4588{
4589 if (it->cur_cset) {
4590 spin_lock_irq(&css_set_lock);
4591 list_del(&it->iters_node);
4592 put_css_set_locked(it->cur_cset);
4593 spin_unlock_irq(&css_set_lock);
4594 }
4595
4596 if (it->cur_dcset)
4597 put_css_set(it->cur_dcset);
4598
4599 if (it->cur_task)
4600 put_task_struct(it->cur_task);
4601}
4602
4603static void cgroup_procs_release(struct kernfs_open_file *of)
4604{
4605 if (of->priv) {
4606 css_task_iter_end(of->priv);
4607 kfree(of->priv);
4608 }
4609}
4610
4611static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos)
4612{
4613 struct kernfs_open_file *of = s->private;
4614 struct css_task_iter *it = of->priv;
4615
4616 return css_task_iter_next(it);
4617}
4618
4619static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos,
4620 unsigned int iter_flags)
4621{
4622 struct kernfs_open_file *of = s->private;
4623 struct cgroup *cgrp = seq_css(s)->cgroup;
4624 struct css_task_iter *it = of->priv;
4625
4626
4627
4628
4629
4630 if (!it) {
4631 if (WARN_ON_ONCE((*pos)++))
4632 return ERR_PTR(-EINVAL);
4633
4634 it = kzalloc(sizeof(*it), GFP_KERNEL);
4635 if (!it)
4636 return ERR_PTR(-ENOMEM);
4637 of->priv = it;
4638 css_task_iter_start(&cgrp->self, iter_flags, it);
4639 } else if (!(*pos)++) {
4640 css_task_iter_end(it);
4641 css_task_iter_start(&cgrp->self, iter_flags, it);
4642 }
4643
4644 return cgroup_procs_next(s, NULL, NULL);
4645}
4646
4647static void *cgroup_procs_start(struct seq_file *s, loff_t *pos)
4648{
4649 struct cgroup *cgrp = seq_css(s)->cgroup;
4650
4651
4652
4653
4654
4655
4656
4657 if (cgroup_is_threaded(cgrp))
4658 return ERR_PTR(-EOPNOTSUPP);
4659
4660 return __cgroup_procs_start(s, pos, CSS_TASK_ITER_PROCS |
4661 CSS_TASK_ITER_THREADED);
4662}
4663
4664static int cgroup_procs_show(struct seq_file *s, void *v)
4665{
4666 seq_printf(s, "%d\n", task_pid_vnr(v));
4667 return 0;
4668}
4669
4670static int cgroup_procs_write_permission(struct cgroup *src_cgrp,
4671 struct cgroup *dst_cgrp,
4672 struct super_block *sb)
4673{
4674 struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
4675 struct cgroup *com_cgrp = src_cgrp;
4676 struct inode *inode;
4677 int ret;
4678
4679 lockdep_assert_held(&cgroup_mutex);
4680
4681
4682 while (!cgroup_is_descendant(dst_cgrp, com_cgrp))
4683 com_cgrp = cgroup_parent(com_cgrp);
4684
4685
4686 inode = kernfs_get_inode(sb, com_cgrp->procs_file.kn);
4687 if (!inode)
4688 return -ENOMEM;
4689
4690 ret = inode_permission(inode, MAY_WRITE);
4691 iput(inode);
4692 if (ret)
4693 return ret;
4694
4695
4696
4697
4698
4699 if ((cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE) &&
4700 (!cgroup_is_descendant(src_cgrp, ns->root_cset->dfl_cgrp) ||
4701 !cgroup_is_descendant(dst_cgrp, ns->root_cset->dfl_cgrp)))
4702 return -ENOENT;
4703
4704 return 0;
4705}
4706
4707static ssize_t cgroup_procs_write(struct kernfs_open_file *of,
4708 char *buf, size_t nbytes, loff_t off)
4709{
4710 struct cgroup *src_cgrp, *dst_cgrp;
4711 struct task_struct *task;
4712 ssize_t ret;
4713
4714 dst_cgrp = cgroup_kn_lock_live(of->kn, false);
4715 if (!dst_cgrp)
4716 return -ENODEV;
4717
4718 task = cgroup_procs_write_start(buf, true);
4719 ret = PTR_ERR_OR_ZERO(task);
4720 if (ret)
4721 goto out_unlock;
4722
4723
4724 spin_lock_irq(&css_set_lock);
4725 src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
4726 spin_unlock_irq(&css_set_lock);
4727
4728 ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp,
4729 of->file->f_path.dentry->d_sb);
4730 if (ret)
4731 goto out_finish;
4732
4733 ret = cgroup_attach_task(dst_cgrp, task, true);
4734
4735out_finish:
4736 cgroup_procs_write_finish(task);
4737out_unlock:
4738 cgroup_kn_unlock(of->kn);
4739
4740 return ret ?: nbytes;
4741}
4742
4743static void *cgroup_threads_start(struct seq_file *s, loff_t *pos)
4744{
4745 return __cgroup_procs_start(s, pos, 0);
4746}
4747
4748static ssize_t cgroup_threads_write(struct kernfs_open_file *of,
4749 char *buf, size_t nbytes, loff_t off)
4750{
4751 struct cgroup *src_cgrp, *dst_cgrp;
4752 struct task_struct *task;
4753 ssize_t ret;
4754
4755 buf = strstrip(buf);
4756
4757 dst_cgrp = cgroup_kn_lock_live(of->kn, false);
4758 if (!dst_cgrp)
4759 return -ENODEV;
4760
4761 task = cgroup_procs_write_start(buf, false);
4762 ret = PTR_ERR_OR_ZERO(task);
4763 if (ret)
4764 goto out_unlock;
4765
4766
4767 spin_lock_irq(&css_set_lock);
4768 src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
4769 spin_unlock_irq(&css_set_lock);
4770
4771
4772 ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp,
4773 of->file->f_path.dentry->d_sb);
4774 if (ret)
4775 goto out_finish;
4776
4777
4778 ret = -EOPNOTSUPP;
4779 if (src_cgrp->dom_cgrp != dst_cgrp->dom_cgrp)
4780 goto out_finish;
4781
4782 ret = cgroup_attach_task(dst_cgrp, task, false);
4783
4784out_finish:
4785 cgroup_procs_write_finish(task);
4786out_unlock:
4787 cgroup_kn_unlock(of->kn);
4788
4789 return ret ?: nbytes;
4790}
4791
4792
4793static struct cftype cgroup_base_files[] = {
4794 {
4795 .name = "cgroup.type",
4796 .flags = CFTYPE_NOT_ON_ROOT,
4797 .seq_show = cgroup_type_show,
4798 .write = cgroup_type_write,
4799 },
4800 {
4801 .name = "cgroup.procs",
4802 .flags = CFTYPE_NS_DELEGATABLE,
4803 .file_offset = offsetof(struct cgroup, procs_file),
4804 .release = cgroup_procs_release,
4805 .seq_start = cgroup_procs_start,
4806 .seq_next = cgroup_procs_next,
4807 .seq_show = cgroup_procs_show,
4808 .write = cgroup_procs_write,
4809 },
4810 {
4811 .name = "cgroup.threads",
4812 .flags = CFTYPE_NS_DELEGATABLE,
4813 .release = cgroup_procs_release,
4814 .seq_start = cgroup_threads_start,
4815 .seq_next = cgroup_procs_next,
4816 .seq_show = cgroup_procs_show,
4817 .write = cgroup_threads_write,
4818 },
4819 {
4820 .name = "cgroup.controllers",
4821 .seq_show = cgroup_controllers_show,
4822 },
4823 {
4824 .name = "cgroup.subtree_control",
4825 .flags = CFTYPE_NS_DELEGATABLE,
4826 .seq_show = cgroup_subtree_control_show,
4827 .write = cgroup_subtree_control_write,
4828 },
4829 {
4830 .name = "cgroup.events",
4831 .flags = CFTYPE_NOT_ON_ROOT,
4832 .file_offset = offsetof(struct cgroup, events_file),
4833 .seq_show = cgroup_events_show,
4834 },
4835 {
4836 .name = "cgroup.max.descendants",
4837 .seq_show = cgroup_max_descendants_show,
4838 .write = cgroup_max_descendants_write,
4839 },
4840 {
4841 .name = "cgroup.max.depth",
4842 .seq_show = cgroup_max_depth_show,
4843 .write = cgroup_max_depth_write,
4844 },
4845 {
4846 .name = "cgroup.stat",
4847 .seq_show = cgroup_stat_show,
4848 },
4849 {
4850 .name = "cgroup.freeze",
4851 .flags = CFTYPE_NOT_ON_ROOT,
4852 .seq_show = cgroup_freeze_show,
4853 .write = cgroup_freeze_write,
4854 },
4855 {
4856 .name = "cpu.stat",
4857 .flags = CFTYPE_NOT_ON_ROOT,
4858 .seq_show = cpu_stat_show,
4859 },
4860#ifdef CONFIG_PSI
4861 {
4862 .name = "io.pressure",
4863 .seq_show = cgroup_io_pressure_show,
4864 .write = cgroup_io_pressure_write,
4865 .poll = cgroup_pressure_poll,
4866 .release = cgroup_pressure_release,
4867 },
4868 {
4869 .name = "memory.pressure",
4870 .seq_show = cgroup_memory_pressure_show,
4871 .write = cgroup_memory_pressure_write,
4872 .poll = cgroup_pressure_poll,
4873 .release = cgroup_pressure_release,
4874 },
4875 {
4876 .name = "cpu.pressure",
4877 .seq_show = cgroup_cpu_pressure_show,
4878 .write = cgroup_cpu_pressure_write,
4879 .poll = cgroup_pressure_poll,
4880 .release = cgroup_pressure_release,
4881 },
4882#endif
4883 { }
4884};
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908static void css_free_rwork_fn(struct work_struct *work)
4909{
4910 struct cgroup_subsys_state *css = container_of(to_rcu_work(work),
4911 struct cgroup_subsys_state, destroy_rwork);
4912 struct cgroup_subsys *ss = css->ss;
4913 struct cgroup *cgrp = css->cgroup;
4914
4915 percpu_ref_exit(&css->refcnt);
4916
4917 if (ss) {
4918
4919 struct cgroup_subsys_state *parent = css->parent;
4920 int id = css->id;
4921
4922 ss->css_free(css);
4923 cgroup_idr_remove(&ss->css_idr, id);
4924 cgroup_put(cgrp);
4925
4926 if (parent)
4927 css_put(parent);
4928 } else {
4929
4930 atomic_dec(&cgrp->root->nr_cgrps);
4931 cgroup1_pidlist_destroy_all(cgrp);
4932 cancel_work_sync(&cgrp->release_agent_work);
4933
4934 if (cgroup_parent(cgrp)) {
4935
4936
4937
4938
4939
4940
4941 cgroup_put(cgroup_parent(cgrp));
4942 kernfs_put(cgrp->kn);
4943 psi_cgroup_free(cgrp);
4944 if (cgroup_on_dfl(cgrp))
4945 cgroup_rstat_exit(cgrp);
4946 kfree(cgrp);
4947 } else {
4948
4949
4950
4951
4952
4953 cgroup_destroy_root(cgrp->root);
4954 }
4955 }
4956}
4957
4958static void css_release_work_fn(struct work_struct *work)
4959{
4960 struct cgroup_subsys_state *css =
4961 container_of(work, struct cgroup_subsys_state, destroy_work);
4962 struct cgroup_subsys *ss = css->ss;
4963 struct cgroup *cgrp = css->cgroup;
4964
4965 mutex_lock(&cgroup_mutex);
4966
4967 css->flags |= CSS_RELEASED;
4968 list_del_rcu(&css->sibling);
4969
4970 if (ss) {
4971
4972 if (!list_empty(&css->rstat_css_node)) {
4973 cgroup_rstat_flush(cgrp);
4974 list_del_rcu(&css->rstat_css_node);
4975 }
4976
4977 cgroup_idr_replace(&ss->css_idr, NULL, css->id);
4978 if (ss->css_released)
4979 ss->css_released(css);
4980 } else {
4981 struct cgroup *tcgrp;
4982
4983
4984 TRACE_CGROUP_PATH(release, cgrp);
4985
4986 if (cgroup_on_dfl(cgrp))
4987 cgroup_rstat_flush(cgrp);
4988
4989 spin_lock_irq(&css_set_lock);
4990 for (tcgrp = cgroup_parent(cgrp); tcgrp;
4991 tcgrp = cgroup_parent(tcgrp))
4992 tcgrp->nr_dying_descendants--;
4993 spin_unlock_irq(&css_set_lock);
4994
4995 cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
4996 cgrp->id = -1;
4997
4998
4999
5000
5001
5002
5003
5004
5005 if (cgrp->kn)
5006 RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv,
5007 NULL);
5008
5009 cgroup_bpf_put(cgrp);
5010 }
5011
5012 mutex_unlock(&cgroup_mutex);
5013
5014 INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
5015 queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
5016}
5017
5018static void css_release(struct percpu_ref *ref)
5019{
5020 struct cgroup_subsys_state *css =
5021 container_of(ref, struct cgroup_subsys_state, refcnt);
5022
5023 INIT_WORK(&css->destroy_work, css_release_work_fn);
5024 queue_work(cgroup_destroy_wq, &css->destroy_work);
5025}
5026
5027static void init_and_link_css(struct cgroup_subsys_state *css,
5028 struct cgroup_subsys *ss, struct cgroup *cgrp)
5029{
5030 lockdep_assert_held(&cgroup_mutex);
5031
5032 cgroup_get_live(cgrp);
5033
5034 memset(css, 0, sizeof(*css));
5035 css->cgroup = cgrp;
5036 css->ss = ss;
5037 css->id = -1;
5038 INIT_LIST_HEAD(&css->sibling);
5039 INIT_LIST_HEAD(&css->children);
5040 INIT_LIST_HEAD(&css->rstat_css_node);
5041 css->serial_nr = css_serial_nr_next++;
5042 atomic_set(&css->online_cnt, 0);
5043
5044 if (cgroup_parent(cgrp)) {
5045 css->parent = cgroup_css(cgroup_parent(cgrp), ss);
5046 css_get(css->parent);
5047 }
5048
5049 if (cgroup_on_dfl(cgrp) && ss->css_rstat_flush)
5050 list_add_rcu(&css->rstat_css_node, &cgrp->rstat_css_list);
5051
5052 BUG_ON(cgroup_css(cgrp, ss));
5053}
5054
5055
5056static int online_css(struct cgroup_subsys_state *css)
5057{
5058 struct cgroup_subsys *ss = css->ss;
5059 int ret = 0;
5060
5061 lockdep_assert_held(&cgroup_mutex);
5062
5063 if (ss->css_online)
5064 ret = ss->css_online(css);
5065 if (!ret) {
5066 css->flags |= CSS_ONLINE;
5067 rcu_assign_pointer(css->cgroup->subsys[ss->id], css);
5068
5069 atomic_inc(&css->online_cnt);
5070 if (css->parent)
5071 atomic_inc(&css->parent->online_cnt);
5072 }
5073 return ret;
5074}
5075
5076
5077static void offline_css(struct cgroup_subsys_state *css)
5078{
5079 struct cgroup_subsys *ss = css->ss;
5080
5081 lockdep_assert_held(&cgroup_mutex);
5082
5083 if (!(css->flags & CSS_ONLINE))
5084 return;
5085
5086 if (ss->css_offline)
5087 ss->css_offline(css);
5088
5089 css->flags &= ~CSS_ONLINE;
5090 RCU_INIT_POINTER(css->cgroup->subsys[ss->id], NULL);
5091
5092 wake_up_all(&css->cgroup->offline_waitq);
5093}
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
5105 struct cgroup_subsys *ss)
5106{
5107 struct cgroup *parent = cgroup_parent(cgrp);
5108 struct cgroup_subsys_state *parent_css = cgroup_css(parent, ss);
5109 struct cgroup_subsys_state *css;
5110 int err;
5111
5112 lockdep_assert_held(&cgroup_mutex);
5113
5114 css = ss->css_alloc(parent_css);
5115 if (!css)
5116 css = ERR_PTR(-ENOMEM);
5117 if (IS_ERR(css))
5118 return css;
5119
5120 init_and_link_css(css, ss, cgrp);
5121
5122 err = percpu_ref_init(&css->refcnt, css_release, 0, GFP_KERNEL);
5123 if (err)
5124 goto err_free_css;
5125
5126 err = cgroup_idr_alloc(&ss->css_idr, NULL, 2, 0, GFP_KERNEL);
5127 if (err < 0)
5128 goto err_free_css;
5129 css->id = err;
5130
5131
5132 list_add_tail_rcu(&css->sibling, &parent_css->children);
5133 cgroup_idr_replace(&ss->css_idr, css, css->id);
5134
5135 err = online_css(css);
5136 if (err)
5137 goto err_list_del;
5138
5139 if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
5140 cgroup_parent(parent)) {
5141 pr_warn("%s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
5142 current->comm, current->pid, ss->name);
5143 if (!strcmp(ss->name, "memory"))
5144 pr_warn("\"memory\" requires setting use_hierarchy to 1 on the root\n");
5145 ss->warned_broken_hierarchy = true;
5146 }
5147
5148 return css;
5149
5150err_list_del:
5151 list_del_rcu(&css->sibling);
5152err_free_css:
5153 list_del_rcu(&css->rstat_css_node);
5154 INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
5155 queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
5156 return ERR_PTR(err);
5157}
5158
5159
5160
5161
5162
5163
5164static struct cgroup *cgroup_create(struct cgroup *parent)
5165{
5166 struct cgroup_root *root = parent->root;
5167 struct cgroup *cgrp, *tcgrp;
5168 int level = parent->level + 1;
5169 int ret;
5170
5171
5172 cgrp = kzalloc(struct_size(cgrp, ancestor_ids, (level + 1)),
5173 GFP_KERNEL);
5174 if (!cgrp)
5175 return ERR_PTR(-ENOMEM);
5176
5177 ret = percpu_ref_init(&cgrp->self.refcnt, css_release, 0, GFP_KERNEL);
5178 if (ret)
5179 goto out_free_cgrp;
5180
5181 if (cgroup_on_dfl(parent)) {
5182 ret = cgroup_rstat_init(cgrp);
5183 if (ret)
5184 goto out_cancel_ref;
5185 }
5186
5187
5188
5189
5190
5191 cgrp->id = cgroup_idr_alloc(&root->cgroup_idr, NULL, 2, 0, GFP_KERNEL);
5192 if (cgrp->id < 0) {
5193 ret = -ENOMEM;
5194 goto out_stat_exit;
5195 }
5196
5197 init_cgroup_housekeeping(cgrp);
5198
5199 cgrp->self.parent = &parent->self;
5200 cgrp->root = root;
5201 cgrp->level = level;
5202
5203 ret = psi_cgroup_alloc(cgrp);
5204 if (ret)
5205 goto out_idr_free;
5206
5207 ret = cgroup_bpf_inherit(cgrp);
5208 if (ret)
5209 goto out_psi_free;
5210
5211
5212
5213
5214
5215 cgrp->freezer.e_freeze = parent->freezer.e_freeze;
5216 if (cgrp->freezer.e_freeze)
5217 set_bit(CGRP_FROZEN, &cgrp->flags);
5218
5219 spin_lock_irq(&css_set_lock);
5220 for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) {
5221 cgrp->ancestor_ids[tcgrp->level] = tcgrp->id;
5222
5223 if (tcgrp != cgrp) {
5224 tcgrp->nr_descendants++;
5225
5226
5227
5228
5229
5230
5231 if (cgrp->freezer.e_freeze)
5232 tcgrp->freezer.nr_frozen_descendants++;
5233 }
5234 }
5235 spin_unlock_irq(&css_set_lock);
5236
5237 if (notify_on_release(parent))
5238 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
5239
5240 if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags))
5241 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
5242
5243 cgrp->self.serial_nr = css_serial_nr_next++;
5244
5245
5246 list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children);
5247 atomic_inc(&root->nr_cgrps);
5248 cgroup_get_live(parent);
5249
5250
5251
5252
5253
5254 cgroup_idr_replace(&root->cgroup_idr, cgrp, cgrp->id);
5255
5256
5257
5258
5259
5260 if (!cgroup_on_dfl(cgrp))
5261 cgrp->subtree_control = cgroup_control(cgrp);
5262
5263 cgroup_propagate_control(cgrp);
5264
5265 return cgrp;
5266
5267out_psi_free:
5268 psi_cgroup_free(cgrp);
5269out_idr_free:
5270 cgroup_idr_remove(&root->cgroup_idr, cgrp->id);
5271out_stat_exit:
5272 if (cgroup_on_dfl(parent))
5273 cgroup_rstat_exit(cgrp);
5274out_cancel_ref:
5275 percpu_ref_exit(&cgrp->self.refcnt);
5276out_free_cgrp:
5277 kfree(cgrp);
5278 return ERR_PTR(ret);
5279}
5280
5281static bool cgroup_check_hierarchy_limits(struct cgroup *parent)
5282{
5283 struct cgroup *cgroup;
5284 int ret = false;
5285 int level = 1;
5286
5287 lockdep_assert_held(&cgroup_mutex);
5288
5289 for (cgroup = parent; cgroup; cgroup = cgroup_parent(cgroup)) {
5290 if (cgroup->nr_descendants >= cgroup->max_descendants)
5291 goto fail;
5292
5293 if (level > cgroup->max_depth)
5294 goto fail;
5295
5296 level++;
5297 }
5298
5299 ret = true;
5300fail:
5301 return ret;
5302}
5303
5304int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode)
5305{
5306 struct cgroup *parent, *cgrp;
5307 struct kernfs_node *kn;
5308 int ret;
5309
5310
5311 if (strchr(name, '\n'))
5312 return -EINVAL;
5313
5314 parent = cgroup_kn_lock_live(parent_kn, false);
5315 if (!parent)
5316 return -ENODEV;
5317
5318 if (!cgroup_check_hierarchy_limits(parent)) {
5319 ret = -EAGAIN;
5320 goto out_unlock;
5321 }
5322
5323 cgrp = cgroup_create(parent);
5324 if (IS_ERR(cgrp)) {
5325 ret = PTR_ERR(cgrp);
5326 goto out_unlock;
5327 }
5328
5329
5330 kn = kernfs_create_dir(parent->kn, name, mode, cgrp);
5331 if (IS_ERR(kn)) {
5332 ret = PTR_ERR(kn);
5333 goto out_destroy;
5334 }
5335 cgrp->kn = kn;
5336
5337
5338
5339
5340
5341 kernfs_get(kn);
5342
5343 ret = cgroup_kn_set_ugid(kn);
5344 if (ret)
5345 goto out_destroy;
5346
5347 ret = css_populate_dir(&cgrp->self);
5348 if (ret)
5349 goto out_destroy;
5350
5351 ret = cgroup_apply_control_enable(cgrp);
5352 if (ret)
5353 goto out_destroy;
5354
5355 TRACE_CGROUP_PATH(mkdir, cgrp);
5356
5357
5358 kernfs_activate(kn);
5359
5360 ret = 0;
5361 goto out_unlock;
5362
5363out_destroy:
5364 cgroup_destroy_locked(cgrp);
5365out_unlock:
5366 cgroup_kn_unlock(parent_kn);
5367 return ret;
5368}
5369
5370
5371
5372
5373
5374
5375static void css_killed_work_fn(struct work_struct *work)
5376{
5377 struct cgroup_subsys_state *css =
5378 container_of(work, struct cgroup_subsys_state, destroy_work);
5379
5380 mutex_lock(&cgroup_mutex);
5381
5382 do {
5383 offline_css(css);
5384 css_put(css);
5385
5386 css = css->parent;
5387 } while (css && atomic_dec_and_test(&css->online_cnt));
5388
5389 mutex_unlock(&cgroup_mutex);
5390}
5391
5392
5393static void css_killed_ref_fn(struct percpu_ref *ref)
5394{
5395 struct cgroup_subsys_state *css =
5396 container_of(ref, struct cgroup_subsys_state, refcnt);
5397
5398 if (atomic_dec_and_test(&css->online_cnt)) {
5399 INIT_WORK(&css->destroy_work, css_killed_work_fn);
5400 queue_work(cgroup_destroy_wq, &css->destroy_work);
5401 }
5402}
5403
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413static void kill_css(struct cgroup_subsys_state *css)
5414{
5415 lockdep_assert_held(&cgroup_mutex);
5416
5417 if (css->flags & CSS_DYING)
5418 return;
5419
5420 css->flags |= CSS_DYING;
5421
5422
5423
5424
5425
5426 css_clear_dir(css);
5427
5428
5429
5430
5431
5432 css_get(css);
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444 percpu_ref_kill_and_confirm(&css->refcnt, css_killed_ref_fn);
5445}
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471static int cgroup_destroy_locked(struct cgroup *cgrp)
5472 __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
5473{
5474 struct cgroup *tcgrp, *parent = cgroup_parent(cgrp);
5475 struct cgroup_subsys_state *css;
5476 struct cgrp_cset_link *link;
5477 int ssid;
5478
5479 lockdep_assert_held(&cgroup_mutex);
5480
5481
5482
5483
5484
5485 if (cgroup_is_populated(cgrp))
5486 return -EBUSY;
5487
5488
5489
5490
5491
5492
5493 if (css_has_online_children(&cgrp->self))
5494 return -EBUSY;
5495
5496
5497
5498
5499
5500
5501
5502 cgrp->self.flags &= ~CSS_ONLINE;
5503
5504 spin_lock_irq(&css_set_lock);
5505 list_for_each_entry(link, &cgrp->cset_links, cset_link)
5506 link->cset->dead = true;
5507 spin_unlock_irq(&css_set_lock);
5508
5509
5510 for_each_css(css, ssid, cgrp)
5511 kill_css(css);
5512
5513
5514 css_clear_dir(&cgrp->self);
5515 kernfs_remove(cgrp->kn);
5516
5517 if (parent && cgroup_is_threaded(cgrp))
5518 parent->nr_threaded_children--;
5519
5520 spin_lock_irq(&css_set_lock);
5521 for (tcgrp = cgroup_parent(cgrp); tcgrp; tcgrp = cgroup_parent(tcgrp)) {
5522 tcgrp->nr_descendants--;
5523 tcgrp->nr_dying_descendants++;
5524
5525
5526
5527
5528 if (test_bit(CGRP_FROZEN, &cgrp->flags))
5529 tcgrp->freezer.nr_frozen_descendants--;
5530 }
5531 spin_unlock_irq(&css_set_lock);
5532
5533 cgroup1_check_for_release(parent);
5534
5535
5536 percpu_ref_kill(&cgrp->self.refcnt);
5537
5538 return 0;
5539};
5540
5541int cgroup_rmdir(struct kernfs_node *kn)
5542{
5543 struct cgroup *cgrp;
5544 int ret = 0;
5545
5546 cgrp = cgroup_kn_lock_live(kn, false);
5547 if (!cgrp)
5548 return 0;
5549
5550 ret = cgroup_destroy_locked(cgrp);
5551 if (!ret)
5552 TRACE_CGROUP_PATH(rmdir, cgrp);
5553
5554 cgroup_kn_unlock(kn);
5555 return ret;
5556}
5557
5558static struct kernfs_syscall_ops cgroup_kf_syscall_ops = {
5559 .show_options = cgroup_show_options,
5560 .mkdir = cgroup_mkdir,
5561 .rmdir = cgroup_rmdir,
5562 .show_path = cgroup_show_path,
5563};
5564
5565static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
5566{
5567 struct cgroup_subsys_state *css;
5568
5569 pr_debug("Initializing cgroup subsys %s\n", ss->name);
5570
5571 mutex_lock(&cgroup_mutex);
5572
5573 idr_init(&ss->css_idr);
5574 INIT_LIST_HEAD(&ss->cfts);
5575
5576
5577 ss->root = &cgrp_dfl_root;
5578 css = ss->css_alloc(cgroup_css(&cgrp_dfl_root.cgrp, ss));
5579
5580 BUG_ON(IS_ERR(css));
5581 init_and_link_css(css, ss, &cgrp_dfl_root.cgrp);
5582
5583
5584
5585
5586
5587 css->flags |= CSS_NO_REF;
5588
5589 if (early) {
5590
5591 css->id = 1;
5592 } else {
5593 css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL);
5594 BUG_ON(css->id < 0);
5595 }
5596
5597
5598
5599
5600
5601 init_css_set.subsys[ss->id] = css;
5602
5603 have_fork_callback |= (bool)ss->fork << ss->id;
5604 have_exit_callback |= (bool)ss->exit << ss->id;
5605 have_release_callback |= (bool)ss->release << ss->id;
5606 have_canfork_callback |= (bool)ss->can_fork << ss->id;
5607
5608
5609
5610
5611 BUG_ON(!list_empty(&init_task.tasks));
5612
5613 BUG_ON(online_css(css));
5614
5615 mutex_unlock(&cgroup_mutex);
5616}
5617
5618
5619
5620
5621
5622
5623
5624int __init cgroup_init_early(void)
5625{
5626 static struct cgroup_fs_context __initdata ctx;
5627 struct cgroup_subsys *ss;
5628 int i;
5629
5630 ctx.root = &cgrp_dfl_root;
5631 init_cgroup_root(&ctx);
5632 cgrp_dfl_root.cgrp.self.flags |= CSS_NO_REF;
5633
5634 RCU_INIT_POINTER(init_task.cgroups, &init_css_set);
5635
5636 for_each_subsys(ss, i) {
5637 WARN(!ss->css_alloc || !ss->css_free || ss->name || ss->id,
5638 "invalid cgroup_subsys %d:%s css_alloc=%p css_free=%p id:name=%d:%s\n",
5639 i, cgroup_subsys_name[i], ss->css_alloc, ss->css_free,
5640 ss->id, ss->name);
5641 WARN(strlen(cgroup_subsys_name[i]) > MAX_CGROUP_TYPE_NAMELEN,
5642 "cgroup_subsys_name %s too long\n", cgroup_subsys_name[i]);
5643
5644 ss->id = i;
5645 ss->name = cgroup_subsys_name[i];
5646 if (!ss->legacy_name)
5647 ss->legacy_name = cgroup_subsys_name[i];
5648
5649 if (ss->early_init)
5650 cgroup_init_subsys(ss, true);
5651 }
5652 return 0;
5653}
5654
5655static u16 cgroup_disable_mask __initdata;
5656
5657
5658
5659
5660
5661
5662
5663int __init cgroup_init(void)
5664{
5665 struct cgroup_subsys *ss;
5666 int ssid;
5667
5668 BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16);
5669 BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem));
5670 BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files));
5671 BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files));
5672
5673 cgroup_rstat_boot();
5674
5675
5676
5677
5678
5679 rcu_sync_enter_start(&cgroup_threadgroup_rwsem.rss);
5680
5681 get_user_ns(init_cgroup_ns.user_ns);
5682
5683 mutex_lock(&cgroup_mutex);
5684
5685
5686
5687
5688
5689 hash_add(css_set_table, &init_css_set.hlist,
5690 css_set_hash(init_css_set.subsys));
5691
5692 BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0));
5693
5694 mutex_unlock(&cgroup_mutex);
5695
5696 for_each_subsys(ss, ssid) {
5697 if (ss->early_init) {
5698 struct cgroup_subsys_state *css =
5699 init_css_set.subsys[ss->id];
5700
5701 css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2,
5702 GFP_KERNEL);
5703 BUG_ON(css->id < 0);
5704 } else {
5705 cgroup_init_subsys(ss, false);
5706 }
5707
5708 list_add_tail(&init_css_set.e_cset_node[ssid],
5709 &cgrp_dfl_root.cgrp.e_csets[ssid]);
5710
5711
5712
5713
5714
5715
5716 if (cgroup_disable_mask & (1 << ssid)) {
5717 static_branch_disable(cgroup_subsys_enabled_key[ssid]);
5718 printk(KERN_INFO "Disabling %s control group subsystem\n",
5719 ss->name);
5720 continue;
5721 }
5722
5723 if (cgroup1_ssid_disabled(ssid))
5724 printk(KERN_INFO "Disabling %s control group subsystem in v1 mounts\n",
5725 ss->name);
5726
5727 cgrp_dfl_root.subsys_mask |= 1 << ss->id;
5728
5729
5730 WARN_ON(ss->implicit_on_dfl && !ss->threaded);
5731
5732 if (ss->implicit_on_dfl)
5733 cgrp_dfl_implicit_ss_mask |= 1 << ss->id;
5734 else if (!ss->dfl_cftypes)
5735 cgrp_dfl_inhibit_ss_mask |= 1 << ss->id;
5736
5737 if (ss->threaded)
5738 cgrp_dfl_threaded_ss_mask |= 1 << ss->id;
5739
5740 if (ss->dfl_cftypes == ss->legacy_cftypes) {
5741 WARN_ON(cgroup_add_cftypes(ss, ss->dfl_cftypes));
5742 } else {
5743 WARN_ON(cgroup_add_dfl_cftypes(ss, ss->dfl_cftypes));
5744 WARN_ON(cgroup_add_legacy_cftypes(ss, ss->legacy_cftypes));
5745 }
5746
5747 if (ss->bind)
5748 ss->bind(init_css_set.subsys[ssid]);
5749
5750 mutex_lock(&cgroup_mutex);
5751 css_populate_dir(init_css_set.subsys[ssid]);
5752 mutex_unlock(&cgroup_mutex);
5753 }
5754
5755
5756 hash_del(&init_css_set.hlist);
5757 hash_add(css_set_table, &init_css_set.hlist,
5758 css_set_hash(init_css_set.subsys));
5759
5760 WARN_ON(sysfs_create_mount_point(fs_kobj, "cgroup"));
5761 WARN_ON(register_filesystem(&cgroup_fs_type));
5762 WARN_ON(register_filesystem(&cgroup2_fs_type));
5763 WARN_ON(!proc_create_single("cgroups", 0, NULL, proc_cgroupstats_show));
5764
5765 return 0;
5766}
5767
5768static int __init cgroup_wq_init(void)
5769{
5770
5771
5772
5773
5774
5775
5776
5777
5778 cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
5779 BUG_ON(!cgroup_destroy_wq);
5780 return 0;
5781}
5782core_initcall(cgroup_wq_init);
5783
5784void cgroup_path_from_kernfs_id(const union kernfs_node_id *id,
5785 char *buf, size_t buflen)
5786{
5787 struct kernfs_node *kn;
5788
5789 kn = kernfs_get_node_by_id(cgrp_dfl_root.kf_root, id);
5790 if (!kn)
5791 return;
5792 kernfs_path(kn, buf, buflen);
5793 kernfs_put(kn);
5794}
5795
5796
5797
5798
5799
5800
5801int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
5802 struct pid *pid, struct task_struct *tsk)
5803{
5804 char *buf;
5805 int retval;
5806 struct cgroup_root *root;
5807
5808 retval = -ENOMEM;
5809 buf = kmalloc(PATH_MAX, GFP_KERNEL);
5810 if (!buf)
5811 goto out;
5812
5813 mutex_lock(&cgroup_mutex);
5814 spin_lock_irq(&css_set_lock);
5815
5816 for_each_root(root) {
5817 struct cgroup_subsys *ss;
5818 struct cgroup *cgrp;
5819 int ssid, count = 0;
5820
5821 if (root == &cgrp_dfl_root && !cgrp_dfl_visible)
5822 continue;
5823
5824 seq_printf(m, "%d:", root->hierarchy_id);
5825 if (root != &cgrp_dfl_root)
5826 for_each_subsys(ss, ssid)
5827 if (root->subsys_mask & (1 << ssid))
5828 seq_printf(m, "%s%s", count++ ? "," : "",
5829 ss->legacy_name);
5830 if (strlen(root->name))
5831 seq_printf(m, "%sname=%s", count ? "," : "",
5832 root->name);
5833 seq_putc(m, ':');
5834
5835 cgrp = task_cgroup_from_root(tsk, root);
5836
5837
5838
5839
5840
5841
5842
5843
5844
5845
5846 if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) {
5847 retval = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
5848 current->nsproxy->cgroup_ns);
5849 if (retval >= PATH_MAX)
5850 retval = -ENAMETOOLONG;
5851 if (retval < 0)
5852 goto out_unlock;
5853
5854 seq_puts(m, buf);
5855 } else {
5856 seq_puts(m, "/");
5857 }
5858
5859 if (cgroup_on_dfl(cgrp) && cgroup_is_dead(cgrp))
5860 seq_puts(m, " (deleted)\n");
5861 else
5862 seq_putc(m, '\n');
5863 }
5864
5865 retval = 0;
5866out_unlock:
5867 spin_unlock_irq(&css_set_lock);
5868 mutex_unlock(&cgroup_mutex);
5869 kfree(buf);
5870out:
5871 return retval;
5872}
5873
5874
5875
5876
5877
5878
5879
5880
5881
5882void cgroup_fork(struct task_struct *child)
5883{
5884 RCU_INIT_POINTER(child->cgroups, &init_css_set);
5885 INIT_LIST_HEAD(&child->cg_list);
5886}
5887
5888
5889
5890
5891
5892
5893
5894
5895
5896int cgroup_can_fork(struct task_struct *child)
5897{
5898 struct cgroup_subsys *ss;
5899 int i, j, ret;
5900
5901 do_each_subsys_mask(ss, i, have_canfork_callback) {
5902 ret = ss->can_fork(child);
5903 if (ret)
5904 goto out_revert;
5905 } while_each_subsys_mask();
5906
5907 return 0;
5908
5909out_revert:
5910 for_each_subsys(ss, j) {
5911 if (j >= i)
5912 break;
5913 if (ss->cancel_fork)
5914 ss->cancel_fork(child);
5915 }
5916
5917 return ret;
5918}
5919
5920
5921
5922
5923
5924
5925
5926
5927void cgroup_cancel_fork(struct task_struct *child)
5928{
5929 struct cgroup_subsys *ss;
5930 int i;
5931
5932 for_each_subsys(ss, i)
5933 if (ss->cancel_fork)
5934 ss->cancel_fork(child);
5935}
5936
5937
5938
5939
5940
5941
5942
5943
5944
5945
5946
5947void cgroup_post_fork(struct task_struct *child)
5948{
5949 struct cgroup_subsys *ss;
5950 int i;
5951
5952
5953
5954
5955
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973 if (use_task_css_set_links) {
5974 struct css_set *cset;
5975
5976 spin_lock_irq(&css_set_lock);
5977 cset = task_css_set(current);
5978 if (list_empty(&child->cg_list)) {
5979 get_css_set(cset);
5980 cset->nr_tasks++;
5981 css_set_move_task(child, NULL, cset, false);
5982 }
5983
5984
5985
5986
5987
5988
5989 if (unlikely(cgroup_task_freeze(child))) {
5990 spin_lock(&child->sighand->siglock);
5991 WARN_ON_ONCE(child->frozen);
5992 child->jobctl |= JOBCTL_TRAP_FREEZE;
5993 spin_unlock(&child->sighand->siglock);
5994
5995
5996
5997
5998
5999
6000
6001 }
6002
6003 spin_unlock_irq(&css_set_lock);
6004 }
6005
6006
6007
6008
6009
6010
6011 do_each_subsys_mask(ss, i, have_fork_callback) {
6012 ss->fork(child);
6013 } while_each_subsys_mask();
6014}
6015
6016
6017
6018
6019
6020
6021
6022
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034
6035void cgroup_exit(struct task_struct *tsk)
6036{
6037 struct cgroup_subsys *ss;
6038 struct css_set *cset;
6039 int i;
6040
6041
6042
6043
6044
6045 cset = task_css_set(tsk);
6046
6047 if (!list_empty(&tsk->cg_list)) {
6048 spin_lock_irq(&css_set_lock);
6049 css_set_move_task(tsk, cset, NULL, false);
6050 list_add_tail(&tsk->cg_list, &cset->dying_tasks);
6051 cset->nr_tasks--;
6052
6053 WARN_ON_ONCE(cgroup_task_frozen(tsk));
6054 if (unlikely(cgroup_task_freeze(tsk)))
6055 cgroup_update_frozen(task_dfl_cgroup(tsk));
6056
6057 spin_unlock_irq(&css_set_lock);
6058 } else {
6059 get_css_set(cset);
6060 }
6061
6062
6063 do_each_subsys_mask(ss, i, have_exit_callback) {
6064 ss->exit(tsk);
6065 } while_each_subsys_mask();
6066}
6067
6068void cgroup_release(struct task_struct *task)
6069{
6070 struct cgroup_subsys *ss;
6071 int ssid;
6072
6073 do_each_subsys_mask(ss, ssid, have_release_callback) {
6074 ss->release(task);
6075 } while_each_subsys_mask();
6076
6077 if (use_task_css_set_links) {
6078 spin_lock_irq(&css_set_lock);
6079 css_set_skip_task_iters(task_css_set(task), task);
6080 list_del_init(&task->cg_list);
6081 spin_unlock_irq(&css_set_lock);
6082 }
6083}
6084
6085void cgroup_free(struct task_struct *task)
6086{
6087 struct css_set *cset = task_css_set(task);
6088 put_css_set(cset);
6089}
6090
6091static int __init cgroup_disable(char *str)
6092{
6093 struct cgroup_subsys *ss;
6094 char *token;
6095 int i;
6096
6097 while ((token = strsep(&str, ",")) != NULL) {
6098 if (!*token)
6099 continue;
6100
6101 for_each_subsys(ss, i) {
6102 if (strcmp(token, ss->name) &&
6103 strcmp(token, ss->legacy_name))
6104 continue;
6105 cgroup_disable_mask |= 1 << i;
6106 }
6107 }
6108 return 1;
6109}
6110__setup("cgroup_disable=", cgroup_disable);
6111
6112void __init __weak enable_debug_cgroup(void) { }
6113
6114static int __init enable_cgroup_debug(char *str)
6115{
6116 cgroup_debug = true;
6117 enable_debug_cgroup();
6118 return 1;
6119}
6120__setup("cgroup_debug", enable_cgroup_debug);
6121
6122
6123
6124
6125
6126
6127
6128
6129
6130
6131struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
6132 struct cgroup_subsys *ss)
6133{
6134 struct kernfs_node *kn = kernfs_node_from_dentry(dentry);
6135 struct file_system_type *s_type = dentry->d_sb->s_type;
6136 struct cgroup_subsys_state *css = NULL;
6137 struct cgroup *cgrp;
6138
6139
6140 if ((s_type != &cgroup_fs_type && s_type != &cgroup2_fs_type) ||
6141 !kn || kernfs_type(kn) != KERNFS_DIR)
6142 return ERR_PTR(-EBADF);
6143
6144 rcu_read_lock();
6145
6146
6147
6148
6149
6150
6151 cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv);
6152 if (cgrp)
6153 css = cgroup_css(cgrp, ss);
6154
6155 if (!css || !css_tryget_online(css))
6156 css = ERR_PTR(-ENOENT);
6157
6158 rcu_read_unlock();
6159 return css;
6160}
6161
6162
6163
6164
6165
6166
6167
6168
6169
6170struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss)
6171{
6172 WARN_ON_ONCE(!rcu_read_lock_held());
6173 return idr_find(&ss->css_idr, id);
6174}
6175
6176
6177
6178
6179
6180
6181
6182
6183
6184
6185struct cgroup *cgroup_get_from_path(const char *path)
6186{
6187 struct kernfs_node *kn;
6188 struct cgroup *cgrp;
6189
6190 mutex_lock(&cgroup_mutex);
6191
6192 kn = kernfs_walk_and_get(cgrp_dfl_root.cgrp.kn, path);
6193 if (kn) {
6194 if (kernfs_type(kn) == KERNFS_DIR) {
6195 cgrp = kn->priv;
6196 cgroup_get_live(cgrp);
6197 } else {
6198 cgrp = ERR_PTR(-ENOTDIR);
6199 }
6200 kernfs_put(kn);
6201 } else {
6202 cgrp = ERR_PTR(-ENOENT);
6203 }
6204
6205 mutex_unlock(&cgroup_mutex);
6206 return cgrp;
6207}
6208EXPORT_SYMBOL_GPL(cgroup_get_from_path);
6209
6210
6211
6212
6213
6214
6215
6216
6217
6218
6219struct cgroup *cgroup_get_from_fd(int fd)
6220{
6221 struct cgroup_subsys_state *css;
6222 struct cgroup *cgrp;
6223 struct file *f;
6224
6225 f = fget_raw(fd);
6226 if (!f)
6227 return ERR_PTR(-EBADF);
6228
6229 css = css_tryget_online_from_dir(f->f_path.dentry, NULL);
6230 fput(f);
6231 if (IS_ERR(css))
6232 return ERR_CAST(css);
6233
6234 cgrp = css->cgroup;
6235 if (!cgroup_on_dfl(cgrp)) {
6236 cgroup_put(cgrp);
6237 return ERR_PTR(-EBADF);
6238 }
6239
6240 return cgrp;
6241}
6242EXPORT_SYMBOL_GPL(cgroup_get_from_fd);
6243
6244
6245
6246
6247
6248#ifdef CONFIG_SOCK_CGROUP_DATA
6249
6250#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
6251
6252DEFINE_SPINLOCK(cgroup_sk_update_lock);
6253static bool cgroup_sk_alloc_disabled __read_mostly;
6254
6255void cgroup_sk_alloc_disable(void)
6256{
6257 if (cgroup_sk_alloc_disabled)
6258 return;
6259 pr_info("cgroup: disabling cgroup2 socket matching due to net_prio or net_cls activation\n");
6260 cgroup_sk_alloc_disabled = true;
6261}
6262
6263#else
6264
6265#define cgroup_sk_alloc_disabled false
6266
6267#endif
6268
6269void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
6270{
6271 if (cgroup_sk_alloc_disabled)
6272 return;
6273
6274
6275 if (skcd->val) {
6276
6277
6278
6279
6280
6281 cgroup_get(sock_cgroup_ptr(skcd));
6282 return;
6283 }
6284
6285 rcu_read_lock();
6286
6287 while (true) {
6288 struct css_set *cset;
6289
6290 cset = task_css_set(current);
6291 if (likely(cgroup_tryget(cset->dfl_cgrp))) {
6292 skcd->val = (unsigned long)cset->dfl_cgrp;
6293 break;
6294 }
6295 cpu_relax();
6296 }
6297
6298 rcu_read_unlock();
6299}
6300
6301void cgroup_sk_free(struct sock_cgroup_data *skcd)
6302{
6303 cgroup_put(sock_cgroup_ptr(skcd));
6304}
6305
6306#endif
6307
6308#ifdef CONFIG_CGROUP_BPF
6309int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
6310 enum bpf_attach_type type, u32 flags)
6311{
6312 int ret;
6313
6314 mutex_lock(&cgroup_mutex);
6315 ret = __cgroup_bpf_attach(cgrp, prog, type, flags);
6316 mutex_unlock(&cgroup_mutex);
6317 return ret;
6318}
6319int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
6320 enum bpf_attach_type type, u32 flags)
6321{
6322 int ret;
6323
6324 mutex_lock(&cgroup_mutex);
6325 ret = __cgroup_bpf_detach(cgrp, prog, type);
6326 mutex_unlock(&cgroup_mutex);
6327 return ret;
6328}
6329int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
6330 union bpf_attr __user *uattr)
6331{
6332 int ret;
6333
6334 mutex_lock(&cgroup_mutex);
6335 ret = __cgroup_bpf_query(cgrp, attr, uattr);
6336 mutex_unlock(&cgroup_mutex);
6337 return ret;
6338}
6339#endif
6340
6341#ifdef CONFIG_SYSFS
6342static ssize_t show_delegatable_files(struct cftype *files, char *buf,
6343 ssize_t size, const char *prefix)
6344{
6345 struct cftype *cft;
6346 ssize_t ret = 0;
6347
6348 for (cft = files; cft && cft->name[0] != '\0'; cft++) {
6349 if (!(cft->flags & CFTYPE_NS_DELEGATABLE))
6350 continue;
6351
6352 if (prefix)
6353 ret += snprintf(buf + ret, size - ret, "%s.", prefix);
6354
6355 ret += snprintf(buf + ret, size - ret, "%s\n", cft->name);
6356
6357 if (WARN_ON(ret >= size))
6358 break;
6359 }
6360
6361 return ret;
6362}
6363
6364static ssize_t delegate_show(struct kobject *kobj, struct kobj_attribute *attr,
6365 char *buf)
6366{
6367 struct cgroup_subsys *ss;
6368 int ssid;
6369 ssize_t ret = 0;
6370
6371 ret = show_delegatable_files(cgroup_base_files, buf, PAGE_SIZE - ret,
6372 NULL);
6373
6374 for_each_subsys(ss, ssid)
6375 ret += show_delegatable_files(ss->dfl_cftypes, buf + ret,
6376 PAGE_SIZE - ret,
6377 cgroup_subsys_name[ssid]);
6378
6379 return ret;
6380}
6381static struct kobj_attribute cgroup_delegate_attr = __ATTR_RO(delegate);
6382
6383static ssize_t features_show(struct kobject *kobj, struct kobj_attribute *attr,
6384 char *buf)
6385{
6386 return snprintf(buf, PAGE_SIZE, "nsdelegate\nmemory_localevents\n");
6387}
6388static struct kobj_attribute cgroup_features_attr = __ATTR_RO(features);
6389
6390static struct attribute *cgroup_sysfs_attrs[] = {
6391 &cgroup_delegate_attr.attr,
6392 &cgroup_features_attr.attr,
6393 NULL,
6394};
6395
6396static const struct attribute_group cgroup_sysfs_attr_group = {
6397 .attrs = cgroup_sysfs_attrs,
6398 .name = "cgroup",
6399};
6400
6401static int __init cgroup_sysfs_init(void)
6402{
6403 return sysfs_create_group(kernel_kobj, &cgroup_sysfs_attr_group);
6404}
6405subsys_initcall(cgroup_sysfs_init);
6406#endif
6407