1
2#include "cgroup-internal.h"
3
4#include <linux/ctype.h>
5#include <linux/kmod.h>
6#include <linux/sort.h>
7#include <linux/delay.h>
8#include <linux/mm.h>
9#include <linux/sched/signal.h>
10#include <linux/sched/task.h>
11#include <linux/magic.h>
12#include <linux/slab.h>
13#include <linux/vmalloc.h>
14#include <linux/delayacct.h>
15#include <linux/pid_namespace.h>
16#include <linux/cgroupstats.h>
17#include <linux/fs_parser.h>
18
19#include <trace/events/cgroup.h>
20
21
22
23
24
25
26
27#define CGROUP_PIDLIST_DESTROY_DELAY HZ
28
29
30static u16 cgroup_no_v1_mask;
31
32
33static bool cgroup_no_v1_named;
34
35
36
37
38
39static struct workqueue_struct *cgroup_pidlist_destroy_wq;
40
41
42static DEFINE_SPINLOCK(release_agent_path_lock);
43
44bool cgroup1_ssid_disabled(int ssid)
45{
46 return cgroup_no_v1_mask & (1 << ssid);
47}
48
49
50
51
52
53
54
55
56int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
57{
58 struct cgroup_root *root;
59 int retval = 0;
60
61 mutex_lock(&cgroup_mutex);
62 percpu_down_write(&cgroup_threadgroup_rwsem);
63 for_each_root(root) {
64 struct cgroup *from_cgrp;
65
66 if (root == &cgrp_dfl_root)
67 continue;
68
69 spin_lock_irq(&css_set_lock);
70 from_cgrp = task_cgroup_from_root(from, root);
71 spin_unlock_irq(&css_set_lock);
72
73 retval = cgroup_attach_task(from_cgrp, tsk, false);
74 if (retval)
75 break;
76 }
77 percpu_up_write(&cgroup_threadgroup_rwsem);
78 mutex_unlock(&cgroup_mutex);
79
80 return retval;
81}
82EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
98{
99 DEFINE_CGROUP_MGCTX(mgctx);
100 struct cgrp_cset_link *link;
101 struct css_task_iter it;
102 struct task_struct *task;
103 int ret;
104
105 if (cgroup_on_dfl(to))
106 return -EINVAL;
107
108 ret = cgroup_migrate_vet_dst(to);
109 if (ret)
110 return ret;
111
112 mutex_lock(&cgroup_mutex);
113
114 percpu_down_write(&cgroup_threadgroup_rwsem);
115
116
117 spin_lock_irq(&css_set_lock);
118 list_for_each_entry(link, &from->cset_links, cset_link)
119 cgroup_migrate_add_src(link->cset, to, &mgctx);
120 spin_unlock_irq(&css_set_lock);
121
122 ret = cgroup_migrate_prepare_dst(&mgctx);
123 if (ret)
124 goto out_err;
125
126
127
128
129
130 do {
131 css_task_iter_start(&from->self, 0, &it);
132
133 do {
134 task = css_task_iter_next(&it);
135 } while (task && (task->flags & PF_EXITING));
136
137 if (task)
138 get_task_struct(task);
139 css_task_iter_end(&it);
140
141 if (task) {
142 ret = cgroup_migrate(task, false, &mgctx);
143 if (!ret)
144 TRACE_CGROUP_PATH(transfer_tasks, to, task, false);
145 put_task_struct(task);
146 }
147 } while (task && !ret);
148out_err:
149 cgroup_migrate_finish(&mgctx);
150 percpu_up_write(&cgroup_threadgroup_rwsem);
151 mutex_unlock(&cgroup_mutex);
152 return ret;
153}
154
155
156
157
158
159
160
161
162
163
164
165
166enum cgroup_filetype {
167 CGROUP_FILE_PROCS,
168 CGROUP_FILE_TASKS,
169};
170
171
172
173
174
175
176
177struct cgroup_pidlist {
178
179
180
181
182 struct { enum cgroup_filetype type; struct pid_namespace *ns; } key;
183
184 pid_t *list;
185
186 int length;
187
188 struct list_head links;
189
190 struct cgroup *owner;
191
192 struct delayed_work destroy_dwork;
193};
194
195
196
197
198
199void cgroup1_pidlist_destroy_all(struct cgroup *cgrp)
200{
201 struct cgroup_pidlist *l, *tmp_l;
202
203 mutex_lock(&cgrp->pidlist_mutex);
204 list_for_each_entry_safe(l, tmp_l, &cgrp->pidlists, links)
205 mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork, 0);
206 mutex_unlock(&cgrp->pidlist_mutex);
207
208 flush_workqueue(cgroup_pidlist_destroy_wq);
209 BUG_ON(!list_empty(&cgrp->pidlists));
210}
211
212static void cgroup_pidlist_destroy_work_fn(struct work_struct *work)
213{
214 struct delayed_work *dwork = to_delayed_work(work);
215 struct cgroup_pidlist *l = container_of(dwork, struct cgroup_pidlist,
216 destroy_dwork);
217 struct cgroup_pidlist *tofree = NULL;
218
219 mutex_lock(&l->owner->pidlist_mutex);
220
221
222
223
224
225 if (!delayed_work_pending(dwork)) {
226 list_del(&l->links);
227 kvfree(l->list);
228 put_pid_ns(l->key.ns);
229 tofree = l;
230 }
231
232 mutex_unlock(&l->owner->pidlist_mutex);
233 kfree(tofree);
234}
235
236
237
238
239
240static int pidlist_uniq(pid_t *list, int length)
241{
242 int src, dest = 1;
243
244
245
246
247
248 if (length == 0 || length == 1)
249 return length;
250
251 for (src = 1; src < length; src++) {
252
253 while (list[src] == list[src-1]) {
254 src++;
255 if (src == length)
256 goto after;
257 }
258
259 list[dest] = list[src];
260 dest++;
261 }
262after:
263 return dest;
264}
265
266
267
268
269
270
271
272
273
274
275static int cmppid(const void *a, const void *b)
276{
277 return *(pid_t *)a - *(pid_t *)b;
278}
279
280static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
281 enum cgroup_filetype type)
282{
283 struct cgroup_pidlist *l;
284
285 struct pid_namespace *ns = task_active_pid_ns(current);
286
287 lockdep_assert_held(&cgrp->pidlist_mutex);
288
289 list_for_each_entry(l, &cgrp->pidlists, links)
290 if (l->key.type == type && l->key.ns == ns)
291 return l;
292 return NULL;
293}
294
295
296
297
298
299
300
301static struct cgroup_pidlist *cgroup_pidlist_find_create(struct cgroup *cgrp,
302 enum cgroup_filetype type)
303{
304 struct cgroup_pidlist *l;
305
306 lockdep_assert_held(&cgrp->pidlist_mutex);
307
308 l = cgroup_pidlist_find(cgrp, type);
309 if (l)
310 return l;
311
312
313 l = kzalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
314 if (!l)
315 return l;
316
317 INIT_DELAYED_WORK(&l->destroy_dwork, cgroup_pidlist_destroy_work_fn);
318 l->key.type = type;
319
320 l->key.ns = get_pid_ns(task_active_pid_ns(current));
321 l->owner = cgrp;
322 list_add(&l->links, &cgrp->pidlists);
323 return l;
324}
325
326
327
328
329static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
330 struct cgroup_pidlist **lp)
331{
332 pid_t *array;
333 int length;
334 int pid, n = 0;
335 struct css_task_iter it;
336 struct task_struct *tsk;
337 struct cgroup_pidlist *l;
338
339 lockdep_assert_held(&cgrp->pidlist_mutex);
340
341
342
343
344
345
346
347 length = cgroup_task_count(cgrp);
348 array = kvmalloc_array(length, sizeof(pid_t), GFP_KERNEL);
349 if (!array)
350 return -ENOMEM;
351
352 css_task_iter_start(&cgrp->self, 0, &it);
353 while ((tsk = css_task_iter_next(&it))) {
354 if (unlikely(n == length))
355 break;
356
357 if (type == CGROUP_FILE_PROCS)
358 pid = task_tgid_vnr(tsk);
359 else
360 pid = task_pid_vnr(tsk);
361 if (pid > 0)
362 array[n++] = pid;
363 }
364 css_task_iter_end(&it);
365 length = n;
366
367 sort(array, length, sizeof(pid_t), cmppid, NULL);
368 if (type == CGROUP_FILE_PROCS)
369 length = pidlist_uniq(array, length);
370
371 l = cgroup_pidlist_find_create(cgrp, type);
372 if (!l) {
373 kvfree(array);
374 return -ENOMEM;
375 }
376
377
378 kvfree(l->list);
379 l->list = array;
380 l->length = length;
381 *lp = l;
382 return 0;
383}
384
385
386
387
388
389
390
391static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
392{
393
394
395
396
397
398
399 struct kernfs_open_file *of = s->private;
400 struct cgroup *cgrp = seq_css(s)->cgroup;
401 struct cgroup_pidlist *l;
402 enum cgroup_filetype type = seq_cft(s)->private;
403 int index = 0, pid = *pos;
404 int *iter, ret;
405
406 mutex_lock(&cgrp->pidlist_mutex);
407
408
409
410
411
412
413
414 if (of->priv)
415 of->priv = cgroup_pidlist_find(cgrp, type);
416
417
418
419
420
421 if (!of->priv) {
422 ret = pidlist_array_load(cgrp, type,
423 (struct cgroup_pidlist **)&of->priv);
424 if (ret)
425 return ERR_PTR(ret);
426 }
427 l = of->priv;
428
429 if (pid) {
430 int end = l->length;
431
432 while (index < end) {
433 int mid = (index + end) / 2;
434 if (l->list[mid] == pid) {
435 index = mid;
436 break;
437 } else if (l->list[mid] <= pid)
438 index = mid + 1;
439 else
440 end = mid;
441 }
442 }
443
444 if (index >= l->length)
445 return NULL;
446
447 iter = l->list + index;
448 *pos = *iter;
449 return iter;
450}
451
452static void cgroup_pidlist_stop(struct seq_file *s, void *v)
453{
454 struct kernfs_open_file *of = s->private;
455 struct cgroup_pidlist *l = of->priv;
456
457 if (l)
458 mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork,
459 CGROUP_PIDLIST_DESTROY_DELAY);
460 mutex_unlock(&seq_css(s)->cgroup->pidlist_mutex);
461}
462
463static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
464{
465 struct kernfs_open_file *of = s->private;
466 struct cgroup_pidlist *l = of->priv;
467 pid_t *p = v;
468 pid_t *end = l->list + l->length;
469
470
471
472
473 p++;
474 if (p >= end) {
475 (*pos)++;
476 return NULL;
477 } else {
478 *pos = *p;
479 return p;
480 }
481}
482
483static int cgroup_pidlist_show(struct seq_file *s, void *v)
484{
485 seq_printf(s, "%d\n", *(int *)v);
486
487 return 0;
488}
489
490static ssize_t __cgroup1_procs_write(struct kernfs_open_file *of,
491 char *buf, size_t nbytes, loff_t off,
492 bool threadgroup)
493{
494 struct cgroup *cgrp;
495 struct task_struct *task;
496 const struct cred *cred, *tcred;
497 ssize_t ret;
498 bool locked;
499
500 cgrp = cgroup_kn_lock_live(of->kn, false);
501 if (!cgrp)
502 return -ENODEV;
503
504 task = cgroup_procs_write_start(buf, threadgroup, &locked);
505 ret = PTR_ERR_OR_ZERO(task);
506 if (ret)
507 goto out_unlock;
508
509
510
511
512
513 cred = current_cred();
514 tcred = get_task_cred(task);
515 if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
516 !uid_eq(cred->euid, tcred->uid) &&
517 !uid_eq(cred->euid, tcred->suid))
518 ret = -EACCES;
519 put_cred(tcred);
520 if (ret)
521 goto out_finish;
522
523 ret = cgroup_attach_task(cgrp, task, threadgroup);
524
525out_finish:
526 cgroup_procs_write_finish(task, locked);
527out_unlock:
528 cgroup_kn_unlock(of->kn);
529
530 return ret ?: nbytes;
531}
532
533static ssize_t cgroup1_procs_write(struct kernfs_open_file *of,
534 char *buf, size_t nbytes, loff_t off)
535{
536 return __cgroup1_procs_write(of, buf, nbytes, off, true);
537}
538
539static ssize_t cgroup1_tasks_write(struct kernfs_open_file *of,
540 char *buf, size_t nbytes, loff_t off)
541{
542 return __cgroup1_procs_write(of, buf, nbytes, off, false);
543}
544
545static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of,
546 char *buf, size_t nbytes, loff_t off)
547{
548 struct cgroup *cgrp;
549
550 BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
551
552 cgrp = cgroup_kn_lock_live(of->kn, false);
553 if (!cgrp)
554 return -ENODEV;
555 spin_lock(&release_agent_path_lock);
556 strlcpy(cgrp->root->release_agent_path, strstrip(buf),
557 sizeof(cgrp->root->release_agent_path));
558 spin_unlock(&release_agent_path_lock);
559 cgroup_kn_unlock(of->kn);
560 return nbytes;
561}
562
563static int cgroup_release_agent_show(struct seq_file *seq, void *v)
564{
565 struct cgroup *cgrp = seq_css(seq)->cgroup;
566
567 spin_lock(&release_agent_path_lock);
568 seq_puts(seq, cgrp->root->release_agent_path);
569 spin_unlock(&release_agent_path_lock);
570 seq_putc(seq, '\n');
571 return 0;
572}
573
574static int cgroup_sane_behavior_show(struct seq_file *seq, void *v)
575{
576 seq_puts(seq, "0\n");
577 return 0;
578}
579
580static u64 cgroup_read_notify_on_release(struct cgroup_subsys_state *css,
581 struct cftype *cft)
582{
583 return notify_on_release(css->cgroup);
584}
585
586static int cgroup_write_notify_on_release(struct cgroup_subsys_state *css,
587 struct cftype *cft, u64 val)
588{
589 if (val)
590 set_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags);
591 else
592 clear_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags);
593 return 0;
594}
595
596static u64 cgroup_clone_children_read(struct cgroup_subsys_state *css,
597 struct cftype *cft)
598{
599 return test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags);
600}
601
602static int cgroup_clone_children_write(struct cgroup_subsys_state *css,
603 struct cftype *cft, u64 val)
604{
605 if (val)
606 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags);
607 else
608 clear_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags);
609 return 0;
610}
611
612
613struct cftype cgroup1_base_files[] = {
614 {
615 .name = "cgroup.procs",
616 .seq_start = cgroup_pidlist_start,
617 .seq_next = cgroup_pidlist_next,
618 .seq_stop = cgroup_pidlist_stop,
619 .seq_show = cgroup_pidlist_show,
620 .private = CGROUP_FILE_PROCS,
621 .write = cgroup1_procs_write,
622 },
623 {
624 .name = "cgroup.clone_children",
625 .read_u64 = cgroup_clone_children_read,
626 .write_u64 = cgroup_clone_children_write,
627 },
628 {
629 .name = "cgroup.sane_behavior",
630 .flags = CFTYPE_ONLY_ON_ROOT,
631 .seq_show = cgroup_sane_behavior_show,
632 },
633 {
634 .name = "tasks",
635 .seq_start = cgroup_pidlist_start,
636 .seq_next = cgroup_pidlist_next,
637 .seq_stop = cgroup_pidlist_stop,
638 .seq_show = cgroup_pidlist_show,
639 .private = CGROUP_FILE_TASKS,
640 .write = cgroup1_tasks_write,
641 },
642 {
643 .name = "notify_on_release",
644 .read_u64 = cgroup_read_notify_on_release,
645 .write_u64 = cgroup_write_notify_on_release,
646 },
647 {
648 .name = "release_agent",
649 .flags = CFTYPE_ONLY_ON_ROOT,
650 .seq_show = cgroup_release_agent_show,
651 .write = cgroup_release_agent_write,
652 .max_write_len = PATH_MAX - 1,
653 },
654 { }
655};
656
657
658int proc_cgroupstats_show(struct seq_file *m, void *v)
659{
660 struct cgroup_subsys *ss;
661 int i;
662
663 seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
664
665
666
667
668
669 mutex_lock(&cgroup_mutex);
670
671 for_each_subsys(ss, i)
672 seq_printf(m, "%s\t%d\t%d\t%d\n",
673 ss->legacy_name, ss->root->hierarchy_id,
674 atomic_read(&ss->root->nr_cgrps),
675 cgroup_ssid_enabled(i));
676
677 mutex_unlock(&cgroup_mutex);
678 return 0;
679}
680
681
682
683
684
685
686
687
688
689
690
691
692int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
693{
694 struct kernfs_node *kn = kernfs_node_from_dentry(dentry);
695 struct cgroup *cgrp;
696 struct css_task_iter it;
697 struct task_struct *tsk;
698
699
700 if (dentry->d_sb->s_type != &cgroup_fs_type || !kn ||
701 kernfs_type(kn) != KERNFS_DIR)
702 return -EINVAL;
703
704 mutex_lock(&cgroup_mutex);
705
706
707
708
709
710
711 rcu_read_lock();
712 cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv);
713 if (!cgrp || cgroup_is_dead(cgrp)) {
714 rcu_read_unlock();
715 mutex_unlock(&cgroup_mutex);
716 return -ENOENT;
717 }
718 rcu_read_unlock();
719
720 css_task_iter_start(&cgrp->self, 0, &it);
721 while ((tsk = css_task_iter_next(&it))) {
722 switch (READ_ONCE(tsk->__state)) {
723 case TASK_RUNNING:
724 stats->nr_running++;
725 break;
726 case TASK_INTERRUPTIBLE:
727 stats->nr_sleeping++;
728 break;
729 case TASK_UNINTERRUPTIBLE:
730 stats->nr_uninterruptible++;
731 break;
732 case TASK_STOPPED:
733 stats->nr_stopped++;
734 break;
735 default:
736 if (tsk->in_iowait)
737 stats->nr_io_wait++;
738 break;
739 }
740 }
741 css_task_iter_end(&it);
742
743 mutex_unlock(&cgroup_mutex);
744 return 0;
745}
746
747void cgroup1_check_for_release(struct cgroup *cgrp)
748{
749 if (notify_on_release(cgrp) && !cgroup_is_populated(cgrp) &&
750 !css_has_online_children(&cgrp->self) && !cgroup_is_dead(cgrp))
751 schedule_work(&cgrp->release_agent_work);
752}
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777void cgroup1_release_agent(struct work_struct *work)
778{
779 struct cgroup *cgrp =
780 container_of(work, struct cgroup, release_agent_work);
781 char *pathbuf, *agentbuf;
782 char *argv[3], *envp[3];
783 int ret;
784
785
786 if (!cgrp->root->release_agent_path[0])
787 return;
788
789
790 pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
791 agentbuf = kmalloc(PATH_MAX, GFP_KERNEL);
792 if (!pathbuf || !agentbuf)
793 goto out_free;
794
795 spin_lock(&release_agent_path_lock);
796 strlcpy(agentbuf, cgrp->root->release_agent_path, PATH_MAX);
797 spin_unlock(&release_agent_path_lock);
798 if (!agentbuf[0])
799 goto out_free;
800
801 ret = cgroup_path_ns(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
802 if (ret < 0 || ret >= PATH_MAX)
803 goto out_free;
804
805 argv[0] = agentbuf;
806 argv[1] = pathbuf;
807 argv[2] = NULL;
808
809
810 envp[0] = "HOME=/";
811 envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
812 envp[2] = NULL;
813
814 call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
815out_free:
816 kfree(agentbuf);
817 kfree(pathbuf);
818}
819
820
821
822
823static int cgroup1_rename(struct kernfs_node *kn, struct kernfs_node *new_parent,
824 const char *new_name_str)
825{
826 struct cgroup *cgrp = kn->priv;
827 int ret;
828
829
830 if (strchr(new_name_str, '\n'))
831 return -EINVAL;
832
833 if (kernfs_type(kn) != KERNFS_DIR)
834 return -ENOTDIR;
835 if (kn->parent != new_parent)
836 return -EIO;
837
838
839
840
841
842
843 kernfs_break_active_protection(new_parent);
844 kernfs_break_active_protection(kn);
845
846 mutex_lock(&cgroup_mutex);
847
848 ret = kernfs_rename(kn, new_parent, new_name_str);
849 if (!ret)
850 TRACE_CGROUP_PATH(rename, cgrp);
851
852 mutex_unlock(&cgroup_mutex);
853
854 kernfs_unbreak_active_protection(kn);
855 kernfs_unbreak_active_protection(new_parent);
856 return ret;
857}
858
859static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root *kf_root)
860{
861 struct cgroup_root *root = cgroup_root_from_kf(kf_root);
862 struct cgroup_subsys *ss;
863 int ssid;
864
865 for_each_subsys(ss, ssid)
866 if (root->subsys_mask & (1 << ssid))
867 seq_show_option(seq, ss->legacy_name, NULL);
868 if (root->flags & CGRP_ROOT_NOPREFIX)
869 seq_puts(seq, ",noprefix");
870 if (root->flags & CGRP_ROOT_XATTR)
871 seq_puts(seq, ",xattr");
872 if (root->flags & CGRP_ROOT_CPUSET_V2_MODE)
873 seq_puts(seq, ",cpuset_v2_mode");
874
875 spin_lock(&release_agent_path_lock);
876 if (strlen(root->release_agent_path))
877 seq_show_option(seq, "release_agent",
878 root->release_agent_path);
879 spin_unlock(&release_agent_path_lock);
880
881 if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags))
882 seq_puts(seq, ",clone_children");
883 if (strlen(root->name))
884 seq_show_option(seq, "name", root->name);
885 return 0;
886}
887
888enum cgroup1_param {
889 Opt_all,
890 Opt_clone_children,
891 Opt_cpuset_v2_mode,
892 Opt_name,
893 Opt_none,
894 Opt_noprefix,
895 Opt_release_agent,
896 Opt_xattr,
897};
898
899const struct fs_parameter_spec cgroup1_fs_parameters[] = {
900 fsparam_flag ("all", Opt_all),
901 fsparam_flag ("clone_children", Opt_clone_children),
902 fsparam_flag ("cpuset_v2_mode", Opt_cpuset_v2_mode),
903 fsparam_string("name", Opt_name),
904 fsparam_flag ("none", Opt_none),
905 fsparam_flag ("noprefix", Opt_noprefix),
906 fsparam_string("release_agent", Opt_release_agent),
907 fsparam_flag ("xattr", Opt_xattr),
908 {}
909};
910
911int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param)
912{
913 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
914 struct cgroup_subsys *ss;
915 struct fs_parse_result result;
916 int opt, i;
917
918 opt = fs_parse(fc, cgroup1_fs_parameters, param, &result);
919 if (opt == -ENOPARAM) {
920 int ret;
921
922 ret = vfs_parse_fs_param_source(fc, param);
923 if (ret != -ENOPARAM)
924 return ret;
925 for_each_subsys(ss, i) {
926 if (strcmp(param->key, ss->legacy_name))
927 continue;
928 if (!cgroup_ssid_enabled(i) || cgroup1_ssid_disabled(i))
929 return invalfc(fc, "Disabled controller '%s'",
930 param->key);
931 ctx->subsys_mask |= (1 << i);
932 return 0;
933 }
934 return invalfc(fc, "Unknown subsys name '%s'", param->key);
935 }
936 if (opt < 0)
937 return opt;
938
939 switch (opt) {
940 case Opt_none:
941
942 ctx->none = true;
943 break;
944 case Opt_all:
945 ctx->all_ss = true;
946 break;
947 case Opt_noprefix:
948 ctx->flags |= CGRP_ROOT_NOPREFIX;
949 break;
950 case Opt_clone_children:
951 ctx->cpuset_clone_children = true;
952 break;
953 case Opt_cpuset_v2_mode:
954 ctx->flags |= CGRP_ROOT_CPUSET_V2_MODE;
955 break;
956 case Opt_xattr:
957 ctx->flags |= CGRP_ROOT_XATTR;
958 break;
959 case Opt_release_agent:
960
961 if (ctx->release_agent)
962 return invalfc(fc, "release_agent respecified");
963 ctx->release_agent = param->string;
964 param->string = NULL;
965 break;
966 case Opt_name:
967
968 if (cgroup_no_v1_named)
969 return -ENOENT;
970
971 if (!param->size)
972 return invalfc(fc, "Empty name");
973 if (param->size > MAX_CGROUP_ROOT_NAMELEN - 1)
974 return invalfc(fc, "Name too long");
975
976 for (i = 0; i < param->size; i++) {
977 char c = param->string[i];
978 if (isalnum(c))
979 continue;
980 if ((c == '.') || (c == '-') || (c == '_'))
981 continue;
982 return invalfc(fc, "Invalid name");
983 }
984
985 if (ctx->name)
986 return invalfc(fc, "name respecified");
987 ctx->name = param->string;
988 param->string = NULL;
989 break;
990 }
991 return 0;
992}
993
994static int check_cgroupfs_options(struct fs_context *fc)
995{
996 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
997 u16 mask = U16_MAX;
998 u16 enabled = 0;
999 struct cgroup_subsys *ss;
1000 int i;
1001
1002#ifdef CONFIG_CPUSETS
1003 mask = ~((u16)1 << cpuset_cgrp_id);
1004#endif
1005 for_each_subsys(ss, i)
1006 if (cgroup_ssid_enabled(i) && !cgroup1_ssid_disabled(i))
1007 enabled |= 1 << i;
1008
1009 ctx->subsys_mask &= enabled;
1010
1011
1012
1013
1014
1015 if (!ctx->subsys_mask && !ctx->none && !ctx->name)
1016 ctx->all_ss = true;
1017
1018 if (ctx->all_ss) {
1019
1020 if (ctx->subsys_mask)
1021 return invalfc(fc, "subsys name conflicts with all");
1022
1023 ctx->subsys_mask = enabled;
1024 }
1025
1026
1027
1028
1029
1030 if (!ctx->subsys_mask && !ctx->name)
1031 return invalfc(fc, "Need name or subsystem set");
1032
1033
1034
1035
1036
1037
1038 if ((ctx->flags & CGRP_ROOT_NOPREFIX) && (ctx->subsys_mask & mask))
1039 return invalfc(fc, "noprefix used incorrectly");
1040
1041
1042 if (ctx->subsys_mask && ctx->none)
1043 return invalfc(fc, "none used incorrectly");
1044
1045 return 0;
1046}
1047
1048int cgroup1_reconfigure(struct fs_context *fc)
1049{
1050 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
1051 struct kernfs_root *kf_root = kernfs_root_from_sb(fc->root->d_sb);
1052 struct cgroup_root *root = cgroup_root_from_kf(kf_root);
1053 int ret = 0;
1054 u16 added_mask, removed_mask;
1055
1056 cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
1057
1058
1059 ret = check_cgroupfs_options(fc);
1060 if (ret)
1061 goto out_unlock;
1062
1063 if (ctx->subsys_mask != root->subsys_mask || ctx->release_agent)
1064 pr_warn("option changes via remount are deprecated (pid=%d comm=%s)\n",
1065 task_tgid_nr(current), current->comm);
1066
1067 added_mask = ctx->subsys_mask & ~root->subsys_mask;
1068 removed_mask = root->subsys_mask & ~ctx->subsys_mask;
1069
1070
1071 if ((ctx->flags ^ root->flags) ||
1072 (ctx->name && strcmp(ctx->name, root->name))) {
1073 errorfc(fc, "option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"",
1074 ctx->flags, ctx->name ?: "", root->flags, root->name);
1075 ret = -EINVAL;
1076 goto out_unlock;
1077 }
1078
1079
1080 if (!list_empty(&root->cgrp.self.children)) {
1081 ret = -EBUSY;
1082 goto out_unlock;
1083 }
1084
1085 ret = rebind_subsystems(root, added_mask);
1086 if (ret)
1087 goto out_unlock;
1088
1089 WARN_ON(rebind_subsystems(&cgrp_dfl_root, removed_mask));
1090
1091 if (ctx->release_agent) {
1092 spin_lock(&release_agent_path_lock);
1093 strcpy(root->release_agent_path, ctx->release_agent);
1094 spin_unlock(&release_agent_path_lock);
1095 }
1096
1097 trace_cgroup_remount(root);
1098
1099 out_unlock:
1100 mutex_unlock(&cgroup_mutex);
1101 return ret;
1102}
1103
1104struct kernfs_syscall_ops cgroup1_kf_syscall_ops = {
1105 .rename = cgroup1_rename,
1106 .show_options = cgroup1_show_options,
1107 .mkdir = cgroup_mkdir,
1108 .rmdir = cgroup_rmdir,
1109 .show_path = cgroup_show_path,
1110};
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120static int cgroup1_root_to_use(struct fs_context *fc)
1121{
1122 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
1123 struct cgroup_root *root;
1124 struct cgroup_subsys *ss;
1125 int i, ret;
1126
1127
1128 ret = check_cgroupfs_options(fc);
1129 if (ret)
1130 return ret;
1131
1132
1133
1134
1135
1136
1137
1138
1139 for_each_subsys(ss, i) {
1140 if (!(ctx->subsys_mask & (1 << i)) ||
1141 ss->root == &cgrp_dfl_root)
1142 continue;
1143
1144 if (!percpu_ref_tryget_live(&ss->root->cgrp.self.refcnt))
1145 return 1;
1146 cgroup_put(&ss->root->cgrp);
1147 }
1148
1149 for_each_root(root) {
1150 bool name_match = false;
1151
1152 if (root == &cgrp_dfl_root)
1153 continue;
1154
1155
1156
1157
1158
1159
1160 if (ctx->name) {
1161 if (strcmp(ctx->name, root->name))
1162 continue;
1163 name_match = true;
1164 }
1165
1166
1167
1168
1169
1170 if ((ctx->subsys_mask || ctx->none) &&
1171 (ctx->subsys_mask != root->subsys_mask)) {
1172 if (!name_match)
1173 continue;
1174 return -EBUSY;
1175 }
1176
1177 if (root->flags ^ ctx->flags)
1178 pr_warn("new mount options do not match the existing superblock, will be ignored\n");
1179
1180 ctx->root = root;
1181 return 0;
1182 }
1183
1184
1185
1186
1187
1188
1189 if (!ctx->subsys_mask && !ctx->none)
1190 return invalfc(fc, "No subsys list or none specified");
1191
1192
1193 if (ctx->ns != &init_cgroup_ns)
1194 return -EPERM;
1195
1196 root = kzalloc(sizeof(*root), GFP_KERNEL);
1197 if (!root)
1198 return -ENOMEM;
1199
1200 ctx->root = root;
1201 init_cgroup_root(ctx);
1202
1203 ret = cgroup_setup_root(root, ctx->subsys_mask);
1204 if (ret)
1205 cgroup_free_root(root);
1206 return ret;
1207}
1208
1209int cgroup1_get_tree(struct fs_context *fc)
1210{
1211 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
1212 int ret;
1213
1214
1215 if (!ns_capable(ctx->ns->user_ns, CAP_SYS_ADMIN))
1216 return -EPERM;
1217
1218 cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
1219
1220 ret = cgroup1_root_to_use(fc);
1221 if (!ret && !percpu_ref_tryget_live(&ctx->root->cgrp.self.refcnt))
1222 ret = 1;
1223
1224 mutex_unlock(&cgroup_mutex);
1225
1226 if (!ret)
1227 ret = cgroup_do_get_tree(fc);
1228
1229 if (!ret && percpu_ref_is_dying(&ctx->root->cgrp.self.refcnt)) {
1230 fc_drop_locked(fc);
1231 ret = 1;
1232 }
1233
1234 if (unlikely(ret > 0)) {
1235 msleep(10);
1236 return restart_syscall();
1237 }
1238 return ret;
1239}
1240
1241static int __init cgroup1_wq_init(void)
1242{
1243
1244
1245
1246
1247 cgroup_pidlist_destroy_wq = alloc_workqueue("cgroup_pidlist_destroy",
1248 0, 1);
1249 BUG_ON(!cgroup_pidlist_destroy_wq);
1250 return 0;
1251}
1252core_initcall(cgroup1_wq_init);
1253
1254static int __init cgroup_no_v1(char *str)
1255{
1256 struct cgroup_subsys *ss;
1257 char *token;
1258 int i;
1259
1260 while ((token = strsep(&str, ",")) != NULL) {
1261 if (!*token)
1262 continue;
1263
1264 if (!strcmp(token, "all")) {
1265 cgroup_no_v1_mask = U16_MAX;
1266 continue;
1267 }
1268
1269 if (!strcmp(token, "named")) {
1270 cgroup_no_v1_named = true;
1271 continue;
1272 }
1273
1274 for_each_subsys(ss, i) {
1275 if (strcmp(token, ss->name) &&
1276 strcmp(token, ss->legacy_name))
1277 continue;
1278
1279 cgroup_no_v1_mask |= 1 << i;
1280 }
1281 }
1282 return 1;
1283}
1284__setup("cgroup_no_v1=", cgroup_no_v1);
1285