1
2#include "cgroup-internal.h"
3
4#include <linux/ctype.h>
5#include <linux/kmod.h>
6#include <linux/sort.h>
7#include <linux/delay.h>
8#include <linux/mm.h>
9#include <linux/sched/signal.h>
10#include <linux/sched/task.h>
11#include <linux/magic.h>
12#include <linux/slab.h>
13#include <linux/vmalloc.h>
14#include <linux/delayacct.h>
15#include <linux/pid_namespace.h>
16#include <linux/cgroupstats.h>
17#include <linux/fs_parser.h>
18
19#include <trace/events/cgroup.h>
20
21#define cg_invalf(fc, fmt, ...) invalf(fc, fmt, ## __VA_ARGS__)
22
23
24
25
26
27
28
29#define CGROUP_PIDLIST_DESTROY_DELAY HZ
30
31
32static u16 cgroup_no_v1_mask;
33
34
35static bool cgroup_no_v1_named;
36
37
38
39
40
41static struct workqueue_struct *cgroup_pidlist_destroy_wq;
42
43
44
45
46
47static DEFINE_SPINLOCK(release_agent_path_lock);
48
49bool cgroup1_ssid_disabled(int ssid)
50{
51 return cgroup_no_v1_mask & (1 << ssid);
52}
53
54
55
56
57
58
59int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
60{
61 struct cgroup_root *root;
62 int retval = 0;
63
64 mutex_lock(&cgroup_mutex);
65 percpu_down_write(&cgroup_threadgroup_rwsem);
66 for_each_root(root) {
67 struct cgroup *from_cgrp;
68
69 if (root == &cgrp_dfl_root)
70 continue;
71
72 spin_lock_irq(&css_set_lock);
73 from_cgrp = task_cgroup_from_root(from, root);
74 spin_unlock_irq(&css_set_lock);
75
76 retval = cgroup_attach_task(from_cgrp, tsk, false);
77 if (retval)
78 break;
79 }
80 percpu_up_write(&cgroup_threadgroup_rwsem);
81 mutex_unlock(&cgroup_mutex);
82
83 return retval;
84}
85EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
86
87
88
89
90
91
92
93
94
95
96
97
98int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
99{
100 DEFINE_CGROUP_MGCTX(mgctx);
101 struct cgrp_cset_link *link;
102 struct css_task_iter it;
103 struct task_struct *task;
104 int ret;
105
106 if (cgroup_on_dfl(to))
107 return -EINVAL;
108
109 ret = cgroup_migrate_vet_dst(to);
110 if (ret)
111 return ret;
112
113 mutex_lock(&cgroup_mutex);
114
115 percpu_down_write(&cgroup_threadgroup_rwsem);
116
117
118 spin_lock_irq(&css_set_lock);
119 list_for_each_entry(link, &from->cset_links, cset_link)
120 cgroup_migrate_add_src(link->cset, to, &mgctx);
121 spin_unlock_irq(&css_set_lock);
122
123 ret = cgroup_migrate_prepare_dst(&mgctx);
124 if (ret)
125 goto out_err;
126
127
128
129
130
131 do {
132 css_task_iter_start(&from->self, 0, &it);
133
134 do {
135 task = css_task_iter_next(&it);
136 } while (task && (task->flags & PF_EXITING));
137
138 if (task)
139 get_task_struct(task);
140 css_task_iter_end(&it);
141
142 if (task) {
143 ret = cgroup_migrate(task, false, &mgctx);
144 if (!ret)
145 TRACE_CGROUP_PATH(transfer_tasks, to, task, false);
146 put_task_struct(task);
147 }
148 } while (task && !ret);
149out_err:
150 cgroup_migrate_finish(&mgctx);
151 percpu_up_write(&cgroup_threadgroup_rwsem);
152 mutex_unlock(&cgroup_mutex);
153 return ret;
154}
155
156
157
158
159
160
161
162
163
164
165
166
167enum cgroup_filetype {
168 CGROUP_FILE_PROCS,
169 CGROUP_FILE_TASKS,
170};
171
172
173
174
175
176
177
178struct cgroup_pidlist {
179
180
181
182
183 struct { enum cgroup_filetype type; struct pid_namespace *ns; } key;
184
185 pid_t *list;
186
187 int length;
188
189 struct list_head links;
190
191 struct cgroup *owner;
192
193 struct delayed_work destroy_dwork;
194};
195
196
197
198
199
200
201#define PIDLIST_TOO_LARGE(c) ((c) * sizeof(pid_t) > (PAGE_SIZE * 2))
202static void *pidlist_allocate(int count)
203{
204 if (PIDLIST_TOO_LARGE(count))
205 return vmalloc(array_size(count, sizeof(pid_t)));
206 else
207 return kmalloc_array(count, sizeof(pid_t), GFP_KERNEL);
208}
209
210static void pidlist_free(void *p)
211{
212 kvfree(p);
213}
214
215
216
217
218
219void cgroup1_pidlist_destroy_all(struct cgroup *cgrp)
220{
221 struct cgroup_pidlist *l, *tmp_l;
222
223 mutex_lock(&cgrp->pidlist_mutex);
224 list_for_each_entry_safe(l, tmp_l, &cgrp->pidlists, links)
225 mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork, 0);
226 mutex_unlock(&cgrp->pidlist_mutex);
227
228 flush_workqueue(cgroup_pidlist_destroy_wq);
229 BUG_ON(!list_empty(&cgrp->pidlists));
230}
231
232static void cgroup_pidlist_destroy_work_fn(struct work_struct *work)
233{
234 struct delayed_work *dwork = to_delayed_work(work);
235 struct cgroup_pidlist *l = container_of(dwork, struct cgroup_pidlist,
236 destroy_dwork);
237 struct cgroup_pidlist *tofree = NULL;
238
239 mutex_lock(&l->owner->pidlist_mutex);
240
241
242
243
244
245 if (!delayed_work_pending(dwork)) {
246 list_del(&l->links);
247 pidlist_free(l->list);
248 put_pid_ns(l->key.ns);
249 tofree = l;
250 }
251
252 mutex_unlock(&l->owner->pidlist_mutex);
253 kfree(tofree);
254}
255
256
257
258
259
260static int pidlist_uniq(pid_t *list, int length)
261{
262 int src, dest = 1;
263
264
265
266
267
268 if (length == 0 || length == 1)
269 return length;
270
271 for (src = 1; src < length; src++) {
272
273 while (list[src] == list[src-1]) {
274 src++;
275 if (src == length)
276 goto after;
277 }
278
279 list[dest] = list[src];
280 dest++;
281 }
282after:
283 return dest;
284}
285
286
287
288
289
290
291
292
293
294
295static int cmppid(const void *a, const void *b)
296{
297 return *(pid_t *)a - *(pid_t *)b;
298}
299
300static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
301 enum cgroup_filetype type)
302{
303 struct cgroup_pidlist *l;
304
305 struct pid_namespace *ns = task_active_pid_ns(current);
306
307 lockdep_assert_held(&cgrp->pidlist_mutex);
308
309 list_for_each_entry(l, &cgrp->pidlists, links)
310 if (l->key.type == type && l->key.ns == ns)
311 return l;
312 return NULL;
313}
314
315
316
317
318
319
320
321static struct cgroup_pidlist *cgroup_pidlist_find_create(struct cgroup *cgrp,
322 enum cgroup_filetype type)
323{
324 struct cgroup_pidlist *l;
325
326 lockdep_assert_held(&cgrp->pidlist_mutex);
327
328 l = cgroup_pidlist_find(cgrp, type);
329 if (l)
330 return l;
331
332
333 l = kzalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
334 if (!l)
335 return l;
336
337 INIT_DELAYED_WORK(&l->destroy_dwork, cgroup_pidlist_destroy_work_fn);
338 l->key.type = type;
339
340 l->key.ns = get_pid_ns(task_active_pid_ns(current));
341 l->owner = cgrp;
342 list_add(&l->links, &cgrp->pidlists);
343 return l;
344}
345
346
347
348
349static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
350 struct cgroup_pidlist **lp)
351{
352 pid_t *array;
353 int length;
354 int pid, n = 0;
355 struct css_task_iter it;
356 struct task_struct *tsk;
357 struct cgroup_pidlist *l;
358
359 lockdep_assert_held(&cgrp->pidlist_mutex);
360
361
362
363
364
365
366
367 length = cgroup_task_count(cgrp);
368 array = pidlist_allocate(length);
369 if (!array)
370 return -ENOMEM;
371
372 css_task_iter_start(&cgrp->self, 0, &it);
373 while ((tsk = css_task_iter_next(&it))) {
374 if (unlikely(n == length))
375 break;
376
377 if (type == CGROUP_FILE_PROCS)
378 pid = task_tgid_vnr(tsk);
379 else
380 pid = task_pid_vnr(tsk);
381 if (pid > 0)
382 array[n++] = pid;
383 }
384 css_task_iter_end(&it);
385 length = n;
386
387 sort(array, length, sizeof(pid_t), cmppid, NULL);
388 if (type == CGROUP_FILE_PROCS)
389 length = pidlist_uniq(array, length);
390
391 l = cgroup_pidlist_find_create(cgrp, type);
392 if (!l) {
393 pidlist_free(array);
394 return -ENOMEM;
395 }
396
397
398 pidlist_free(l->list);
399 l->list = array;
400 l->length = length;
401 *lp = l;
402 return 0;
403}
404
405
406
407
408
409
410
411static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
412{
413
414
415
416
417
418
419 struct kernfs_open_file *of = s->private;
420 struct cgroup *cgrp = seq_css(s)->cgroup;
421 struct cgroup_pidlist *l;
422 enum cgroup_filetype type = seq_cft(s)->private;
423 int index = 0, pid = *pos;
424 int *iter, ret;
425
426 mutex_lock(&cgrp->pidlist_mutex);
427
428
429
430
431
432
433
434 if (of->priv)
435 of->priv = cgroup_pidlist_find(cgrp, type);
436
437
438
439
440
441 if (!of->priv) {
442 ret = pidlist_array_load(cgrp, type,
443 (struct cgroup_pidlist **)&of->priv);
444 if (ret)
445 return ERR_PTR(ret);
446 }
447 l = of->priv;
448
449 if (pid) {
450 int end = l->length;
451
452 while (index < end) {
453 int mid = (index + end) / 2;
454 if (l->list[mid] == pid) {
455 index = mid;
456 break;
457 } else if (l->list[mid] <= pid)
458 index = mid + 1;
459 else
460 end = mid;
461 }
462 }
463
464 if (index >= l->length)
465 return NULL;
466
467 iter = l->list + index;
468 *pos = *iter;
469 return iter;
470}
471
472static void cgroup_pidlist_stop(struct seq_file *s, void *v)
473{
474 struct kernfs_open_file *of = s->private;
475 struct cgroup_pidlist *l = of->priv;
476
477 if (l)
478 mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork,
479 CGROUP_PIDLIST_DESTROY_DELAY);
480 mutex_unlock(&seq_css(s)->cgroup->pidlist_mutex);
481}
482
483static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
484{
485 struct kernfs_open_file *of = s->private;
486 struct cgroup_pidlist *l = of->priv;
487 pid_t *p = v;
488 pid_t *end = l->list + l->length;
489
490
491
492
493 p++;
494 if (p >= end) {
495 return NULL;
496 } else {
497 *pos = *p;
498 return p;
499 }
500}
501
502static int cgroup_pidlist_show(struct seq_file *s, void *v)
503{
504 seq_printf(s, "%d\n", *(int *)v);
505
506 return 0;
507}
508
509static ssize_t __cgroup1_procs_write(struct kernfs_open_file *of,
510 char *buf, size_t nbytes, loff_t off,
511 bool threadgroup)
512{
513 struct cgroup *cgrp;
514 struct task_struct *task;
515 const struct cred *cred, *tcred;
516 ssize_t ret;
517
518 cgrp = cgroup_kn_lock_live(of->kn, false);
519 if (!cgrp)
520 return -ENODEV;
521
522 task = cgroup_procs_write_start(buf, threadgroup);
523 ret = PTR_ERR_OR_ZERO(task);
524 if (ret)
525 goto out_unlock;
526
527
528
529
530
531 cred = current_cred();
532 tcred = get_task_cred(task);
533 if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
534 !uid_eq(cred->euid, tcred->uid) &&
535 !uid_eq(cred->euid, tcred->suid))
536 ret = -EACCES;
537 put_cred(tcred);
538 if (ret)
539 goto out_finish;
540
541 ret = cgroup_attach_task(cgrp, task, threadgroup);
542
543out_finish:
544 cgroup_procs_write_finish(task);
545out_unlock:
546 cgroup_kn_unlock(of->kn);
547
548 return ret ?: nbytes;
549}
550
551static ssize_t cgroup1_procs_write(struct kernfs_open_file *of,
552 char *buf, size_t nbytes, loff_t off)
553{
554 return __cgroup1_procs_write(of, buf, nbytes, off, true);
555}
556
557static ssize_t cgroup1_tasks_write(struct kernfs_open_file *of,
558 char *buf, size_t nbytes, loff_t off)
559{
560 return __cgroup1_procs_write(of, buf, nbytes, off, false);
561}
562
563static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of,
564 char *buf, size_t nbytes, loff_t off)
565{
566 struct cgroup *cgrp;
567
568 BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
569
570 cgrp = cgroup_kn_lock_live(of->kn, false);
571 if (!cgrp)
572 return -ENODEV;
573 spin_lock(&release_agent_path_lock);
574 strlcpy(cgrp->root->release_agent_path, strstrip(buf),
575 sizeof(cgrp->root->release_agent_path));
576 spin_unlock(&release_agent_path_lock);
577 cgroup_kn_unlock(of->kn);
578 return nbytes;
579}
580
581static int cgroup_release_agent_show(struct seq_file *seq, void *v)
582{
583 struct cgroup *cgrp = seq_css(seq)->cgroup;
584
585 spin_lock(&release_agent_path_lock);
586 seq_puts(seq, cgrp->root->release_agent_path);
587 spin_unlock(&release_agent_path_lock);
588 seq_putc(seq, '\n');
589 return 0;
590}
591
592static int cgroup_sane_behavior_show(struct seq_file *seq, void *v)
593{
594 seq_puts(seq, "0\n");
595 return 0;
596}
597
598static u64 cgroup_read_notify_on_release(struct cgroup_subsys_state *css,
599 struct cftype *cft)
600{
601 return notify_on_release(css->cgroup);
602}
603
604static int cgroup_write_notify_on_release(struct cgroup_subsys_state *css,
605 struct cftype *cft, u64 val)
606{
607 if (val)
608 set_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags);
609 else
610 clear_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags);
611 return 0;
612}
613
614static u64 cgroup_clone_children_read(struct cgroup_subsys_state *css,
615 struct cftype *cft)
616{
617 return test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags);
618}
619
620static int cgroup_clone_children_write(struct cgroup_subsys_state *css,
621 struct cftype *cft, u64 val)
622{
623 if (val)
624 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags);
625 else
626 clear_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags);
627 return 0;
628}
629
630
631struct cftype cgroup1_base_files[] = {
632 {
633 .name = "cgroup.procs",
634 .seq_start = cgroup_pidlist_start,
635 .seq_next = cgroup_pidlist_next,
636 .seq_stop = cgroup_pidlist_stop,
637 .seq_show = cgroup_pidlist_show,
638 .private = CGROUP_FILE_PROCS,
639 .write = cgroup1_procs_write,
640 },
641 {
642 .name = "cgroup.clone_children",
643 .read_u64 = cgroup_clone_children_read,
644 .write_u64 = cgroup_clone_children_write,
645 },
646 {
647 .name = "cgroup.sane_behavior",
648 .flags = CFTYPE_ONLY_ON_ROOT,
649 .seq_show = cgroup_sane_behavior_show,
650 },
651 {
652 .name = "tasks",
653 .seq_start = cgroup_pidlist_start,
654 .seq_next = cgroup_pidlist_next,
655 .seq_stop = cgroup_pidlist_stop,
656 .seq_show = cgroup_pidlist_show,
657 .private = CGROUP_FILE_TASKS,
658 .write = cgroup1_tasks_write,
659 },
660 {
661 .name = "notify_on_release",
662 .read_u64 = cgroup_read_notify_on_release,
663 .write_u64 = cgroup_write_notify_on_release,
664 },
665 {
666 .name = "release_agent",
667 .flags = CFTYPE_ONLY_ON_ROOT,
668 .seq_show = cgroup_release_agent_show,
669 .write = cgroup_release_agent_write,
670 .max_write_len = PATH_MAX - 1,
671 },
672 { }
673};
674
675
676int proc_cgroupstats_show(struct seq_file *m, void *v)
677{
678 struct cgroup_subsys *ss;
679 int i;
680
681 seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
682
683
684
685
686
687 mutex_lock(&cgroup_mutex);
688
689 for_each_subsys(ss, i)
690 seq_printf(m, "%s\t%d\t%d\t%d\n",
691 ss->legacy_name, ss->root->hierarchy_id,
692 atomic_read(&ss->root->nr_cgrps),
693 cgroup_ssid_enabled(i));
694
695 mutex_unlock(&cgroup_mutex);
696 return 0;
697}
698
699
700
701
702
703
704
705
706
707
708int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
709{
710 struct kernfs_node *kn = kernfs_node_from_dentry(dentry);
711 struct cgroup *cgrp;
712 struct css_task_iter it;
713 struct task_struct *tsk;
714
715
716 if (dentry->d_sb->s_type != &cgroup_fs_type || !kn ||
717 kernfs_type(kn) != KERNFS_DIR)
718 return -EINVAL;
719
720 mutex_lock(&cgroup_mutex);
721
722
723
724
725
726
727 rcu_read_lock();
728 cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv);
729 if (!cgrp || cgroup_is_dead(cgrp)) {
730 rcu_read_unlock();
731 mutex_unlock(&cgroup_mutex);
732 return -ENOENT;
733 }
734 rcu_read_unlock();
735
736 css_task_iter_start(&cgrp->self, 0, &it);
737 while ((tsk = css_task_iter_next(&it))) {
738 switch (tsk->state) {
739 case TASK_RUNNING:
740 stats->nr_running++;
741 break;
742 case TASK_INTERRUPTIBLE:
743 stats->nr_sleeping++;
744 break;
745 case TASK_UNINTERRUPTIBLE:
746 stats->nr_uninterruptible++;
747 break;
748 case TASK_STOPPED:
749 stats->nr_stopped++;
750 break;
751 default:
752 if (delayacct_is_task_waiting_on_io(tsk))
753 stats->nr_io_wait++;
754 break;
755 }
756 }
757 css_task_iter_end(&it);
758
759 mutex_unlock(&cgroup_mutex);
760 return 0;
761}
762
763void cgroup1_check_for_release(struct cgroup *cgrp)
764{
765 if (notify_on_release(cgrp) && !cgroup_is_populated(cgrp) &&
766 !css_has_online_children(&cgrp->self) && !cgroup_is_dead(cgrp))
767 schedule_work(&cgrp->release_agent_work);
768}
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793void cgroup1_release_agent(struct work_struct *work)
794{
795 struct cgroup *cgrp =
796 container_of(work, struct cgroup, release_agent_work);
797 char *pathbuf = NULL, *agentbuf = NULL;
798 char *argv[3], *envp[3];
799 int ret;
800
801 mutex_lock(&cgroup_mutex);
802
803 pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
804 agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
805 if (!pathbuf || !agentbuf)
806 goto out;
807
808 spin_lock_irq(&css_set_lock);
809 ret = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
810 spin_unlock_irq(&css_set_lock);
811 if (ret < 0 || ret >= PATH_MAX)
812 goto out;
813
814 argv[0] = agentbuf;
815 argv[1] = pathbuf;
816 argv[2] = NULL;
817
818
819 envp[0] = "HOME=/";
820 envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
821 envp[2] = NULL;
822
823 mutex_unlock(&cgroup_mutex);
824 call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
825 goto out_free;
826out:
827 mutex_unlock(&cgroup_mutex);
828out_free:
829 kfree(agentbuf);
830 kfree(pathbuf);
831}
832
833
834
835
836static int cgroup1_rename(struct kernfs_node *kn, struct kernfs_node *new_parent,
837 const char *new_name_str)
838{
839 struct cgroup *cgrp = kn->priv;
840 int ret;
841
842 if (kernfs_type(kn) != KERNFS_DIR)
843 return -ENOTDIR;
844 if (kn->parent != new_parent)
845 return -EIO;
846
847
848
849
850
851
852 kernfs_break_active_protection(new_parent);
853 kernfs_break_active_protection(kn);
854
855 mutex_lock(&cgroup_mutex);
856
857 ret = kernfs_rename(kn, new_parent, new_name_str);
858 if (!ret)
859 TRACE_CGROUP_PATH(rename, cgrp);
860
861 mutex_unlock(&cgroup_mutex);
862
863 kernfs_unbreak_active_protection(kn);
864 kernfs_unbreak_active_protection(new_parent);
865 return ret;
866}
867
868static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root *kf_root)
869{
870 struct cgroup_root *root = cgroup_root_from_kf(kf_root);
871 struct cgroup_subsys *ss;
872 int ssid;
873
874 for_each_subsys(ss, ssid)
875 if (root->subsys_mask & (1 << ssid))
876 seq_show_option(seq, ss->legacy_name, NULL);
877 if (root->flags & CGRP_ROOT_NOPREFIX)
878 seq_puts(seq, ",noprefix");
879 if (root->flags & CGRP_ROOT_XATTR)
880 seq_puts(seq, ",xattr");
881 if (root->flags & CGRP_ROOT_CPUSET_V2_MODE)
882 seq_puts(seq, ",cpuset_v2_mode");
883
884 spin_lock(&release_agent_path_lock);
885 if (strlen(root->release_agent_path))
886 seq_show_option(seq, "release_agent",
887 root->release_agent_path);
888 spin_unlock(&release_agent_path_lock);
889
890 if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags))
891 seq_puts(seq, ",clone_children");
892 if (strlen(root->name))
893 seq_show_option(seq, "name", root->name);
894 return 0;
895}
896
897enum cgroup1_param {
898 Opt_all,
899 Opt_clone_children,
900 Opt_cpuset_v2_mode,
901 Opt_name,
902 Opt_none,
903 Opt_noprefix,
904 Opt_release_agent,
905 Opt_xattr,
906};
907
908static const struct fs_parameter_spec cgroup1_param_specs[] = {
909 fsparam_flag ("all", Opt_all),
910 fsparam_flag ("clone_children", Opt_clone_children),
911 fsparam_flag ("cpuset_v2_mode", Opt_cpuset_v2_mode),
912 fsparam_string("name", Opt_name),
913 fsparam_flag ("none", Opt_none),
914 fsparam_flag ("noprefix", Opt_noprefix),
915 fsparam_string("release_agent", Opt_release_agent),
916 fsparam_flag ("xattr", Opt_xattr),
917 {}
918};
919
920const struct fs_parameter_description cgroup1_fs_parameters = {
921 .name = "cgroup1",
922 .specs = cgroup1_param_specs,
923};
924
925int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param)
926{
927 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
928 struct cgroup_subsys *ss;
929 struct fs_parse_result result;
930 int opt, i;
931
932 opt = fs_parse(fc, &cgroup1_fs_parameters, param, &result);
933 if (opt == -ENOPARAM) {
934 if (strcmp(param->key, "source") == 0) {
935 fc->source = param->string;
936 param->string = NULL;
937 return 0;
938 }
939 for_each_subsys(ss, i) {
940 if (strcmp(param->key, ss->legacy_name))
941 continue;
942 ctx->subsys_mask |= (1 << i);
943 return 0;
944 }
945 return cg_invalf(fc, "cgroup1: Unknown subsys name '%s'", param->key);
946 }
947 if (opt < 0)
948 return opt;
949
950 switch (opt) {
951 case Opt_none:
952
953 ctx->none = true;
954 break;
955 case Opt_all:
956 ctx->all_ss = true;
957 break;
958 case Opt_noprefix:
959 ctx->flags |= CGRP_ROOT_NOPREFIX;
960 break;
961 case Opt_clone_children:
962 ctx->cpuset_clone_children = true;
963 break;
964 case Opt_cpuset_v2_mode:
965 ctx->flags |= CGRP_ROOT_CPUSET_V2_MODE;
966 break;
967 case Opt_xattr:
968 ctx->flags |= CGRP_ROOT_XATTR;
969 break;
970 case Opt_release_agent:
971
972 if (ctx->release_agent)
973 return cg_invalf(fc, "cgroup1: release_agent respecified");
974 ctx->release_agent = param->string;
975 param->string = NULL;
976 break;
977 case Opt_name:
978
979 if (cgroup_no_v1_named)
980 return -ENOENT;
981
982 if (!param->size)
983 return cg_invalf(fc, "cgroup1: Empty name");
984 if (param->size > MAX_CGROUP_ROOT_NAMELEN - 1)
985 return cg_invalf(fc, "cgroup1: Name too long");
986
987 for (i = 0; i < param->size; i++) {
988 char c = param->string[i];
989 if (isalnum(c))
990 continue;
991 if ((c == '.') || (c == '-') || (c == '_'))
992 continue;
993 return cg_invalf(fc, "cgroup1: Invalid name");
994 }
995
996 if (ctx->name)
997 return cg_invalf(fc, "cgroup1: name respecified");
998 ctx->name = param->string;
999 param->string = NULL;
1000 break;
1001 }
1002 return 0;
1003}
1004
1005static int check_cgroupfs_options(struct fs_context *fc)
1006{
1007 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
1008 u16 mask = U16_MAX;
1009 u16 enabled = 0;
1010 struct cgroup_subsys *ss;
1011 int i;
1012
1013#ifdef CONFIG_CPUSETS
1014 mask = ~((u16)1 << cpuset_cgrp_id);
1015#endif
1016 for_each_subsys(ss, i)
1017 if (cgroup_ssid_enabled(i) && !cgroup1_ssid_disabled(i))
1018 enabled |= 1 << i;
1019
1020 ctx->subsys_mask &= enabled;
1021
1022
1023
1024
1025
1026 if (!ctx->subsys_mask && !ctx->none && !ctx->name)
1027 ctx->all_ss = true;
1028
1029 if (ctx->all_ss) {
1030
1031 if (ctx->subsys_mask)
1032 return cg_invalf(fc, "cgroup1: subsys name conflicts with all");
1033
1034 ctx->subsys_mask = enabled;
1035 }
1036
1037
1038
1039
1040
1041 if (!ctx->subsys_mask && !ctx->name)
1042 return cg_invalf(fc, "cgroup1: Need name or subsystem set");
1043
1044
1045
1046
1047
1048
1049 if ((ctx->flags & CGRP_ROOT_NOPREFIX) && (ctx->subsys_mask & mask))
1050 return cg_invalf(fc, "cgroup1: noprefix used incorrectly");
1051
1052
1053 if (ctx->subsys_mask && ctx->none)
1054 return cg_invalf(fc, "cgroup1: none used incorrectly");
1055
1056 return 0;
1057}
1058
1059int cgroup1_reconfigure(struct fs_context *fc)
1060{
1061 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
1062 struct kernfs_root *kf_root = kernfs_root_from_sb(fc->root->d_sb);
1063 struct cgroup_root *root = cgroup_root_from_kf(kf_root);
1064 int ret = 0;
1065 u16 added_mask, removed_mask;
1066
1067 cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
1068
1069
1070 ret = check_cgroupfs_options(fc);
1071 if (ret)
1072 goto out_unlock;
1073
1074 if (ctx->subsys_mask != root->subsys_mask || ctx->release_agent)
1075 pr_warn("option changes via remount are deprecated (pid=%d comm=%s)\n",
1076 task_tgid_nr(current), current->comm);
1077
1078 added_mask = ctx->subsys_mask & ~root->subsys_mask;
1079 removed_mask = root->subsys_mask & ~ctx->subsys_mask;
1080
1081
1082 if ((ctx->flags ^ root->flags) ||
1083 (ctx->name && strcmp(ctx->name, root->name))) {
1084 cg_invalf(fc, "option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"",
1085 ctx->flags, ctx->name ?: "", root->flags, root->name);
1086 ret = -EINVAL;
1087 goto out_unlock;
1088 }
1089
1090
1091 if (!list_empty(&root->cgrp.self.children)) {
1092 ret = -EBUSY;
1093 goto out_unlock;
1094 }
1095
1096 ret = rebind_subsystems(root, added_mask);
1097 if (ret)
1098 goto out_unlock;
1099
1100 WARN_ON(rebind_subsystems(&cgrp_dfl_root, removed_mask));
1101
1102 if (ctx->release_agent) {
1103 spin_lock(&release_agent_path_lock);
1104 strcpy(root->release_agent_path, ctx->release_agent);
1105 spin_unlock(&release_agent_path_lock);
1106 }
1107
1108 trace_cgroup_remount(root);
1109
1110 out_unlock:
1111 mutex_unlock(&cgroup_mutex);
1112 return ret;
1113}
1114
1115struct kernfs_syscall_ops cgroup1_kf_syscall_ops = {
1116 .rename = cgroup1_rename,
1117 .show_options = cgroup1_show_options,
1118 .mkdir = cgroup_mkdir,
1119 .rmdir = cgroup_rmdir,
1120 .show_path = cgroup_show_path,
1121};
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131static int cgroup1_root_to_use(struct fs_context *fc)
1132{
1133 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
1134 struct cgroup_root *root;
1135 struct cgroup_subsys *ss;
1136 int i, ret;
1137
1138
1139 ret = check_cgroupfs_options(fc);
1140 if (ret)
1141 return ret;
1142
1143
1144
1145
1146
1147
1148
1149
1150 for_each_subsys(ss, i) {
1151 if (!(ctx->subsys_mask & (1 << i)) ||
1152 ss->root == &cgrp_dfl_root)
1153 continue;
1154
1155 if (!percpu_ref_tryget_live(&ss->root->cgrp.self.refcnt))
1156 return 1;
1157 cgroup_put(&ss->root->cgrp);
1158 }
1159
1160 for_each_root(root) {
1161 bool name_match = false;
1162
1163 if (root == &cgrp_dfl_root)
1164 continue;
1165
1166
1167
1168
1169
1170
1171 if (ctx->name) {
1172 if (strcmp(ctx->name, root->name))
1173 continue;
1174 name_match = true;
1175 }
1176
1177
1178
1179
1180
1181 if ((ctx->subsys_mask || ctx->none) &&
1182 (ctx->subsys_mask != root->subsys_mask)) {
1183 if (!name_match)
1184 continue;
1185 return -EBUSY;
1186 }
1187
1188 if (root->flags ^ ctx->flags)
1189 pr_warn("new mount options do not match the existing superblock, will be ignored\n");
1190
1191 ctx->root = root;
1192 return 0;
1193 }
1194
1195
1196
1197
1198
1199
1200 if (!ctx->subsys_mask && !ctx->none)
1201 return cg_invalf(fc, "cgroup1: No subsys list or none specified");
1202
1203
1204 if (ctx->ns != &init_cgroup_ns)
1205 return -EPERM;
1206
1207 root = kzalloc(sizeof(*root), GFP_KERNEL);
1208 if (!root)
1209 return -ENOMEM;
1210
1211 ctx->root = root;
1212 init_cgroup_root(ctx);
1213
1214 ret = cgroup_setup_root(root, ctx->subsys_mask);
1215 if (ret)
1216 cgroup_free_root(root);
1217 return ret;
1218}
1219
1220int cgroup1_get_tree(struct fs_context *fc)
1221{
1222 struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
1223 int ret;
1224
1225
1226 if (!ns_capable(ctx->ns->user_ns, CAP_SYS_ADMIN))
1227 return -EPERM;
1228
1229 cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
1230
1231 ret = cgroup1_root_to_use(fc);
1232 if (!ret && !percpu_ref_tryget_live(&ctx->root->cgrp.self.refcnt))
1233 ret = 1;
1234
1235 mutex_unlock(&cgroup_mutex);
1236
1237 if (!ret)
1238 ret = cgroup_do_get_tree(fc);
1239
1240 if (!ret && percpu_ref_is_dying(&ctx->root->cgrp.self.refcnt)) {
1241 struct super_block *sb = fc->root->d_sb;
1242 dput(fc->root);
1243 deactivate_locked_super(sb);
1244 ret = 1;
1245 }
1246
1247 if (unlikely(ret > 0)) {
1248 msleep(10);
1249 return restart_syscall();
1250 }
1251 return ret;
1252}
1253
1254static int __init cgroup1_wq_init(void)
1255{
1256
1257
1258
1259
1260 cgroup_pidlist_destroy_wq = alloc_workqueue("cgroup_pidlist_destroy",
1261 0, 1);
1262 BUG_ON(!cgroup_pidlist_destroy_wq);
1263 return 0;
1264}
1265core_initcall(cgroup1_wq_init);
1266
1267static int __init cgroup_no_v1(char *str)
1268{
1269 struct cgroup_subsys *ss;
1270 char *token;
1271 int i;
1272
1273 while ((token = strsep(&str, ",")) != NULL) {
1274 if (!*token)
1275 continue;
1276
1277 if (!strcmp(token, "all")) {
1278 cgroup_no_v1_mask = U16_MAX;
1279 continue;
1280 }
1281
1282 if (!strcmp(token, "named")) {
1283 cgroup_no_v1_named = true;
1284 continue;
1285 }
1286
1287 for_each_subsys(ss, i) {
1288 if (strcmp(token, ss->name) &&
1289 strcmp(token, ss->legacy_name))
1290 continue;
1291
1292 cgroup_no_v1_mask |= 1 << i;
1293 }
1294 }
1295 return 1;
1296}
1297__setup("cgroup_no_v1=", cgroup_no_v1);
1298