1
2
3
4
5
6
7
8
9
10
11#include <linux/syscalls.h>
12#include <linux/export.h>
13#include <linux/capability.h>
14#include <linux/mnt_namespace.h>
15#include <linux/user_namespace.h>
16#include <linux/namei.h>
17#include <linux/security.h>
18#include <linux/cred.h>
19#include <linux/idr.h>
20#include <linux/init.h>
21#include <linux/fs_struct.h>
22#include <linux/fsnotify.h>
23#include <linux/file.h>
24#include <linux/uaccess.h>
25#include <linux/proc_ns.h>
26#include <linux/magic.h>
27#include <linux/memblock.h>
28#include <linux/proc_fs.h>
29#include <linux/task_work.h>
30#include <linux/sched/task.h>
31#include <uapi/linux/mount.h>
32#include <linux/fs_context.h>
33#include <linux/shmem_fs.h>
34
35#include "pnode.h"
36#include "internal.h"
37
38
39unsigned int sysctl_mount_max __read_mostly = 100000;
40
41static unsigned int m_hash_mask __read_mostly;
42static unsigned int m_hash_shift __read_mostly;
43static unsigned int mp_hash_mask __read_mostly;
44static unsigned int mp_hash_shift __read_mostly;
45
46static __initdata unsigned long mhash_entries;
47static int __init set_mhash_entries(char *str)
48{
49 if (!str)
50 return 0;
51 mhash_entries = simple_strtoul(str, &str, 0);
52 return 1;
53}
54__setup("mhash_entries=", set_mhash_entries);
55
56static __initdata unsigned long mphash_entries;
57static int __init set_mphash_entries(char *str)
58{
59 if (!str)
60 return 0;
61 mphash_entries = simple_strtoul(str, &str, 0);
62 return 1;
63}
64__setup("mphash_entries=", set_mphash_entries);
65
66static u64 event;
67static DEFINE_IDA(mnt_id_ida);
68static DEFINE_IDA(mnt_group_ida);
69
70static struct hlist_head *mount_hashtable __read_mostly;
71static struct hlist_head *mountpoint_hashtable __read_mostly;
72static struct kmem_cache *mnt_cache __read_mostly;
73static DECLARE_RWSEM(namespace_sem);
74static HLIST_HEAD(unmounted);
75static LIST_HEAD(ex_mountpoints);
76
77struct mount_kattr {
78 unsigned int attr_set;
79 unsigned int attr_clr;
80 unsigned int propagation;
81 unsigned int lookup_flags;
82 bool recurse;
83 struct user_namespace *mnt_userns;
84};
85
86
87struct kobject *fs_kobj;
88EXPORT_SYMBOL_GPL(fs_kobj);
89
90
91
92
93
94
95
96
97
98__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
99
100static inline void lock_mount_hash(void)
101{
102 write_seqlock(&mount_lock);
103}
104
105static inline void unlock_mount_hash(void)
106{
107 write_sequnlock(&mount_lock);
108}
109
110static inline struct hlist_head *m_hash(struct vfsmount *mnt, struct dentry *dentry)
111{
112 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
113 tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
114 tmp = tmp + (tmp >> m_hash_shift);
115 return &mount_hashtable[tmp & m_hash_mask];
116}
117
118static inline struct hlist_head *mp_hash(struct dentry *dentry)
119{
120 unsigned long tmp = ((unsigned long)dentry / L1_CACHE_BYTES);
121 tmp = tmp + (tmp >> mp_hash_shift);
122 return &mountpoint_hashtable[tmp & mp_hash_mask];
123}
124
125static int mnt_alloc_id(struct mount *mnt)
126{
127 int res = ida_alloc(&mnt_id_ida, GFP_KERNEL);
128
129 if (res < 0)
130 return res;
131 mnt->mnt_id = res;
132 return 0;
133}
134
135static void mnt_free_id(struct mount *mnt)
136{
137 ida_free(&mnt_id_ida, mnt->mnt_id);
138}
139
140
141
142
143static int mnt_alloc_group_id(struct mount *mnt)
144{
145 int res = ida_alloc_min(&mnt_group_ida, 1, GFP_KERNEL);
146
147 if (res < 0)
148 return res;
149 mnt->mnt_group_id = res;
150 return 0;
151}
152
153
154
155
156void mnt_release_group_id(struct mount *mnt)
157{
158 ida_free(&mnt_group_ida, mnt->mnt_group_id);
159 mnt->mnt_group_id = 0;
160}
161
162
163
164
165static inline void mnt_add_count(struct mount *mnt, int n)
166{
167#ifdef CONFIG_SMP
168 this_cpu_add(mnt->mnt_pcp->mnt_count, n);
169#else
170 preempt_disable();
171 mnt->mnt_count += n;
172 preempt_enable();
173#endif
174}
175
176
177
178
179int mnt_get_count(struct mount *mnt)
180{
181#ifdef CONFIG_SMP
182 int count = 0;
183 int cpu;
184
185 for_each_possible_cpu(cpu) {
186 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count;
187 }
188
189 return count;
190#else
191 return mnt->mnt_count;
192#endif
193}
194
195static struct mount *alloc_vfsmnt(const char *name)
196{
197 struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
198 if (mnt) {
199 int err;
200
201 err = mnt_alloc_id(mnt);
202 if (err)
203 goto out_free_cache;
204
205 if (name) {
206 mnt->mnt_devname = kstrdup_const(name,
207 GFP_KERNEL_ACCOUNT);
208 if (!mnt->mnt_devname)
209 goto out_free_id;
210 }
211
212#ifdef CONFIG_SMP
213 mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);
214 if (!mnt->mnt_pcp)
215 goto out_free_devname;
216
217 this_cpu_add(mnt->mnt_pcp->mnt_count, 1);
218#else
219 mnt->mnt_count = 1;
220 mnt->mnt_writers = 0;
221#endif
222
223 INIT_HLIST_NODE(&mnt->mnt_hash);
224 INIT_LIST_HEAD(&mnt->mnt_child);
225 INIT_LIST_HEAD(&mnt->mnt_mounts);
226 INIT_LIST_HEAD(&mnt->mnt_list);
227 INIT_LIST_HEAD(&mnt->mnt_expire);
228 INIT_LIST_HEAD(&mnt->mnt_share);
229 INIT_LIST_HEAD(&mnt->mnt_slave_list);
230 INIT_LIST_HEAD(&mnt->mnt_slave);
231 INIT_HLIST_NODE(&mnt->mnt_mp_list);
232 INIT_LIST_HEAD(&mnt->mnt_umounting);
233 INIT_HLIST_HEAD(&mnt->mnt_stuck_children);
234 mnt->mnt.mnt_userns = &init_user_ns;
235 }
236 return mnt;
237
238#ifdef CONFIG_SMP
239out_free_devname:
240 kfree_const(mnt->mnt_devname);
241#endif
242out_free_id:
243 mnt_free_id(mnt);
244out_free_cache:
245 kmem_cache_free(mnt_cache, mnt);
246 return NULL;
247}
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268bool __mnt_is_readonly(struct vfsmount *mnt)
269{
270 return (mnt->mnt_flags & MNT_READONLY) || sb_rdonly(mnt->mnt_sb);
271}
272EXPORT_SYMBOL_GPL(__mnt_is_readonly);
273
274static inline void mnt_inc_writers(struct mount *mnt)
275{
276#ifdef CONFIG_SMP
277 this_cpu_inc(mnt->mnt_pcp->mnt_writers);
278#else
279 mnt->mnt_writers++;
280#endif
281}
282
283static inline void mnt_dec_writers(struct mount *mnt)
284{
285#ifdef CONFIG_SMP
286 this_cpu_dec(mnt->mnt_pcp->mnt_writers);
287#else
288 mnt->mnt_writers--;
289#endif
290}
291
292static unsigned int mnt_get_writers(struct mount *mnt)
293{
294#ifdef CONFIG_SMP
295 unsigned int count = 0;
296 int cpu;
297
298 for_each_possible_cpu(cpu) {
299 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers;
300 }
301
302 return count;
303#else
304 return mnt->mnt_writers;
305#endif
306}
307
308static int mnt_is_readonly(struct vfsmount *mnt)
309{
310 if (mnt->mnt_sb->s_readonly_remount)
311 return 1;
312
313 smp_rmb();
314 return __mnt_is_readonly(mnt);
315}
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333int __mnt_want_write(struct vfsmount *m)
334{
335 struct mount *mnt = real_mount(m);
336 int ret = 0;
337
338 preempt_disable();
339 mnt_inc_writers(mnt);
340
341
342
343
344
345 smp_mb();
346 while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
347 cpu_relax();
348
349
350
351
352
353 smp_rmb();
354 if (mnt_is_readonly(m)) {
355 mnt_dec_writers(mnt);
356 ret = -EROFS;
357 }
358 preempt_enable();
359
360 return ret;
361}
362
363
364
365
366
367
368
369
370
371
372int mnt_want_write(struct vfsmount *m)
373{
374 int ret;
375
376 sb_start_write(m->mnt_sb);
377 ret = __mnt_want_write(m);
378 if (ret)
379 sb_end_write(m->mnt_sb);
380 return ret;
381}
382EXPORT_SYMBOL_GPL(mnt_want_write);
383
384
385
386
387
388
389
390
391
392
393int __mnt_want_write_file(struct file *file)
394{
395 if (file->f_mode & FMODE_WRITER) {
396
397
398
399
400 if (__mnt_is_readonly(file->f_path.mnt))
401 return -EROFS;
402 return 0;
403 }
404 return __mnt_want_write(file->f_path.mnt);
405}
406
407
408
409
410
411
412
413
414
415
416int mnt_want_write_file(struct file *file)
417{
418 int ret;
419
420 sb_start_write(file_inode(file)->i_sb);
421 ret = __mnt_want_write_file(file);
422 if (ret)
423 sb_end_write(file_inode(file)->i_sb);
424 return ret;
425}
426EXPORT_SYMBOL_GPL(mnt_want_write_file);
427
428
429
430
431
432
433
434
435
436void __mnt_drop_write(struct vfsmount *mnt)
437{
438 preempt_disable();
439 mnt_dec_writers(real_mount(mnt));
440 preempt_enable();
441}
442
443
444
445
446
447
448
449
450
451void mnt_drop_write(struct vfsmount *mnt)
452{
453 __mnt_drop_write(mnt);
454 sb_end_write(mnt->mnt_sb);
455}
456EXPORT_SYMBOL_GPL(mnt_drop_write);
457
458void __mnt_drop_write_file(struct file *file)
459{
460 if (!(file->f_mode & FMODE_WRITER))
461 __mnt_drop_write(file->f_path.mnt);
462}
463
464void mnt_drop_write_file(struct file *file)
465{
466 __mnt_drop_write_file(file);
467 sb_end_write(file_inode(file)->i_sb);
468}
469EXPORT_SYMBOL(mnt_drop_write_file);
470
471static inline int mnt_hold_writers(struct mount *mnt)
472{
473 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
474
475
476
477
478 smp_mb();
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496 if (mnt_get_writers(mnt) > 0)
497 return -EBUSY;
498
499 return 0;
500}
501
502static inline void mnt_unhold_writers(struct mount *mnt)
503{
504
505
506
507
508 smp_wmb();
509 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
510}
511
512static int mnt_make_readonly(struct mount *mnt)
513{
514 int ret;
515
516 ret = mnt_hold_writers(mnt);
517 if (!ret)
518 mnt->mnt.mnt_flags |= MNT_READONLY;
519 mnt_unhold_writers(mnt);
520 return ret;
521}
522
523int sb_prepare_remount_readonly(struct super_block *sb)
524{
525 struct mount *mnt;
526 int err = 0;
527
528
529 if (atomic_long_read(&sb->s_remove_count))
530 return -EBUSY;
531
532 lock_mount_hash();
533 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
534 if (!(mnt->mnt.mnt_flags & MNT_READONLY)) {
535 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
536 smp_mb();
537 if (mnt_get_writers(mnt) > 0) {
538 err = -EBUSY;
539 break;
540 }
541 }
542 }
543 if (!err && atomic_long_read(&sb->s_remove_count))
544 err = -EBUSY;
545
546 if (!err) {
547 sb->s_readonly_remount = 1;
548 smp_wmb();
549 }
550 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
551 if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
552 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
553 }
554 unlock_mount_hash();
555
556 return err;
557}
558
559static void free_vfsmnt(struct mount *mnt)
560{
561 struct user_namespace *mnt_userns;
562
563 mnt_userns = mnt_user_ns(&mnt->mnt);
564 if (mnt_userns != &init_user_ns)
565 put_user_ns(mnt_userns);
566 kfree_const(mnt->mnt_devname);
567#ifdef CONFIG_SMP
568 free_percpu(mnt->mnt_pcp);
569#endif
570 kmem_cache_free(mnt_cache, mnt);
571}
572
573static void delayed_free_vfsmnt(struct rcu_head *head)
574{
575 free_vfsmnt(container_of(head, struct mount, mnt_rcu));
576}
577
578
579int __legitimize_mnt(struct vfsmount *bastard, unsigned seq)
580{
581 struct mount *mnt;
582 if (read_seqretry(&mount_lock, seq))
583 return 1;
584 if (bastard == NULL)
585 return 0;
586 mnt = real_mount(bastard);
587 mnt_add_count(mnt, 1);
588 smp_mb();
589 if (likely(!read_seqretry(&mount_lock, seq)))
590 return 0;
591 if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
592 mnt_add_count(mnt, -1);
593 return 1;
594 }
595 lock_mount_hash();
596 if (unlikely(bastard->mnt_flags & MNT_DOOMED)) {
597 mnt_add_count(mnt, -1);
598 unlock_mount_hash();
599 return 1;
600 }
601 unlock_mount_hash();
602
603 return -1;
604}
605
606
607bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
608{
609 int res = __legitimize_mnt(bastard, seq);
610 if (likely(!res))
611 return true;
612 if (unlikely(res < 0)) {
613 rcu_read_unlock();
614 mntput(bastard);
615 rcu_read_lock();
616 }
617 return false;
618}
619
620
621
622
623
624struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
625{
626 struct hlist_head *head = m_hash(mnt, dentry);
627 struct mount *p;
628
629 hlist_for_each_entry_rcu(p, head, mnt_hash)
630 if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
631 return p;
632 return NULL;
633}
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651struct vfsmount *lookup_mnt(const struct path *path)
652{
653 struct mount *child_mnt;
654 struct vfsmount *m;
655 unsigned seq;
656
657 rcu_read_lock();
658 do {
659 seq = read_seqbegin(&mount_lock);
660 child_mnt = __lookup_mnt(path->mnt, path->dentry);
661 m = child_mnt ? &child_mnt->mnt : NULL;
662 } while (!legitimize_mnt(m, seq));
663 rcu_read_unlock();
664 return m;
665}
666
667static inline void lock_ns_list(struct mnt_namespace *ns)
668{
669 spin_lock(&ns->ns_lock);
670}
671
672static inline void unlock_ns_list(struct mnt_namespace *ns)
673{
674 spin_unlock(&ns->ns_lock);
675}
676
677static inline bool mnt_is_cursor(struct mount *mnt)
678{
679 return mnt->mnt.mnt_flags & MNT_CURSOR;
680}
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697bool __is_local_mountpoint(struct dentry *dentry)
698{
699 struct mnt_namespace *ns = current->nsproxy->mnt_ns;
700 struct mount *mnt;
701 bool is_covered = false;
702
703 down_read(&namespace_sem);
704 lock_ns_list(ns);
705 list_for_each_entry(mnt, &ns->list, mnt_list) {
706 if (mnt_is_cursor(mnt))
707 continue;
708 is_covered = (mnt->mnt_mountpoint == dentry);
709 if (is_covered)
710 break;
711 }
712 unlock_ns_list(ns);
713 up_read(&namespace_sem);
714
715 return is_covered;
716}
717
718static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
719{
720 struct hlist_head *chain = mp_hash(dentry);
721 struct mountpoint *mp;
722
723 hlist_for_each_entry(mp, chain, m_hash) {
724 if (mp->m_dentry == dentry) {
725 mp->m_count++;
726 return mp;
727 }
728 }
729 return NULL;
730}
731
732static struct mountpoint *get_mountpoint(struct dentry *dentry)
733{
734 struct mountpoint *mp, *new = NULL;
735 int ret;
736
737 if (d_mountpoint(dentry)) {
738
739 if (d_unlinked(dentry))
740 return ERR_PTR(-ENOENT);
741mountpoint:
742 read_seqlock_excl(&mount_lock);
743 mp = lookup_mountpoint(dentry);
744 read_sequnlock_excl(&mount_lock);
745 if (mp)
746 goto done;
747 }
748
749 if (!new)
750 new = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
751 if (!new)
752 return ERR_PTR(-ENOMEM);
753
754
755
756 ret = d_set_mounted(dentry);
757
758
759 if (ret == -EBUSY)
760 goto mountpoint;
761
762
763 mp = ERR_PTR(ret);
764 if (ret)
765 goto done;
766
767
768 read_seqlock_excl(&mount_lock);
769 new->m_dentry = dget(dentry);
770 new->m_count = 1;
771 hlist_add_head(&new->m_hash, mp_hash(dentry));
772 INIT_HLIST_HEAD(&new->m_list);
773 read_sequnlock_excl(&mount_lock);
774
775 mp = new;
776 new = NULL;
777done:
778 kfree(new);
779 return mp;
780}
781
782
783
784
785
786static void __put_mountpoint(struct mountpoint *mp, struct list_head *list)
787{
788 if (!--mp->m_count) {
789 struct dentry *dentry = mp->m_dentry;
790 BUG_ON(!hlist_empty(&mp->m_list));
791 spin_lock(&dentry->d_lock);
792 dentry->d_flags &= ~DCACHE_MOUNTED;
793 spin_unlock(&dentry->d_lock);
794 dput_to_list(dentry, list);
795 hlist_del(&mp->m_hash);
796 kfree(mp);
797 }
798}
799
800
801static void put_mountpoint(struct mountpoint *mp)
802{
803 __put_mountpoint(mp, &ex_mountpoints);
804}
805
806static inline int check_mnt(struct mount *mnt)
807{
808 return mnt->mnt_ns == current->nsproxy->mnt_ns;
809}
810
811
812
813
814static void touch_mnt_namespace(struct mnt_namespace *ns)
815{
816 if (ns) {
817 ns->event = ++event;
818 wake_up_interruptible(&ns->poll);
819 }
820}
821
822
823
824
825static void __touch_mnt_namespace(struct mnt_namespace *ns)
826{
827 if (ns && ns->event != event) {
828 ns->event = event;
829 wake_up_interruptible(&ns->poll);
830 }
831}
832
833
834
835
836static struct mountpoint *unhash_mnt(struct mount *mnt)
837{
838 struct mountpoint *mp;
839 mnt->mnt_parent = mnt;
840 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
841 list_del_init(&mnt->mnt_child);
842 hlist_del_init_rcu(&mnt->mnt_hash);
843 hlist_del_init(&mnt->mnt_mp_list);
844 mp = mnt->mnt_mp;
845 mnt->mnt_mp = NULL;
846 return mp;
847}
848
849
850
851
852static void umount_mnt(struct mount *mnt)
853{
854 put_mountpoint(unhash_mnt(mnt));
855}
856
857
858
859
860void mnt_set_mountpoint(struct mount *mnt,
861 struct mountpoint *mp,
862 struct mount *child_mnt)
863{
864 mp->m_count++;
865 mnt_add_count(mnt, 1);
866 child_mnt->mnt_mountpoint = mp->m_dentry;
867 child_mnt->mnt_parent = mnt;
868 child_mnt->mnt_mp = mp;
869 hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
870}
871
872static void __attach_mnt(struct mount *mnt, struct mount *parent)
873{
874 hlist_add_head_rcu(&mnt->mnt_hash,
875 m_hash(&parent->mnt, mnt->mnt_mountpoint));
876 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
877}
878
879
880
881
882static void attach_mnt(struct mount *mnt,
883 struct mount *parent,
884 struct mountpoint *mp)
885{
886 mnt_set_mountpoint(parent, mp, mnt);
887 __attach_mnt(mnt, parent);
888}
889
890void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt)
891{
892 struct mountpoint *old_mp = mnt->mnt_mp;
893 struct mount *old_parent = mnt->mnt_parent;
894
895 list_del_init(&mnt->mnt_child);
896 hlist_del_init(&mnt->mnt_mp_list);
897 hlist_del_init_rcu(&mnt->mnt_hash);
898
899 attach_mnt(mnt, parent, mp);
900
901 put_mountpoint(old_mp);
902 mnt_add_count(old_parent, -1);
903}
904
905
906
907
908static void commit_tree(struct mount *mnt)
909{
910 struct mount *parent = mnt->mnt_parent;
911 struct mount *m;
912 LIST_HEAD(head);
913 struct mnt_namespace *n = parent->mnt_ns;
914
915 BUG_ON(parent == mnt);
916
917 list_add_tail(&head, &mnt->mnt_list);
918 list_for_each_entry(m, &head, mnt_list)
919 m->mnt_ns = n;
920
921 list_splice(&head, n->list.prev);
922
923 n->mounts += n->pending_mounts;
924 n->pending_mounts = 0;
925
926 __attach_mnt(mnt, parent);
927 touch_mnt_namespace(n);
928}
929
930static struct mount *next_mnt(struct mount *p, struct mount *root)
931{
932 struct list_head *next = p->mnt_mounts.next;
933 if (next == &p->mnt_mounts) {
934 while (1) {
935 if (p == root)
936 return NULL;
937 next = p->mnt_child.next;
938 if (next != &p->mnt_parent->mnt_mounts)
939 break;
940 p = p->mnt_parent;
941 }
942 }
943 return list_entry(next, struct mount, mnt_child);
944}
945
946static struct mount *skip_mnt_tree(struct mount *p)
947{
948 struct list_head *prev = p->mnt_mounts.prev;
949 while (prev != &p->mnt_mounts) {
950 p = list_entry(prev, struct mount, mnt_child);
951 prev = p->mnt_mounts.prev;
952 }
953 return p;
954}
955
956
957
958
959
960
961
962
963
964
965struct vfsmount *vfs_create_mount(struct fs_context *fc)
966{
967 struct mount *mnt;
968
969 if (!fc->root)
970 return ERR_PTR(-EINVAL);
971
972 mnt = alloc_vfsmnt(fc->source ?: "none");
973 if (!mnt)
974 return ERR_PTR(-ENOMEM);
975
976 if (fc->sb_flags & SB_KERNMOUNT)
977 mnt->mnt.mnt_flags = MNT_INTERNAL;
978
979 atomic_inc(&fc->root->d_sb->s_active);
980 mnt->mnt.mnt_sb = fc->root->d_sb;
981 mnt->mnt.mnt_root = dget(fc->root);
982 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
983 mnt->mnt_parent = mnt;
984
985 lock_mount_hash();
986 list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts);
987 unlock_mount_hash();
988 return &mnt->mnt;
989}
990EXPORT_SYMBOL(vfs_create_mount);
991
992struct vfsmount *fc_mount(struct fs_context *fc)
993{
994 int err = vfs_get_tree(fc);
995 if (!err) {
996 up_write(&fc->root->d_sb->s_umount);
997 return vfs_create_mount(fc);
998 }
999 return ERR_PTR(err);
1000}
1001EXPORT_SYMBOL(fc_mount);
1002
1003struct vfsmount *vfs_kern_mount(struct file_system_type *type,
1004 int flags, const char *name,
1005 void *data)
1006{
1007 struct fs_context *fc;
1008 struct vfsmount *mnt;
1009 int ret = 0;
1010
1011 if (!type)
1012 return ERR_PTR(-EINVAL);
1013
1014 fc = fs_context_for_mount(type, flags);
1015 if (IS_ERR(fc))
1016 return ERR_CAST(fc);
1017
1018 if (name)
1019 ret = vfs_parse_fs_string(fc, "source",
1020 name, strlen(name));
1021 if (!ret)
1022 ret = parse_monolithic_mount_data(fc, data);
1023 if (!ret)
1024 mnt = fc_mount(fc);
1025 else
1026 mnt = ERR_PTR(ret);
1027
1028 put_fs_context(fc);
1029 return mnt;
1030}
1031EXPORT_SYMBOL_GPL(vfs_kern_mount);
1032
1033struct vfsmount *
1034vfs_submount(const struct dentry *mountpoint, struct file_system_type *type,
1035 const char *name, void *data)
1036{
1037
1038
1039
1040
1041 if (mountpoint->d_sb->s_user_ns != &init_user_ns)
1042 return ERR_PTR(-EPERM);
1043
1044 return vfs_kern_mount(type, SB_SUBMOUNT, name, data);
1045}
1046EXPORT_SYMBOL_GPL(vfs_submount);
1047
1048static struct mount *clone_mnt(struct mount *old, struct dentry *root,
1049 int flag)
1050{
1051 struct super_block *sb = old->mnt.mnt_sb;
1052 struct mount *mnt;
1053 int err;
1054
1055 mnt = alloc_vfsmnt(old->mnt_devname);
1056 if (!mnt)
1057 return ERR_PTR(-ENOMEM);
1058
1059 if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE))
1060 mnt->mnt_group_id = 0;
1061 else
1062 mnt->mnt_group_id = old->mnt_group_id;
1063
1064 if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {
1065 err = mnt_alloc_group_id(mnt);
1066 if (err)
1067 goto out_free;
1068 }
1069
1070 mnt->mnt.mnt_flags = old->mnt.mnt_flags;
1071 mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL);
1072
1073 atomic_inc(&sb->s_active);
1074 mnt->mnt.mnt_userns = mnt_user_ns(&old->mnt);
1075 if (mnt->mnt.mnt_userns != &init_user_ns)
1076 mnt->mnt.mnt_userns = get_user_ns(mnt->mnt.mnt_userns);
1077 mnt->mnt.mnt_sb = sb;
1078 mnt->mnt.mnt_root = dget(root);
1079 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
1080 mnt->mnt_parent = mnt;
1081 lock_mount_hash();
1082 list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
1083 unlock_mount_hash();
1084
1085 if ((flag & CL_SLAVE) ||
1086 ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) {
1087 list_add(&mnt->mnt_slave, &old->mnt_slave_list);
1088 mnt->mnt_master = old;
1089 CLEAR_MNT_SHARED(mnt);
1090 } else if (!(flag & CL_PRIVATE)) {
1091 if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old))
1092 list_add(&mnt->mnt_share, &old->mnt_share);
1093 if (IS_MNT_SLAVE(old))
1094 list_add(&mnt->mnt_slave, &old->mnt_slave);
1095 mnt->mnt_master = old->mnt_master;
1096 } else {
1097 CLEAR_MNT_SHARED(mnt);
1098 }
1099 if (flag & CL_MAKE_SHARED)
1100 set_mnt_shared(mnt);
1101
1102
1103
1104 if (flag & CL_EXPIRE) {
1105 if (!list_empty(&old->mnt_expire))
1106 list_add(&mnt->mnt_expire, &old->mnt_expire);
1107 }
1108
1109 return mnt;
1110
1111 out_free:
1112 mnt_free_id(mnt);
1113 free_vfsmnt(mnt);
1114 return ERR_PTR(err);
1115}
1116
1117static void cleanup_mnt(struct mount *mnt)
1118{
1119 struct hlist_node *p;
1120 struct mount *m;
1121
1122
1123
1124
1125
1126
1127
1128 WARN_ON(mnt_get_writers(mnt));
1129 if (unlikely(mnt->mnt_pins.first))
1130 mnt_pin_kill(mnt);
1131 hlist_for_each_entry_safe(m, p, &mnt->mnt_stuck_children, mnt_umount) {
1132 hlist_del(&m->mnt_umount);
1133 mntput(&m->mnt);
1134 }
1135 fsnotify_vfsmount_delete(&mnt->mnt);
1136 dput(mnt->mnt.mnt_root);
1137 deactivate_super(mnt->mnt.mnt_sb);
1138 mnt_free_id(mnt);
1139 call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);
1140}
1141
1142static void __cleanup_mnt(struct rcu_head *head)
1143{
1144 cleanup_mnt(container_of(head, struct mount, mnt_rcu));
1145}
1146
1147static LLIST_HEAD(delayed_mntput_list);
1148static void delayed_mntput(struct work_struct *unused)
1149{
1150 struct llist_node *node = llist_del_all(&delayed_mntput_list);
1151 struct mount *m, *t;
1152
1153 llist_for_each_entry_safe(m, t, node, mnt_llist)
1154 cleanup_mnt(m);
1155}
1156static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
1157
1158static void mntput_no_expire(struct mount *mnt)
1159{
1160 LIST_HEAD(list);
1161 int count;
1162
1163 rcu_read_lock();
1164 if (likely(READ_ONCE(mnt->mnt_ns))) {
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174 mnt_add_count(mnt, -1);
1175 rcu_read_unlock();
1176 return;
1177 }
1178 lock_mount_hash();
1179
1180
1181
1182
1183 smp_mb();
1184 mnt_add_count(mnt, -1);
1185 count = mnt_get_count(mnt);
1186 if (count != 0) {
1187 WARN_ON(count < 0);
1188 rcu_read_unlock();
1189 unlock_mount_hash();
1190 return;
1191 }
1192 if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
1193 rcu_read_unlock();
1194 unlock_mount_hash();
1195 return;
1196 }
1197 mnt->mnt.mnt_flags |= MNT_DOOMED;
1198 rcu_read_unlock();
1199
1200 list_del(&mnt->mnt_instance);
1201
1202 if (unlikely(!list_empty(&mnt->mnt_mounts))) {
1203 struct mount *p, *tmp;
1204 list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) {
1205 __put_mountpoint(unhash_mnt(p), &list);
1206 hlist_add_head(&p->mnt_umount, &mnt->mnt_stuck_children);
1207 }
1208 }
1209 unlock_mount_hash();
1210 shrink_dentry_list(&list);
1211
1212 if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {
1213 struct task_struct *task = current;
1214 if (likely(!(task->flags & PF_KTHREAD))) {
1215 init_task_work(&mnt->mnt_rcu, __cleanup_mnt);
1216 if (!task_work_add(task, &mnt->mnt_rcu, TWA_RESUME))
1217 return;
1218 }
1219 if (llist_add(&mnt->mnt_llist, &delayed_mntput_list))
1220 schedule_delayed_work(&delayed_mntput_work, 1);
1221 return;
1222 }
1223 cleanup_mnt(mnt);
1224}
1225
1226void mntput(struct vfsmount *mnt)
1227{
1228 if (mnt) {
1229 struct mount *m = real_mount(mnt);
1230
1231 if (unlikely(m->mnt_expiry_mark))
1232 m->mnt_expiry_mark = 0;
1233 mntput_no_expire(m);
1234 }
1235}
1236EXPORT_SYMBOL(mntput);
1237
1238struct vfsmount *mntget(struct vfsmount *mnt)
1239{
1240 if (mnt)
1241 mnt_add_count(real_mount(mnt), 1);
1242 return mnt;
1243}
1244EXPORT_SYMBOL(mntget);
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257bool path_is_mountpoint(const struct path *path)
1258{
1259 unsigned seq;
1260 bool res;
1261
1262 if (!d_mountpoint(path->dentry))
1263 return false;
1264
1265 rcu_read_lock();
1266 do {
1267 seq = read_seqbegin(&mount_lock);
1268 res = __path_is_mountpoint(path);
1269 } while (read_seqretry(&mount_lock, seq));
1270 rcu_read_unlock();
1271
1272 return res;
1273}
1274EXPORT_SYMBOL(path_is_mountpoint);
1275
1276struct vfsmount *mnt_clone_internal(const struct path *path)
1277{
1278 struct mount *p;
1279 p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE);
1280 if (IS_ERR(p))
1281 return ERR_CAST(p);
1282 p->mnt.mnt_flags |= MNT_INTERNAL;
1283 return &p->mnt;
1284}
1285
1286#ifdef CONFIG_PROC_FS
1287static struct mount *mnt_list_next(struct mnt_namespace *ns,
1288 struct list_head *p)
1289{
1290 struct mount *mnt, *ret = NULL;
1291
1292 lock_ns_list(ns);
1293 list_for_each_continue(p, &ns->list) {
1294 mnt = list_entry(p, typeof(*mnt), mnt_list);
1295 if (!mnt_is_cursor(mnt)) {
1296 ret = mnt;
1297 break;
1298 }
1299 }
1300 unlock_ns_list(ns);
1301
1302 return ret;
1303}
1304
1305
1306static void *m_start(struct seq_file *m, loff_t *pos)
1307{
1308 struct proc_mounts *p = m->private;
1309 struct list_head *prev;
1310
1311 down_read(&namespace_sem);
1312 if (!*pos) {
1313 prev = &p->ns->list;
1314 } else {
1315 prev = &p->cursor.mnt_list;
1316
1317
1318 if (list_empty(prev))
1319 return NULL;
1320 }
1321
1322 return mnt_list_next(p->ns, prev);
1323}
1324
1325static void *m_next(struct seq_file *m, void *v, loff_t *pos)
1326{
1327 struct proc_mounts *p = m->private;
1328 struct mount *mnt = v;
1329
1330 ++*pos;
1331 return mnt_list_next(p->ns, &mnt->mnt_list);
1332}
1333
1334static void m_stop(struct seq_file *m, void *v)
1335{
1336 struct proc_mounts *p = m->private;
1337 struct mount *mnt = v;
1338
1339 lock_ns_list(p->ns);
1340 if (mnt)
1341 list_move_tail(&p->cursor.mnt_list, &mnt->mnt_list);
1342 else
1343 list_del_init(&p->cursor.mnt_list);
1344 unlock_ns_list(p->ns);
1345 up_read(&namespace_sem);
1346}
1347
1348static int m_show(struct seq_file *m, void *v)
1349{
1350 struct proc_mounts *p = m->private;
1351 struct mount *r = v;
1352 return p->show(m, &r->mnt);
1353}
1354
1355const struct seq_operations mounts_op = {
1356 .start = m_start,
1357 .next = m_next,
1358 .stop = m_stop,
1359 .show = m_show,
1360};
1361
1362void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor)
1363{
1364 down_read(&namespace_sem);
1365 lock_ns_list(ns);
1366 list_del(&cursor->mnt_list);
1367 unlock_ns_list(ns);
1368 up_read(&namespace_sem);
1369}
1370#endif
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380int may_umount_tree(struct vfsmount *m)
1381{
1382 struct mount *mnt = real_mount(m);
1383 int actual_refs = 0;
1384 int minimum_refs = 0;
1385 struct mount *p;
1386 BUG_ON(!m);
1387
1388
1389 lock_mount_hash();
1390 for (p = mnt; p; p = next_mnt(p, mnt)) {
1391 actual_refs += mnt_get_count(p);
1392 minimum_refs += 2;
1393 }
1394 unlock_mount_hash();
1395
1396 if (actual_refs > minimum_refs)
1397 return 0;
1398
1399 return 1;
1400}
1401
1402EXPORT_SYMBOL(may_umount_tree);
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417int may_umount(struct vfsmount *mnt)
1418{
1419 int ret = 1;
1420 down_read(&namespace_sem);
1421 lock_mount_hash();
1422 if (propagate_mount_busy(real_mount(mnt), 2))
1423 ret = 0;
1424 unlock_mount_hash();
1425 up_read(&namespace_sem);
1426 return ret;
1427}
1428
1429EXPORT_SYMBOL(may_umount);
1430
1431static void namespace_unlock(void)
1432{
1433 struct hlist_head head;
1434 struct hlist_node *p;
1435 struct mount *m;
1436 LIST_HEAD(list);
1437
1438 hlist_move_list(&unmounted, &head);
1439 list_splice_init(&ex_mountpoints, &list);
1440
1441 up_write(&namespace_sem);
1442
1443 shrink_dentry_list(&list);
1444
1445 if (likely(hlist_empty(&head)))
1446 return;
1447
1448 synchronize_rcu_expedited();
1449
1450 hlist_for_each_entry_safe(m, p, &head, mnt_umount) {
1451 hlist_del(&m->mnt_umount);
1452 mntput(&m->mnt);
1453 }
1454}
1455
1456static inline void namespace_lock(void)
1457{
1458 down_write(&namespace_sem);
1459}
1460
1461enum umount_tree_flags {
1462 UMOUNT_SYNC = 1,
1463 UMOUNT_PROPAGATE = 2,
1464 UMOUNT_CONNECTED = 4,
1465};
1466
1467static bool disconnect_mount(struct mount *mnt, enum umount_tree_flags how)
1468{
1469
1470 if (how & UMOUNT_SYNC)
1471 return true;
1472
1473
1474 if (!mnt_has_parent(mnt))
1475 return true;
1476
1477
1478
1479
1480
1481 if (!(mnt->mnt_parent->mnt.mnt_flags & MNT_UMOUNT))
1482 return true;
1483
1484
1485 if (how & UMOUNT_CONNECTED)
1486 return false;
1487
1488
1489 if (IS_MNT_LOCKED(mnt))
1490 return false;
1491
1492
1493 return true;
1494}
1495
1496
1497
1498
1499
1500static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
1501{
1502 LIST_HEAD(tmp_list);
1503 struct mount *p;
1504
1505 if (how & UMOUNT_PROPAGATE)
1506 propagate_mount_unlock(mnt);
1507
1508
1509 for (p = mnt; p; p = next_mnt(p, mnt)) {
1510 p->mnt.mnt_flags |= MNT_UMOUNT;
1511 list_move(&p->mnt_list, &tmp_list);
1512 }
1513
1514
1515 list_for_each_entry(p, &tmp_list, mnt_list) {
1516 list_del_init(&p->mnt_child);
1517 }
1518
1519
1520 if (how & UMOUNT_PROPAGATE)
1521 propagate_umount(&tmp_list);
1522
1523 while (!list_empty(&tmp_list)) {
1524 struct mnt_namespace *ns;
1525 bool disconnect;
1526 p = list_first_entry(&tmp_list, struct mount, mnt_list);
1527 list_del_init(&p->mnt_expire);
1528 list_del_init(&p->mnt_list);
1529 ns = p->mnt_ns;
1530 if (ns) {
1531 ns->mounts--;
1532 __touch_mnt_namespace(ns);
1533 }
1534 p->mnt_ns = NULL;
1535 if (how & UMOUNT_SYNC)
1536 p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
1537
1538 disconnect = disconnect_mount(p, how);
1539 if (mnt_has_parent(p)) {
1540 mnt_add_count(p->mnt_parent, -1);
1541 if (!disconnect) {
1542
1543 list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts);
1544 } else {
1545 umount_mnt(p);
1546 }
1547 }
1548 change_mnt_propagation(p, MS_PRIVATE);
1549 if (disconnect)
1550 hlist_add_head(&p->mnt_umount, &unmounted);
1551 }
1552}
1553
1554static void shrink_submounts(struct mount *mnt);
1555
1556static int do_umount_root(struct super_block *sb)
1557{
1558 int ret = 0;
1559
1560 down_write(&sb->s_umount);
1561 if (!sb_rdonly(sb)) {
1562 struct fs_context *fc;
1563
1564 fc = fs_context_for_reconfigure(sb->s_root, SB_RDONLY,
1565 SB_RDONLY);
1566 if (IS_ERR(fc)) {
1567 ret = PTR_ERR(fc);
1568 } else {
1569 ret = parse_monolithic_mount_data(fc, NULL);
1570 if (!ret)
1571 ret = reconfigure_super(fc);
1572 put_fs_context(fc);
1573 }
1574 }
1575 up_write(&sb->s_umount);
1576 return ret;
1577}
1578
1579static int do_umount(struct mount *mnt, int flags)
1580{
1581 struct super_block *sb = mnt->mnt.mnt_sb;
1582 int retval;
1583
1584 retval = security_sb_umount(&mnt->mnt, flags);
1585 if (retval)
1586 return retval;
1587
1588
1589
1590
1591
1592
1593
1594 if (flags & MNT_EXPIRE) {
1595 if (&mnt->mnt == current->fs->root.mnt ||
1596 flags & (MNT_FORCE | MNT_DETACH))
1597 return -EINVAL;
1598
1599
1600
1601
1602
1603 lock_mount_hash();
1604 if (mnt_get_count(mnt) != 2) {
1605 unlock_mount_hash();
1606 return -EBUSY;
1607 }
1608 unlock_mount_hash();
1609
1610 if (!xchg(&mnt->mnt_expiry_mark, 1))
1611 return -EAGAIN;
1612 }
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624 if (flags & MNT_FORCE && sb->s_op->umount_begin) {
1625 sb->s_op->umount_begin(sb);
1626 }
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637 if (&mnt->mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) {
1638
1639
1640
1641
1642 if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
1643 return -EPERM;
1644 return do_umount_root(sb);
1645 }
1646
1647 namespace_lock();
1648 lock_mount_hash();
1649
1650
1651 retval = -EINVAL;
1652 if (mnt->mnt.mnt_flags & MNT_LOCKED)
1653 goto out;
1654
1655 event++;
1656 if (flags & MNT_DETACH) {
1657 if (!list_empty(&mnt->mnt_list))
1658 umount_tree(mnt, UMOUNT_PROPAGATE);
1659 retval = 0;
1660 } else {
1661 shrink_submounts(mnt);
1662 retval = -EBUSY;
1663 if (!propagate_mount_busy(mnt, 2)) {
1664 if (!list_empty(&mnt->mnt_list))
1665 umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
1666 retval = 0;
1667 }
1668 }
1669out:
1670 unlock_mount_hash();
1671 namespace_unlock();
1672 return retval;
1673}
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685void __detach_mounts(struct dentry *dentry)
1686{
1687 struct mountpoint *mp;
1688 struct mount *mnt;
1689
1690 namespace_lock();
1691 lock_mount_hash();
1692 mp = lookup_mountpoint(dentry);
1693 if (!mp)
1694 goto out_unlock;
1695
1696 event++;
1697 while (!hlist_empty(&mp->m_list)) {
1698 mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
1699 if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
1700 umount_mnt(mnt);
1701 hlist_add_head(&mnt->mnt_umount, &unmounted);
1702 }
1703 else umount_tree(mnt, UMOUNT_CONNECTED);
1704 }
1705 put_mountpoint(mp);
1706out_unlock:
1707 unlock_mount_hash();
1708 namespace_unlock();
1709}
1710
1711
1712
1713
1714static inline bool may_mount(void)
1715{
1716 return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
1717}
1718
1719static void warn_mandlock(void)
1720{
1721 pr_warn_once("=======================================================\n"
1722 "WARNING: The mand mount option has been deprecated and\n"
1723 " and is ignored by this kernel. Remove the mand\n"
1724 " option from the mount to silence this warning.\n"
1725 "=======================================================\n");
1726}
1727
1728static int can_umount(const struct path *path, int flags)
1729{
1730 struct mount *mnt = real_mount(path->mnt);
1731
1732 if (!may_mount())
1733 return -EPERM;
1734 if (path->dentry != path->mnt->mnt_root)
1735 return -EINVAL;
1736 if (!check_mnt(mnt))
1737 return -EINVAL;
1738 if (mnt->mnt.mnt_flags & MNT_LOCKED)
1739 return -EINVAL;
1740 if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
1741 return -EPERM;
1742 return 0;
1743}
1744
1745
1746int path_umount(struct path *path, int flags)
1747{
1748 struct mount *mnt = real_mount(path->mnt);
1749 int ret;
1750
1751 ret = can_umount(path, flags);
1752 if (!ret)
1753 ret = do_umount(mnt, flags);
1754
1755
1756 dput(path->dentry);
1757 mntput_no_expire(mnt);
1758 return ret;
1759}
1760
1761static int ksys_umount(char __user *name, int flags)
1762{
1763 int lookup_flags = LOOKUP_MOUNTPOINT;
1764 struct path path;
1765 int ret;
1766
1767
1768 if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
1769 return -EINVAL;
1770
1771 if (!(flags & UMOUNT_NOFOLLOW))
1772 lookup_flags |= LOOKUP_FOLLOW;
1773 ret = user_path_at(AT_FDCWD, name, lookup_flags, &path);
1774 if (ret)
1775 return ret;
1776 return path_umount(&path, flags);
1777}
1778
1779SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1780{
1781 return ksys_umount(name, flags);
1782}
1783
1784#ifdef __ARCH_WANT_SYS_OLDUMOUNT
1785
1786
1787
1788
1789SYSCALL_DEFINE1(oldumount, char __user *, name)
1790{
1791 return ksys_umount(name, 0);
1792}
1793
1794#endif
1795
1796static bool is_mnt_ns_file(struct dentry *dentry)
1797{
1798
1799 return dentry->d_op == &ns_dentry_operations &&
1800 dentry->d_fsdata == &mntns_operations;
1801}
1802
1803static struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
1804{
1805 return container_of(ns, struct mnt_namespace, ns);
1806}
1807
1808struct ns_common *from_mnt_ns(struct mnt_namespace *mnt)
1809{
1810 return &mnt->ns;
1811}
1812
1813static bool mnt_ns_loop(struct dentry *dentry)
1814{
1815
1816
1817
1818 struct mnt_namespace *mnt_ns;
1819 if (!is_mnt_ns_file(dentry))
1820 return false;
1821
1822 mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode));
1823 return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
1824}
1825
1826struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1827 int flag)
1828{
1829 struct mount *res, *p, *q, *r, *parent;
1830
1831 if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(mnt))
1832 return ERR_PTR(-EINVAL);
1833
1834 if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(dentry))
1835 return ERR_PTR(-EINVAL);
1836
1837 res = q = clone_mnt(mnt, dentry, flag);
1838 if (IS_ERR(q))
1839 return q;
1840
1841 q->mnt_mountpoint = mnt->mnt_mountpoint;
1842
1843 p = mnt;
1844 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
1845 struct mount *s;
1846 if (!is_subdir(r->mnt_mountpoint, dentry))
1847 continue;
1848
1849 for (s = r; s; s = next_mnt(s, r)) {
1850 if (!(flag & CL_COPY_UNBINDABLE) &&
1851 IS_MNT_UNBINDABLE(s)) {
1852 if (s->mnt.mnt_flags & MNT_LOCKED) {
1853
1854 q = ERR_PTR(-EPERM);
1855 goto out;
1856 } else {
1857 s = skip_mnt_tree(s);
1858 continue;
1859 }
1860 }
1861 if (!(flag & CL_COPY_MNT_NS_FILE) &&
1862 is_mnt_ns_file(s->mnt.mnt_root)) {
1863 s = skip_mnt_tree(s);
1864 continue;
1865 }
1866 while (p != s->mnt_parent) {
1867 p = p->mnt_parent;
1868 q = q->mnt_parent;
1869 }
1870 p = s;
1871 parent = q;
1872 q = clone_mnt(p, p->mnt.mnt_root, flag);
1873 if (IS_ERR(q))
1874 goto out;
1875 lock_mount_hash();
1876 list_add_tail(&q->mnt_list, &res->mnt_list);
1877 attach_mnt(q, parent, p->mnt_mp);
1878 unlock_mount_hash();
1879 }
1880 }
1881 return res;
1882out:
1883 if (res) {
1884 lock_mount_hash();
1885 umount_tree(res, UMOUNT_SYNC);
1886 unlock_mount_hash();
1887 }
1888 return q;
1889}
1890
1891
1892
1893struct vfsmount *collect_mounts(const struct path *path)
1894{
1895 struct mount *tree;
1896 namespace_lock();
1897 if (!check_mnt(real_mount(path->mnt)))
1898 tree = ERR_PTR(-EINVAL);
1899 else
1900 tree = copy_tree(real_mount(path->mnt), path->dentry,
1901 CL_COPY_ALL | CL_PRIVATE);
1902 namespace_unlock();
1903 if (IS_ERR(tree))
1904 return ERR_CAST(tree);
1905 return &tree->mnt;
1906}
1907
1908static void free_mnt_ns(struct mnt_namespace *);
1909static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *, bool);
1910
1911void dissolve_on_fput(struct vfsmount *mnt)
1912{
1913 struct mnt_namespace *ns;
1914 namespace_lock();
1915 lock_mount_hash();
1916 ns = real_mount(mnt)->mnt_ns;
1917 if (ns) {
1918 if (is_anon_ns(ns))
1919 umount_tree(real_mount(mnt), UMOUNT_CONNECTED);
1920 else
1921 ns = NULL;
1922 }
1923 unlock_mount_hash();
1924 namespace_unlock();
1925 if (ns)
1926 free_mnt_ns(ns);
1927}
1928
1929void drop_collected_mounts(struct vfsmount *mnt)
1930{
1931 namespace_lock();
1932 lock_mount_hash();
1933 umount_tree(real_mount(mnt), 0);
1934 unlock_mount_hash();
1935 namespace_unlock();
1936}
1937
1938static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
1939{
1940 struct mount *child;
1941
1942 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
1943 if (!is_subdir(child->mnt_mountpoint, dentry))
1944 continue;
1945
1946 if (child->mnt.mnt_flags & MNT_LOCKED)
1947 return true;
1948 }
1949 return false;
1950}
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962struct vfsmount *clone_private_mount(const struct path *path)
1963{
1964 struct mount *old_mnt = real_mount(path->mnt);
1965 struct mount *new_mnt;
1966
1967 down_read(&namespace_sem);
1968 if (IS_MNT_UNBINDABLE(old_mnt))
1969 goto invalid;
1970
1971 if (!check_mnt(old_mnt))
1972 goto invalid;
1973
1974 if (has_locked_children(old_mnt, path->dentry))
1975 goto invalid;
1976
1977 new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
1978 up_read(&namespace_sem);
1979
1980 if (IS_ERR(new_mnt))
1981 return ERR_CAST(new_mnt);
1982
1983
1984 new_mnt->mnt_ns = MNT_NS_INTERNAL;
1985
1986 return &new_mnt->mnt;
1987
1988invalid:
1989 up_read(&namespace_sem);
1990 return ERR_PTR(-EINVAL);
1991}
1992EXPORT_SYMBOL_GPL(clone_private_mount);
1993
1994int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
1995 struct vfsmount *root)
1996{
1997 struct mount *mnt;
1998 int res = f(root, arg);
1999 if (res)
2000 return res;
2001 list_for_each_entry(mnt, &real_mount(root)->mnt_list, mnt_list) {
2002 res = f(&mnt->mnt, arg);
2003 if (res)
2004 return res;
2005 }
2006 return 0;
2007}
2008
2009static void lock_mnt_tree(struct mount *mnt)
2010{
2011 struct mount *p;
2012
2013 for (p = mnt; p; p = next_mnt(p, mnt)) {
2014 int flags = p->mnt.mnt_flags;
2015
2016 flags |= MNT_LOCK_ATIME;
2017
2018 if (flags & MNT_READONLY)
2019 flags |= MNT_LOCK_READONLY;
2020
2021 if (flags & MNT_NODEV)
2022 flags |= MNT_LOCK_NODEV;
2023
2024 if (flags & MNT_NOSUID)
2025 flags |= MNT_LOCK_NOSUID;
2026
2027 if (flags & MNT_NOEXEC)
2028 flags |= MNT_LOCK_NOEXEC;
2029
2030 if (list_empty(&p->mnt_expire))
2031 flags |= MNT_LOCKED;
2032 p->mnt.mnt_flags = flags;
2033 }
2034}
2035
2036static void cleanup_group_ids(struct mount *mnt, struct mount *end)
2037{
2038 struct mount *p;
2039
2040 for (p = mnt; p != end; p = next_mnt(p, mnt)) {
2041 if (p->mnt_group_id && !IS_MNT_SHARED(p))
2042 mnt_release_group_id(p);
2043 }
2044}
2045
2046static int invent_group_ids(struct mount *mnt, bool recurse)
2047{
2048 struct mount *p;
2049
2050 for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) {
2051 if (!p->mnt_group_id && !IS_MNT_SHARED(p)) {
2052 int err = mnt_alloc_group_id(p);
2053 if (err) {
2054 cleanup_group_ids(mnt, p);
2055 return err;
2056 }
2057 }
2058 }
2059
2060 return 0;
2061}
2062
2063int count_mounts(struct mnt_namespace *ns, struct mount *mnt)
2064{
2065 unsigned int max = READ_ONCE(sysctl_mount_max);
2066 unsigned int mounts = 0, old, pending, sum;
2067 struct mount *p;
2068
2069 for (p = mnt; p; p = next_mnt(p, mnt))
2070 mounts++;
2071
2072 old = ns->mounts;
2073 pending = ns->pending_mounts;
2074 sum = old + pending;
2075 if ((old > sum) ||
2076 (pending > sum) ||
2077 (max < sum) ||
2078 (mounts > (max - sum)))
2079 return -ENOSPC;
2080
2081 ns->pending_mounts = pending + mounts;
2082 return 0;
2083}
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148static int attach_recursive_mnt(struct mount *source_mnt,
2149 struct mount *dest_mnt,
2150 struct mountpoint *dest_mp,
2151 bool moving)
2152{
2153 struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
2154 HLIST_HEAD(tree_list);
2155 struct mnt_namespace *ns = dest_mnt->mnt_ns;
2156 struct mountpoint *smp;
2157 struct mount *child, *p;
2158 struct hlist_node *n;
2159 int err;
2160
2161
2162
2163
2164 smp = get_mountpoint(source_mnt->mnt.mnt_root);
2165 if (IS_ERR(smp))
2166 return PTR_ERR(smp);
2167
2168
2169 if (!moving) {
2170 err = count_mounts(ns, source_mnt);
2171 if (err)
2172 goto out;
2173 }
2174
2175 if (IS_MNT_SHARED(dest_mnt)) {
2176 err = invent_group_ids(source_mnt, true);
2177 if (err)
2178 goto out;
2179 err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
2180 lock_mount_hash();
2181 if (err)
2182 goto out_cleanup_ids;
2183 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
2184 set_mnt_shared(p);
2185 } else {
2186 lock_mount_hash();
2187 }
2188 if (moving) {
2189 unhash_mnt(source_mnt);
2190 attach_mnt(source_mnt, dest_mnt, dest_mp);
2191 touch_mnt_namespace(source_mnt->mnt_ns);
2192 } else {
2193 if (source_mnt->mnt_ns) {
2194
2195 list_del_init(&source_mnt->mnt_ns->list);
2196 }
2197 mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
2198 commit_tree(source_mnt);
2199 }
2200
2201 hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
2202 struct mount *q;
2203 hlist_del_init(&child->mnt_hash);
2204 q = __lookup_mnt(&child->mnt_parent->mnt,
2205 child->mnt_mountpoint);
2206 if (q)
2207 mnt_change_mountpoint(child, smp, q);
2208
2209 if (child->mnt_parent->mnt_ns->user_ns != user_ns)
2210 lock_mnt_tree(child);
2211 child->mnt.mnt_flags &= ~MNT_LOCKED;
2212 commit_tree(child);
2213 }
2214 put_mountpoint(smp);
2215 unlock_mount_hash();
2216
2217 return 0;
2218
2219 out_cleanup_ids:
2220 while (!hlist_empty(&tree_list)) {
2221 child = hlist_entry(tree_list.first, struct mount, mnt_hash);
2222 child->mnt_parent->mnt_ns->pending_mounts = 0;
2223 umount_tree(child, UMOUNT_SYNC);
2224 }
2225 unlock_mount_hash();
2226 cleanup_group_ids(source_mnt, NULL);
2227 out:
2228 ns->pending_mounts = 0;
2229
2230 read_seqlock_excl(&mount_lock);
2231 put_mountpoint(smp);
2232 read_sequnlock_excl(&mount_lock);
2233
2234 return err;
2235}
2236
2237static struct mountpoint *lock_mount(struct path *path)
2238{
2239 struct vfsmount *mnt;
2240 struct dentry *dentry = path->dentry;
2241retry:
2242 inode_lock(dentry->d_inode);
2243 if (unlikely(cant_mount(dentry))) {
2244 inode_unlock(dentry->d_inode);
2245 return ERR_PTR(-ENOENT);
2246 }
2247 namespace_lock();
2248 mnt = lookup_mnt(path);
2249 if (likely(!mnt)) {
2250 struct mountpoint *mp = get_mountpoint(dentry);
2251 if (IS_ERR(mp)) {
2252 namespace_unlock();
2253 inode_unlock(dentry->d_inode);
2254 return mp;
2255 }
2256 return mp;
2257 }
2258 namespace_unlock();
2259 inode_unlock(path->dentry->d_inode);
2260 path_put(path);
2261 path->mnt = mnt;
2262 dentry = path->dentry = dget(mnt->mnt_root);
2263 goto retry;
2264}
2265
2266static void unlock_mount(struct mountpoint *where)
2267{
2268 struct dentry *dentry = where->m_dentry;
2269
2270 read_seqlock_excl(&mount_lock);
2271 put_mountpoint(where);
2272 read_sequnlock_excl(&mount_lock);
2273
2274 namespace_unlock();
2275 inode_unlock(dentry->d_inode);
2276}
2277
2278static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
2279{
2280 if (mnt->mnt.mnt_sb->s_flags & SB_NOUSER)
2281 return -EINVAL;
2282
2283 if (d_is_dir(mp->m_dentry) !=
2284 d_is_dir(mnt->mnt.mnt_root))
2285 return -ENOTDIR;
2286
2287 return attach_recursive_mnt(mnt, p, mp, false);
2288}
2289
2290
2291
2292
2293
2294static int flags_to_propagation_type(int ms_flags)
2295{
2296 int type = ms_flags & ~(MS_REC | MS_SILENT);
2297
2298
2299 if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
2300 return 0;
2301
2302 if (!is_power_of_2(type))
2303 return 0;
2304 return type;
2305}
2306
2307
2308
2309
2310static int do_change_type(struct path *path, int ms_flags)
2311{
2312 struct mount *m;
2313 struct mount *mnt = real_mount(path->mnt);
2314 int recurse = ms_flags & MS_REC;
2315 int type;
2316 int err = 0;
2317
2318 if (path->dentry != path->mnt->mnt_root)
2319 return -EINVAL;
2320
2321 type = flags_to_propagation_type(ms_flags);
2322 if (!type)
2323 return -EINVAL;
2324
2325 namespace_lock();
2326 if (type == MS_SHARED) {
2327 err = invent_group_ids(mnt, recurse);
2328 if (err)
2329 goto out_unlock;
2330 }
2331
2332 lock_mount_hash();
2333 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
2334 change_mnt_propagation(m, type);
2335 unlock_mount_hash();
2336
2337 out_unlock:
2338 namespace_unlock();
2339 return err;
2340}
2341
2342static struct mount *__do_loopback(struct path *old_path, int recurse)
2343{
2344 struct mount *mnt = ERR_PTR(-EINVAL), *old = real_mount(old_path->mnt);
2345
2346 if (IS_MNT_UNBINDABLE(old))
2347 return mnt;
2348
2349 if (!check_mnt(old) && old_path->dentry->d_op != &ns_dentry_operations)
2350 return mnt;
2351
2352 if (!recurse && has_locked_children(old, old_path->dentry))
2353 return mnt;
2354
2355 if (recurse)
2356 mnt = copy_tree(old, old_path->dentry, CL_COPY_MNT_NS_FILE);
2357 else
2358 mnt = clone_mnt(old, old_path->dentry, 0);
2359
2360 if (!IS_ERR(mnt))
2361 mnt->mnt.mnt_flags &= ~MNT_LOCKED;
2362
2363 return mnt;
2364}
2365
2366
2367
2368
2369static int do_loopback(struct path *path, const char *old_name,
2370 int recurse)
2371{
2372 struct path old_path;
2373 struct mount *mnt = NULL, *parent;
2374 struct mountpoint *mp;
2375 int err;
2376 if (!old_name || !*old_name)
2377 return -EINVAL;
2378 err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path);
2379 if (err)
2380 return err;
2381
2382 err = -EINVAL;
2383 if (mnt_ns_loop(old_path.dentry))
2384 goto out;
2385
2386 mp = lock_mount(path);
2387 if (IS_ERR(mp)) {
2388 err = PTR_ERR(mp);
2389 goto out;
2390 }
2391
2392 parent = real_mount(path->mnt);
2393 if (!check_mnt(parent))
2394 goto out2;
2395
2396 mnt = __do_loopback(&old_path, recurse);
2397 if (IS_ERR(mnt)) {
2398 err = PTR_ERR(mnt);
2399 goto out2;
2400 }
2401
2402 err = graft_tree(mnt, parent, mp);
2403 if (err) {
2404 lock_mount_hash();
2405 umount_tree(mnt, UMOUNT_SYNC);
2406 unlock_mount_hash();
2407 }
2408out2:
2409 unlock_mount(mp);
2410out:
2411 path_put(&old_path);
2412 return err;
2413}
2414
2415static struct file *open_detached_copy(struct path *path, bool recursive)
2416{
2417 struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
2418 struct mnt_namespace *ns = alloc_mnt_ns(user_ns, true);
2419 struct mount *mnt, *p;
2420 struct file *file;
2421
2422 if (IS_ERR(ns))
2423 return ERR_CAST(ns);
2424
2425 namespace_lock();
2426 mnt = __do_loopback(path, recursive);
2427 if (IS_ERR(mnt)) {
2428 namespace_unlock();
2429 free_mnt_ns(ns);
2430 return ERR_CAST(mnt);
2431 }
2432
2433 lock_mount_hash();
2434 for (p = mnt; p; p = next_mnt(p, mnt)) {
2435 p->mnt_ns = ns;
2436 ns->mounts++;
2437 }
2438 ns->root = mnt;
2439 list_add_tail(&ns->list, &mnt->mnt_list);
2440 mntget(&mnt->mnt);
2441 unlock_mount_hash();
2442 namespace_unlock();
2443
2444 mntput(path->mnt);
2445 path->mnt = &mnt->mnt;
2446 file = dentry_open(path, O_PATH, current_cred());
2447 if (IS_ERR(file))
2448 dissolve_on_fput(path->mnt);
2449 else
2450 file->f_mode |= FMODE_NEED_UNMOUNT;
2451 return file;
2452}
2453
2454SYSCALL_DEFINE3(open_tree, int, dfd, const char __user *, filename, unsigned, flags)
2455{
2456 struct file *file;
2457 struct path path;
2458 int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW;
2459 bool detached = flags & OPEN_TREE_CLONE;
2460 int error;
2461 int fd;
2462
2463 BUILD_BUG_ON(OPEN_TREE_CLOEXEC != O_CLOEXEC);
2464
2465 if (flags & ~(AT_EMPTY_PATH | AT_NO_AUTOMOUNT | AT_RECURSIVE |
2466 AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLONE |
2467 OPEN_TREE_CLOEXEC))
2468 return -EINVAL;
2469
2470 if ((flags & (AT_RECURSIVE | OPEN_TREE_CLONE)) == AT_RECURSIVE)
2471 return -EINVAL;
2472
2473 if (flags & AT_NO_AUTOMOUNT)
2474 lookup_flags &= ~LOOKUP_AUTOMOUNT;
2475 if (flags & AT_SYMLINK_NOFOLLOW)
2476 lookup_flags &= ~LOOKUP_FOLLOW;
2477 if (flags & AT_EMPTY_PATH)
2478 lookup_flags |= LOOKUP_EMPTY;
2479
2480 if (detached && !may_mount())
2481 return -EPERM;
2482
2483 fd = get_unused_fd_flags(flags & O_CLOEXEC);
2484 if (fd < 0)
2485 return fd;
2486
2487 error = user_path_at(dfd, filename, lookup_flags, &path);
2488 if (unlikely(error)) {
2489 file = ERR_PTR(error);
2490 } else {
2491 if (detached)
2492 file = open_detached_copy(&path, flags & AT_RECURSIVE);
2493 else
2494 file = dentry_open(&path, O_PATH, current_cred());
2495 path_put(&path);
2496 }
2497 if (IS_ERR(file)) {
2498 put_unused_fd(fd);
2499 return PTR_ERR(file);
2500 }
2501 fd_install(fd, file);
2502 return fd;
2503}
2504
2505
2506
2507
2508
2509
2510
2511static bool can_change_locked_flags(struct mount *mnt, unsigned int mnt_flags)
2512{
2513 unsigned int fl = mnt->mnt.mnt_flags;
2514
2515 if ((fl & MNT_LOCK_READONLY) &&
2516 !(mnt_flags & MNT_READONLY))
2517 return false;
2518
2519 if ((fl & MNT_LOCK_NODEV) &&
2520 !(mnt_flags & MNT_NODEV))
2521 return false;
2522
2523 if ((fl & MNT_LOCK_NOSUID) &&
2524 !(mnt_flags & MNT_NOSUID))
2525 return false;
2526
2527 if ((fl & MNT_LOCK_NOEXEC) &&
2528 !(mnt_flags & MNT_NOEXEC))
2529 return false;
2530
2531 if ((fl & MNT_LOCK_ATIME) &&
2532 ((fl & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK)))
2533 return false;
2534
2535 return true;
2536}
2537
2538static int change_mount_ro_state(struct mount *mnt, unsigned int mnt_flags)
2539{
2540 bool readonly_request = (mnt_flags & MNT_READONLY);
2541
2542 if (readonly_request == __mnt_is_readonly(&mnt->mnt))
2543 return 0;
2544
2545 if (readonly_request)
2546 return mnt_make_readonly(mnt);
2547
2548 mnt->mnt.mnt_flags &= ~MNT_READONLY;
2549 return 0;
2550}
2551
2552static void set_mount_attributes(struct mount *mnt, unsigned int mnt_flags)
2553{
2554 mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
2555 mnt->mnt.mnt_flags = mnt_flags;
2556 touch_mnt_namespace(mnt->mnt_ns);
2557}
2558
2559static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount *mnt)
2560{
2561 struct super_block *sb = mnt->mnt_sb;
2562
2563 if (!__mnt_is_readonly(mnt) &&
2564 (ktime_get_real_seconds() + TIME_UPTIME_SEC_MAX > sb->s_time_max)) {
2565 char *buf = (char *)__get_free_page(GFP_KERNEL);
2566 char *mntpath = buf ? d_path(mountpoint, buf, PAGE_SIZE) : ERR_PTR(-ENOMEM);
2567 struct tm tm;
2568
2569 time64_to_tm(sb->s_time_max, 0, &tm);
2570
2571 pr_warn("%s filesystem being %s at %s supports timestamps until %04ld (0x%llx)\n",
2572 sb->s_type->name,
2573 is_mounted(mnt) ? "remounted" : "mounted",
2574 mntpath,
2575 tm.tm_year+1900, (unsigned long long)sb->s_time_max);
2576
2577 free_page((unsigned long)buf);
2578 }
2579}
2580
2581
2582
2583
2584
2585
2586static int do_reconfigure_mnt(struct path *path, unsigned int mnt_flags)
2587{
2588 struct super_block *sb = path->mnt->mnt_sb;
2589 struct mount *mnt = real_mount(path->mnt);
2590 int ret;
2591
2592 if (!check_mnt(mnt))
2593 return -EINVAL;
2594
2595 if (path->dentry != mnt->mnt.mnt_root)
2596 return -EINVAL;
2597
2598 if (!can_change_locked_flags(mnt, mnt_flags))
2599 return -EPERM;
2600
2601
2602
2603
2604
2605 down_read(&sb->s_umount);
2606 lock_mount_hash();
2607 ret = change_mount_ro_state(mnt, mnt_flags);
2608 if (ret == 0)
2609 set_mount_attributes(mnt, mnt_flags);
2610 unlock_mount_hash();
2611 up_read(&sb->s_umount);
2612
2613 mnt_warn_timestamp_expiry(path, &mnt->mnt);
2614
2615 return ret;
2616}
2617
2618
2619
2620
2621
2622
2623static int do_remount(struct path *path, int ms_flags, int sb_flags,
2624 int mnt_flags, void *data)
2625{
2626 int err;
2627 struct super_block *sb = path->mnt->mnt_sb;
2628 struct mount *mnt = real_mount(path->mnt);
2629 struct fs_context *fc;
2630
2631 if (!check_mnt(mnt))
2632 return -EINVAL;
2633
2634 if (path->dentry != path->mnt->mnt_root)
2635 return -EINVAL;
2636
2637 if (!can_change_locked_flags(mnt, mnt_flags))
2638 return -EPERM;
2639
2640 fc = fs_context_for_reconfigure(path->dentry, sb_flags, MS_RMT_MASK);
2641 if (IS_ERR(fc))
2642 return PTR_ERR(fc);
2643
2644 fc->oldapi = true;
2645 err = parse_monolithic_mount_data(fc, data);
2646 if (!err) {
2647 down_write(&sb->s_umount);
2648 err = -EPERM;
2649 if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
2650 err = reconfigure_super(fc);
2651 if (!err) {
2652 lock_mount_hash();
2653 set_mount_attributes(mnt, mnt_flags);
2654 unlock_mount_hash();
2655 }
2656 }
2657 up_write(&sb->s_umount);
2658 }
2659
2660 mnt_warn_timestamp_expiry(path, &mnt->mnt);
2661
2662 put_fs_context(fc);
2663 return err;
2664}
2665
2666static inline int tree_contains_unbindable(struct mount *mnt)
2667{
2668 struct mount *p;
2669 for (p = mnt; p; p = next_mnt(p, mnt)) {
2670 if (IS_MNT_UNBINDABLE(p))
2671 return 1;
2672 }
2673 return 0;
2674}
2675
2676
2677
2678
2679
2680
2681
2682static bool check_for_nsfs_mounts(struct mount *subtree)
2683{
2684 struct mount *p;
2685 bool ret = false;
2686
2687 lock_mount_hash();
2688 for (p = subtree; p; p = next_mnt(p, subtree))
2689 if (mnt_ns_loop(p->mnt.mnt_root))
2690 goto out;
2691
2692 ret = true;
2693out:
2694 unlock_mount_hash();
2695 return ret;
2696}
2697
2698static int do_set_group(struct path *from_path, struct path *to_path)
2699{
2700 struct mount *from, *to;
2701 int err;
2702
2703 from = real_mount(from_path->mnt);
2704 to = real_mount(to_path->mnt);
2705
2706 namespace_lock();
2707
2708 err = -EINVAL;
2709
2710 if (!is_mounted(&from->mnt))
2711 goto out;
2712 if (!is_mounted(&to->mnt))
2713 goto out;
2714
2715 err = -EPERM;
2716
2717 if (!ns_capable(from->mnt_ns->user_ns, CAP_SYS_ADMIN))
2718 goto out;
2719 if (!ns_capable(to->mnt_ns->user_ns, CAP_SYS_ADMIN))
2720 goto out;
2721
2722 err = -EINVAL;
2723
2724 if (from_path->dentry != from_path->mnt->mnt_root)
2725 goto out;
2726 if (to_path->dentry != to_path->mnt->mnt_root)
2727 goto out;
2728
2729
2730 if (from->mnt.mnt_sb != to->mnt.mnt_sb)
2731 goto out;
2732
2733
2734 if (!is_subdir(to->mnt.mnt_root, from->mnt.mnt_root))
2735 goto out;
2736
2737
2738 if (has_locked_children(from, to->mnt.mnt_root))
2739 goto out;
2740
2741
2742 if (IS_MNT_SHARED(to) || IS_MNT_SLAVE(to))
2743 goto out;
2744
2745
2746 if (!IS_MNT_SHARED(from) && !IS_MNT_SLAVE(from))
2747 goto out;
2748
2749 if (IS_MNT_SLAVE(from)) {
2750 struct mount *m = from->mnt_master;
2751
2752 list_add(&to->mnt_slave, &m->mnt_slave_list);
2753 to->mnt_master = m;
2754 }
2755
2756 if (IS_MNT_SHARED(from)) {
2757 to->mnt_group_id = from->mnt_group_id;
2758 list_add(&to->mnt_share, &from->mnt_share);
2759 lock_mount_hash();
2760 set_mnt_shared(to);
2761 unlock_mount_hash();
2762 }
2763
2764 err = 0;
2765out:
2766 namespace_unlock();
2767 return err;
2768}
2769
2770static int do_move_mount(struct path *old_path, struct path *new_path)
2771{
2772 struct mnt_namespace *ns;
2773 struct mount *p;
2774 struct mount *old;
2775 struct mount *parent;
2776 struct mountpoint *mp, *old_mp;
2777 int err;
2778 bool attached;
2779
2780 mp = lock_mount(new_path);
2781 if (IS_ERR(mp))
2782 return PTR_ERR(mp);
2783
2784 old = real_mount(old_path->mnt);
2785 p = real_mount(new_path->mnt);
2786 parent = old->mnt_parent;
2787 attached = mnt_has_parent(old);
2788 old_mp = old->mnt_mp;
2789 ns = old->mnt_ns;
2790
2791 err = -EINVAL;
2792
2793 if (!check_mnt(p))
2794 goto out;
2795
2796
2797 if (!is_mounted(&old->mnt))
2798 goto out;
2799
2800
2801 if (!(attached ? check_mnt(old) : is_anon_ns(ns)))
2802 goto out;
2803
2804 if (old->mnt.mnt_flags & MNT_LOCKED)
2805 goto out;
2806
2807 if (old_path->dentry != old_path->mnt->mnt_root)
2808 goto out;
2809
2810 if (d_is_dir(new_path->dentry) !=
2811 d_is_dir(old_path->dentry))
2812 goto out;
2813
2814
2815
2816 if (attached && IS_MNT_SHARED(parent))
2817 goto out;
2818
2819
2820
2821
2822 if (IS_MNT_SHARED(p) && tree_contains_unbindable(old))
2823 goto out;
2824 err = -ELOOP;
2825 if (!check_for_nsfs_mounts(old))
2826 goto out;
2827 for (; mnt_has_parent(p); p = p->mnt_parent)
2828 if (p == old)
2829 goto out;
2830
2831 err = attach_recursive_mnt(old, real_mount(new_path->mnt), mp,
2832 attached);
2833 if (err)
2834 goto out;
2835
2836
2837
2838 list_del_init(&old->mnt_expire);
2839 if (attached)
2840 put_mountpoint(old_mp);
2841out:
2842 unlock_mount(mp);
2843 if (!err) {
2844 if (attached)
2845 mntput_no_expire(parent);
2846 else
2847 free_mnt_ns(ns);
2848 }
2849 return err;
2850}
2851
2852static int do_move_mount_old(struct path *path, const char *old_name)
2853{
2854 struct path old_path;
2855 int err;
2856
2857 if (!old_name || !*old_name)
2858 return -EINVAL;
2859
2860 err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
2861 if (err)
2862 return err;
2863
2864 err = do_move_mount(&old_path, path);
2865 path_put(&old_path);
2866 return err;
2867}
2868
2869
2870
2871
2872static int do_add_mount(struct mount *newmnt, struct mountpoint *mp,
2873 struct path *path, int mnt_flags)
2874{
2875 struct mount *parent = real_mount(path->mnt);
2876
2877 mnt_flags &= ~MNT_INTERNAL_FLAGS;
2878
2879 if (unlikely(!check_mnt(parent))) {
2880
2881 if (!(mnt_flags & MNT_SHRINKABLE))
2882 return -EINVAL;
2883
2884 if (!parent->mnt_ns)
2885 return -EINVAL;
2886 }
2887
2888
2889 if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb &&
2890 path->mnt->mnt_root == path->dentry)
2891 return -EBUSY;
2892
2893 if (d_is_symlink(newmnt->mnt.mnt_root))
2894 return -EINVAL;
2895
2896 newmnt->mnt.mnt_flags = mnt_flags;
2897 return graft_tree(newmnt, parent, mp);
2898}
2899
2900static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags);
2901
2902
2903
2904
2905
2906static int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint,
2907 unsigned int mnt_flags)
2908{
2909 struct vfsmount *mnt;
2910 struct mountpoint *mp;
2911 struct super_block *sb = fc->root->d_sb;
2912 int error;
2913
2914 error = security_sb_kern_mount(sb);
2915 if (!error && mount_too_revealing(sb, &mnt_flags))
2916 error = -EPERM;
2917
2918 if (unlikely(error)) {
2919 fc_drop_locked(fc);
2920 return error;
2921 }
2922
2923 up_write(&sb->s_umount);
2924
2925 mnt = vfs_create_mount(fc);
2926 if (IS_ERR(mnt))
2927 return PTR_ERR(mnt);
2928
2929 mnt_warn_timestamp_expiry(mountpoint, mnt);
2930
2931 mp = lock_mount(mountpoint);
2932 if (IS_ERR(mp)) {
2933 mntput(mnt);
2934 return PTR_ERR(mp);
2935 }
2936 error = do_add_mount(real_mount(mnt), mp, mountpoint, mnt_flags);
2937 unlock_mount(mp);
2938 if (error < 0)
2939 mntput(mnt);
2940 return error;
2941}
2942
2943
2944
2945
2946
2947static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
2948 int mnt_flags, const char *name, void *data)
2949{
2950 struct file_system_type *type;
2951 struct fs_context *fc;
2952 const char *subtype = NULL;
2953 int err = 0;
2954
2955 if (!fstype)
2956 return -EINVAL;
2957
2958 type = get_fs_type(fstype);
2959 if (!type)
2960 return -ENODEV;
2961
2962 if (type->fs_flags & FS_HAS_SUBTYPE) {
2963 subtype = strchr(fstype, '.');
2964 if (subtype) {
2965 subtype++;
2966 if (!*subtype) {
2967 put_filesystem(type);
2968 return -EINVAL;
2969 }
2970 }
2971 }
2972
2973 fc = fs_context_for_mount(type, sb_flags);
2974 put_filesystem(type);
2975 if (IS_ERR(fc))
2976 return PTR_ERR(fc);
2977
2978 if (subtype)
2979 err = vfs_parse_fs_string(fc, "subtype",
2980 subtype, strlen(subtype));
2981 if (!err && name)
2982 err = vfs_parse_fs_string(fc, "source", name, strlen(name));
2983 if (!err)
2984 err = parse_monolithic_mount_data(fc, data);
2985 if (!err && !mount_capable(fc))
2986 err = -EPERM;
2987 if (!err)
2988 err = vfs_get_tree(fc);
2989 if (!err)
2990 err = do_new_mount_fc(fc, path, mnt_flags);
2991
2992 put_fs_context(fc);
2993 return err;
2994}
2995
2996int finish_automount(struct vfsmount *m, struct path *path)
2997{
2998 struct dentry *dentry = path->dentry;
2999 struct mountpoint *mp;
3000 struct mount *mnt;
3001 int err;
3002
3003 if (!m)
3004 return 0;
3005 if (IS_ERR(m))
3006 return PTR_ERR(m);
3007
3008 mnt = real_mount(m);
3009
3010
3011
3012 BUG_ON(mnt_get_count(mnt) < 2);
3013
3014 if (m->mnt_sb == path->mnt->mnt_sb &&
3015 m->mnt_root == dentry) {
3016 err = -ELOOP;
3017 goto discard;
3018 }
3019
3020
3021
3022
3023
3024
3025 inode_lock(dentry->d_inode);
3026 namespace_lock();
3027 if (unlikely(cant_mount(dentry))) {
3028 err = -ENOENT;
3029 goto discard_locked;
3030 }
3031 rcu_read_lock();
3032 if (unlikely(__lookup_mnt(path->mnt, dentry))) {
3033 rcu_read_unlock();
3034 err = 0;
3035 goto discard_locked;
3036 }
3037 rcu_read_unlock();
3038 mp = get_mountpoint(dentry);
3039 if (IS_ERR(mp)) {
3040 err = PTR_ERR(mp);
3041 goto discard_locked;
3042 }
3043
3044 err = do_add_mount(mnt, mp, path, path->mnt->mnt_flags | MNT_SHRINKABLE);
3045 unlock_mount(mp);
3046 if (unlikely(err))
3047 goto discard;
3048 mntput(m);
3049 return 0;
3050
3051discard_locked:
3052 namespace_unlock();
3053 inode_unlock(dentry->d_inode);
3054discard:
3055
3056 if (!list_empty(&mnt->mnt_expire)) {
3057 namespace_lock();
3058 list_del_init(&mnt->mnt_expire);
3059 namespace_unlock();
3060 }
3061 mntput(m);
3062 mntput(m);
3063 return err;
3064}
3065
3066
3067
3068
3069
3070
3071void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
3072{
3073 namespace_lock();
3074
3075 list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);
3076
3077 namespace_unlock();
3078}
3079EXPORT_SYMBOL(mnt_set_expiry);
3080
3081
3082
3083
3084
3085
3086void mark_mounts_for_expiry(struct list_head *mounts)
3087{
3088 struct mount *mnt, *next;
3089 LIST_HEAD(graveyard);
3090
3091 if (list_empty(mounts))
3092 return;
3093
3094 namespace_lock();
3095 lock_mount_hash();
3096
3097
3098
3099
3100
3101
3102
3103 list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
3104 if (!xchg(&mnt->mnt_expiry_mark, 1) ||
3105 propagate_mount_busy(mnt, 1))
3106 continue;
3107 list_move(&mnt->mnt_expire, &graveyard);
3108 }
3109 while (!list_empty(&graveyard)) {
3110 mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
3111 touch_mnt_namespace(mnt->mnt_ns);
3112 umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
3113 }
3114 unlock_mount_hash();
3115 namespace_unlock();
3116}
3117
3118EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
3119
3120
3121
3122
3123
3124
3125
3126static int select_submounts(struct mount *parent, struct list_head *graveyard)
3127{
3128 struct mount *this_parent = parent;
3129 struct list_head *next;
3130 int found = 0;
3131
3132repeat:
3133 next = this_parent->mnt_mounts.next;
3134resume:
3135 while (next != &this_parent->mnt_mounts) {
3136 struct list_head *tmp = next;
3137 struct mount *mnt = list_entry(tmp, struct mount, mnt_child);
3138
3139 next = tmp->next;
3140 if (!(mnt->mnt.mnt_flags & MNT_SHRINKABLE))
3141 continue;
3142
3143
3144
3145 if (!list_empty(&mnt->mnt_mounts)) {
3146 this_parent = mnt;
3147 goto repeat;
3148 }
3149
3150 if (!propagate_mount_busy(mnt, 1)) {
3151 list_move_tail(&mnt->mnt_expire, graveyard);
3152 found++;
3153 }
3154 }
3155
3156
3157
3158 if (this_parent != parent) {
3159 next = this_parent->mnt_child.next;
3160 this_parent = this_parent->mnt_parent;
3161 goto resume;
3162 }
3163 return found;
3164}
3165
3166
3167
3168
3169
3170
3171
3172static void shrink_submounts(struct mount *mnt)
3173{
3174 LIST_HEAD(graveyard);
3175 struct mount *m;
3176
3177
3178 while (select_submounts(mnt, &graveyard)) {
3179 while (!list_empty(&graveyard)) {
3180 m = list_first_entry(&graveyard, struct mount,
3181 mnt_expire);
3182 touch_mnt_namespace(m->mnt_ns);
3183 umount_tree(m, UMOUNT_PROPAGATE|UMOUNT_SYNC);
3184 }
3185 }
3186}
3187
3188static void *copy_mount_options(const void __user * data)
3189{
3190 char *copy;
3191 unsigned left, offset;
3192
3193 if (!data)
3194 return NULL;
3195
3196 copy = kmalloc(PAGE_SIZE, GFP_KERNEL);
3197 if (!copy)
3198 return ERR_PTR(-ENOMEM);
3199
3200 left = copy_from_user(copy, data, PAGE_SIZE);
3201
3202
3203
3204
3205
3206 offset = PAGE_SIZE - left;
3207 while (left) {
3208 char c;
3209 if (get_user(c, (const char __user *)data + offset))
3210 break;
3211 copy[offset] = c;
3212 left--;
3213 offset++;
3214 }
3215
3216 if (left == PAGE_SIZE) {
3217 kfree(copy);
3218 return ERR_PTR(-EFAULT);
3219 }
3220
3221 return copy;
3222}
3223
3224static char *copy_mount_string(const void __user *data)
3225{
3226 return data ? strndup_user(data, PATH_MAX) : NULL;
3227}
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243int path_mount(const char *dev_name, struct path *path,
3244 const char *type_page, unsigned long flags, void *data_page)
3245{
3246 unsigned int mnt_flags = 0, sb_flags;
3247 int ret;
3248
3249
3250 if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
3251 flags &= ~MS_MGC_MSK;
3252
3253
3254 if (data_page)
3255 ((char *)data_page)[PAGE_SIZE - 1] = 0;
3256
3257 if (flags & MS_NOUSER)
3258 return -EINVAL;
3259
3260 ret = security_sb_mount(dev_name, path, type_page, flags, data_page);
3261 if (ret)
3262 return ret;
3263 if (!may_mount())
3264 return -EPERM;
3265 if (flags & SB_MANDLOCK)
3266 warn_mandlock();
3267
3268
3269 if (!(flags & MS_NOATIME))
3270 mnt_flags |= MNT_RELATIME;
3271
3272
3273 if (flags & MS_NOSUID)
3274 mnt_flags |= MNT_NOSUID;
3275 if (flags & MS_NODEV)
3276 mnt_flags |= MNT_NODEV;
3277 if (flags & MS_NOEXEC)
3278 mnt_flags |= MNT_NOEXEC;
3279 if (flags & MS_NOATIME)
3280 mnt_flags |= MNT_NOATIME;
3281 if (flags & MS_NODIRATIME)
3282 mnt_flags |= MNT_NODIRATIME;
3283 if (flags & MS_STRICTATIME)
3284 mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
3285 if (flags & MS_RDONLY)
3286 mnt_flags |= MNT_READONLY;
3287 if (flags & MS_NOSYMFOLLOW)
3288 mnt_flags |= MNT_NOSYMFOLLOW;
3289
3290
3291 if ((flags & MS_REMOUNT) &&
3292 ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME |
3293 MS_STRICTATIME)) == 0)) {
3294 mnt_flags &= ~MNT_ATIME_MASK;
3295 mnt_flags |= path->mnt->mnt_flags & MNT_ATIME_MASK;
3296 }
3297
3298 sb_flags = flags & (SB_RDONLY |
3299 SB_SYNCHRONOUS |
3300 SB_MANDLOCK |
3301 SB_DIRSYNC |
3302 SB_SILENT |
3303 SB_POSIXACL |
3304 SB_LAZYTIME |
3305 SB_I_VERSION);
3306
3307 if ((flags & (MS_REMOUNT | MS_BIND)) == (MS_REMOUNT | MS_BIND))
3308 return do_reconfigure_mnt(path, mnt_flags);
3309 if (flags & MS_REMOUNT)
3310 return do_remount(path, flags, sb_flags, mnt_flags, data_page);
3311 if (flags & MS_BIND)
3312 return do_loopback(path, dev_name, flags & MS_REC);
3313 if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
3314 return do_change_type(path, flags);
3315 if (flags & MS_MOVE)
3316 return do_move_mount_old(path, dev_name);
3317
3318 return do_new_mount(path, type_page, sb_flags, mnt_flags, dev_name,
3319 data_page);
3320}
3321
3322long do_mount(const char *dev_name, const char __user *dir_name,
3323 const char *type_page, unsigned long flags, void *data_page)
3324{
3325 struct path path;
3326 int ret;
3327
3328 ret = user_path_at(AT_FDCWD, dir_name, LOOKUP_FOLLOW, &path);
3329 if (ret)
3330 return ret;
3331 ret = path_mount(dev_name, &path, type_page, flags, data_page);
3332 path_put(&path);
3333 return ret;
3334}
3335
3336static struct ucounts *inc_mnt_namespaces(struct user_namespace *ns)
3337{
3338 return inc_ucount(ns, current_euid(), UCOUNT_MNT_NAMESPACES);
3339}
3340
3341static void dec_mnt_namespaces(struct ucounts *ucounts)
3342{
3343 dec_ucount(ucounts, UCOUNT_MNT_NAMESPACES);
3344}
3345
3346static void free_mnt_ns(struct mnt_namespace *ns)
3347{
3348 if (!is_anon_ns(ns))
3349 ns_free_inum(&ns->ns);
3350 dec_mnt_namespaces(ns->ucounts);
3351 put_user_ns(ns->user_ns);
3352 kfree(ns);
3353}
3354
3355
3356
3357
3358
3359
3360
3361
3362static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
3363
3364static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool anon)
3365{
3366 struct mnt_namespace *new_ns;
3367 struct ucounts *ucounts;
3368 int ret;
3369
3370 ucounts = inc_mnt_namespaces(user_ns);
3371 if (!ucounts)
3372 return ERR_PTR(-ENOSPC);
3373
3374 new_ns = kzalloc(sizeof(struct mnt_namespace), GFP_KERNEL_ACCOUNT);
3375 if (!new_ns) {
3376 dec_mnt_namespaces(ucounts);
3377 return ERR_PTR(-ENOMEM);
3378 }
3379 if (!anon) {
3380 ret = ns_alloc_inum(&new_ns->ns);
3381 if (ret) {
3382 kfree(new_ns);
3383 dec_mnt_namespaces(ucounts);
3384 return ERR_PTR(ret);
3385 }
3386 }
3387 new_ns->ns.ops = &mntns_operations;
3388 if (!anon)
3389 new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
3390 refcount_set(&new_ns->ns.count, 1);
3391 INIT_LIST_HEAD(&new_ns->list);
3392 init_waitqueue_head(&new_ns->poll);
3393 spin_lock_init(&new_ns->ns_lock);
3394 new_ns->user_ns = get_user_ns(user_ns);
3395 new_ns->ucounts = ucounts;
3396 return new_ns;
3397}
3398
3399__latent_entropy
3400struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
3401 struct user_namespace *user_ns, struct fs_struct *new_fs)
3402{
3403 struct mnt_namespace *new_ns;
3404 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
3405 struct mount *p, *q;
3406 struct mount *old;
3407 struct mount *new;
3408 int copy_flags;
3409
3410 BUG_ON(!ns);
3411
3412 if (likely(!(flags & CLONE_NEWNS))) {
3413 get_mnt_ns(ns);
3414 return ns;
3415 }
3416
3417 old = ns->root;
3418
3419 new_ns = alloc_mnt_ns(user_ns, false);
3420 if (IS_ERR(new_ns))
3421 return new_ns;
3422
3423 namespace_lock();
3424
3425 copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE;
3426 if (user_ns != ns->user_ns)
3427 copy_flags |= CL_SHARED_TO_SLAVE;
3428 new = copy_tree(old, old->mnt.mnt_root, copy_flags);
3429 if (IS_ERR(new)) {
3430 namespace_unlock();
3431 free_mnt_ns(new_ns);
3432 return ERR_CAST(new);
3433 }
3434 if (user_ns != ns->user_ns) {
3435 lock_mount_hash();
3436 lock_mnt_tree(new);
3437 unlock_mount_hash();
3438 }
3439 new_ns->root = new;
3440 list_add_tail(&new_ns->list, &new->mnt_list);
3441
3442
3443
3444
3445
3446
3447 p = old;
3448 q = new;
3449 while (p) {
3450 q->mnt_ns = new_ns;
3451 new_ns->mounts++;
3452 if (new_fs) {
3453 if (&p->mnt == new_fs->root.mnt) {
3454 new_fs->root.mnt = mntget(&q->mnt);
3455 rootmnt = &p->mnt;
3456 }
3457 if (&p->mnt == new_fs->pwd.mnt) {
3458 new_fs->pwd.mnt = mntget(&q->mnt);
3459 pwdmnt = &p->mnt;
3460 }
3461 }
3462 p = next_mnt(p, old);
3463 q = next_mnt(q, new);
3464 if (!q)
3465 break;
3466 while (p->mnt.mnt_root != q->mnt.mnt_root)
3467 p = next_mnt(p, old);
3468 }
3469 namespace_unlock();
3470
3471 if (rootmnt)
3472 mntput(rootmnt);
3473 if (pwdmnt)
3474 mntput(pwdmnt);
3475
3476 return new_ns;
3477}
3478
3479struct dentry *mount_subtree(struct vfsmount *m, const char *name)
3480{
3481 struct mount *mnt = real_mount(m);
3482 struct mnt_namespace *ns;
3483 struct super_block *s;
3484 struct path path;
3485 int err;
3486
3487 ns = alloc_mnt_ns(&init_user_ns, true);
3488 if (IS_ERR(ns)) {
3489 mntput(m);
3490 return ERR_CAST(ns);
3491 }
3492 mnt->mnt_ns = ns;
3493 ns->root = mnt;
3494 ns->mounts++;
3495 list_add(&mnt->mnt_list, &ns->list);
3496
3497 err = vfs_path_lookup(m->mnt_root, m,
3498 name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);
3499
3500 put_mnt_ns(ns);
3501
3502 if (err)
3503 return ERR_PTR(err);
3504
3505
3506 s = path.mnt->mnt_sb;
3507 atomic_inc(&s->s_active);
3508 mntput(path.mnt);
3509
3510 down_write(&s->s_umount);
3511
3512 return path.dentry;
3513}
3514EXPORT_SYMBOL(mount_subtree);
3515
3516SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
3517 char __user *, type, unsigned long, flags, void __user *, data)
3518{
3519 int ret;
3520 char *kernel_type;
3521 char *kernel_dev;
3522 void *options;
3523
3524 kernel_type = copy_mount_string(type);
3525 ret = PTR_ERR(kernel_type);
3526 if (IS_ERR(kernel_type))
3527 goto out_type;
3528
3529 kernel_dev = copy_mount_string(dev_name);
3530 ret = PTR_ERR(kernel_dev);
3531 if (IS_ERR(kernel_dev))
3532 goto out_dev;
3533
3534 options = copy_mount_options(data);
3535 ret = PTR_ERR(options);
3536 if (IS_ERR(options))
3537 goto out_data;
3538
3539 ret = do_mount(kernel_dev, dir_name, kernel_type, flags, options);
3540
3541 kfree(options);
3542out_data:
3543 kfree(kernel_dev);
3544out_dev:
3545 kfree(kernel_type);
3546out_type:
3547 return ret;
3548}
3549
3550#define FSMOUNT_VALID_FLAGS \
3551 (MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NODEV | \
3552 MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME | MOUNT_ATTR_NODIRATIME | \
3553 MOUNT_ATTR_NOSYMFOLLOW)
3554
3555#define MOUNT_SETATTR_VALID_FLAGS (FSMOUNT_VALID_FLAGS | MOUNT_ATTR_IDMAP)
3556
3557#define MOUNT_SETATTR_PROPAGATION_FLAGS \
3558 (MS_UNBINDABLE | MS_PRIVATE | MS_SLAVE | MS_SHARED)
3559
3560static unsigned int attr_flags_to_mnt_flags(u64 attr_flags)
3561{
3562 unsigned int mnt_flags = 0;
3563
3564 if (attr_flags & MOUNT_ATTR_RDONLY)
3565 mnt_flags |= MNT_READONLY;
3566 if (attr_flags & MOUNT_ATTR_NOSUID)
3567 mnt_flags |= MNT_NOSUID;
3568 if (attr_flags & MOUNT_ATTR_NODEV)
3569 mnt_flags |= MNT_NODEV;
3570 if (attr_flags & MOUNT_ATTR_NOEXEC)
3571 mnt_flags |= MNT_NOEXEC;
3572 if (attr_flags & MOUNT_ATTR_NODIRATIME)
3573 mnt_flags |= MNT_NODIRATIME;
3574 if (attr_flags & MOUNT_ATTR_NOSYMFOLLOW)
3575 mnt_flags |= MNT_NOSYMFOLLOW;
3576
3577 return mnt_flags;
3578}
3579
3580
3581
3582
3583
3584SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags,
3585 unsigned int, attr_flags)
3586{
3587 struct mnt_namespace *ns;
3588 struct fs_context *fc;
3589 struct file *file;
3590 struct path newmount;
3591 struct mount *mnt;
3592 struct fd f;
3593 unsigned int mnt_flags = 0;
3594 long ret;
3595
3596 if (!may_mount())
3597 return -EPERM;
3598
3599 if ((flags & ~(FSMOUNT_CLOEXEC)) != 0)
3600 return -EINVAL;
3601
3602 if (attr_flags & ~FSMOUNT_VALID_FLAGS)
3603 return -EINVAL;
3604
3605 mnt_flags = attr_flags_to_mnt_flags(attr_flags);
3606
3607 switch (attr_flags & MOUNT_ATTR__ATIME) {
3608 case MOUNT_ATTR_STRICTATIME:
3609 break;
3610 case MOUNT_ATTR_NOATIME:
3611 mnt_flags |= MNT_NOATIME;
3612 break;
3613 case MOUNT_ATTR_RELATIME:
3614 mnt_flags |= MNT_RELATIME;
3615 break;
3616 default:
3617 return -EINVAL;
3618 }
3619
3620 f = fdget(fs_fd);
3621 if (!f.file)
3622 return -EBADF;
3623
3624 ret = -EINVAL;
3625 if (f.file->f_op != &fscontext_fops)
3626 goto err_fsfd;
3627
3628 fc = f.file->private_data;
3629
3630 ret = mutex_lock_interruptible(&fc->uapi_mutex);
3631 if (ret < 0)
3632 goto err_fsfd;
3633
3634
3635 ret = -EINVAL;
3636 if (!fc->root)
3637 goto err_unlock;
3638
3639 ret = -EPERM;
3640 if (mount_too_revealing(fc->root->d_sb, &mnt_flags)) {
3641 pr_warn("VFS: Mount too revealing\n");
3642 goto err_unlock;
3643 }
3644
3645 ret = -EBUSY;
3646 if (fc->phase != FS_CONTEXT_AWAITING_MOUNT)
3647 goto err_unlock;
3648
3649 if (fc->sb_flags & SB_MANDLOCK)
3650 warn_mandlock();
3651
3652 newmount.mnt = vfs_create_mount(fc);
3653 if (IS_ERR(newmount.mnt)) {
3654 ret = PTR_ERR(newmount.mnt);
3655 goto err_unlock;
3656 }
3657 newmount.dentry = dget(fc->root);
3658 newmount.mnt->mnt_flags = mnt_flags;
3659
3660
3661
3662
3663
3664
3665 vfs_clean_context(fc);
3666
3667 ns = alloc_mnt_ns(current->nsproxy->mnt_ns->user_ns, true);
3668 if (IS_ERR(ns)) {
3669 ret = PTR_ERR(ns);
3670 goto err_path;
3671 }
3672 mnt = real_mount(newmount.mnt);
3673 mnt->mnt_ns = ns;
3674 ns->root = mnt;
3675 ns->mounts = 1;
3676 list_add(&mnt->mnt_list, &ns->list);
3677 mntget(newmount.mnt);
3678
3679
3680
3681
3682 file = dentry_open(&newmount, O_PATH, fc->cred);
3683 if (IS_ERR(file)) {
3684 dissolve_on_fput(newmount.mnt);
3685 ret = PTR_ERR(file);
3686 goto err_path;
3687 }
3688 file->f_mode |= FMODE_NEED_UNMOUNT;
3689
3690 ret = get_unused_fd_flags((flags & FSMOUNT_CLOEXEC) ? O_CLOEXEC : 0);
3691 if (ret >= 0)
3692 fd_install(ret, file);
3693 else
3694 fput(file);
3695
3696err_path:
3697 path_put(&newmount);
3698err_unlock:
3699 mutex_unlock(&fc->uapi_mutex);
3700err_fsfd:
3701 fdput(f);
3702 return ret;
3703}
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713SYSCALL_DEFINE5(move_mount,
3714 int, from_dfd, const char __user *, from_pathname,
3715 int, to_dfd, const char __user *, to_pathname,
3716 unsigned int, flags)
3717{
3718 struct path from_path, to_path;
3719 unsigned int lflags;
3720 int ret = 0;
3721
3722 if (!may_mount())
3723 return -EPERM;
3724
3725 if (flags & ~MOVE_MOUNT__MASK)
3726 return -EINVAL;
3727
3728
3729
3730
3731
3732 lflags = 0;
3733 if (flags & MOVE_MOUNT_F_SYMLINKS) lflags |= LOOKUP_FOLLOW;
3734 if (flags & MOVE_MOUNT_F_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT;
3735 if (flags & MOVE_MOUNT_F_EMPTY_PATH) lflags |= LOOKUP_EMPTY;
3736
3737 ret = user_path_at(from_dfd, from_pathname, lflags, &from_path);
3738 if (ret < 0)
3739 return ret;
3740
3741 lflags = 0;
3742 if (flags & MOVE_MOUNT_T_SYMLINKS) lflags |= LOOKUP_FOLLOW;
3743 if (flags & MOVE_MOUNT_T_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT;
3744 if (flags & MOVE_MOUNT_T_EMPTY_PATH) lflags |= LOOKUP_EMPTY;
3745
3746 ret = user_path_at(to_dfd, to_pathname, lflags, &to_path);
3747 if (ret < 0)
3748 goto out_from;
3749
3750 ret = security_move_mount(&from_path, &to_path);
3751 if (ret < 0)
3752 goto out_to;
3753
3754 if (flags & MOVE_MOUNT_SET_GROUP)
3755 ret = do_set_group(&from_path, &to_path);
3756 else
3757 ret = do_move_mount(&from_path, &to_path);
3758
3759out_to:
3760 path_put(&to_path);
3761out_from:
3762 path_put(&from_path);
3763 return ret;
3764}
3765
3766
3767
3768
3769
3770
3771bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
3772 const struct path *root)
3773{
3774 while (&mnt->mnt != root->mnt && mnt_has_parent(mnt)) {
3775 dentry = mnt->mnt_mountpoint;
3776 mnt = mnt->mnt_parent;
3777 }
3778 return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry);
3779}
3780
3781bool path_is_under(const struct path *path1, const struct path *path2)
3782{
3783 bool res;
3784 read_seqlock_excl(&mount_lock);
3785 res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2);
3786 read_sequnlock_excl(&mount_lock);
3787 return res;
3788}
3789EXPORT_SYMBOL(path_is_under);
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
3817 const char __user *, put_old)
3818{
3819 struct path new, old, root;
3820 struct mount *new_mnt, *root_mnt, *old_mnt, *root_parent, *ex_parent;
3821 struct mountpoint *old_mp, *root_mp;
3822 int error;
3823
3824 if (!may_mount())
3825 return -EPERM;
3826
3827 error = user_path_at(AT_FDCWD, new_root,
3828 LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &new);
3829 if (error)
3830 goto out0;
3831
3832 error = user_path_at(AT_FDCWD, put_old,
3833 LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &old);
3834 if (error)
3835 goto out1;
3836
3837 error = security_sb_pivotroot(&old, &new);
3838 if (error)
3839 goto out2;
3840
3841 get_fs_root(current->fs, &root);
3842 old_mp = lock_mount(&old);
3843 error = PTR_ERR(old_mp);
3844 if (IS_ERR(old_mp))
3845 goto out3;
3846
3847 error = -EINVAL;
3848 new_mnt = real_mount(new.mnt);
3849 root_mnt = real_mount(root.mnt);
3850 old_mnt = real_mount(old.mnt);
3851 ex_parent = new_mnt->mnt_parent;
3852 root_parent = root_mnt->mnt_parent;
3853 if (IS_MNT_SHARED(old_mnt) ||
3854 IS_MNT_SHARED(ex_parent) ||
3855 IS_MNT_SHARED(root_parent))
3856 goto out4;
3857 if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
3858 goto out4;
3859 if (new_mnt->mnt.mnt_flags & MNT_LOCKED)
3860 goto out4;
3861 error = -ENOENT;
3862 if (d_unlinked(new.dentry))
3863 goto out4;
3864 error = -EBUSY;
3865 if (new_mnt == root_mnt || old_mnt == root_mnt)
3866 goto out4;
3867 error = -EINVAL;
3868 if (root.mnt->mnt_root != root.dentry)
3869 goto out4;
3870 if (!mnt_has_parent(root_mnt))
3871 goto out4;
3872 if (new.mnt->mnt_root != new.dentry)
3873 goto out4;
3874 if (!mnt_has_parent(new_mnt))
3875 goto out4;
3876
3877 if (!is_path_reachable(old_mnt, old.dentry, &new))
3878 goto out4;
3879
3880 if (!is_path_reachable(new_mnt, new.dentry, &root))
3881 goto out4;
3882 lock_mount_hash();
3883 umount_mnt(new_mnt);
3884 root_mp = unhash_mnt(root_mnt);
3885 if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
3886 new_mnt->mnt.mnt_flags |= MNT_LOCKED;
3887 root_mnt->mnt.mnt_flags &= ~MNT_LOCKED;
3888 }
3889
3890 attach_mnt(root_mnt, old_mnt, old_mp);
3891
3892 attach_mnt(new_mnt, root_parent, root_mp);
3893 mnt_add_count(root_parent, -1);
3894 touch_mnt_namespace(current->nsproxy->mnt_ns);
3895
3896 list_del_init(&new_mnt->mnt_expire);
3897 put_mountpoint(root_mp);
3898 unlock_mount_hash();
3899 chroot_fs_refs(&root, &new);
3900 error = 0;
3901out4:
3902 unlock_mount(old_mp);
3903 if (!error)
3904 mntput_no_expire(ex_parent);
3905out3:
3906 path_put(&root);
3907out2:
3908 path_put(&old);
3909out1:
3910 path_put(&new);
3911out0:
3912 return error;
3913}
3914
3915static unsigned int recalc_flags(struct mount_kattr *kattr, struct mount *mnt)
3916{
3917 unsigned int flags = mnt->mnt.mnt_flags;
3918
3919
3920 flags &= ~kattr->attr_clr;
3921
3922 flags |= kattr->attr_set;
3923
3924 return flags;
3925}
3926
3927static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
3928{
3929 struct vfsmount *m = &mnt->mnt;
3930
3931 if (!kattr->mnt_userns)
3932 return 0;
3933
3934
3935
3936
3937
3938
3939 if (mnt_user_ns(m) != &init_user_ns)
3940 return -EPERM;
3941
3942
3943 if (!(m->mnt_sb->s_type->fs_flags & FS_ALLOW_IDMAP))
3944 return -EINVAL;
3945
3946
3947 if (m->mnt_sb->s_user_ns != &init_user_ns)
3948 return -EINVAL;
3949
3950
3951 if (!capable(CAP_SYS_ADMIN))
3952 return -EPERM;
3953
3954
3955 if (!is_anon_ns(mnt->mnt_ns))
3956 return -EINVAL;
3957
3958 return 0;
3959}
3960
3961static struct mount *mount_setattr_prepare(struct mount_kattr *kattr,
3962 struct mount *mnt, int *err)
3963{
3964 struct mount *m = mnt, *last = NULL;
3965
3966 if (!is_mounted(&m->mnt)) {
3967 *err = -EINVAL;
3968 goto out;
3969 }
3970
3971 if (!(mnt_has_parent(m) ? check_mnt(m) : is_anon_ns(m->mnt_ns))) {
3972 *err = -EINVAL;
3973 goto out;
3974 }
3975
3976 do {
3977 unsigned int flags;
3978
3979 flags = recalc_flags(kattr, m);
3980 if (!can_change_locked_flags(m, flags)) {
3981 *err = -EPERM;
3982 goto out;
3983 }
3984
3985 *err = can_idmap_mount(kattr, m);
3986 if (*err)
3987 goto out;
3988
3989 last = m;
3990
3991 if ((kattr->attr_set & MNT_READONLY) &&
3992 !(m->mnt.mnt_flags & MNT_READONLY)) {
3993 *err = mnt_hold_writers(m);
3994 if (*err)
3995 goto out;
3996 }
3997 } while (kattr->recurse && (m = next_mnt(m, mnt)));
3998
3999out:
4000 return last;
4001}
4002
4003static void do_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
4004{
4005 struct user_namespace *mnt_userns;
4006
4007 if (!kattr->mnt_userns)
4008 return;
4009
4010 mnt_userns = get_user_ns(kattr->mnt_userns);
4011
4012 smp_store_release(&mnt->mnt.mnt_userns, mnt_userns);
4013}
4014
4015static void mount_setattr_commit(struct mount_kattr *kattr,
4016 struct mount *mnt, struct mount *last,
4017 int err)
4018{
4019 struct mount *m = mnt;
4020
4021 do {
4022 if (!err) {
4023 unsigned int flags;
4024
4025 do_idmap_mount(kattr, m);
4026 flags = recalc_flags(kattr, m);
4027 WRITE_ONCE(m->mnt.mnt_flags, flags);
4028 }
4029
4030
4031
4032
4033
4034
4035 if ((kattr->attr_set & MNT_READONLY) &&
4036 (m->mnt.mnt_flags & MNT_WRITE_HOLD))
4037 mnt_unhold_writers(m);
4038
4039 if (!err && kattr->propagation)
4040 change_mnt_propagation(m, kattr->propagation);
4041
4042
4043
4044
4045
4046 if (err && m == last)
4047 break;
4048 } while (kattr->recurse && (m = next_mnt(m, mnt)));
4049
4050 if (!err)
4051 touch_mnt_namespace(mnt->mnt_ns);
4052}
4053
4054static int do_mount_setattr(struct path *path, struct mount_kattr *kattr)
4055{
4056 struct mount *mnt = real_mount(path->mnt), *last = NULL;
4057 int err = 0;
4058
4059 if (path->dentry != mnt->mnt.mnt_root)
4060 return -EINVAL;
4061
4062 if (kattr->propagation) {
4063
4064
4065
4066
4067 namespace_lock();
4068 if (kattr->propagation == MS_SHARED) {
4069 err = invent_group_ids(mnt, kattr->recurse);
4070 if (err) {
4071 namespace_unlock();
4072 return err;
4073 }
4074 }
4075 }
4076
4077 lock_mount_hash();
4078
4079
4080
4081
4082
4083 last = mount_setattr_prepare(kattr, mnt, &err);
4084 if (last)
4085 mount_setattr_commit(kattr, mnt, last, err);
4086
4087 unlock_mount_hash();
4088
4089 if (kattr->propagation) {
4090 namespace_unlock();
4091 if (err)
4092 cleanup_group_ids(mnt, NULL);
4093 }
4094
4095 return err;
4096}
4097
4098static int build_mount_idmapped(const struct mount_attr *attr, size_t usize,
4099 struct mount_kattr *kattr, unsigned int flags)
4100{
4101 int err = 0;
4102 struct ns_common *ns;
4103 struct user_namespace *mnt_userns;
4104 struct file *file;
4105
4106 if (!((attr->attr_set | attr->attr_clr) & MOUNT_ATTR_IDMAP))
4107 return 0;
4108
4109
4110
4111
4112
4113
4114 if (attr->attr_clr & MOUNT_ATTR_IDMAP)
4115 return -EINVAL;
4116
4117 if (attr->userns_fd > INT_MAX)
4118 return -EINVAL;
4119
4120 file = fget(attr->userns_fd);
4121 if (!file)
4122 return -EBADF;
4123
4124 if (!proc_ns_file(file)) {
4125 err = -EINVAL;
4126 goto out_fput;
4127 }
4128
4129 ns = get_proc_ns(file_inode(file));
4130 if (ns->ops->type != CLONE_NEWUSER) {
4131 err = -EINVAL;
4132 goto out_fput;
4133 }
4134
4135
4136
4137
4138
4139
4140
4141 mnt_userns = container_of(ns, struct user_namespace, ns);
4142 if (mnt_userns == &init_user_ns) {
4143 err = -EPERM;
4144 goto out_fput;
4145 }
4146 kattr->mnt_userns = get_user_ns(mnt_userns);
4147
4148out_fput:
4149 fput(file);
4150 return err;
4151}
4152
4153static int build_mount_kattr(const struct mount_attr *attr, size_t usize,
4154 struct mount_kattr *kattr, unsigned int flags)
4155{
4156 unsigned int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW;
4157
4158 if (flags & AT_NO_AUTOMOUNT)
4159 lookup_flags &= ~LOOKUP_AUTOMOUNT;
4160 if (flags & AT_SYMLINK_NOFOLLOW)
4161 lookup_flags &= ~LOOKUP_FOLLOW;
4162 if (flags & AT_EMPTY_PATH)
4163 lookup_flags |= LOOKUP_EMPTY;
4164
4165 *kattr = (struct mount_kattr) {
4166 .lookup_flags = lookup_flags,
4167 .recurse = !!(flags & AT_RECURSIVE),
4168 };
4169
4170 if (attr->propagation & ~MOUNT_SETATTR_PROPAGATION_FLAGS)
4171 return -EINVAL;
4172 if (hweight32(attr->propagation & MOUNT_SETATTR_PROPAGATION_FLAGS) > 1)
4173 return -EINVAL;
4174 kattr->propagation = attr->propagation;
4175
4176 if ((attr->attr_set | attr->attr_clr) & ~MOUNT_SETATTR_VALID_FLAGS)
4177 return -EINVAL;
4178
4179 kattr->attr_set = attr_flags_to_mnt_flags(attr->attr_set);
4180 kattr->attr_clr = attr_flags_to_mnt_flags(attr->attr_clr);
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191 if (attr->attr_clr & MOUNT_ATTR__ATIME) {
4192 if ((attr->attr_clr & MOUNT_ATTR__ATIME) != MOUNT_ATTR__ATIME)
4193 return -EINVAL;
4194
4195
4196
4197
4198
4199 kattr->attr_clr |= MNT_RELATIME | MNT_NOATIME;
4200 switch (attr->attr_set & MOUNT_ATTR__ATIME) {
4201 case MOUNT_ATTR_RELATIME:
4202 kattr->attr_set |= MNT_RELATIME;
4203 break;
4204 case MOUNT_ATTR_NOATIME:
4205 kattr->attr_set |= MNT_NOATIME;
4206 break;
4207 case MOUNT_ATTR_STRICTATIME:
4208 break;
4209 default:
4210 return -EINVAL;
4211 }
4212 } else {
4213 if (attr->attr_set & MOUNT_ATTR__ATIME)
4214 return -EINVAL;
4215 }
4216
4217 return build_mount_idmapped(attr, usize, kattr, flags);
4218}
4219
4220static void finish_mount_kattr(struct mount_kattr *kattr)
4221{
4222 put_user_ns(kattr->mnt_userns);
4223 kattr->mnt_userns = NULL;
4224}
4225
4226SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path,
4227 unsigned int, flags, struct mount_attr __user *, uattr,
4228 size_t, usize)
4229{
4230 int err;
4231 struct path target;
4232 struct mount_attr attr;
4233 struct mount_kattr kattr;
4234
4235 BUILD_BUG_ON(sizeof(struct mount_attr) != MOUNT_ATTR_SIZE_VER0);
4236
4237 if (flags & ~(AT_EMPTY_PATH |
4238 AT_RECURSIVE |
4239 AT_SYMLINK_NOFOLLOW |
4240 AT_NO_AUTOMOUNT))
4241 return -EINVAL;
4242
4243 if (unlikely(usize > PAGE_SIZE))
4244 return -E2BIG;
4245 if (unlikely(usize < MOUNT_ATTR_SIZE_VER0))
4246 return -EINVAL;
4247
4248 if (!may_mount())
4249 return -EPERM;
4250
4251 err = copy_struct_from_user(&attr, sizeof(attr), uattr, usize);
4252 if (err)
4253 return err;
4254
4255
4256 if (attr.attr_set == 0 &&
4257 attr.attr_clr == 0 &&
4258 attr.propagation == 0)
4259 return 0;
4260
4261 err = build_mount_kattr(&attr, usize, &kattr, flags);
4262 if (err)
4263 return err;
4264
4265 err = user_path_at(dfd, path, kattr.lookup_flags, &target);
4266 if (err)
4267 return err;
4268
4269 err = do_mount_setattr(&target, &kattr);
4270 finish_mount_kattr(&kattr);
4271 path_put(&target);
4272 return err;
4273}
4274
4275static void __init init_mount_tree(void)
4276{
4277 struct vfsmount *mnt;
4278 struct mount *m;
4279 struct mnt_namespace *ns;
4280 struct path root;
4281
4282 mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", NULL);
4283 if (IS_ERR(mnt))
4284 panic("Can't create rootfs");
4285
4286 ns = alloc_mnt_ns(&init_user_ns, false);
4287 if (IS_ERR(ns))
4288 panic("Can't allocate initial namespace");
4289 m = real_mount(mnt);
4290 m->mnt_ns = ns;
4291 ns->root = m;
4292 ns->mounts = 1;
4293 list_add(&m->mnt_list, &ns->list);
4294 init_task.nsproxy->mnt_ns = ns;
4295 get_mnt_ns(ns);
4296
4297 root.mnt = mnt;
4298 root.dentry = mnt->mnt_root;
4299 mnt->mnt_flags |= MNT_LOCKED;
4300
4301 set_fs_pwd(current->fs, &root);
4302 set_fs_root(current->fs, &root);
4303}
4304
4305void __init mnt_init(void)
4306{
4307 int err;
4308
4309 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
4310 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL);
4311
4312 mount_hashtable = alloc_large_system_hash("Mount-cache",
4313 sizeof(struct hlist_head),
4314 mhash_entries, 19,
4315 HASH_ZERO,
4316 &m_hash_shift, &m_hash_mask, 0, 0);
4317 mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache",
4318 sizeof(struct hlist_head),
4319 mphash_entries, 19,
4320 HASH_ZERO,
4321 &mp_hash_shift, &mp_hash_mask, 0, 0);
4322
4323 if (!mount_hashtable || !mountpoint_hashtable)
4324 panic("Failed to allocate mount hash table\n");
4325
4326 kernfs_init();
4327
4328 err = sysfs_init();
4329 if (err)
4330 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
4331 __func__, err);
4332 fs_kobj = kobject_create_and_add("fs", NULL);
4333 if (!fs_kobj)
4334 printk(KERN_WARNING "%s: kobj create error\n", __func__);
4335 shmem_init();
4336 init_rootfs();
4337 init_mount_tree();
4338}
4339
4340void put_mnt_ns(struct mnt_namespace *ns)
4341{
4342 if (!refcount_dec_and_test(&ns->ns.count))
4343 return;
4344 drop_collected_mounts(&ns->root->mnt);
4345 free_mnt_ns(ns);
4346}
4347
4348struct vfsmount *kern_mount(struct file_system_type *type)
4349{
4350 struct vfsmount *mnt;
4351 mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL);
4352 if (!IS_ERR(mnt)) {
4353
4354
4355
4356
4357 real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL;
4358 }
4359 return mnt;
4360}
4361EXPORT_SYMBOL_GPL(kern_mount);
4362
4363void kern_unmount(struct vfsmount *mnt)
4364{
4365
4366 if (!IS_ERR_OR_NULL(mnt)) {
4367 real_mount(mnt)->mnt_ns = NULL;
4368 synchronize_rcu();
4369 mntput(mnt);
4370 }
4371}
4372EXPORT_SYMBOL(kern_unmount);
4373
4374void kern_unmount_array(struct vfsmount *mnt[], unsigned int num)
4375{
4376 unsigned int i;
4377
4378 for (i = 0; i < num; i++)
4379 if (mnt[i])
4380 real_mount(mnt[i])->mnt_ns = NULL;
4381 synchronize_rcu_expedited();
4382 for (i = 0; i < num; i++)
4383 mntput(mnt[i]);
4384}
4385EXPORT_SYMBOL(kern_unmount_array);
4386
4387bool our_mnt(struct vfsmount *mnt)
4388{
4389 return check_mnt(real_mount(mnt));
4390}
4391
4392bool current_chrooted(void)
4393{
4394
4395 struct path ns_root;
4396 struct path fs_root;
4397 bool chrooted;
4398
4399
4400 ns_root.mnt = ¤t->nsproxy->mnt_ns->root->mnt;
4401 ns_root.dentry = ns_root.mnt->mnt_root;
4402 path_get(&ns_root);
4403 while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root))
4404 ;
4405
4406 get_fs_root(current->fs, &fs_root);
4407
4408 chrooted = !path_equal(&fs_root, &ns_root);
4409
4410 path_put(&fs_root);
4411 path_put(&ns_root);
4412
4413 return chrooted;
4414}
4415
4416static bool mnt_already_visible(struct mnt_namespace *ns,
4417 const struct super_block *sb,
4418 int *new_mnt_flags)
4419{
4420 int new_flags = *new_mnt_flags;
4421 struct mount *mnt;
4422 bool visible = false;
4423
4424 down_read(&namespace_sem);
4425 lock_ns_list(ns);
4426 list_for_each_entry(mnt, &ns->list, mnt_list) {
4427 struct mount *child;
4428 int mnt_flags;
4429
4430 if (mnt_is_cursor(mnt))
4431 continue;
4432
4433 if (mnt->mnt.mnt_sb->s_type != sb->s_type)
4434 continue;
4435
4436
4437
4438
4439 if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root)
4440 continue;
4441
4442
4443 mnt_flags = mnt->mnt.mnt_flags;
4444
4445
4446 if (sb_rdonly(mnt->mnt.mnt_sb))
4447 mnt_flags |= MNT_LOCK_READONLY;
4448
4449
4450
4451
4452 if ((mnt_flags & MNT_LOCK_READONLY) &&
4453 !(new_flags & MNT_READONLY))
4454 continue;
4455 if ((mnt_flags & MNT_LOCK_ATIME) &&
4456 ((mnt_flags & MNT_ATIME_MASK) != (new_flags & MNT_ATIME_MASK)))
4457 continue;
4458
4459
4460
4461
4462
4463 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
4464 struct inode *inode = child->mnt_mountpoint->d_inode;
4465
4466 if (!(child->mnt.mnt_flags & MNT_LOCKED))
4467 continue;
4468
4469 if (!is_empty_dir_inode(inode))
4470 goto next;
4471 }
4472
4473 *new_mnt_flags |= mnt_flags & (MNT_LOCK_READONLY | \
4474 MNT_LOCK_ATIME);
4475 visible = true;
4476 goto found;
4477 next: ;
4478 }
4479found:
4480 unlock_ns_list(ns);
4481 up_read(&namespace_sem);
4482 return visible;
4483}
4484
4485static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags)
4486{
4487 const unsigned long required_iflags = SB_I_NOEXEC | SB_I_NODEV;
4488 struct mnt_namespace *ns = current->nsproxy->mnt_ns;
4489 unsigned long s_iflags;
4490
4491 if (ns->user_ns == &init_user_ns)
4492 return false;
4493
4494
4495 s_iflags = sb->s_iflags;
4496 if (!(s_iflags & SB_I_USERNS_VISIBLE))
4497 return false;
4498
4499 if ((s_iflags & required_iflags) != required_iflags) {
4500 WARN_ONCE(1, "Expected s_iflags to contain 0x%lx\n",
4501 required_iflags);
4502 return true;
4503 }
4504
4505 return !mnt_already_visible(ns, sb, new_mnt_flags);
4506}
4507
4508bool mnt_may_suid(struct vfsmount *mnt)
4509{
4510
4511
4512
4513
4514
4515
4516
4517 return !(mnt->mnt_flags & MNT_NOSUID) && check_mnt(real_mount(mnt)) &&
4518 current_in_userns(mnt->mnt_sb->s_user_ns);
4519}
4520
4521static struct ns_common *mntns_get(struct task_struct *task)
4522{
4523 struct ns_common *ns = NULL;
4524 struct nsproxy *nsproxy;
4525
4526 task_lock(task);
4527 nsproxy = task->nsproxy;
4528 if (nsproxy) {
4529 ns = &nsproxy->mnt_ns->ns;
4530 get_mnt_ns(to_mnt_ns(ns));
4531 }
4532 task_unlock(task);
4533
4534 return ns;
4535}
4536
4537static void mntns_put(struct ns_common *ns)
4538{
4539 put_mnt_ns(to_mnt_ns(ns));
4540}
4541
4542static int mntns_install(struct nsset *nsset, struct ns_common *ns)
4543{
4544 struct nsproxy *nsproxy = nsset->nsproxy;
4545 struct fs_struct *fs = nsset->fs;
4546 struct mnt_namespace *mnt_ns = to_mnt_ns(ns), *old_mnt_ns;
4547 struct user_namespace *user_ns = nsset->cred->user_ns;
4548 struct path root;
4549 int err;
4550
4551 if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
4552 !ns_capable(user_ns, CAP_SYS_CHROOT) ||
4553 !ns_capable(user_ns, CAP_SYS_ADMIN))
4554 return -EPERM;
4555
4556 if (is_anon_ns(mnt_ns))
4557 return -EINVAL;
4558
4559 if (fs->users != 1)
4560 return -EINVAL;
4561
4562 get_mnt_ns(mnt_ns);
4563 old_mnt_ns = nsproxy->mnt_ns;
4564 nsproxy->mnt_ns = mnt_ns;
4565
4566
4567 err = vfs_path_lookup(mnt_ns->root->mnt.mnt_root, &mnt_ns->root->mnt,
4568 "/", LOOKUP_DOWN, &root);
4569 if (err) {
4570
4571 nsproxy->mnt_ns = old_mnt_ns;
4572 put_mnt_ns(mnt_ns);
4573 return err;
4574 }
4575
4576 put_mnt_ns(old_mnt_ns);
4577
4578
4579 set_fs_pwd(fs, &root);
4580 set_fs_root(fs, &root);
4581
4582 path_put(&root);
4583 return 0;
4584}
4585
4586static struct user_namespace *mntns_owner(struct ns_common *ns)
4587{
4588 return to_mnt_ns(ns)->user_ns;
4589}
4590
4591const struct proc_ns_operations mntns_operations = {
4592 .name = "mnt",
4593 .type = CLONE_NEWNS,
4594 .get = mntns_get,
4595 .put = mntns_put,
4596 .install = mntns_install,
4597 .owner = mntns_owner,
4598};
4599