1
2
3
4
5
6
7
8
9
10
11#include <linux/syscalls.h>
12#include <linux/export.h>
13#include <linux/capability.h>
14#include <linux/mnt_namespace.h>
15#include <linux/user_namespace.h>
16#include <linux/namei.h>
17#include <linux/security.h>
18#include <linux/cred.h>
19#include <linux/idr.h>
20#include <linux/init.h>
21#include <linux/fs_struct.h>
22#include <linux/fsnotify.h>
23#include <linux/file.h>
24#include <linux/uaccess.h>
25#include <linux/proc_ns.h>
26#include <linux/magic.h>
27#include <linux/memblock.h>
28#include <linux/proc_fs.h>
29#include <linux/task_work.h>
30#include <linux/sched/task.h>
31#include <uapi/linux/mount.h>
32#include <linux/fs_context.h>
33#include <linux/shmem_fs.h>
34
35#include "pnode.h"
36#include "internal.h"
37
38
39unsigned int sysctl_mount_max __read_mostly = 100000;
40
41static unsigned int m_hash_mask __read_mostly;
42static unsigned int m_hash_shift __read_mostly;
43static unsigned int mp_hash_mask __read_mostly;
44static unsigned int mp_hash_shift __read_mostly;
45
46static __initdata unsigned long mhash_entries;
47static int __init set_mhash_entries(char *str)
48{
49 if (!str)
50 return 0;
51 mhash_entries = simple_strtoul(str, &str, 0);
52 return 1;
53}
54__setup("mhash_entries=", set_mhash_entries);
55
56static __initdata unsigned long mphash_entries;
57static int __init set_mphash_entries(char *str)
58{
59 if (!str)
60 return 0;
61 mphash_entries = simple_strtoul(str, &str, 0);
62 return 1;
63}
64__setup("mphash_entries=", set_mphash_entries);
65
66static u64 event;
67static DEFINE_IDA(mnt_id_ida);
68static DEFINE_IDA(mnt_group_ida);
69
70static struct hlist_head *mount_hashtable __read_mostly;
71static struct hlist_head *mountpoint_hashtable __read_mostly;
72static struct kmem_cache *mnt_cache __read_mostly;
73static DECLARE_RWSEM(namespace_sem);
74static HLIST_HEAD(unmounted);
75static LIST_HEAD(ex_mountpoints);
76
77struct mount_kattr {
78 unsigned int attr_set;
79 unsigned int attr_clr;
80 unsigned int propagation;
81 unsigned int lookup_flags;
82 bool recurse;
83 struct user_namespace *mnt_userns;
84};
85
86
87struct kobject *fs_kobj;
88EXPORT_SYMBOL_GPL(fs_kobj);
89
90
91
92
93
94
95
96
97
98__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
99
100static inline void lock_mount_hash(void)
101{
102 write_seqlock(&mount_lock);
103}
104
105static inline void unlock_mount_hash(void)
106{
107 write_sequnlock(&mount_lock);
108}
109
110static inline struct hlist_head *m_hash(struct vfsmount *mnt, struct dentry *dentry)
111{
112 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
113 tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
114 tmp = tmp + (tmp >> m_hash_shift);
115 return &mount_hashtable[tmp & m_hash_mask];
116}
117
118static inline struct hlist_head *mp_hash(struct dentry *dentry)
119{
120 unsigned long tmp = ((unsigned long)dentry / L1_CACHE_BYTES);
121 tmp = tmp + (tmp >> mp_hash_shift);
122 return &mountpoint_hashtable[tmp & mp_hash_mask];
123}
124
125static int mnt_alloc_id(struct mount *mnt)
126{
127 int res = ida_alloc(&mnt_id_ida, GFP_KERNEL);
128
129 if (res < 0)
130 return res;
131 mnt->mnt_id = res;
132 return 0;
133}
134
135static void mnt_free_id(struct mount *mnt)
136{
137 ida_free(&mnt_id_ida, mnt->mnt_id);
138}
139
140
141
142
143static int mnt_alloc_group_id(struct mount *mnt)
144{
145 int res = ida_alloc_min(&mnt_group_ida, 1, GFP_KERNEL);
146
147 if (res < 0)
148 return res;
149 mnt->mnt_group_id = res;
150 return 0;
151}
152
153
154
155
156void mnt_release_group_id(struct mount *mnt)
157{
158 ida_free(&mnt_group_ida, mnt->mnt_group_id);
159 mnt->mnt_group_id = 0;
160}
161
162
163
164
165static inline void mnt_add_count(struct mount *mnt, int n)
166{
167#ifdef CONFIG_SMP
168 this_cpu_add(mnt->mnt_pcp->mnt_count, n);
169#else
170 preempt_disable();
171 mnt->mnt_count += n;
172 preempt_enable();
173#endif
174}
175
176
177
178
179int mnt_get_count(struct mount *mnt)
180{
181#ifdef CONFIG_SMP
182 int count = 0;
183 int cpu;
184
185 for_each_possible_cpu(cpu) {
186 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count;
187 }
188
189 return count;
190#else
191 return mnt->mnt_count;
192#endif
193}
194
195static struct mount *alloc_vfsmnt(const char *name)
196{
197 struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
198 if (mnt) {
199 int err;
200
201 err = mnt_alloc_id(mnt);
202 if (err)
203 goto out_free_cache;
204
205 if (name) {
206 mnt->mnt_devname = kstrdup_const(name, GFP_KERNEL);
207 if (!mnt->mnt_devname)
208 goto out_free_id;
209 }
210
211#ifdef CONFIG_SMP
212 mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);
213 if (!mnt->mnt_pcp)
214 goto out_free_devname;
215
216 this_cpu_add(mnt->mnt_pcp->mnt_count, 1);
217#else
218 mnt->mnt_count = 1;
219 mnt->mnt_writers = 0;
220#endif
221
222 INIT_HLIST_NODE(&mnt->mnt_hash);
223 INIT_LIST_HEAD(&mnt->mnt_child);
224 INIT_LIST_HEAD(&mnt->mnt_mounts);
225 INIT_LIST_HEAD(&mnt->mnt_list);
226 INIT_LIST_HEAD(&mnt->mnt_expire);
227 INIT_LIST_HEAD(&mnt->mnt_share);
228 INIT_LIST_HEAD(&mnt->mnt_slave_list);
229 INIT_LIST_HEAD(&mnt->mnt_slave);
230 INIT_HLIST_NODE(&mnt->mnt_mp_list);
231 INIT_LIST_HEAD(&mnt->mnt_umounting);
232 INIT_HLIST_HEAD(&mnt->mnt_stuck_children);
233 mnt->mnt.mnt_userns = &init_user_ns;
234 }
235 return mnt;
236
237#ifdef CONFIG_SMP
238out_free_devname:
239 kfree_const(mnt->mnt_devname);
240#endif
241out_free_id:
242 mnt_free_id(mnt);
243out_free_cache:
244 kmem_cache_free(mnt_cache, mnt);
245 return NULL;
246}
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267bool __mnt_is_readonly(struct vfsmount *mnt)
268{
269 return (mnt->mnt_flags & MNT_READONLY) || sb_rdonly(mnt->mnt_sb);
270}
271EXPORT_SYMBOL_GPL(__mnt_is_readonly);
272
273static inline void mnt_inc_writers(struct mount *mnt)
274{
275#ifdef CONFIG_SMP
276 this_cpu_inc(mnt->mnt_pcp->mnt_writers);
277#else
278 mnt->mnt_writers++;
279#endif
280}
281
282static inline void mnt_dec_writers(struct mount *mnt)
283{
284#ifdef CONFIG_SMP
285 this_cpu_dec(mnt->mnt_pcp->mnt_writers);
286#else
287 mnt->mnt_writers--;
288#endif
289}
290
291static unsigned int mnt_get_writers(struct mount *mnt)
292{
293#ifdef CONFIG_SMP
294 unsigned int count = 0;
295 int cpu;
296
297 for_each_possible_cpu(cpu) {
298 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers;
299 }
300
301 return count;
302#else
303 return mnt->mnt_writers;
304#endif
305}
306
307static int mnt_is_readonly(struct vfsmount *mnt)
308{
309 if (mnt->mnt_sb->s_readonly_remount)
310 return 1;
311
312 smp_rmb();
313 return __mnt_is_readonly(mnt);
314}
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332int __mnt_want_write(struct vfsmount *m)
333{
334 struct mount *mnt = real_mount(m);
335 int ret = 0;
336
337 preempt_disable();
338 mnt_inc_writers(mnt);
339
340
341
342
343
344 smp_mb();
345 while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
346 cpu_relax();
347
348
349
350
351
352 smp_rmb();
353 if (mnt_is_readonly(m)) {
354 mnt_dec_writers(mnt);
355 ret = -EROFS;
356 }
357 preempt_enable();
358
359 return ret;
360}
361
362
363
364
365
366
367
368
369
370
371int mnt_want_write(struct vfsmount *m)
372{
373 int ret;
374
375 sb_start_write(m->mnt_sb);
376 ret = __mnt_want_write(m);
377 if (ret)
378 sb_end_write(m->mnt_sb);
379 return ret;
380}
381EXPORT_SYMBOL_GPL(mnt_want_write);
382
383
384
385
386
387
388
389
390
391
392int __mnt_want_write_file(struct file *file)
393{
394 if (file->f_mode & FMODE_WRITER) {
395
396
397
398
399 if (__mnt_is_readonly(file->f_path.mnt))
400 return -EROFS;
401 return 0;
402 }
403 return __mnt_want_write(file->f_path.mnt);
404}
405
406
407
408
409
410
411
412
413
414
415int mnt_want_write_file(struct file *file)
416{
417 int ret;
418
419 sb_start_write(file_inode(file)->i_sb);
420 ret = __mnt_want_write_file(file);
421 if (ret)
422 sb_end_write(file_inode(file)->i_sb);
423 return ret;
424}
425EXPORT_SYMBOL_GPL(mnt_want_write_file);
426
427
428
429
430
431
432
433
434
435void __mnt_drop_write(struct vfsmount *mnt)
436{
437 preempt_disable();
438 mnt_dec_writers(real_mount(mnt));
439 preempt_enable();
440}
441
442
443
444
445
446
447
448
449
450void mnt_drop_write(struct vfsmount *mnt)
451{
452 __mnt_drop_write(mnt);
453 sb_end_write(mnt->mnt_sb);
454}
455EXPORT_SYMBOL_GPL(mnt_drop_write);
456
457void __mnt_drop_write_file(struct file *file)
458{
459 if (!(file->f_mode & FMODE_WRITER))
460 __mnt_drop_write(file->f_path.mnt);
461}
462
463void mnt_drop_write_file(struct file *file)
464{
465 __mnt_drop_write_file(file);
466 sb_end_write(file_inode(file)->i_sb);
467}
468EXPORT_SYMBOL(mnt_drop_write_file);
469
470static inline int mnt_hold_writers(struct mount *mnt)
471{
472 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
473
474
475
476
477 smp_mb();
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495 if (mnt_get_writers(mnt) > 0)
496 return -EBUSY;
497
498 return 0;
499}
500
501static inline void mnt_unhold_writers(struct mount *mnt)
502{
503
504
505
506
507 smp_wmb();
508 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
509}
510
511static int mnt_make_readonly(struct mount *mnt)
512{
513 int ret;
514
515 ret = mnt_hold_writers(mnt);
516 if (!ret)
517 mnt->mnt.mnt_flags |= MNT_READONLY;
518 mnt_unhold_writers(mnt);
519 return ret;
520}
521
522int sb_prepare_remount_readonly(struct super_block *sb)
523{
524 struct mount *mnt;
525 int err = 0;
526
527
528 if (atomic_long_read(&sb->s_remove_count))
529 return -EBUSY;
530
531 lock_mount_hash();
532 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
533 if (!(mnt->mnt.mnt_flags & MNT_READONLY)) {
534 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
535 smp_mb();
536 if (mnt_get_writers(mnt) > 0) {
537 err = -EBUSY;
538 break;
539 }
540 }
541 }
542 if (!err && atomic_long_read(&sb->s_remove_count))
543 err = -EBUSY;
544
545 if (!err) {
546 sb->s_readonly_remount = 1;
547 smp_wmb();
548 }
549 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
550 if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
551 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
552 }
553 unlock_mount_hash();
554
555 return err;
556}
557
558static void free_vfsmnt(struct mount *mnt)
559{
560 struct user_namespace *mnt_userns;
561
562 mnt_userns = mnt_user_ns(&mnt->mnt);
563 if (mnt_userns != &init_user_ns)
564 put_user_ns(mnt_userns);
565 kfree_const(mnt->mnt_devname);
566#ifdef CONFIG_SMP
567 free_percpu(mnt->mnt_pcp);
568#endif
569 kmem_cache_free(mnt_cache, mnt);
570}
571
572static void delayed_free_vfsmnt(struct rcu_head *head)
573{
574 free_vfsmnt(container_of(head, struct mount, mnt_rcu));
575}
576
577
578int __legitimize_mnt(struct vfsmount *bastard, unsigned seq)
579{
580 struct mount *mnt;
581 if (read_seqretry(&mount_lock, seq))
582 return 1;
583 if (bastard == NULL)
584 return 0;
585 mnt = real_mount(bastard);
586 mnt_add_count(mnt, 1);
587 smp_mb();
588 if (likely(!read_seqretry(&mount_lock, seq)))
589 return 0;
590 if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
591 mnt_add_count(mnt, -1);
592 return 1;
593 }
594 lock_mount_hash();
595 if (unlikely(bastard->mnt_flags & MNT_DOOMED)) {
596 mnt_add_count(mnt, -1);
597 unlock_mount_hash();
598 return 1;
599 }
600 unlock_mount_hash();
601
602 return -1;
603}
604
605
606bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
607{
608 int res = __legitimize_mnt(bastard, seq);
609 if (likely(!res))
610 return true;
611 if (unlikely(res < 0)) {
612 rcu_read_unlock();
613 mntput(bastard);
614 rcu_read_lock();
615 }
616 return false;
617}
618
619
620
621
622
623struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
624{
625 struct hlist_head *head = m_hash(mnt, dentry);
626 struct mount *p;
627
628 hlist_for_each_entry_rcu(p, head, mnt_hash)
629 if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
630 return p;
631 return NULL;
632}
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650struct vfsmount *lookup_mnt(const struct path *path)
651{
652 struct mount *child_mnt;
653 struct vfsmount *m;
654 unsigned seq;
655
656 rcu_read_lock();
657 do {
658 seq = read_seqbegin(&mount_lock);
659 child_mnt = __lookup_mnt(path->mnt, path->dentry);
660 m = child_mnt ? &child_mnt->mnt : NULL;
661 } while (!legitimize_mnt(m, seq));
662 rcu_read_unlock();
663 return m;
664}
665
666static inline void lock_ns_list(struct mnt_namespace *ns)
667{
668 spin_lock(&ns->ns_lock);
669}
670
671static inline void unlock_ns_list(struct mnt_namespace *ns)
672{
673 spin_unlock(&ns->ns_lock);
674}
675
676static inline bool mnt_is_cursor(struct mount *mnt)
677{
678 return mnt->mnt.mnt_flags & MNT_CURSOR;
679}
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696bool __is_local_mountpoint(struct dentry *dentry)
697{
698 struct mnt_namespace *ns = current->nsproxy->mnt_ns;
699 struct mount *mnt;
700 bool is_covered = false;
701
702 down_read(&namespace_sem);
703 lock_ns_list(ns);
704 list_for_each_entry(mnt, &ns->list, mnt_list) {
705 if (mnt_is_cursor(mnt))
706 continue;
707 is_covered = (mnt->mnt_mountpoint == dentry);
708 if (is_covered)
709 break;
710 }
711 unlock_ns_list(ns);
712 up_read(&namespace_sem);
713
714 return is_covered;
715}
716
717static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
718{
719 struct hlist_head *chain = mp_hash(dentry);
720 struct mountpoint *mp;
721
722 hlist_for_each_entry(mp, chain, m_hash) {
723 if (mp->m_dentry == dentry) {
724 mp->m_count++;
725 return mp;
726 }
727 }
728 return NULL;
729}
730
731static struct mountpoint *get_mountpoint(struct dentry *dentry)
732{
733 struct mountpoint *mp, *new = NULL;
734 int ret;
735
736 if (d_mountpoint(dentry)) {
737
738 if (d_unlinked(dentry))
739 return ERR_PTR(-ENOENT);
740mountpoint:
741 read_seqlock_excl(&mount_lock);
742 mp = lookup_mountpoint(dentry);
743 read_sequnlock_excl(&mount_lock);
744 if (mp)
745 goto done;
746 }
747
748 if (!new)
749 new = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
750 if (!new)
751 return ERR_PTR(-ENOMEM);
752
753
754
755 ret = d_set_mounted(dentry);
756
757
758 if (ret == -EBUSY)
759 goto mountpoint;
760
761
762 mp = ERR_PTR(ret);
763 if (ret)
764 goto done;
765
766
767 read_seqlock_excl(&mount_lock);
768 new->m_dentry = dget(dentry);
769 new->m_count = 1;
770 hlist_add_head(&new->m_hash, mp_hash(dentry));
771 INIT_HLIST_HEAD(&new->m_list);
772 read_sequnlock_excl(&mount_lock);
773
774 mp = new;
775 new = NULL;
776done:
777 kfree(new);
778 return mp;
779}
780
781
782
783
784
785static void __put_mountpoint(struct mountpoint *mp, struct list_head *list)
786{
787 if (!--mp->m_count) {
788 struct dentry *dentry = mp->m_dentry;
789 BUG_ON(!hlist_empty(&mp->m_list));
790 spin_lock(&dentry->d_lock);
791 dentry->d_flags &= ~DCACHE_MOUNTED;
792 spin_unlock(&dentry->d_lock);
793 dput_to_list(dentry, list);
794 hlist_del(&mp->m_hash);
795 kfree(mp);
796 }
797}
798
799
800static void put_mountpoint(struct mountpoint *mp)
801{
802 __put_mountpoint(mp, &ex_mountpoints);
803}
804
805static inline int check_mnt(struct mount *mnt)
806{
807 return mnt->mnt_ns == current->nsproxy->mnt_ns;
808}
809
810
811
812
813static void touch_mnt_namespace(struct mnt_namespace *ns)
814{
815 if (ns) {
816 ns->event = ++event;
817 wake_up_interruptible(&ns->poll);
818 }
819}
820
821
822
823
824static void __touch_mnt_namespace(struct mnt_namespace *ns)
825{
826 if (ns && ns->event != event) {
827 ns->event = event;
828 wake_up_interruptible(&ns->poll);
829 }
830}
831
832
833
834
835static struct mountpoint *unhash_mnt(struct mount *mnt)
836{
837 struct mountpoint *mp;
838 mnt->mnt_parent = mnt;
839 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
840 list_del_init(&mnt->mnt_child);
841 hlist_del_init_rcu(&mnt->mnt_hash);
842 hlist_del_init(&mnt->mnt_mp_list);
843 mp = mnt->mnt_mp;
844 mnt->mnt_mp = NULL;
845 return mp;
846}
847
848
849
850
851static void umount_mnt(struct mount *mnt)
852{
853 put_mountpoint(unhash_mnt(mnt));
854}
855
856
857
858
859void mnt_set_mountpoint(struct mount *mnt,
860 struct mountpoint *mp,
861 struct mount *child_mnt)
862{
863 mp->m_count++;
864 mnt_add_count(mnt, 1);
865 child_mnt->mnt_mountpoint = mp->m_dentry;
866 child_mnt->mnt_parent = mnt;
867 child_mnt->mnt_mp = mp;
868 hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
869}
870
871static void __attach_mnt(struct mount *mnt, struct mount *parent)
872{
873 hlist_add_head_rcu(&mnt->mnt_hash,
874 m_hash(&parent->mnt, mnt->mnt_mountpoint));
875 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
876}
877
878
879
880
881static void attach_mnt(struct mount *mnt,
882 struct mount *parent,
883 struct mountpoint *mp)
884{
885 mnt_set_mountpoint(parent, mp, mnt);
886 __attach_mnt(mnt, parent);
887}
888
889void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt)
890{
891 struct mountpoint *old_mp = mnt->mnt_mp;
892 struct mount *old_parent = mnt->mnt_parent;
893
894 list_del_init(&mnt->mnt_child);
895 hlist_del_init(&mnt->mnt_mp_list);
896 hlist_del_init_rcu(&mnt->mnt_hash);
897
898 attach_mnt(mnt, parent, mp);
899
900 put_mountpoint(old_mp);
901 mnt_add_count(old_parent, -1);
902}
903
904
905
906
907static void commit_tree(struct mount *mnt)
908{
909 struct mount *parent = mnt->mnt_parent;
910 struct mount *m;
911 LIST_HEAD(head);
912 struct mnt_namespace *n = parent->mnt_ns;
913
914 BUG_ON(parent == mnt);
915
916 list_add_tail(&head, &mnt->mnt_list);
917 list_for_each_entry(m, &head, mnt_list)
918 m->mnt_ns = n;
919
920 list_splice(&head, n->list.prev);
921
922 n->mounts += n->pending_mounts;
923 n->pending_mounts = 0;
924
925 __attach_mnt(mnt, parent);
926 touch_mnt_namespace(n);
927}
928
929static struct mount *next_mnt(struct mount *p, struct mount *root)
930{
931 struct list_head *next = p->mnt_mounts.next;
932 if (next == &p->mnt_mounts) {
933 while (1) {
934 if (p == root)
935 return NULL;
936 next = p->mnt_child.next;
937 if (next != &p->mnt_parent->mnt_mounts)
938 break;
939 p = p->mnt_parent;
940 }
941 }
942 return list_entry(next, struct mount, mnt_child);
943}
944
945static struct mount *skip_mnt_tree(struct mount *p)
946{
947 struct list_head *prev = p->mnt_mounts.prev;
948 while (prev != &p->mnt_mounts) {
949 p = list_entry(prev, struct mount, mnt_child);
950 prev = p->mnt_mounts.prev;
951 }
952 return p;
953}
954
955
956
957
958
959
960
961
962
963
964struct vfsmount *vfs_create_mount(struct fs_context *fc)
965{
966 struct mount *mnt;
967
968 if (!fc->root)
969 return ERR_PTR(-EINVAL);
970
971 mnt = alloc_vfsmnt(fc->source ?: "none");
972 if (!mnt)
973 return ERR_PTR(-ENOMEM);
974
975 if (fc->sb_flags & SB_KERNMOUNT)
976 mnt->mnt.mnt_flags = MNT_INTERNAL;
977
978 atomic_inc(&fc->root->d_sb->s_active);
979 mnt->mnt.mnt_sb = fc->root->d_sb;
980 mnt->mnt.mnt_root = dget(fc->root);
981 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
982 mnt->mnt_parent = mnt;
983
984 lock_mount_hash();
985 list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts);
986 unlock_mount_hash();
987 return &mnt->mnt;
988}
989EXPORT_SYMBOL(vfs_create_mount);
990
991struct vfsmount *fc_mount(struct fs_context *fc)
992{
993 int err = vfs_get_tree(fc);
994 if (!err) {
995 up_write(&fc->root->d_sb->s_umount);
996 return vfs_create_mount(fc);
997 }
998 return ERR_PTR(err);
999}
1000EXPORT_SYMBOL(fc_mount);
1001
1002struct vfsmount *vfs_kern_mount(struct file_system_type *type,
1003 int flags, const char *name,
1004 void *data)
1005{
1006 struct fs_context *fc;
1007 struct vfsmount *mnt;
1008 int ret = 0;
1009
1010 if (!type)
1011 return ERR_PTR(-EINVAL);
1012
1013 fc = fs_context_for_mount(type, flags);
1014 if (IS_ERR(fc))
1015 return ERR_CAST(fc);
1016
1017 if (name)
1018 ret = vfs_parse_fs_string(fc, "source",
1019 name, strlen(name));
1020 if (!ret)
1021 ret = parse_monolithic_mount_data(fc, data);
1022 if (!ret)
1023 mnt = fc_mount(fc);
1024 else
1025 mnt = ERR_PTR(ret);
1026
1027 put_fs_context(fc);
1028 return mnt;
1029}
1030EXPORT_SYMBOL_GPL(vfs_kern_mount);
1031
1032struct vfsmount *
1033vfs_submount(const struct dentry *mountpoint, struct file_system_type *type,
1034 const char *name, void *data)
1035{
1036
1037
1038
1039
1040 if (mountpoint->d_sb->s_user_ns != &init_user_ns)
1041 return ERR_PTR(-EPERM);
1042
1043 return vfs_kern_mount(type, SB_SUBMOUNT, name, data);
1044}
1045EXPORT_SYMBOL_GPL(vfs_submount);
1046
1047static struct mount *clone_mnt(struct mount *old, struct dentry *root,
1048 int flag)
1049{
1050 struct super_block *sb = old->mnt.mnt_sb;
1051 struct mount *mnt;
1052 int err;
1053
1054 mnt = alloc_vfsmnt(old->mnt_devname);
1055 if (!mnt)
1056 return ERR_PTR(-ENOMEM);
1057
1058 if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE))
1059 mnt->mnt_group_id = 0;
1060 else
1061 mnt->mnt_group_id = old->mnt_group_id;
1062
1063 if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {
1064 err = mnt_alloc_group_id(mnt);
1065 if (err)
1066 goto out_free;
1067 }
1068
1069 mnt->mnt.mnt_flags = old->mnt.mnt_flags;
1070 mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL);
1071
1072 atomic_inc(&sb->s_active);
1073 mnt->mnt.mnt_userns = mnt_user_ns(&old->mnt);
1074 if (mnt->mnt.mnt_userns != &init_user_ns)
1075 mnt->mnt.mnt_userns = get_user_ns(mnt->mnt.mnt_userns);
1076 mnt->mnt.mnt_sb = sb;
1077 mnt->mnt.mnt_root = dget(root);
1078 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
1079 mnt->mnt_parent = mnt;
1080 lock_mount_hash();
1081 list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
1082 unlock_mount_hash();
1083
1084 if ((flag & CL_SLAVE) ||
1085 ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) {
1086 list_add(&mnt->mnt_slave, &old->mnt_slave_list);
1087 mnt->mnt_master = old;
1088 CLEAR_MNT_SHARED(mnt);
1089 } else if (!(flag & CL_PRIVATE)) {
1090 if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old))
1091 list_add(&mnt->mnt_share, &old->mnt_share);
1092 if (IS_MNT_SLAVE(old))
1093 list_add(&mnt->mnt_slave, &old->mnt_slave);
1094 mnt->mnt_master = old->mnt_master;
1095 } else {
1096 CLEAR_MNT_SHARED(mnt);
1097 }
1098 if (flag & CL_MAKE_SHARED)
1099 set_mnt_shared(mnt);
1100
1101
1102
1103 if (flag & CL_EXPIRE) {
1104 if (!list_empty(&old->mnt_expire))
1105 list_add(&mnt->mnt_expire, &old->mnt_expire);
1106 }
1107
1108 return mnt;
1109
1110 out_free:
1111 mnt_free_id(mnt);
1112 free_vfsmnt(mnt);
1113 return ERR_PTR(err);
1114}
1115
1116static void cleanup_mnt(struct mount *mnt)
1117{
1118 struct hlist_node *p;
1119 struct mount *m;
1120
1121
1122
1123
1124
1125
1126
1127 WARN_ON(mnt_get_writers(mnt));
1128 if (unlikely(mnt->mnt_pins.first))
1129 mnt_pin_kill(mnt);
1130 hlist_for_each_entry_safe(m, p, &mnt->mnt_stuck_children, mnt_umount) {
1131 hlist_del(&m->mnt_umount);
1132 mntput(&m->mnt);
1133 }
1134 fsnotify_vfsmount_delete(&mnt->mnt);
1135 dput(mnt->mnt.mnt_root);
1136 deactivate_super(mnt->mnt.mnt_sb);
1137 mnt_free_id(mnt);
1138 call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);
1139}
1140
1141static void __cleanup_mnt(struct rcu_head *head)
1142{
1143 cleanup_mnt(container_of(head, struct mount, mnt_rcu));
1144}
1145
1146static LLIST_HEAD(delayed_mntput_list);
1147static void delayed_mntput(struct work_struct *unused)
1148{
1149 struct llist_node *node = llist_del_all(&delayed_mntput_list);
1150 struct mount *m, *t;
1151
1152 llist_for_each_entry_safe(m, t, node, mnt_llist)
1153 cleanup_mnt(m);
1154}
1155static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
1156
1157static void mntput_no_expire(struct mount *mnt)
1158{
1159 LIST_HEAD(list);
1160 int count;
1161
1162 rcu_read_lock();
1163 if (likely(READ_ONCE(mnt->mnt_ns))) {
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173 mnt_add_count(mnt, -1);
1174 rcu_read_unlock();
1175 return;
1176 }
1177 lock_mount_hash();
1178
1179
1180
1181
1182 smp_mb();
1183 mnt_add_count(mnt, -1);
1184 count = mnt_get_count(mnt);
1185 if (count != 0) {
1186 WARN_ON(count < 0);
1187 rcu_read_unlock();
1188 unlock_mount_hash();
1189 return;
1190 }
1191 if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
1192 rcu_read_unlock();
1193 unlock_mount_hash();
1194 return;
1195 }
1196 mnt->mnt.mnt_flags |= MNT_DOOMED;
1197 rcu_read_unlock();
1198
1199 list_del(&mnt->mnt_instance);
1200
1201 if (unlikely(!list_empty(&mnt->mnt_mounts))) {
1202 struct mount *p, *tmp;
1203 list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) {
1204 __put_mountpoint(unhash_mnt(p), &list);
1205 hlist_add_head(&p->mnt_umount, &mnt->mnt_stuck_children);
1206 }
1207 }
1208 unlock_mount_hash();
1209 shrink_dentry_list(&list);
1210
1211 if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {
1212 struct task_struct *task = current;
1213 if (likely(!(task->flags & PF_KTHREAD))) {
1214 init_task_work(&mnt->mnt_rcu, __cleanup_mnt);
1215 if (!task_work_add(task, &mnt->mnt_rcu, TWA_RESUME))
1216 return;
1217 }
1218 if (llist_add(&mnt->mnt_llist, &delayed_mntput_list))
1219 schedule_delayed_work(&delayed_mntput_work, 1);
1220 return;
1221 }
1222 cleanup_mnt(mnt);
1223}
1224
1225void mntput(struct vfsmount *mnt)
1226{
1227 if (mnt) {
1228 struct mount *m = real_mount(mnt);
1229
1230 if (unlikely(m->mnt_expiry_mark))
1231 m->mnt_expiry_mark = 0;
1232 mntput_no_expire(m);
1233 }
1234}
1235EXPORT_SYMBOL(mntput);
1236
1237struct vfsmount *mntget(struct vfsmount *mnt)
1238{
1239 if (mnt)
1240 mnt_add_count(real_mount(mnt), 1);
1241 return mnt;
1242}
1243EXPORT_SYMBOL(mntget);
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256bool path_is_mountpoint(const struct path *path)
1257{
1258 unsigned seq;
1259 bool res;
1260
1261 if (!d_mountpoint(path->dentry))
1262 return false;
1263
1264 rcu_read_lock();
1265 do {
1266 seq = read_seqbegin(&mount_lock);
1267 res = __path_is_mountpoint(path);
1268 } while (read_seqretry(&mount_lock, seq));
1269 rcu_read_unlock();
1270
1271 return res;
1272}
1273EXPORT_SYMBOL(path_is_mountpoint);
1274
1275struct vfsmount *mnt_clone_internal(const struct path *path)
1276{
1277 struct mount *p;
1278 p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE);
1279 if (IS_ERR(p))
1280 return ERR_CAST(p);
1281 p->mnt.mnt_flags |= MNT_INTERNAL;
1282 return &p->mnt;
1283}
1284
1285#ifdef CONFIG_PROC_FS
1286static struct mount *mnt_list_next(struct mnt_namespace *ns,
1287 struct list_head *p)
1288{
1289 struct mount *mnt, *ret = NULL;
1290
1291 lock_ns_list(ns);
1292 list_for_each_continue(p, &ns->list) {
1293 mnt = list_entry(p, typeof(*mnt), mnt_list);
1294 if (!mnt_is_cursor(mnt)) {
1295 ret = mnt;
1296 break;
1297 }
1298 }
1299 unlock_ns_list(ns);
1300
1301 return ret;
1302}
1303
1304
1305static void *m_start(struct seq_file *m, loff_t *pos)
1306{
1307 struct proc_mounts *p = m->private;
1308 struct list_head *prev;
1309
1310 down_read(&namespace_sem);
1311 if (!*pos) {
1312 prev = &p->ns->list;
1313 } else {
1314 prev = &p->cursor.mnt_list;
1315
1316
1317 if (list_empty(prev))
1318 return NULL;
1319 }
1320
1321 return mnt_list_next(p->ns, prev);
1322}
1323
1324static void *m_next(struct seq_file *m, void *v, loff_t *pos)
1325{
1326 struct proc_mounts *p = m->private;
1327 struct mount *mnt = v;
1328
1329 ++*pos;
1330 return mnt_list_next(p->ns, &mnt->mnt_list);
1331}
1332
1333static void m_stop(struct seq_file *m, void *v)
1334{
1335 struct proc_mounts *p = m->private;
1336 struct mount *mnt = v;
1337
1338 lock_ns_list(p->ns);
1339 if (mnt)
1340 list_move_tail(&p->cursor.mnt_list, &mnt->mnt_list);
1341 else
1342 list_del_init(&p->cursor.mnt_list);
1343 unlock_ns_list(p->ns);
1344 up_read(&namespace_sem);
1345}
1346
1347static int m_show(struct seq_file *m, void *v)
1348{
1349 struct proc_mounts *p = m->private;
1350 struct mount *r = v;
1351 return p->show(m, &r->mnt);
1352}
1353
1354const struct seq_operations mounts_op = {
1355 .start = m_start,
1356 .next = m_next,
1357 .stop = m_stop,
1358 .show = m_show,
1359};
1360
1361void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor)
1362{
1363 down_read(&namespace_sem);
1364 lock_ns_list(ns);
1365 list_del(&cursor->mnt_list);
1366 unlock_ns_list(ns);
1367 up_read(&namespace_sem);
1368}
1369#endif
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379int may_umount_tree(struct vfsmount *m)
1380{
1381 struct mount *mnt = real_mount(m);
1382 int actual_refs = 0;
1383 int minimum_refs = 0;
1384 struct mount *p;
1385 BUG_ON(!m);
1386
1387
1388 lock_mount_hash();
1389 for (p = mnt; p; p = next_mnt(p, mnt)) {
1390 actual_refs += mnt_get_count(p);
1391 minimum_refs += 2;
1392 }
1393 unlock_mount_hash();
1394
1395 if (actual_refs > minimum_refs)
1396 return 0;
1397
1398 return 1;
1399}
1400
1401EXPORT_SYMBOL(may_umount_tree);
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416int may_umount(struct vfsmount *mnt)
1417{
1418 int ret = 1;
1419 down_read(&namespace_sem);
1420 lock_mount_hash();
1421 if (propagate_mount_busy(real_mount(mnt), 2))
1422 ret = 0;
1423 unlock_mount_hash();
1424 up_read(&namespace_sem);
1425 return ret;
1426}
1427
1428EXPORT_SYMBOL(may_umount);
1429
1430static void namespace_unlock(void)
1431{
1432 struct hlist_head head;
1433 struct hlist_node *p;
1434 struct mount *m;
1435 LIST_HEAD(list);
1436
1437 hlist_move_list(&unmounted, &head);
1438 list_splice_init(&ex_mountpoints, &list);
1439
1440 up_write(&namespace_sem);
1441
1442 shrink_dentry_list(&list);
1443
1444 if (likely(hlist_empty(&head)))
1445 return;
1446
1447 synchronize_rcu_expedited();
1448
1449 hlist_for_each_entry_safe(m, p, &head, mnt_umount) {
1450 hlist_del(&m->mnt_umount);
1451 mntput(&m->mnt);
1452 }
1453}
1454
1455static inline void namespace_lock(void)
1456{
1457 down_write(&namespace_sem);
1458}
1459
1460enum umount_tree_flags {
1461 UMOUNT_SYNC = 1,
1462 UMOUNT_PROPAGATE = 2,
1463 UMOUNT_CONNECTED = 4,
1464};
1465
1466static bool disconnect_mount(struct mount *mnt, enum umount_tree_flags how)
1467{
1468
1469 if (how & UMOUNT_SYNC)
1470 return true;
1471
1472
1473 if (!mnt_has_parent(mnt))
1474 return true;
1475
1476
1477
1478
1479
1480 if (!(mnt->mnt_parent->mnt.mnt_flags & MNT_UMOUNT))
1481 return true;
1482
1483
1484 if (how & UMOUNT_CONNECTED)
1485 return false;
1486
1487
1488 if (IS_MNT_LOCKED(mnt))
1489 return false;
1490
1491
1492 return true;
1493}
1494
1495
1496
1497
1498
1499static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
1500{
1501 LIST_HEAD(tmp_list);
1502 struct mount *p;
1503
1504 if (how & UMOUNT_PROPAGATE)
1505 propagate_mount_unlock(mnt);
1506
1507
1508 for (p = mnt; p; p = next_mnt(p, mnt)) {
1509 p->mnt.mnt_flags |= MNT_UMOUNT;
1510 list_move(&p->mnt_list, &tmp_list);
1511 }
1512
1513
1514 list_for_each_entry(p, &tmp_list, mnt_list) {
1515 list_del_init(&p->mnt_child);
1516 }
1517
1518
1519 if (how & UMOUNT_PROPAGATE)
1520 propagate_umount(&tmp_list);
1521
1522 while (!list_empty(&tmp_list)) {
1523 struct mnt_namespace *ns;
1524 bool disconnect;
1525 p = list_first_entry(&tmp_list, struct mount, mnt_list);
1526 list_del_init(&p->mnt_expire);
1527 list_del_init(&p->mnt_list);
1528 ns = p->mnt_ns;
1529 if (ns) {
1530 ns->mounts--;
1531 __touch_mnt_namespace(ns);
1532 }
1533 p->mnt_ns = NULL;
1534 if (how & UMOUNT_SYNC)
1535 p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
1536
1537 disconnect = disconnect_mount(p, how);
1538 if (mnt_has_parent(p)) {
1539 mnt_add_count(p->mnt_parent, -1);
1540 if (!disconnect) {
1541
1542 list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts);
1543 } else {
1544 umount_mnt(p);
1545 }
1546 }
1547 change_mnt_propagation(p, MS_PRIVATE);
1548 if (disconnect)
1549 hlist_add_head(&p->mnt_umount, &unmounted);
1550 }
1551}
1552
1553static void shrink_submounts(struct mount *mnt);
1554
1555static int do_umount_root(struct super_block *sb)
1556{
1557 int ret = 0;
1558
1559 down_write(&sb->s_umount);
1560 if (!sb_rdonly(sb)) {
1561 struct fs_context *fc;
1562
1563 fc = fs_context_for_reconfigure(sb->s_root, SB_RDONLY,
1564 SB_RDONLY);
1565 if (IS_ERR(fc)) {
1566 ret = PTR_ERR(fc);
1567 } else {
1568 ret = parse_monolithic_mount_data(fc, NULL);
1569 if (!ret)
1570 ret = reconfigure_super(fc);
1571 put_fs_context(fc);
1572 }
1573 }
1574 up_write(&sb->s_umount);
1575 return ret;
1576}
1577
1578static int do_umount(struct mount *mnt, int flags)
1579{
1580 struct super_block *sb = mnt->mnt.mnt_sb;
1581 int retval;
1582
1583 retval = security_sb_umount(&mnt->mnt, flags);
1584 if (retval)
1585 return retval;
1586
1587
1588
1589
1590
1591
1592
1593 if (flags & MNT_EXPIRE) {
1594 if (&mnt->mnt == current->fs->root.mnt ||
1595 flags & (MNT_FORCE | MNT_DETACH))
1596 return -EINVAL;
1597
1598
1599
1600
1601
1602 lock_mount_hash();
1603 if (mnt_get_count(mnt) != 2) {
1604 unlock_mount_hash();
1605 return -EBUSY;
1606 }
1607 unlock_mount_hash();
1608
1609 if (!xchg(&mnt->mnt_expiry_mark, 1))
1610 return -EAGAIN;
1611 }
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623 if (flags & MNT_FORCE && sb->s_op->umount_begin) {
1624 sb->s_op->umount_begin(sb);
1625 }
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636 if (&mnt->mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) {
1637
1638
1639
1640
1641 if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
1642 return -EPERM;
1643 return do_umount_root(sb);
1644 }
1645
1646 namespace_lock();
1647 lock_mount_hash();
1648
1649
1650 retval = -EINVAL;
1651 if (mnt->mnt.mnt_flags & MNT_LOCKED)
1652 goto out;
1653
1654 event++;
1655 if (flags & MNT_DETACH) {
1656 if (!list_empty(&mnt->mnt_list))
1657 umount_tree(mnt, UMOUNT_PROPAGATE);
1658 retval = 0;
1659 } else {
1660 shrink_submounts(mnt);
1661 retval = -EBUSY;
1662 if (!propagate_mount_busy(mnt, 2)) {
1663 if (!list_empty(&mnt->mnt_list))
1664 umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
1665 retval = 0;
1666 }
1667 }
1668out:
1669 unlock_mount_hash();
1670 namespace_unlock();
1671 return retval;
1672}
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684void __detach_mounts(struct dentry *dentry)
1685{
1686 struct mountpoint *mp;
1687 struct mount *mnt;
1688
1689 namespace_lock();
1690 lock_mount_hash();
1691 mp = lookup_mountpoint(dentry);
1692 if (!mp)
1693 goto out_unlock;
1694
1695 event++;
1696 while (!hlist_empty(&mp->m_list)) {
1697 mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
1698 if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
1699 umount_mnt(mnt);
1700 hlist_add_head(&mnt->mnt_umount, &unmounted);
1701 }
1702 else umount_tree(mnt, UMOUNT_CONNECTED);
1703 }
1704 put_mountpoint(mp);
1705out_unlock:
1706 unlock_mount_hash();
1707 namespace_unlock();
1708}
1709
1710
1711
1712
1713static inline bool may_mount(void)
1714{
1715 return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
1716}
1717
1718#ifdef CONFIG_MANDATORY_FILE_LOCKING
1719static bool may_mandlock(void)
1720{
1721 pr_warn_once("======================================================\n"
1722 "WARNING: the mand mount option is being deprecated and\n"
1723 " will be removed in v5.15!\n"
1724 "======================================================\n");
1725 return capable(CAP_SYS_ADMIN);
1726}
1727#else
1728static inline bool may_mandlock(void)
1729{
1730 pr_warn("VFS: \"mand\" mount option not supported");
1731 return false;
1732}
1733#endif
1734
1735static int can_umount(const struct path *path, int flags)
1736{
1737 struct mount *mnt = real_mount(path->mnt);
1738
1739 if (!may_mount())
1740 return -EPERM;
1741 if (path->dentry != path->mnt->mnt_root)
1742 return -EINVAL;
1743 if (!check_mnt(mnt))
1744 return -EINVAL;
1745 if (mnt->mnt.mnt_flags & MNT_LOCKED)
1746 return -EINVAL;
1747 if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
1748 return -EPERM;
1749 return 0;
1750}
1751
1752
1753int path_umount(struct path *path, int flags)
1754{
1755 struct mount *mnt = real_mount(path->mnt);
1756 int ret;
1757
1758 ret = can_umount(path, flags);
1759 if (!ret)
1760 ret = do_umount(mnt, flags);
1761
1762
1763 dput(path->dentry);
1764 mntput_no_expire(mnt);
1765 return ret;
1766}
1767
1768static int ksys_umount(char __user *name, int flags)
1769{
1770 int lookup_flags = LOOKUP_MOUNTPOINT;
1771 struct path path;
1772 int ret;
1773
1774
1775 if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
1776 return -EINVAL;
1777
1778 if (!(flags & UMOUNT_NOFOLLOW))
1779 lookup_flags |= LOOKUP_FOLLOW;
1780 ret = user_path_at(AT_FDCWD, name, lookup_flags, &path);
1781 if (ret)
1782 return ret;
1783 return path_umount(&path, flags);
1784}
1785
1786SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1787{
1788 return ksys_umount(name, flags);
1789}
1790
1791#ifdef __ARCH_WANT_SYS_OLDUMOUNT
1792
1793
1794
1795
1796SYSCALL_DEFINE1(oldumount, char __user *, name)
1797{
1798 return ksys_umount(name, 0);
1799}
1800
1801#endif
1802
1803static bool is_mnt_ns_file(struct dentry *dentry)
1804{
1805
1806 return dentry->d_op == &ns_dentry_operations &&
1807 dentry->d_fsdata == &mntns_operations;
1808}
1809
1810static struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
1811{
1812 return container_of(ns, struct mnt_namespace, ns);
1813}
1814
1815struct ns_common *from_mnt_ns(struct mnt_namespace *mnt)
1816{
1817 return &mnt->ns;
1818}
1819
1820static bool mnt_ns_loop(struct dentry *dentry)
1821{
1822
1823
1824
1825 struct mnt_namespace *mnt_ns;
1826 if (!is_mnt_ns_file(dentry))
1827 return false;
1828
1829 mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode));
1830 return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
1831}
1832
1833struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1834 int flag)
1835{
1836 struct mount *res, *p, *q, *r, *parent;
1837
1838 if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(mnt))
1839 return ERR_PTR(-EINVAL);
1840
1841 if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(dentry))
1842 return ERR_PTR(-EINVAL);
1843
1844 res = q = clone_mnt(mnt, dentry, flag);
1845 if (IS_ERR(q))
1846 return q;
1847
1848 q->mnt_mountpoint = mnt->mnt_mountpoint;
1849
1850 p = mnt;
1851 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
1852 struct mount *s;
1853 if (!is_subdir(r->mnt_mountpoint, dentry))
1854 continue;
1855
1856 for (s = r; s; s = next_mnt(s, r)) {
1857 if (!(flag & CL_COPY_UNBINDABLE) &&
1858 IS_MNT_UNBINDABLE(s)) {
1859 if (s->mnt.mnt_flags & MNT_LOCKED) {
1860
1861 q = ERR_PTR(-EPERM);
1862 goto out;
1863 } else {
1864 s = skip_mnt_tree(s);
1865 continue;
1866 }
1867 }
1868 if (!(flag & CL_COPY_MNT_NS_FILE) &&
1869 is_mnt_ns_file(s->mnt.mnt_root)) {
1870 s = skip_mnt_tree(s);
1871 continue;
1872 }
1873 while (p != s->mnt_parent) {
1874 p = p->mnt_parent;
1875 q = q->mnt_parent;
1876 }
1877 p = s;
1878 parent = q;
1879 q = clone_mnt(p, p->mnt.mnt_root, flag);
1880 if (IS_ERR(q))
1881 goto out;
1882 lock_mount_hash();
1883 list_add_tail(&q->mnt_list, &res->mnt_list);
1884 attach_mnt(q, parent, p->mnt_mp);
1885 unlock_mount_hash();
1886 }
1887 }
1888 return res;
1889out:
1890 if (res) {
1891 lock_mount_hash();
1892 umount_tree(res, UMOUNT_SYNC);
1893 unlock_mount_hash();
1894 }
1895 return q;
1896}
1897
1898
1899
1900struct vfsmount *collect_mounts(const struct path *path)
1901{
1902 struct mount *tree;
1903 namespace_lock();
1904 if (!check_mnt(real_mount(path->mnt)))
1905 tree = ERR_PTR(-EINVAL);
1906 else
1907 tree = copy_tree(real_mount(path->mnt), path->dentry,
1908 CL_COPY_ALL | CL_PRIVATE);
1909 namespace_unlock();
1910 if (IS_ERR(tree))
1911 return ERR_CAST(tree);
1912 return &tree->mnt;
1913}
1914
1915static void free_mnt_ns(struct mnt_namespace *);
1916static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *, bool);
1917
1918void dissolve_on_fput(struct vfsmount *mnt)
1919{
1920 struct mnt_namespace *ns;
1921 namespace_lock();
1922 lock_mount_hash();
1923 ns = real_mount(mnt)->mnt_ns;
1924 if (ns) {
1925 if (is_anon_ns(ns))
1926 umount_tree(real_mount(mnt), UMOUNT_CONNECTED);
1927 else
1928 ns = NULL;
1929 }
1930 unlock_mount_hash();
1931 namespace_unlock();
1932 if (ns)
1933 free_mnt_ns(ns);
1934}
1935
1936void drop_collected_mounts(struct vfsmount *mnt)
1937{
1938 namespace_lock();
1939 lock_mount_hash();
1940 umount_tree(real_mount(mnt), 0);
1941 unlock_mount_hash();
1942 namespace_unlock();
1943}
1944
1945static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
1946{
1947 struct mount *child;
1948
1949 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
1950 if (!is_subdir(child->mnt_mountpoint, dentry))
1951 continue;
1952
1953 if (child->mnt.mnt_flags & MNT_LOCKED)
1954 return true;
1955 }
1956 return false;
1957}
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969struct vfsmount *clone_private_mount(const struct path *path)
1970{
1971 struct mount *old_mnt = real_mount(path->mnt);
1972 struct mount *new_mnt;
1973
1974 down_read(&namespace_sem);
1975 if (IS_MNT_UNBINDABLE(old_mnt))
1976 goto invalid;
1977
1978 if (!check_mnt(old_mnt))
1979 goto invalid;
1980
1981 if (has_locked_children(old_mnt, path->dentry))
1982 goto invalid;
1983
1984 new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
1985 up_read(&namespace_sem);
1986
1987 if (IS_ERR(new_mnt))
1988 return ERR_CAST(new_mnt);
1989
1990
1991 new_mnt->mnt_ns = MNT_NS_INTERNAL;
1992
1993 return &new_mnt->mnt;
1994
1995invalid:
1996 up_read(&namespace_sem);
1997 return ERR_PTR(-EINVAL);
1998}
1999EXPORT_SYMBOL_GPL(clone_private_mount);
2000
2001int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
2002 struct vfsmount *root)
2003{
2004 struct mount *mnt;
2005 int res = f(root, arg);
2006 if (res)
2007 return res;
2008 list_for_each_entry(mnt, &real_mount(root)->mnt_list, mnt_list) {
2009 res = f(&mnt->mnt, arg);
2010 if (res)
2011 return res;
2012 }
2013 return 0;
2014}
2015
2016static void lock_mnt_tree(struct mount *mnt)
2017{
2018 struct mount *p;
2019
2020 for (p = mnt; p; p = next_mnt(p, mnt)) {
2021 int flags = p->mnt.mnt_flags;
2022
2023 flags |= MNT_LOCK_ATIME;
2024
2025 if (flags & MNT_READONLY)
2026 flags |= MNT_LOCK_READONLY;
2027
2028 if (flags & MNT_NODEV)
2029 flags |= MNT_LOCK_NODEV;
2030
2031 if (flags & MNT_NOSUID)
2032 flags |= MNT_LOCK_NOSUID;
2033
2034 if (flags & MNT_NOEXEC)
2035 flags |= MNT_LOCK_NOEXEC;
2036
2037 if (list_empty(&p->mnt_expire))
2038 flags |= MNT_LOCKED;
2039 p->mnt.mnt_flags = flags;
2040 }
2041}
2042
2043static void cleanup_group_ids(struct mount *mnt, struct mount *end)
2044{
2045 struct mount *p;
2046
2047 for (p = mnt; p != end; p = next_mnt(p, mnt)) {
2048 if (p->mnt_group_id && !IS_MNT_SHARED(p))
2049 mnt_release_group_id(p);
2050 }
2051}
2052
2053static int invent_group_ids(struct mount *mnt, bool recurse)
2054{
2055 struct mount *p;
2056
2057 for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) {
2058 if (!p->mnt_group_id && !IS_MNT_SHARED(p)) {
2059 int err = mnt_alloc_group_id(p);
2060 if (err) {
2061 cleanup_group_ids(mnt, p);
2062 return err;
2063 }
2064 }
2065 }
2066
2067 return 0;
2068}
2069
2070int count_mounts(struct mnt_namespace *ns, struct mount *mnt)
2071{
2072 unsigned int max = READ_ONCE(sysctl_mount_max);
2073 unsigned int mounts = 0, old, pending, sum;
2074 struct mount *p;
2075
2076 for (p = mnt; p; p = next_mnt(p, mnt))
2077 mounts++;
2078
2079 old = ns->mounts;
2080 pending = ns->pending_mounts;
2081 sum = old + pending;
2082 if ((old > sum) ||
2083 (pending > sum) ||
2084 (max < sum) ||
2085 (mounts > (max - sum)))
2086 return -ENOSPC;
2087
2088 ns->pending_mounts = pending + mounts;
2089 return 0;
2090}
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155static int attach_recursive_mnt(struct mount *source_mnt,
2156 struct mount *dest_mnt,
2157 struct mountpoint *dest_mp,
2158 bool moving)
2159{
2160 struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
2161 HLIST_HEAD(tree_list);
2162 struct mnt_namespace *ns = dest_mnt->mnt_ns;
2163 struct mountpoint *smp;
2164 struct mount *child, *p;
2165 struct hlist_node *n;
2166 int err;
2167
2168
2169
2170
2171 smp = get_mountpoint(source_mnt->mnt.mnt_root);
2172 if (IS_ERR(smp))
2173 return PTR_ERR(smp);
2174
2175
2176 if (!moving) {
2177 err = count_mounts(ns, source_mnt);
2178 if (err)
2179 goto out;
2180 }
2181
2182 if (IS_MNT_SHARED(dest_mnt)) {
2183 err = invent_group_ids(source_mnt, true);
2184 if (err)
2185 goto out;
2186 err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
2187 lock_mount_hash();
2188 if (err)
2189 goto out_cleanup_ids;
2190 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
2191 set_mnt_shared(p);
2192 } else {
2193 lock_mount_hash();
2194 }
2195 if (moving) {
2196 unhash_mnt(source_mnt);
2197 attach_mnt(source_mnt, dest_mnt, dest_mp);
2198 touch_mnt_namespace(source_mnt->mnt_ns);
2199 } else {
2200 if (source_mnt->mnt_ns) {
2201
2202 list_del_init(&source_mnt->mnt_ns->list);
2203 }
2204 mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
2205 commit_tree(source_mnt);
2206 }
2207
2208 hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
2209 struct mount *q;
2210 hlist_del_init(&child->mnt_hash);
2211 q = __lookup_mnt(&child->mnt_parent->mnt,
2212 child->mnt_mountpoint);
2213 if (q)
2214 mnt_change_mountpoint(child, smp, q);
2215
2216 if (child->mnt_parent->mnt_ns->user_ns != user_ns)
2217 lock_mnt_tree(child);
2218 child->mnt.mnt_flags &= ~MNT_LOCKED;
2219 commit_tree(child);
2220 }
2221 put_mountpoint(smp);
2222 unlock_mount_hash();
2223
2224 return 0;
2225
2226 out_cleanup_ids:
2227 while (!hlist_empty(&tree_list)) {
2228 child = hlist_entry(tree_list.first, struct mount, mnt_hash);
2229 child->mnt_parent->mnt_ns->pending_mounts = 0;
2230 umount_tree(child, UMOUNT_SYNC);
2231 }
2232 unlock_mount_hash();
2233 cleanup_group_ids(source_mnt, NULL);
2234 out:
2235 ns->pending_mounts = 0;
2236
2237 read_seqlock_excl(&mount_lock);
2238 put_mountpoint(smp);
2239 read_sequnlock_excl(&mount_lock);
2240
2241 return err;
2242}
2243
2244static struct mountpoint *lock_mount(struct path *path)
2245{
2246 struct vfsmount *mnt;
2247 struct dentry *dentry = path->dentry;
2248retry:
2249 inode_lock(dentry->d_inode);
2250 if (unlikely(cant_mount(dentry))) {
2251 inode_unlock(dentry->d_inode);
2252 return ERR_PTR(-ENOENT);
2253 }
2254 namespace_lock();
2255 mnt = lookup_mnt(path);
2256 if (likely(!mnt)) {
2257 struct mountpoint *mp = get_mountpoint(dentry);
2258 if (IS_ERR(mp)) {
2259 namespace_unlock();
2260 inode_unlock(dentry->d_inode);
2261 return mp;
2262 }
2263 return mp;
2264 }
2265 namespace_unlock();
2266 inode_unlock(path->dentry->d_inode);
2267 path_put(path);
2268 path->mnt = mnt;
2269 dentry = path->dentry = dget(mnt->mnt_root);
2270 goto retry;
2271}
2272
2273static void unlock_mount(struct mountpoint *where)
2274{
2275 struct dentry *dentry = where->m_dentry;
2276
2277 read_seqlock_excl(&mount_lock);
2278 put_mountpoint(where);
2279 read_sequnlock_excl(&mount_lock);
2280
2281 namespace_unlock();
2282 inode_unlock(dentry->d_inode);
2283}
2284
2285static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
2286{
2287 if (mnt->mnt.mnt_sb->s_flags & SB_NOUSER)
2288 return -EINVAL;
2289
2290 if (d_is_dir(mp->m_dentry) !=
2291 d_is_dir(mnt->mnt.mnt_root))
2292 return -ENOTDIR;
2293
2294 return attach_recursive_mnt(mnt, p, mp, false);
2295}
2296
2297
2298
2299
2300
2301static int flags_to_propagation_type(int ms_flags)
2302{
2303 int type = ms_flags & ~(MS_REC | MS_SILENT);
2304
2305
2306 if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
2307 return 0;
2308
2309 if (!is_power_of_2(type))
2310 return 0;
2311 return type;
2312}
2313
2314
2315
2316
2317static int do_change_type(struct path *path, int ms_flags)
2318{
2319 struct mount *m;
2320 struct mount *mnt = real_mount(path->mnt);
2321 int recurse = ms_flags & MS_REC;
2322 int type;
2323 int err = 0;
2324
2325 if (path->dentry != path->mnt->mnt_root)
2326 return -EINVAL;
2327
2328 type = flags_to_propagation_type(ms_flags);
2329 if (!type)
2330 return -EINVAL;
2331
2332 namespace_lock();
2333 if (type == MS_SHARED) {
2334 err = invent_group_ids(mnt, recurse);
2335 if (err)
2336 goto out_unlock;
2337 }
2338
2339 lock_mount_hash();
2340 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
2341 change_mnt_propagation(m, type);
2342 unlock_mount_hash();
2343
2344 out_unlock:
2345 namespace_unlock();
2346 return err;
2347}
2348
2349static struct mount *__do_loopback(struct path *old_path, int recurse)
2350{
2351 struct mount *mnt = ERR_PTR(-EINVAL), *old = real_mount(old_path->mnt);
2352
2353 if (IS_MNT_UNBINDABLE(old))
2354 return mnt;
2355
2356 if (!check_mnt(old) && old_path->dentry->d_op != &ns_dentry_operations)
2357 return mnt;
2358
2359 if (!recurse && has_locked_children(old, old_path->dentry))
2360 return mnt;
2361
2362 if (recurse)
2363 mnt = copy_tree(old, old_path->dentry, CL_COPY_MNT_NS_FILE);
2364 else
2365 mnt = clone_mnt(old, old_path->dentry, 0);
2366
2367 if (!IS_ERR(mnt))
2368 mnt->mnt.mnt_flags &= ~MNT_LOCKED;
2369
2370 return mnt;
2371}
2372
2373
2374
2375
2376static int do_loopback(struct path *path, const char *old_name,
2377 int recurse)
2378{
2379 struct path old_path;
2380 struct mount *mnt = NULL, *parent;
2381 struct mountpoint *mp;
2382 int err;
2383 if (!old_name || !*old_name)
2384 return -EINVAL;
2385 err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path);
2386 if (err)
2387 return err;
2388
2389 err = -EINVAL;
2390 if (mnt_ns_loop(old_path.dentry))
2391 goto out;
2392
2393 mp = lock_mount(path);
2394 if (IS_ERR(mp)) {
2395 err = PTR_ERR(mp);
2396 goto out;
2397 }
2398
2399 parent = real_mount(path->mnt);
2400 if (!check_mnt(parent))
2401 goto out2;
2402
2403 mnt = __do_loopback(&old_path, recurse);
2404 if (IS_ERR(mnt)) {
2405 err = PTR_ERR(mnt);
2406 goto out2;
2407 }
2408
2409 err = graft_tree(mnt, parent, mp);
2410 if (err) {
2411 lock_mount_hash();
2412 umount_tree(mnt, UMOUNT_SYNC);
2413 unlock_mount_hash();
2414 }
2415out2:
2416 unlock_mount(mp);
2417out:
2418 path_put(&old_path);
2419 return err;
2420}
2421
2422static struct file *open_detached_copy(struct path *path, bool recursive)
2423{
2424 struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
2425 struct mnt_namespace *ns = alloc_mnt_ns(user_ns, true);
2426 struct mount *mnt, *p;
2427 struct file *file;
2428
2429 if (IS_ERR(ns))
2430 return ERR_CAST(ns);
2431
2432 namespace_lock();
2433 mnt = __do_loopback(path, recursive);
2434 if (IS_ERR(mnt)) {
2435 namespace_unlock();
2436 free_mnt_ns(ns);
2437 return ERR_CAST(mnt);
2438 }
2439
2440 lock_mount_hash();
2441 for (p = mnt; p; p = next_mnt(p, mnt)) {
2442 p->mnt_ns = ns;
2443 ns->mounts++;
2444 }
2445 ns->root = mnt;
2446 list_add_tail(&ns->list, &mnt->mnt_list);
2447 mntget(&mnt->mnt);
2448 unlock_mount_hash();
2449 namespace_unlock();
2450
2451 mntput(path->mnt);
2452 path->mnt = &mnt->mnt;
2453 file = dentry_open(path, O_PATH, current_cred());
2454 if (IS_ERR(file))
2455 dissolve_on_fput(path->mnt);
2456 else
2457 file->f_mode |= FMODE_NEED_UNMOUNT;
2458 return file;
2459}
2460
2461SYSCALL_DEFINE3(open_tree, int, dfd, const char __user *, filename, unsigned, flags)
2462{
2463 struct file *file;
2464 struct path path;
2465 int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW;
2466 bool detached = flags & OPEN_TREE_CLONE;
2467 int error;
2468 int fd;
2469
2470 BUILD_BUG_ON(OPEN_TREE_CLOEXEC != O_CLOEXEC);
2471
2472 if (flags & ~(AT_EMPTY_PATH | AT_NO_AUTOMOUNT | AT_RECURSIVE |
2473 AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLONE |
2474 OPEN_TREE_CLOEXEC))
2475 return -EINVAL;
2476
2477 if ((flags & (AT_RECURSIVE | OPEN_TREE_CLONE)) == AT_RECURSIVE)
2478 return -EINVAL;
2479
2480 if (flags & AT_NO_AUTOMOUNT)
2481 lookup_flags &= ~LOOKUP_AUTOMOUNT;
2482 if (flags & AT_SYMLINK_NOFOLLOW)
2483 lookup_flags &= ~LOOKUP_FOLLOW;
2484 if (flags & AT_EMPTY_PATH)
2485 lookup_flags |= LOOKUP_EMPTY;
2486
2487 if (detached && !may_mount())
2488 return -EPERM;
2489
2490 fd = get_unused_fd_flags(flags & O_CLOEXEC);
2491 if (fd < 0)
2492 return fd;
2493
2494 error = user_path_at(dfd, filename, lookup_flags, &path);
2495 if (unlikely(error)) {
2496 file = ERR_PTR(error);
2497 } else {
2498 if (detached)
2499 file = open_detached_copy(&path, flags & AT_RECURSIVE);
2500 else
2501 file = dentry_open(&path, O_PATH, current_cred());
2502 path_put(&path);
2503 }
2504 if (IS_ERR(file)) {
2505 put_unused_fd(fd);
2506 return PTR_ERR(file);
2507 }
2508 fd_install(fd, file);
2509 return fd;
2510}
2511
2512
2513
2514
2515
2516
2517
2518static bool can_change_locked_flags(struct mount *mnt, unsigned int mnt_flags)
2519{
2520 unsigned int fl = mnt->mnt.mnt_flags;
2521
2522 if ((fl & MNT_LOCK_READONLY) &&
2523 !(mnt_flags & MNT_READONLY))
2524 return false;
2525
2526 if ((fl & MNT_LOCK_NODEV) &&
2527 !(mnt_flags & MNT_NODEV))
2528 return false;
2529
2530 if ((fl & MNT_LOCK_NOSUID) &&
2531 !(mnt_flags & MNT_NOSUID))
2532 return false;
2533
2534 if ((fl & MNT_LOCK_NOEXEC) &&
2535 !(mnt_flags & MNT_NOEXEC))
2536 return false;
2537
2538 if ((fl & MNT_LOCK_ATIME) &&
2539 ((fl & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK)))
2540 return false;
2541
2542 return true;
2543}
2544
2545static int change_mount_ro_state(struct mount *mnt, unsigned int mnt_flags)
2546{
2547 bool readonly_request = (mnt_flags & MNT_READONLY);
2548
2549 if (readonly_request == __mnt_is_readonly(&mnt->mnt))
2550 return 0;
2551
2552 if (readonly_request)
2553 return mnt_make_readonly(mnt);
2554
2555 mnt->mnt.mnt_flags &= ~MNT_READONLY;
2556 return 0;
2557}
2558
2559static void set_mount_attributes(struct mount *mnt, unsigned int mnt_flags)
2560{
2561 mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
2562 mnt->mnt.mnt_flags = mnt_flags;
2563 touch_mnt_namespace(mnt->mnt_ns);
2564}
2565
2566static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount *mnt)
2567{
2568 struct super_block *sb = mnt->mnt_sb;
2569
2570 if (!__mnt_is_readonly(mnt) &&
2571 (ktime_get_real_seconds() + TIME_UPTIME_SEC_MAX > sb->s_time_max)) {
2572 char *buf = (char *)__get_free_page(GFP_KERNEL);
2573 char *mntpath = buf ? d_path(mountpoint, buf, PAGE_SIZE) : ERR_PTR(-ENOMEM);
2574 struct tm tm;
2575
2576 time64_to_tm(sb->s_time_max, 0, &tm);
2577
2578 pr_warn("%s filesystem being %s at %s supports timestamps until %04ld (0x%llx)\n",
2579 sb->s_type->name,
2580 is_mounted(mnt) ? "remounted" : "mounted",
2581 mntpath,
2582 tm.tm_year+1900, (unsigned long long)sb->s_time_max);
2583
2584 free_page((unsigned long)buf);
2585 }
2586}
2587
2588
2589
2590
2591
2592
2593static int do_reconfigure_mnt(struct path *path, unsigned int mnt_flags)
2594{
2595 struct super_block *sb = path->mnt->mnt_sb;
2596 struct mount *mnt = real_mount(path->mnt);
2597 int ret;
2598
2599 if (!check_mnt(mnt))
2600 return -EINVAL;
2601
2602 if (path->dentry != mnt->mnt.mnt_root)
2603 return -EINVAL;
2604
2605 if (!can_change_locked_flags(mnt, mnt_flags))
2606 return -EPERM;
2607
2608
2609
2610
2611
2612 down_read(&sb->s_umount);
2613 lock_mount_hash();
2614 ret = change_mount_ro_state(mnt, mnt_flags);
2615 if (ret == 0)
2616 set_mount_attributes(mnt, mnt_flags);
2617 unlock_mount_hash();
2618 up_read(&sb->s_umount);
2619
2620 mnt_warn_timestamp_expiry(path, &mnt->mnt);
2621
2622 return ret;
2623}
2624
2625
2626
2627
2628
2629
2630static int do_remount(struct path *path, int ms_flags, int sb_flags,
2631 int mnt_flags, void *data)
2632{
2633 int err;
2634 struct super_block *sb = path->mnt->mnt_sb;
2635 struct mount *mnt = real_mount(path->mnt);
2636 struct fs_context *fc;
2637
2638 if (!check_mnt(mnt))
2639 return -EINVAL;
2640
2641 if (path->dentry != path->mnt->mnt_root)
2642 return -EINVAL;
2643
2644 if (!can_change_locked_flags(mnt, mnt_flags))
2645 return -EPERM;
2646
2647 fc = fs_context_for_reconfigure(path->dentry, sb_flags, MS_RMT_MASK);
2648 if (IS_ERR(fc))
2649 return PTR_ERR(fc);
2650
2651 fc->oldapi = true;
2652 err = parse_monolithic_mount_data(fc, data);
2653 if (!err) {
2654 down_write(&sb->s_umount);
2655 err = -EPERM;
2656 if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
2657 err = reconfigure_super(fc);
2658 if (!err) {
2659 lock_mount_hash();
2660 set_mount_attributes(mnt, mnt_flags);
2661 unlock_mount_hash();
2662 }
2663 }
2664 up_write(&sb->s_umount);
2665 }
2666
2667 mnt_warn_timestamp_expiry(path, &mnt->mnt);
2668
2669 put_fs_context(fc);
2670 return err;
2671}
2672
2673static inline int tree_contains_unbindable(struct mount *mnt)
2674{
2675 struct mount *p;
2676 for (p = mnt; p; p = next_mnt(p, mnt)) {
2677 if (IS_MNT_UNBINDABLE(p))
2678 return 1;
2679 }
2680 return 0;
2681}
2682
2683
2684
2685
2686
2687
2688
2689static bool check_for_nsfs_mounts(struct mount *subtree)
2690{
2691 struct mount *p;
2692 bool ret = false;
2693
2694 lock_mount_hash();
2695 for (p = subtree; p; p = next_mnt(p, subtree))
2696 if (mnt_ns_loop(p->mnt.mnt_root))
2697 goto out;
2698
2699 ret = true;
2700out:
2701 unlock_mount_hash();
2702 return ret;
2703}
2704
2705static int do_move_mount(struct path *old_path, struct path *new_path)
2706{
2707 struct mnt_namespace *ns;
2708 struct mount *p;
2709 struct mount *old;
2710 struct mount *parent;
2711 struct mountpoint *mp, *old_mp;
2712 int err;
2713 bool attached;
2714
2715 mp = lock_mount(new_path);
2716 if (IS_ERR(mp))
2717 return PTR_ERR(mp);
2718
2719 old = real_mount(old_path->mnt);
2720 p = real_mount(new_path->mnt);
2721 parent = old->mnt_parent;
2722 attached = mnt_has_parent(old);
2723 old_mp = old->mnt_mp;
2724 ns = old->mnt_ns;
2725
2726 err = -EINVAL;
2727
2728 if (!check_mnt(p))
2729 goto out;
2730
2731
2732 if (!is_mounted(&old->mnt))
2733 goto out;
2734
2735
2736 if (!(attached ? check_mnt(old) : is_anon_ns(ns)))
2737 goto out;
2738
2739 if (old->mnt.mnt_flags & MNT_LOCKED)
2740 goto out;
2741
2742 if (old_path->dentry != old_path->mnt->mnt_root)
2743 goto out;
2744
2745 if (d_is_dir(new_path->dentry) !=
2746 d_is_dir(old_path->dentry))
2747 goto out;
2748
2749
2750
2751 if (attached && IS_MNT_SHARED(parent))
2752 goto out;
2753
2754
2755
2756
2757 if (IS_MNT_SHARED(p) && tree_contains_unbindable(old))
2758 goto out;
2759 err = -ELOOP;
2760 if (!check_for_nsfs_mounts(old))
2761 goto out;
2762 for (; mnt_has_parent(p); p = p->mnt_parent)
2763 if (p == old)
2764 goto out;
2765
2766 err = attach_recursive_mnt(old, real_mount(new_path->mnt), mp,
2767 attached);
2768 if (err)
2769 goto out;
2770
2771
2772
2773 list_del_init(&old->mnt_expire);
2774 if (attached)
2775 put_mountpoint(old_mp);
2776out:
2777 unlock_mount(mp);
2778 if (!err) {
2779 if (attached)
2780 mntput_no_expire(parent);
2781 else
2782 free_mnt_ns(ns);
2783 }
2784 return err;
2785}
2786
2787static int do_move_mount_old(struct path *path, const char *old_name)
2788{
2789 struct path old_path;
2790 int err;
2791
2792 if (!old_name || !*old_name)
2793 return -EINVAL;
2794
2795 err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
2796 if (err)
2797 return err;
2798
2799 err = do_move_mount(&old_path, path);
2800 path_put(&old_path);
2801 return err;
2802}
2803
2804
2805
2806
2807static int do_add_mount(struct mount *newmnt, struct mountpoint *mp,
2808 struct path *path, int mnt_flags)
2809{
2810 struct mount *parent = real_mount(path->mnt);
2811
2812 mnt_flags &= ~MNT_INTERNAL_FLAGS;
2813
2814 if (unlikely(!check_mnt(parent))) {
2815
2816 if (!(mnt_flags & MNT_SHRINKABLE))
2817 return -EINVAL;
2818
2819 if (!parent->mnt_ns)
2820 return -EINVAL;
2821 }
2822
2823
2824 if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb &&
2825 path->mnt->mnt_root == path->dentry)
2826 return -EBUSY;
2827
2828 if (d_is_symlink(newmnt->mnt.mnt_root))
2829 return -EINVAL;
2830
2831 newmnt->mnt.mnt_flags = mnt_flags;
2832 return graft_tree(newmnt, parent, mp);
2833}
2834
2835static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags);
2836
2837
2838
2839
2840
2841static int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint,
2842 unsigned int mnt_flags)
2843{
2844 struct vfsmount *mnt;
2845 struct mountpoint *mp;
2846 struct super_block *sb = fc->root->d_sb;
2847 int error;
2848
2849 error = security_sb_kern_mount(sb);
2850 if (!error && mount_too_revealing(sb, &mnt_flags))
2851 error = -EPERM;
2852
2853 if (unlikely(error)) {
2854 fc_drop_locked(fc);
2855 return error;
2856 }
2857
2858 up_write(&sb->s_umount);
2859
2860 mnt = vfs_create_mount(fc);
2861 if (IS_ERR(mnt))
2862 return PTR_ERR(mnt);
2863
2864 mnt_warn_timestamp_expiry(mountpoint, mnt);
2865
2866 mp = lock_mount(mountpoint);
2867 if (IS_ERR(mp)) {
2868 mntput(mnt);
2869 return PTR_ERR(mp);
2870 }
2871 error = do_add_mount(real_mount(mnt), mp, mountpoint, mnt_flags);
2872 unlock_mount(mp);
2873 if (error < 0)
2874 mntput(mnt);
2875 return error;
2876}
2877
2878
2879
2880
2881
2882static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
2883 int mnt_flags, const char *name, void *data)
2884{
2885 struct file_system_type *type;
2886 struct fs_context *fc;
2887 const char *subtype = NULL;
2888 int err = 0;
2889
2890 if (!fstype)
2891 return -EINVAL;
2892
2893 type = get_fs_type(fstype);
2894 if (!type)
2895 return -ENODEV;
2896
2897 if (type->fs_flags & FS_HAS_SUBTYPE) {
2898 subtype = strchr(fstype, '.');
2899 if (subtype) {
2900 subtype++;
2901 if (!*subtype) {
2902 put_filesystem(type);
2903 return -EINVAL;
2904 }
2905 }
2906 }
2907
2908 fc = fs_context_for_mount(type, sb_flags);
2909 put_filesystem(type);
2910 if (IS_ERR(fc))
2911 return PTR_ERR(fc);
2912
2913 if (subtype)
2914 err = vfs_parse_fs_string(fc, "subtype",
2915 subtype, strlen(subtype));
2916 if (!err && name)
2917 err = vfs_parse_fs_string(fc, "source", name, strlen(name));
2918 if (!err)
2919 err = parse_monolithic_mount_data(fc, data);
2920 if (!err && !mount_capable(fc))
2921 err = -EPERM;
2922 if (!err)
2923 err = vfs_get_tree(fc);
2924 if (!err)
2925 err = do_new_mount_fc(fc, path, mnt_flags);
2926
2927 put_fs_context(fc);
2928 return err;
2929}
2930
2931int finish_automount(struct vfsmount *m, struct path *path)
2932{
2933 struct dentry *dentry = path->dentry;
2934 struct mountpoint *mp;
2935 struct mount *mnt;
2936 int err;
2937
2938 if (!m)
2939 return 0;
2940 if (IS_ERR(m))
2941 return PTR_ERR(m);
2942
2943 mnt = real_mount(m);
2944
2945
2946
2947 BUG_ON(mnt_get_count(mnt) < 2);
2948
2949 if (m->mnt_sb == path->mnt->mnt_sb &&
2950 m->mnt_root == dentry) {
2951 err = -ELOOP;
2952 goto discard;
2953 }
2954
2955
2956
2957
2958
2959
2960 inode_lock(dentry->d_inode);
2961 namespace_lock();
2962 if (unlikely(cant_mount(dentry))) {
2963 err = -ENOENT;
2964 goto discard_locked;
2965 }
2966 rcu_read_lock();
2967 if (unlikely(__lookup_mnt(path->mnt, dentry))) {
2968 rcu_read_unlock();
2969 err = 0;
2970 goto discard_locked;
2971 }
2972 rcu_read_unlock();
2973 mp = get_mountpoint(dentry);
2974 if (IS_ERR(mp)) {
2975 err = PTR_ERR(mp);
2976 goto discard_locked;
2977 }
2978
2979 err = do_add_mount(mnt, mp, path, path->mnt->mnt_flags | MNT_SHRINKABLE);
2980 unlock_mount(mp);
2981 if (unlikely(err))
2982 goto discard;
2983 mntput(m);
2984 return 0;
2985
2986discard_locked:
2987 namespace_unlock();
2988 inode_unlock(dentry->d_inode);
2989discard:
2990
2991 if (!list_empty(&mnt->mnt_expire)) {
2992 namespace_lock();
2993 list_del_init(&mnt->mnt_expire);
2994 namespace_unlock();
2995 }
2996 mntput(m);
2997 mntput(m);
2998 return err;
2999}
3000
3001
3002
3003
3004
3005
3006void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
3007{
3008 namespace_lock();
3009
3010 list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);
3011
3012 namespace_unlock();
3013}
3014EXPORT_SYMBOL(mnt_set_expiry);
3015
3016
3017
3018
3019
3020
3021void mark_mounts_for_expiry(struct list_head *mounts)
3022{
3023 struct mount *mnt, *next;
3024 LIST_HEAD(graveyard);
3025
3026 if (list_empty(mounts))
3027 return;
3028
3029 namespace_lock();
3030 lock_mount_hash();
3031
3032
3033
3034
3035
3036
3037
3038 list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
3039 if (!xchg(&mnt->mnt_expiry_mark, 1) ||
3040 propagate_mount_busy(mnt, 1))
3041 continue;
3042 list_move(&mnt->mnt_expire, &graveyard);
3043 }
3044 while (!list_empty(&graveyard)) {
3045 mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
3046 touch_mnt_namespace(mnt->mnt_ns);
3047 umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
3048 }
3049 unlock_mount_hash();
3050 namespace_unlock();
3051}
3052
3053EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
3054
3055
3056
3057
3058
3059
3060
3061static int select_submounts(struct mount *parent, struct list_head *graveyard)
3062{
3063 struct mount *this_parent = parent;
3064 struct list_head *next;
3065 int found = 0;
3066
3067repeat:
3068 next = this_parent->mnt_mounts.next;
3069resume:
3070 while (next != &this_parent->mnt_mounts) {
3071 struct list_head *tmp = next;
3072 struct mount *mnt = list_entry(tmp, struct mount, mnt_child);
3073
3074 next = tmp->next;
3075 if (!(mnt->mnt.mnt_flags & MNT_SHRINKABLE))
3076 continue;
3077
3078
3079
3080 if (!list_empty(&mnt->mnt_mounts)) {
3081 this_parent = mnt;
3082 goto repeat;
3083 }
3084
3085 if (!propagate_mount_busy(mnt, 1)) {
3086 list_move_tail(&mnt->mnt_expire, graveyard);
3087 found++;
3088 }
3089 }
3090
3091
3092
3093 if (this_parent != parent) {
3094 next = this_parent->mnt_child.next;
3095 this_parent = this_parent->mnt_parent;
3096 goto resume;
3097 }
3098 return found;
3099}
3100
3101
3102
3103
3104
3105
3106
3107static void shrink_submounts(struct mount *mnt)
3108{
3109 LIST_HEAD(graveyard);
3110 struct mount *m;
3111
3112
3113 while (select_submounts(mnt, &graveyard)) {
3114 while (!list_empty(&graveyard)) {
3115 m = list_first_entry(&graveyard, struct mount,
3116 mnt_expire);
3117 touch_mnt_namespace(m->mnt_ns);
3118 umount_tree(m, UMOUNT_PROPAGATE|UMOUNT_SYNC);
3119 }
3120 }
3121}
3122
3123static void *copy_mount_options(const void __user * data)
3124{
3125 char *copy;
3126 unsigned left, offset;
3127
3128 if (!data)
3129 return NULL;
3130
3131 copy = kmalloc(PAGE_SIZE, GFP_KERNEL);
3132 if (!copy)
3133 return ERR_PTR(-ENOMEM);
3134
3135 left = copy_from_user(copy, data, PAGE_SIZE);
3136
3137
3138
3139
3140
3141 offset = PAGE_SIZE - left;
3142 while (left) {
3143 char c;
3144 if (get_user(c, (const char __user *)data + offset))
3145 break;
3146 copy[offset] = c;
3147 left--;
3148 offset++;
3149 }
3150
3151 if (left == PAGE_SIZE) {
3152 kfree(copy);
3153 return ERR_PTR(-EFAULT);
3154 }
3155
3156 return copy;
3157}
3158
3159static char *copy_mount_string(const void __user *data)
3160{
3161 return data ? strndup_user(data, PATH_MAX) : NULL;
3162}
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178int path_mount(const char *dev_name, struct path *path,
3179 const char *type_page, unsigned long flags, void *data_page)
3180{
3181 unsigned int mnt_flags = 0, sb_flags;
3182 int ret;
3183
3184
3185 if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
3186 flags &= ~MS_MGC_MSK;
3187
3188
3189 if (data_page)
3190 ((char *)data_page)[PAGE_SIZE - 1] = 0;
3191
3192 if (flags & MS_NOUSER)
3193 return -EINVAL;
3194
3195 ret = security_sb_mount(dev_name, path, type_page, flags, data_page);
3196 if (ret)
3197 return ret;
3198 if (!may_mount())
3199 return -EPERM;
3200 if ((flags & SB_MANDLOCK) && !may_mandlock())
3201 return -EPERM;
3202
3203
3204 if (!(flags & MS_NOATIME))
3205 mnt_flags |= MNT_RELATIME;
3206
3207
3208 if (flags & MS_NOSUID)
3209 mnt_flags |= MNT_NOSUID;
3210 if (flags & MS_NODEV)
3211 mnt_flags |= MNT_NODEV;
3212 if (flags & MS_NOEXEC)
3213 mnt_flags |= MNT_NOEXEC;
3214 if (flags & MS_NOATIME)
3215 mnt_flags |= MNT_NOATIME;
3216 if (flags & MS_NODIRATIME)
3217 mnt_flags |= MNT_NODIRATIME;
3218 if (flags & MS_STRICTATIME)
3219 mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
3220 if (flags & MS_RDONLY)
3221 mnt_flags |= MNT_READONLY;
3222 if (flags & MS_NOSYMFOLLOW)
3223 mnt_flags |= MNT_NOSYMFOLLOW;
3224
3225
3226 if ((flags & MS_REMOUNT) &&
3227 ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME |
3228 MS_STRICTATIME)) == 0)) {
3229 mnt_flags &= ~MNT_ATIME_MASK;
3230 mnt_flags |= path->mnt->mnt_flags & MNT_ATIME_MASK;
3231 }
3232
3233 sb_flags = flags & (SB_RDONLY |
3234 SB_SYNCHRONOUS |
3235 SB_MANDLOCK |
3236 SB_DIRSYNC |
3237 SB_SILENT |
3238 SB_POSIXACL |
3239 SB_LAZYTIME |
3240 SB_I_VERSION);
3241
3242 if ((flags & (MS_REMOUNT | MS_BIND)) == (MS_REMOUNT | MS_BIND))
3243 return do_reconfigure_mnt(path, mnt_flags);
3244 if (flags & MS_REMOUNT)
3245 return do_remount(path, flags, sb_flags, mnt_flags, data_page);
3246 if (flags & MS_BIND)
3247 return do_loopback(path, dev_name, flags & MS_REC);
3248 if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
3249 return do_change_type(path, flags);
3250 if (flags & MS_MOVE)
3251 return do_move_mount_old(path, dev_name);
3252
3253 return do_new_mount(path, type_page, sb_flags, mnt_flags, dev_name,
3254 data_page);
3255}
3256
3257long do_mount(const char *dev_name, const char __user *dir_name,
3258 const char *type_page, unsigned long flags, void *data_page)
3259{
3260 struct path path;
3261 int ret;
3262
3263 ret = user_path_at(AT_FDCWD, dir_name, LOOKUP_FOLLOW, &path);
3264 if (ret)
3265 return ret;
3266 ret = path_mount(dev_name, &path, type_page, flags, data_page);
3267 path_put(&path);
3268 return ret;
3269}
3270
3271static struct ucounts *inc_mnt_namespaces(struct user_namespace *ns)
3272{
3273 return inc_ucount(ns, current_euid(), UCOUNT_MNT_NAMESPACES);
3274}
3275
3276static void dec_mnt_namespaces(struct ucounts *ucounts)
3277{
3278 dec_ucount(ucounts, UCOUNT_MNT_NAMESPACES);
3279}
3280
3281static void free_mnt_ns(struct mnt_namespace *ns)
3282{
3283 if (!is_anon_ns(ns))
3284 ns_free_inum(&ns->ns);
3285 dec_mnt_namespaces(ns->ucounts);
3286 put_user_ns(ns->user_ns);
3287 kfree(ns);
3288}
3289
3290
3291
3292
3293
3294
3295
3296
3297static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
3298
3299static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool anon)
3300{
3301 struct mnt_namespace *new_ns;
3302 struct ucounts *ucounts;
3303 int ret;
3304
3305 ucounts = inc_mnt_namespaces(user_ns);
3306 if (!ucounts)
3307 return ERR_PTR(-ENOSPC);
3308
3309 new_ns = kzalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
3310 if (!new_ns) {
3311 dec_mnt_namespaces(ucounts);
3312 return ERR_PTR(-ENOMEM);
3313 }
3314 if (!anon) {
3315 ret = ns_alloc_inum(&new_ns->ns);
3316 if (ret) {
3317 kfree(new_ns);
3318 dec_mnt_namespaces(ucounts);
3319 return ERR_PTR(ret);
3320 }
3321 }
3322 new_ns->ns.ops = &mntns_operations;
3323 if (!anon)
3324 new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
3325 refcount_set(&new_ns->ns.count, 1);
3326 INIT_LIST_HEAD(&new_ns->list);
3327 init_waitqueue_head(&new_ns->poll);
3328 spin_lock_init(&new_ns->ns_lock);
3329 new_ns->user_ns = get_user_ns(user_ns);
3330 new_ns->ucounts = ucounts;
3331 return new_ns;
3332}
3333
3334__latent_entropy
3335struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
3336 struct user_namespace *user_ns, struct fs_struct *new_fs)
3337{
3338 struct mnt_namespace *new_ns;
3339 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
3340 struct mount *p, *q;
3341 struct mount *old;
3342 struct mount *new;
3343 int copy_flags;
3344
3345 BUG_ON(!ns);
3346
3347 if (likely(!(flags & CLONE_NEWNS))) {
3348 get_mnt_ns(ns);
3349 return ns;
3350 }
3351
3352 old = ns->root;
3353
3354 new_ns = alloc_mnt_ns(user_ns, false);
3355 if (IS_ERR(new_ns))
3356 return new_ns;
3357
3358 namespace_lock();
3359
3360 copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE;
3361 if (user_ns != ns->user_ns)
3362 copy_flags |= CL_SHARED_TO_SLAVE;
3363 new = copy_tree(old, old->mnt.mnt_root, copy_flags);
3364 if (IS_ERR(new)) {
3365 namespace_unlock();
3366 free_mnt_ns(new_ns);
3367 return ERR_CAST(new);
3368 }
3369 if (user_ns != ns->user_ns) {
3370 lock_mount_hash();
3371 lock_mnt_tree(new);
3372 unlock_mount_hash();
3373 }
3374 new_ns->root = new;
3375 list_add_tail(&new_ns->list, &new->mnt_list);
3376
3377
3378
3379
3380
3381
3382 p = old;
3383 q = new;
3384 while (p) {
3385 q->mnt_ns = new_ns;
3386 new_ns->mounts++;
3387 if (new_fs) {
3388 if (&p->mnt == new_fs->root.mnt) {
3389 new_fs->root.mnt = mntget(&q->mnt);
3390 rootmnt = &p->mnt;
3391 }
3392 if (&p->mnt == new_fs->pwd.mnt) {
3393 new_fs->pwd.mnt = mntget(&q->mnt);
3394 pwdmnt = &p->mnt;
3395 }
3396 }
3397 p = next_mnt(p, old);
3398 q = next_mnt(q, new);
3399 if (!q)
3400 break;
3401 while (p->mnt.mnt_root != q->mnt.mnt_root)
3402 p = next_mnt(p, old);
3403 }
3404 namespace_unlock();
3405
3406 if (rootmnt)
3407 mntput(rootmnt);
3408 if (pwdmnt)
3409 mntput(pwdmnt);
3410
3411 return new_ns;
3412}
3413
3414struct dentry *mount_subtree(struct vfsmount *m, const char *name)
3415{
3416 struct mount *mnt = real_mount(m);
3417 struct mnt_namespace *ns;
3418 struct super_block *s;
3419 struct path path;
3420 int err;
3421
3422 ns = alloc_mnt_ns(&init_user_ns, true);
3423 if (IS_ERR(ns)) {
3424 mntput(m);
3425 return ERR_CAST(ns);
3426 }
3427 mnt->mnt_ns = ns;
3428 ns->root = mnt;
3429 ns->mounts++;
3430 list_add(&mnt->mnt_list, &ns->list);
3431
3432 err = vfs_path_lookup(m->mnt_root, m,
3433 name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);
3434
3435 put_mnt_ns(ns);
3436
3437 if (err)
3438 return ERR_PTR(err);
3439
3440
3441 s = path.mnt->mnt_sb;
3442 atomic_inc(&s->s_active);
3443 mntput(path.mnt);
3444
3445 down_write(&s->s_umount);
3446
3447 return path.dentry;
3448}
3449EXPORT_SYMBOL(mount_subtree);
3450
3451SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
3452 char __user *, type, unsigned long, flags, void __user *, data)
3453{
3454 int ret;
3455 char *kernel_type;
3456 char *kernel_dev;
3457 void *options;
3458
3459 kernel_type = copy_mount_string(type);
3460 ret = PTR_ERR(kernel_type);
3461 if (IS_ERR(kernel_type))
3462 goto out_type;
3463
3464 kernel_dev = copy_mount_string(dev_name);
3465 ret = PTR_ERR(kernel_dev);
3466 if (IS_ERR(kernel_dev))
3467 goto out_dev;
3468
3469 options = copy_mount_options(data);
3470 ret = PTR_ERR(options);
3471 if (IS_ERR(options))
3472 goto out_data;
3473
3474 ret = do_mount(kernel_dev, dir_name, kernel_type, flags, options);
3475
3476 kfree(options);
3477out_data:
3478 kfree(kernel_dev);
3479out_dev:
3480 kfree(kernel_type);
3481out_type:
3482 return ret;
3483}
3484
3485#define FSMOUNT_VALID_FLAGS \
3486 (MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NODEV | \
3487 MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME | MOUNT_ATTR_NODIRATIME | \
3488 MOUNT_ATTR_NOSYMFOLLOW)
3489
3490#define MOUNT_SETATTR_VALID_FLAGS (FSMOUNT_VALID_FLAGS | MOUNT_ATTR_IDMAP)
3491
3492#define MOUNT_SETATTR_PROPAGATION_FLAGS \
3493 (MS_UNBINDABLE | MS_PRIVATE | MS_SLAVE | MS_SHARED)
3494
3495static unsigned int attr_flags_to_mnt_flags(u64 attr_flags)
3496{
3497 unsigned int mnt_flags = 0;
3498
3499 if (attr_flags & MOUNT_ATTR_RDONLY)
3500 mnt_flags |= MNT_READONLY;
3501 if (attr_flags & MOUNT_ATTR_NOSUID)
3502 mnt_flags |= MNT_NOSUID;
3503 if (attr_flags & MOUNT_ATTR_NODEV)
3504 mnt_flags |= MNT_NODEV;
3505 if (attr_flags & MOUNT_ATTR_NOEXEC)
3506 mnt_flags |= MNT_NOEXEC;
3507 if (attr_flags & MOUNT_ATTR_NODIRATIME)
3508 mnt_flags |= MNT_NODIRATIME;
3509 if (attr_flags & MOUNT_ATTR_NOSYMFOLLOW)
3510 mnt_flags |= MNT_NOSYMFOLLOW;
3511
3512 return mnt_flags;
3513}
3514
3515
3516
3517
3518
3519SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags,
3520 unsigned int, attr_flags)
3521{
3522 struct mnt_namespace *ns;
3523 struct fs_context *fc;
3524 struct file *file;
3525 struct path newmount;
3526 struct mount *mnt;
3527 struct fd f;
3528 unsigned int mnt_flags = 0;
3529 long ret;
3530
3531 if (!may_mount())
3532 return -EPERM;
3533
3534 if ((flags & ~(FSMOUNT_CLOEXEC)) != 0)
3535 return -EINVAL;
3536
3537 if (attr_flags & ~FSMOUNT_VALID_FLAGS)
3538 return -EINVAL;
3539
3540 mnt_flags = attr_flags_to_mnt_flags(attr_flags);
3541
3542 switch (attr_flags & MOUNT_ATTR__ATIME) {
3543 case MOUNT_ATTR_STRICTATIME:
3544 break;
3545 case MOUNT_ATTR_NOATIME:
3546 mnt_flags |= MNT_NOATIME;
3547 break;
3548 case MOUNT_ATTR_RELATIME:
3549 mnt_flags |= MNT_RELATIME;
3550 break;
3551 default:
3552 return -EINVAL;
3553 }
3554
3555 f = fdget(fs_fd);
3556 if (!f.file)
3557 return -EBADF;
3558
3559 ret = -EINVAL;
3560 if (f.file->f_op != &fscontext_fops)
3561 goto err_fsfd;
3562
3563 fc = f.file->private_data;
3564
3565 ret = mutex_lock_interruptible(&fc->uapi_mutex);
3566 if (ret < 0)
3567 goto err_fsfd;
3568
3569
3570 ret = -EINVAL;
3571 if (!fc->root)
3572 goto err_unlock;
3573
3574 ret = -EPERM;
3575 if (mount_too_revealing(fc->root->d_sb, &mnt_flags)) {
3576 pr_warn("VFS: Mount too revealing\n");
3577 goto err_unlock;
3578 }
3579
3580 ret = -EBUSY;
3581 if (fc->phase != FS_CONTEXT_AWAITING_MOUNT)
3582 goto err_unlock;
3583
3584 ret = -EPERM;
3585 if ((fc->sb_flags & SB_MANDLOCK) && !may_mandlock())
3586 goto err_unlock;
3587
3588 newmount.mnt = vfs_create_mount(fc);
3589 if (IS_ERR(newmount.mnt)) {
3590 ret = PTR_ERR(newmount.mnt);
3591 goto err_unlock;
3592 }
3593 newmount.dentry = dget(fc->root);
3594 newmount.mnt->mnt_flags = mnt_flags;
3595
3596
3597
3598
3599
3600
3601 vfs_clean_context(fc);
3602
3603 ns = alloc_mnt_ns(current->nsproxy->mnt_ns->user_ns, true);
3604 if (IS_ERR(ns)) {
3605 ret = PTR_ERR(ns);
3606 goto err_path;
3607 }
3608 mnt = real_mount(newmount.mnt);
3609 mnt->mnt_ns = ns;
3610 ns->root = mnt;
3611 ns->mounts = 1;
3612 list_add(&mnt->mnt_list, &ns->list);
3613 mntget(newmount.mnt);
3614
3615
3616
3617
3618 file = dentry_open(&newmount, O_PATH, fc->cred);
3619 if (IS_ERR(file)) {
3620 dissolve_on_fput(newmount.mnt);
3621 ret = PTR_ERR(file);
3622 goto err_path;
3623 }
3624 file->f_mode |= FMODE_NEED_UNMOUNT;
3625
3626 ret = get_unused_fd_flags((flags & FSMOUNT_CLOEXEC) ? O_CLOEXEC : 0);
3627 if (ret >= 0)
3628 fd_install(ret, file);
3629 else
3630 fput(file);
3631
3632err_path:
3633 path_put(&newmount);
3634err_unlock:
3635 mutex_unlock(&fc->uapi_mutex);
3636err_fsfd:
3637 fdput(f);
3638 return ret;
3639}
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649SYSCALL_DEFINE5(move_mount,
3650 int, from_dfd, const char __user *, from_pathname,
3651 int, to_dfd, const char __user *, to_pathname,
3652 unsigned int, flags)
3653{
3654 struct path from_path, to_path;
3655 unsigned int lflags;
3656 int ret = 0;
3657
3658 if (!may_mount())
3659 return -EPERM;
3660
3661 if (flags & ~MOVE_MOUNT__MASK)
3662 return -EINVAL;
3663
3664
3665
3666
3667
3668 lflags = 0;
3669 if (flags & MOVE_MOUNT_F_SYMLINKS) lflags |= LOOKUP_FOLLOW;
3670 if (flags & MOVE_MOUNT_F_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT;
3671 if (flags & MOVE_MOUNT_F_EMPTY_PATH) lflags |= LOOKUP_EMPTY;
3672
3673 ret = user_path_at(from_dfd, from_pathname, lflags, &from_path);
3674 if (ret < 0)
3675 return ret;
3676
3677 lflags = 0;
3678 if (flags & MOVE_MOUNT_T_SYMLINKS) lflags |= LOOKUP_FOLLOW;
3679 if (flags & MOVE_MOUNT_T_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT;
3680 if (flags & MOVE_MOUNT_T_EMPTY_PATH) lflags |= LOOKUP_EMPTY;
3681
3682 ret = user_path_at(to_dfd, to_pathname, lflags, &to_path);
3683 if (ret < 0)
3684 goto out_from;
3685
3686 ret = security_move_mount(&from_path, &to_path);
3687 if (ret < 0)
3688 goto out_to;
3689
3690 ret = do_move_mount(&from_path, &to_path);
3691
3692out_to:
3693 path_put(&to_path);
3694out_from:
3695 path_put(&from_path);
3696 return ret;
3697}
3698
3699
3700
3701
3702
3703
3704bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
3705 const struct path *root)
3706{
3707 while (&mnt->mnt != root->mnt && mnt_has_parent(mnt)) {
3708 dentry = mnt->mnt_mountpoint;
3709 mnt = mnt->mnt_parent;
3710 }
3711 return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry);
3712}
3713
3714bool path_is_under(const struct path *path1, const struct path *path2)
3715{
3716 bool res;
3717 read_seqlock_excl(&mount_lock);
3718 res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2);
3719 read_sequnlock_excl(&mount_lock);
3720 return res;
3721}
3722EXPORT_SYMBOL(path_is_under);
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
3750 const char __user *, put_old)
3751{
3752 struct path new, old, root;
3753 struct mount *new_mnt, *root_mnt, *old_mnt, *root_parent, *ex_parent;
3754 struct mountpoint *old_mp, *root_mp;
3755 int error;
3756
3757 if (!may_mount())
3758 return -EPERM;
3759
3760 error = user_path_at(AT_FDCWD, new_root,
3761 LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &new);
3762 if (error)
3763 goto out0;
3764
3765 error = user_path_at(AT_FDCWD, put_old,
3766 LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &old);
3767 if (error)
3768 goto out1;
3769
3770 error = security_sb_pivotroot(&old, &new);
3771 if (error)
3772 goto out2;
3773
3774 get_fs_root(current->fs, &root);
3775 old_mp = lock_mount(&old);
3776 error = PTR_ERR(old_mp);
3777 if (IS_ERR(old_mp))
3778 goto out3;
3779
3780 error = -EINVAL;
3781 new_mnt = real_mount(new.mnt);
3782 root_mnt = real_mount(root.mnt);
3783 old_mnt = real_mount(old.mnt);
3784 ex_parent = new_mnt->mnt_parent;
3785 root_parent = root_mnt->mnt_parent;
3786 if (IS_MNT_SHARED(old_mnt) ||
3787 IS_MNT_SHARED(ex_parent) ||
3788 IS_MNT_SHARED(root_parent))
3789 goto out4;
3790 if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
3791 goto out4;
3792 if (new_mnt->mnt.mnt_flags & MNT_LOCKED)
3793 goto out4;
3794 error = -ENOENT;
3795 if (d_unlinked(new.dentry))
3796 goto out4;
3797 error = -EBUSY;
3798 if (new_mnt == root_mnt || old_mnt == root_mnt)
3799 goto out4;
3800 error = -EINVAL;
3801 if (root.mnt->mnt_root != root.dentry)
3802 goto out4;
3803 if (!mnt_has_parent(root_mnt))
3804 goto out4;
3805 if (new.mnt->mnt_root != new.dentry)
3806 goto out4;
3807 if (!mnt_has_parent(new_mnt))
3808 goto out4;
3809
3810 if (!is_path_reachable(old_mnt, old.dentry, &new))
3811 goto out4;
3812
3813 if (!is_path_reachable(new_mnt, new.dentry, &root))
3814 goto out4;
3815 lock_mount_hash();
3816 umount_mnt(new_mnt);
3817 root_mp = unhash_mnt(root_mnt);
3818 if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
3819 new_mnt->mnt.mnt_flags |= MNT_LOCKED;
3820 root_mnt->mnt.mnt_flags &= ~MNT_LOCKED;
3821 }
3822
3823 attach_mnt(root_mnt, old_mnt, old_mp);
3824
3825 attach_mnt(new_mnt, root_parent, root_mp);
3826 mnt_add_count(root_parent, -1);
3827 touch_mnt_namespace(current->nsproxy->mnt_ns);
3828
3829 list_del_init(&new_mnt->mnt_expire);
3830 put_mountpoint(root_mp);
3831 unlock_mount_hash();
3832 chroot_fs_refs(&root, &new);
3833 error = 0;
3834out4:
3835 unlock_mount(old_mp);
3836 if (!error)
3837 mntput_no_expire(ex_parent);
3838out3:
3839 path_put(&root);
3840out2:
3841 path_put(&old);
3842out1:
3843 path_put(&new);
3844out0:
3845 return error;
3846}
3847
3848static unsigned int recalc_flags(struct mount_kattr *kattr, struct mount *mnt)
3849{
3850 unsigned int flags = mnt->mnt.mnt_flags;
3851
3852
3853 flags &= ~kattr->attr_clr;
3854
3855 flags |= kattr->attr_set;
3856
3857 return flags;
3858}
3859
3860static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
3861{
3862 struct vfsmount *m = &mnt->mnt;
3863
3864 if (!kattr->mnt_userns)
3865 return 0;
3866
3867
3868
3869
3870
3871
3872 if (mnt_user_ns(m) != &init_user_ns)
3873 return -EPERM;
3874
3875
3876 if (!(m->mnt_sb->s_type->fs_flags & FS_ALLOW_IDMAP))
3877 return -EINVAL;
3878
3879
3880 if (m->mnt_sb->s_user_ns != &init_user_ns)
3881 return -EINVAL;
3882
3883
3884 if (!capable(CAP_SYS_ADMIN))
3885 return -EPERM;
3886
3887
3888 if (!is_anon_ns(mnt->mnt_ns))
3889 return -EINVAL;
3890
3891 return 0;
3892}
3893
3894static struct mount *mount_setattr_prepare(struct mount_kattr *kattr,
3895 struct mount *mnt, int *err)
3896{
3897 struct mount *m = mnt, *last = NULL;
3898
3899 if (!is_mounted(&m->mnt)) {
3900 *err = -EINVAL;
3901 goto out;
3902 }
3903
3904 if (!(mnt_has_parent(m) ? check_mnt(m) : is_anon_ns(m->mnt_ns))) {
3905 *err = -EINVAL;
3906 goto out;
3907 }
3908
3909 do {
3910 unsigned int flags;
3911
3912 flags = recalc_flags(kattr, m);
3913 if (!can_change_locked_flags(m, flags)) {
3914 *err = -EPERM;
3915 goto out;
3916 }
3917
3918 *err = can_idmap_mount(kattr, m);
3919 if (*err)
3920 goto out;
3921
3922 last = m;
3923
3924 if ((kattr->attr_set & MNT_READONLY) &&
3925 !(m->mnt.mnt_flags & MNT_READONLY)) {
3926 *err = mnt_hold_writers(m);
3927 if (*err)
3928 goto out;
3929 }
3930 } while (kattr->recurse && (m = next_mnt(m, mnt)));
3931
3932out:
3933 return last;
3934}
3935
3936static void do_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
3937{
3938 struct user_namespace *mnt_userns;
3939
3940 if (!kattr->mnt_userns)
3941 return;
3942
3943 mnt_userns = get_user_ns(kattr->mnt_userns);
3944
3945 smp_store_release(&mnt->mnt.mnt_userns, mnt_userns);
3946}
3947
3948static void mount_setattr_commit(struct mount_kattr *kattr,
3949 struct mount *mnt, struct mount *last,
3950 int err)
3951{
3952 struct mount *m = mnt;
3953
3954 do {
3955 if (!err) {
3956 unsigned int flags;
3957
3958 do_idmap_mount(kattr, m);
3959 flags = recalc_flags(kattr, m);
3960 WRITE_ONCE(m->mnt.mnt_flags, flags);
3961 }
3962
3963
3964
3965
3966
3967
3968 if ((kattr->attr_set & MNT_READONLY) &&
3969 (m->mnt.mnt_flags & MNT_WRITE_HOLD))
3970 mnt_unhold_writers(m);
3971
3972 if (!err && kattr->propagation)
3973 change_mnt_propagation(m, kattr->propagation);
3974
3975
3976
3977
3978
3979 if (err && m == last)
3980 break;
3981 } while (kattr->recurse && (m = next_mnt(m, mnt)));
3982
3983 if (!err)
3984 touch_mnt_namespace(mnt->mnt_ns);
3985}
3986
3987static int do_mount_setattr(struct path *path, struct mount_kattr *kattr)
3988{
3989 struct mount *mnt = real_mount(path->mnt), *last = NULL;
3990 int err = 0;
3991
3992 if (path->dentry != mnt->mnt.mnt_root)
3993 return -EINVAL;
3994
3995 if (kattr->propagation) {
3996
3997
3998
3999
4000 namespace_lock();
4001 if (kattr->propagation == MS_SHARED) {
4002 err = invent_group_ids(mnt, kattr->recurse);
4003 if (err) {
4004 namespace_unlock();
4005 return err;
4006 }
4007 }
4008 }
4009
4010 lock_mount_hash();
4011
4012
4013
4014
4015
4016 last = mount_setattr_prepare(kattr, mnt, &err);
4017 if (last)
4018 mount_setattr_commit(kattr, mnt, last, err);
4019
4020 unlock_mount_hash();
4021
4022 if (kattr->propagation) {
4023 namespace_unlock();
4024 if (err)
4025 cleanup_group_ids(mnt, NULL);
4026 }
4027
4028 return err;
4029}
4030
4031static int build_mount_idmapped(const struct mount_attr *attr, size_t usize,
4032 struct mount_kattr *kattr, unsigned int flags)
4033{
4034 int err = 0;
4035 struct ns_common *ns;
4036 struct user_namespace *mnt_userns;
4037 struct file *file;
4038
4039 if (!((attr->attr_set | attr->attr_clr) & MOUNT_ATTR_IDMAP))
4040 return 0;
4041
4042
4043
4044
4045
4046
4047 if (attr->attr_clr & MOUNT_ATTR_IDMAP)
4048 return -EINVAL;
4049
4050 if (attr->userns_fd > INT_MAX)
4051 return -EINVAL;
4052
4053 file = fget(attr->userns_fd);
4054 if (!file)
4055 return -EBADF;
4056
4057 if (!proc_ns_file(file)) {
4058 err = -EINVAL;
4059 goto out_fput;
4060 }
4061
4062 ns = get_proc_ns(file_inode(file));
4063 if (ns->ops->type != CLONE_NEWUSER) {
4064 err = -EINVAL;
4065 goto out_fput;
4066 }
4067
4068
4069
4070
4071
4072
4073
4074 mnt_userns = container_of(ns, struct user_namespace, ns);
4075 if (mnt_userns == &init_user_ns) {
4076 err = -EPERM;
4077 goto out_fput;
4078 }
4079 kattr->mnt_userns = get_user_ns(mnt_userns);
4080
4081out_fput:
4082 fput(file);
4083 return err;
4084}
4085
4086static int build_mount_kattr(const struct mount_attr *attr, size_t usize,
4087 struct mount_kattr *kattr, unsigned int flags)
4088{
4089 unsigned int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW;
4090
4091 if (flags & AT_NO_AUTOMOUNT)
4092 lookup_flags &= ~LOOKUP_AUTOMOUNT;
4093 if (flags & AT_SYMLINK_NOFOLLOW)
4094 lookup_flags &= ~LOOKUP_FOLLOW;
4095 if (flags & AT_EMPTY_PATH)
4096 lookup_flags |= LOOKUP_EMPTY;
4097
4098 *kattr = (struct mount_kattr) {
4099 .lookup_flags = lookup_flags,
4100 .recurse = !!(flags & AT_RECURSIVE),
4101 };
4102
4103 if (attr->propagation & ~MOUNT_SETATTR_PROPAGATION_FLAGS)
4104 return -EINVAL;
4105 if (hweight32(attr->propagation & MOUNT_SETATTR_PROPAGATION_FLAGS) > 1)
4106 return -EINVAL;
4107 kattr->propagation = attr->propagation;
4108
4109 if ((attr->attr_set | attr->attr_clr) & ~MOUNT_SETATTR_VALID_FLAGS)
4110 return -EINVAL;
4111
4112 kattr->attr_set = attr_flags_to_mnt_flags(attr->attr_set);
4113 kattr->attr_clr = attr_flags_to_mnt_flags(attr->attr_clr);
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124 if (attr->attr_clr & MOUNT_ATTR__ATIME) {
4125 if ((attr->attr_clr & MOUNT_ATTR__ATIME) != MOUNT_ATTR__ATIME)
4126 return -EINVAL;
4127
4128
4129
4130
4131
4132 kattr->attr_clr |= MNT_RELATIME | MNT_NOATIME;
4133 switch (attr->attr_set & MOUNT_ATTR__ATIME) {
4134 case MOUNT_ATTR_RELATIME:
4135 kattr->attr_set |= MNT_RELATIME;
4136 break;
4137 case MOUNT_ATTR_NOATIME:
4138 kattr->attr_set |= MNT_NOATIME;
4139 break;
4140 case MOUNT_ATTR_STRICTATIME:
4141 break;
4142 default:
4143 return -EINVAL;
4144 }
4145 } else {
4146 if (attr->attr_set & MOUNT_ATTR__ATIME)
4147 return -EINVAL;
4148 }
4149
4150 return build_mount_idmapped(attr, usize, kattr, flags);
4151}
4152
4153static void finish_mount_kattr(struct mount_kattr *kattr)
4154{
4155 put_user_ns(kattr->mnt_userns);
4156 kattr->mnt_userns = NULL;
4157}
4158
4159SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path,
4160 unsigned int, flags, struct mount_attr __user *, uattr,
4161 size_t, usize)
4162{
4163 int err;
4164 struct path target;
4165 struct mount_attr attr;
4166 struct mount_kattr kattr;
4167
4168 BUILD_BUG_ON(sizeof(struct mount_attr) != MOUNT_ATTR_SIZE_VER0);
4169
4170 if (flags & ~(AT_EMPTY_PATH |
4171 AT_RECURSIVE |
4172 AT_SYMLINK_NOFOLLOW |
4173 AT_NO_AUTOMOUNT))
4174 return -EINVAL;
4175
4176 if (unlikely(usize > PAGE_SIZE))
4177 return -E2BIG;
4178 if (unlikely(usize < MOUNT_ATTR_SIZE_VER0))
4179 return -EINVAL;
4180
4181 if (!may_mount())
4182 return -EPERM;
4183
4184 err = copy_struct_from_user(&attr, sizeof(attr), uattr, usize);
4185 if (err)
4186 return err;
4187
4188
4189 if (attr.attr_set == 0 &&
4190 attr.attr_clr == 0 &&
4191 attr.propagation == 0)
4192 return 0;
4193
4194 err = build_mount_kattr(&attr, usize, &kattr, flags);
4195 if (err)
4196 return err;
4197
4198 err = user_path_at(dfd, path, kattr.lookup_flags, &target);
4199 if (err)
4200 return err;
4201
4202 err = do_mount_setattr(&target, &kattr);
4203 finish_mount_kattr(&kattr);
4204 path_put(&target);
4205 return err;
4206}
4207
4208static void __init init_mount_tree(void)
4209{
4210 struct vfsmount *mnt;
4211 struct mount *m;
4212 struct mnt_namespace *ns;
4213 struct path root;
4214
4215 mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", NULL);
4216 if (IS_ERR(mnt))
4217 panic("Can't create rootfs");
4218
4219 ns = alloc_mnt_ns(&init_user_ns, false);
4220 if (IS_ERR(ns))
4221 panic("Can't allocate initial namespace");
4222 m = real_mount(mnt);
4223 m->mnt_ns = ns;
4224 ns->root = m;
4225 ns->mounts = 1;
4226 list_add(&m->mnt_list, &ns->list);
4227 init_task.nsproxy->mnt_ns = ns;
4228 get_mnt_ns(ns);
4229
4230 root.mnt = mnt;
4231 root.dentry = mnt->mnt_root;
4232 mnt->mnt_flags |= MNT_LOCKED;
4233
4234 set_fs_pwd(current->fs, &root);
4235 set_fs_root(current->fs, &root);
4236}
4237
4238void __init mnt_init(void)
4239{
4240 int err;
4241
4242 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
4243 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
4244
4245 mount_hashtable = alloc_large_system_hash("Mount-cache",
4246 sizeof(struct hlist_head),
4247 mhash_entries, 19,
4248 HASH_ZERO,
4249 &m_hash_shift, &m_hash_mask, 0, 0);
4250 mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache",
4251 sizeof(struct hlist_head),
4252 mphash_entries, 19,
4253 HASH_ZERO,
4254 &mp_hash_shift, &mp_hash_mask, 0, 0);
4255
4256 if (!mount_hashtable || !mountpoint_hashtable)
4257 panic("Failed to allocate mount hash table\n");
4258
4259 kernfs_init();
4260
4261 err = sysfs_init();
4262 if (err)
4263 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
4264 __func__, err);
4265 fs_kobj = kobject_create_and_add("fs", NULL);
4266 if (!fs_kobj)
4267 printk(KERN_WARNING "%s: kobj create error\n", __func__);
4268 shmem_init();
4269 init_rootfs();
4270 init_mount_tree();
4271}
4272
4273void put_mnt_ns(struct mnt_namespace *ns)
4274{
4275 if (!refcount_dec_and_test(&ns->ns.count))
4276 return;
4277 drop_collected_mounts(&ns->root->mnt);
4278 free_mnt_ns(ns);
4279}
4280
4281struct vfsmount *kern_mount(struct file_system_type *type)
4282{
4283 struct vfsmount *mnt;
4284 mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL);
4285 if (!IS_ERR(mnt)) {
4286
4287
4288
4289
4290 real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL;
4291 }
4292 return mnt;
4293}
4294EXPORT_SYMBOL_GPL(kern_mount);
4295
4296void kern_unmount(struct vfsmount *mnt)
4297{
4298
4299 if (!IS_ERR_OR_NULL(mnt)) {
4300 real_mount(mnt)->mnt_ns = NULL;
4301 synchronize_rcu();
4302 mntput(mnt);
4303 }
4304}
4305EXPORT_SYMBOL(kern_unmount);
4306
4307void kern_unmount_array(struct vfsmount *mnt[], unsigned int num)
4308{
4309 unsigned int i;
4310
4311 for (i = 0; i < num; i++)
4312 if (mnt[i])
4313 real_mount(mnt[i])->mnt_ns = NULL;
4314 synchronize_rcu_expedited();
4315 for (i = 0; i < num; i++)
4316 mntput(mnt[i]);
4317}
4318EXPORT_SYMBOL(kern_unmount_array);
4319
4320bool our_mnt(struct vfsmount *mnt)
4321{
4322 return check_mnt(real_mount(mnt));
4323}
4324
4325bool current_chrooted(void)
4326{
4327
4328 struct path ns_root;
4329 struct path fs_root;
4330 bool chrooted;
4331
4332
4333 ns_root.mnt = ¤t->nsproxy->mnt_ns->root->mnt;
4334 ns_root.dentry = ns_root.mnt->mnt_root;
4335 path_get(&ns_root);
4336 while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root))
4337 ;
4338
4339 get_fs_root(current->fs, &fs_root);
4340
4341 chrooted = !path_equal(&fs_root, &ns_root);
4342
4343 path_put(&fs_root);
4344 path_put(&ns_root);
4345
4346 return chrooted;
4347}
4348
4349static bool mnt_already_visible(struct mnt_namespace *ns,
4350 const struct super_block *sb,
4351 int *new_mnt_flags)
4352{
4353 int new_flags = *new_mnt_flags;
4354 struct mount *mnt;
4355 bool visible = false;
4356
4357 down_read(&namespace_sem);
4358 lock_ns_list(ns);
4359 list_for_each_entry(mnt, &ns->list, mnt_list) {
4360 struct mount *child;
4361 int mnt_flags;
4362
4363 if (mnt_is_cursor(mnt))
4364 continue;
4365
4366 if (mnt->mnt.mnt_sb->s_type != sb->s_type)
4367 continue;
4368
4369
4370
4371
4372 if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root)
4373 continue;
4374
4375
4376 mnt_flags = mnt->mnt.mnt_flags;
4377
4378
4379 if (sb_rdonly(mnt->mnt.mnt_sb))
4380 mnt_flags |= MNT_LOCK_READONLY;
4381
4382
4383
4384
4385 if ((mnt_flags & MNT_LOCK_READONLY) &&
4386 !(new_flags & MNT_READONLY))
4387 continue;
4388 if ((mnt_flags & MNT_LOCK_ATIME) &&
4389 ((mnt_flags & MNT_ATIME_MASK) != (new_flags & MNT_ATIME_MASK)))
4390 continue;
4391
4392
4393
4394
4395
4396 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
4397 struct inode *inode = child->mnt_mountpoint->d_inode;
4398
4399 if (!(child->mnt.mnt_flags & MNT_LOCKED))
4400 continue;
4401
4402 if (!is_empty_dir_inode(inode))
4403 goto next;
4404 }
4405
4406 *new_mnt_flags |= mnt_flags & (MNT_LOCK_READONLY | \
4407 MNT_LOCK_ATIME);
4408 visible = true;
4409 goto found;
4410 next: ;
4411 }
4412found:
4413 unlock_ns_list(ns);
4414 up_read(&namespace_sem);
4415 return visible;
4416}
4417
4418static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags)
4419{
4420 const unsigned long required_iflags = SB_I_NOEXEC | SB_I_NODEV;
4421 struct mnt_namespace *ns = current->nsproxy->mnt_ns;
4422 unsigned long s_iflags;
4423
4424 if (ns->user_ns == &init_user_ns)
4425 return false;
4426
4427
4428 s_iflags = sb->s_iflags;
4429 if (!(s_iflags & SB_I_USERNS_VISIBLE))
4430 return false;
4431
4432 if ((s_iflags & required_iflags) != required_iflags) {
4433 WARN_ONCE(1, "Expected s_iflags to contain 0x%lx\n",
4434 required_iflags);
4435 return true;
4436 }
4437
4438 return !mnt_already_visible(ns, sb, new_mnt_flags);
4439}
4440
4441bool mnt_may_suid(struct vfsmount *mnt)
4442{
4443
4444
4445
4446
4447
4448
4449
4450 return !(mnt->mnt_flags & MNT_NOSUID) && check_mnt(real_mount(mnt)) &&
4451 current_in_userns(mnt->mnt_sb->s_user_ns);
4452}
4453
4454static struct ns_common *mntns_get(struct task_struct *task)
4455{
4456 struct ns_common *ns = NULL;
4457 struct nsproxy *nsproxy;
4458
4459 task_lock(task);
4460 nsproxy = task->nsproxy;
4461 if (nsproxy) {
4462 ns = &nsproxy->mnt_ns->ns;
4463 get_mnt_ns(to_mnt_ns(ns));
4464 }
4465 task_unlock(task);
4466
4467 return ns;
4468}
4469
4470static void mntns_put(struct ns_common *ns)
4471{
4472 put_mnt_ns(to_mnt_ns(ns));
4473}
4474
4475static int mntns_install(struct nsset *nsset, struct ns_common *ns)
4476{
4477 struct nsproxy *nsproxy = nsset->nsproxy;
4478 struct fs_struct *fs = nsset->fs;
4479 struct mnt_namespace *mnt_ns = to_mnt_ns(ns), *old_mnt_ns;
4480 struct user_namespace *user_ns = nsset->cred->user_ns;
4481 struct path root;
4482 int err;
4483
4484 if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
4485 !ns_capable(user_ns, CAP_SYS_CHROOT) ||
4486 !ns_capable(user_ns, CAP_SYS_ADMIN))
4487 return -EPERM;
4488
4489 if (is_anon_ns(mnt_ns))
4490 return -EINVAL;
4491
4492 if (fs->users != 1)
4493 return -EINVAL;
4494
4495 get_mnt_ns(mnt_ns);
4496 old_mnt_ns = nsproxy->mnt_ns;
4497 nsproxy->mnt_ns = mnt_ns;
4498
4499
4500 err = vfs_path_lookup(mnt_ns->root->mnt.mnt_root, &mnt_ns->root->mnt,
4501 "/", LOOKUP_DOWN, &root);
4502 if (err) {
4503
4504 nsproxy->mnt_ns = old_mnt_ns;
4505 put_mnt_ns(mnt_ns);
4506 return err;
4507 }
4508
4509 put_mnt_ns(old_mnt_ns);
4510
4511
4512 set_fs_pwd(fs, &root);
4513 set_fs_root(fs, &root);
4514
4515 path_put(&root);
4516 return 0;
4517}
4518
4519static struct user_namespace *mntns_owner(struct ns_common *ns)
4520{
4521 return to_mnt_ns(ns)->user_ns;
4522}
4523
4524const struct proc_ns_operations mntns_operations = {
4525 .name = "mnt",
4526 .type = CLONE_NEWNS,
4527 .get = mntns_get,
4528 .put = mntns_put,
4529 .install = mntns_install,
4530 .owner = mntns_owner,
4531};
4532