1
2
3
4
5
6
7
8
9
10
11#include <linux/syscalls.h>
12#include <linux/export.h>
13#include <linux/capability.h>
14#include <linux/mnt_namespace.h>
15#include <linux/user_namespace.h>
16#include <linux/namei.h>
17#include <linux/security.h>
18#include <linux/cred.h>
19#include <linux/idr.h>
20#include <linux/init.h>
21#include <linux/fs_struct.h>
22#include <linux/fsnotify.h>
23#include <linux/file.h>
24#include <linux/uaccess.h>
25#include <linux/proc_ns.h>
26#include <linux/magic.h>
27#include <linux/memblock.h>
28#include <linux/task_work.h>
29#include <linux/sched/task.h>
30#include <uapi/linux/mount.h>
31#include <linux/fs_context.h>
32
33#include "pnode.h"
34#include "internal.h"
35
36
37unsigned int sysctl_mount_max __read_mostly = 100000;
38
39static unsigned int m_hash_mask __read_mostly;
40static unsigned int m_hash_shift __read_mostly;
41static unsigned int mp_hash_mask __read_mostly;
42static unsigned int mp_hash_shift __read_mostly;
43
44static __initdata unsigned long mhash_entries;
45static int __init set_mhash_entries(char *str)
46{
47 if (!str)
48 return 0;
49 mhash_entries = simple_strtoul(str, &str, 0);
50 return 1;
51}
52__setup("mhash_entries=", set_mhash_entries);
53
54static __initdata unsigned long mphash_entries;
55static int __init set_mphash_entries(char *str)
56{
57 if (!str)
58 return 0;
59 mphash_entries = simple_strtoul(str, &str, 0);
60 return 1;
61}
62__setup("mphash_entries=", set_mphash_entries);
63
64static u64 event;
65static DEFINE_IDA(mnt_id_ida);
66static DEFINE_IDA(mnt_group_ida);
67
68static struct hlist_head *mount_hashtable __read_mostly;
69static struct hlist_head *mountpoint_hashtable __read_mostly;
70static struct kmem_cache *mnt_cache __read_mostly;
71static DECLARE_RWSEM(namespace_sem);
72
73
74struct kobject *fs_kobj;
75EXPORT_SYMBOL_GPL(fs_kobj);
76
77
78
79
80
81
82
83
84
85__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
86
87static inline struct hlist_head *m_hash(struct vfsmount *mnt, struct dentry *dentry)
88{
89 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
90 tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
91 tmp = tmp + (tmp >> m_hash_shift);
92 return &mount_hashtable[tmp & m_hash_mask];
93}
94
95static inline struct hlist_head *mp_hash(struct dentry *dentry)
96{
97 unsigned long tmp = ((unsigned long)dentry / L1_CACHE_BYTES);
98 tmp = tmp + (tmp >> mp_hash_shift);
99 return &mountpoint_hashtable[tmp & mp_hash_mask];
100}
101
102static int mnt_alloc_id(struct mount *mnt)
103{
104 int res = ida_alloc(&mnt_id_ida, GFP_KERNEL);
105
106 if (res < 0)
107 return res;
108 mnt->mnt_id = res;
109 return 0;
110}
111
112static void mnt_free_id(struct mount *mnt)
113{
114 ida_free(&mnt_id_ida, mnt->mnt_id);
115}
116
117
118
119
120static int mnt_alloc_group_id(struct mount *mnt)
121{
122 int res = ida_alloc_min(&mnt_group_ida, 1, GFP_KERNEL);
123
124 if (res < 0)
125 return res;
126 mnt->mnt_group_id = res;
127 return 0;
128}
129
130
131
132
133void mnt_release_group_id(struct mount *mnt)
134{
135 ida_free(&mnt_group_ida, mnt->mnt_group_id);
136 mnt->mnt_group_id = 0;
137}
138
139
140
141
142static inline void mnt_add_count(struct mount *mnt, int n)
143{
144#ifdef CONFIG_SMP
145 this_cpu_add(mnt->mnt_pcp->mnt_count, n);
146#else
147 preempt_disable();
148 mnt->mnt_count += n;
149 preempt_enable();
150#endif
151}
152
153
154
155
156unsigned int mnt_get_count(struct mount *mnt)
157{
158#ifdef CONFIG_SMP
159 unsigned int count = 0;
160 int cpu;
161
162 for_each_possible_cpu(cpu) {
163 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count;
164 }
165
166 return count;
167#else
168 return mnt->mnt_count;
169#endif
170}
171
172static void drop_mountpoint(struct fs_pin *p)
173{
174 struct mount *m = container_of(p, struct mount, mnt_umount);
175 dput(m->mnt_ex_mountpoint);
176 pin_remove(p);
177 mntput(&m->mnt);
178}
179
180static struct mount *alloc_vfsmnt(const char *name)
181{
182 struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
183 if (mnt) {
184 int err;
185
186 err = mnt_alloc_id(mnt);
187 if (err)
188 goto out_free_cache;
189
190 if (name) {
191 mnt->mnt_devname = kstrdup_const(name, GFP_KERNEL);
192 if (!mnt->mnt_devname)
193 goto out_free_id;
194 }
195
196#ifdef CONFIG_SMP
197 mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);
198 if (!mnt->mnt_pcp)
199 goto out_free_devname;
200
201 this_cpu_add(mnt->mnt_pcp->mnt_count, 1);
202#else
203 mnt->mnt_count = 1;
204 mnt->mnt_writers = 0;
205#endif
206
207 INIT_HLIST_NODE(&mnt->mnt_hash);
208 INIT_LIST_HEAD(&mnt->mnt_child);
209 INIT_LIST_HEAD(&mnt->mnt_mounts);
210 INIT_LIST_HEAD(&mnt->mnt_list);
211 INIT_LIST_HEAD(&mnt->mnt_expire);
212 INIT_LIST_HEAD(&mnt->mnt_share);
213 INIT_LIST_HEAD(&mnt->mnt_slave_list);
214 INIT_LIST_HEAD(&mnt->mnt_slave);
215 INIT_HLIST_NODE(&mnt->mnt_mp_list);
216 INIT_LIST_HEAD(&mnt->mnt_umounting);
217 init_fs_pin(&mnt->mnt_umount, drop_mountpoint);
218 }
219 return mnt;
220
221#ifdef CONFIG_SMP
222out_free_devname:
223 kfree_const(mnt->mnt_devname);
224#endif
225out_free_id:
226 mnt_free_id(mnt);
227out_free_cache:
228 kmem_cache_free(mnt_cache, mnt);
229 return NULL;
230}
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251bool __mnt_is_readonly(struct vfsmount *mnt)
252{
253 return (mnt->mnt_flags & MNT_READONLY) || sb_rdonly(mnt->mnt_sb);
254}
255EXPORT_SYMBOL_GPL(__mnt_is_readonly);
256
257static inline void mnt_inc_writers(struct mount *mnt)
258{
259#ifdef CONFIG_SMP
260 this_cpu_inc(mnt->mnt_pcp->mnt_writers);
261#else
262 mnt->mnt_writers++;
263#endif
264}
265
266static inline void mnt_dec_writers(struct mount *mnt)
267{
268#ifdef CONFIG_SMP
269 this_cpu_dec(mnt->mnt_pcp->mnt_writers);
270#else
271 mnt->mnt_writers--;
272#endif
273}
274
275static unsigned int mnt_get_writers(struct mount *mnt)
276{
277#ifdef CONFIG_SMP
278 unsigned int count = 0;
279 int cpu;
280
281 for_each_possible_cpu(cpu) {
282 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers;
283 }
284
285 return count;
286#else
287 return mnt->mnt_writers;
288#endif
289}
290
291static int mnt_is_readonly(struct vfsmount *mnt)
292{
293 if (mnt->mnt_sb->s_readonly_remount)
294 return 1;
295
296 smp_rmb();
297 return __mnt_is_readonly(mnt);
298}
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316int __mnt_want_write(struct vfsmount *m)
317{
318 struct mount *mnt = real_mount(m);
319 int ret = 0;
320
321 preempt_disable();
322 mnt_inc_writers(mnt);
323
324
325
326
327
328 smp_mb();
329 while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
330 cpu_relax();
331
332
333
334
335
336 smp_rmb();
337 if (mnt_is_readonly(m)) {
338 mnt_dec_writers(mnt);
339 ret = -EROFS;
340 }
341 preempt_enable();
342
343 return ret;
344}
345
346
347
348
349
350
351
352
353
354
355int mnt_want_write(struct vfsmount *m)
356{
357 int ret;
358
359 sb_start_write(m->mnt_sb);
360 ret = __mnt_want_write(m);
361 if (ret)
362 sb_end_write(m->mnt_sb);
363 return ret;
364}
365EXPORT_SYMBOL_GPL(mnt_want_write);
366
367
368
369
370
371
372
373
374
375
376
377
378
379int mnt_clone_write(struct vfsmount *mnt)
380{
381
382 if (__mnt_is_readonly(mnt))
383 return -EROFS;
384 preempt_disable();
385 mnt_inc_writers(real_mount(mnt));
386 preempt_enable();
387 return 0;
388}
389EXPORT_SYMBOL_GPL(mnt_clone_write);
390
391
392
393
394
395
396
397
398int __mnt_want_write_file(struct file *file)
399{
400 if (!(file->f_mode & FMODE_WRITER))
401 return __mnt_want_write(file->f_path.mnt);
402 else
403 return mnt_clone_write(file->f_path.mnt);
404}
405
406
407
408
409
410
411
412
413int mnt_want_write_file(struct file *file)
414{
415 int ret;
416
417 sb_start_write(file_inode(file)->i_sb);
418 ret = __mnt_want_write_file(file);
419 if (ret)
420 sb_end_write(file_inode(file)->i_sb);
421 return ret;
422}
423EXPORT_SYMBOL_GPL(mnt_want_write_file);
424
425
426
427
428
429
430
431
432
433void __mnt_drop_write(struct vfsmount *mnt)
434{
435 preempt_disable();
436 mnt_dec_writers(real_mount(mnt));
437 preempt_enable();
438}
439
440
441
442
443
444
445
446
447
448void mnt_drop_write(struct vfsmount *mnt)
449{
450 __mnt_drop_write(mnt);
451 sb_end_write(mnt->mnt_sb);
452}
453EXPORT_SYMBOL_GPL(mnt_drop_write);
454
455void __mnt_drop_write_file(struct file *file)
456{
457 __mnt_drop_write(file->f_path.mnt);
458}
459
460void mnt_drop_write_file(struct file *file)
461{
462 __mnt_drop_write_file(file);
463 sb_end_write(file_inode(file)->i_sb);
464}
465EXPORT_SYMBOL(mnt_drop_write_file);
466
467static int mnt_make_readonly(struct mount *mnt)
468{
469 int ret = 0;
470
471 lock_mount_hash();
472 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
473
474
475
476
477 smp_mb();
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495 if (mnt_get_writers(mnt) > 0)
496 ret = -EBUSY;
497 else
498 mnt->mnt.mnt_flags |= MNT_READONLY;
499
500
501
502
503 smp_wmb();
504 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
505 unlock_mount_hash();
506 return ret;
507}
508
509static int __mnt_unmake_readonly(struct mount *mnt)
510{
511 lock_mount_hash();
512 mnt->mnt.mnt_flags &= ~MNT_READONLY;
513 unlock_mount_hash();
514 return 0;
515}
516
517int sb_prepare_remount_readonly(struct super_block *sb)
518{
519 struct mount *mnt;
520 int err = 0;
521
522
523 if (atomic_long_read(&sb->s_remove_count))
524 return -EBUSY;
525
526 lock_mount_hash();
527 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
528 if (!(mnt->mnt.mnt_flags & MNT_READONLY)) {
529 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
530 smp_mb();
531 if (mnt_get_writers(mnt) > 0) {
532 err = -EBUSY;
533 break;
534 }
535 }
536 }
537 if (!err && atomic_long_read(&sb->s_remove_count))
538 err = -EBUSY;
539
540 if (!err) {
541 sb->s_readonly_remount = 1;
542 smp_wmb();
543 }
544 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
545 if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
546 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
547 }
548 unlock_mount_hash();
549
550 return err;
551}
552
553static void free_vfsmnt(struct mount *mnt)
554{
555 kfree_const(mnt->mnt_devname);
556#ifdef CONFIG_SMP
557 free_percpu(mnt->mnt_pcp);
558#endif
559 kmem_cache_free(mnt_cache, mnt);
560}
561
562static void delayed_free_vfsmnt(struct rcu_head *head)
563{
564 free_vfsmnt(container_of(head, struct mount, mnt_rcu));
565}
566
567
568int __legitimize_mnt(struct vfsmount *bastard, unsigned seq)
569{
570 struct mount *mnt;
571 if (read_seqretry(&mount_lock, seq))
572 return 1;
573 if (bastard == NULL)
574 return 0;
575 mnt = real_mount(bastard);
576 mnt_add_count(mnt, 1);
577 smp_mb();
578 if (likely(!read_seqretry(&mount_lock, seq)))
579 return 0;
580 if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
581 mnt_add_count(mnt, -1);
582 return 1;
583 }
584 lock_mount_hash();
585 if (unlikely(bastard->mnt_flags & MNT_DOOMED)) {
586 mnt_add_count(mnt, -1);
587 unlock_mount_hash();
588 return 1;
589 }
590 unlock_mount_hash();
591
592 return -1;
593}
594
595
596bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
597{
598 int res = __legitimize_mnt(bastard, seq);
599 if (likely(!res))
600 return true;
601 if (unlikely(res < 0)) {
602 rcu_read_unlock();
603 mntput(bastard);
604 rcu_read_lock();
605 }
606 return false;
607}
608
609
610
611
612
613struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
614{
615 struct hlist_head *head = m_hash(mnt, dentry);
616 struct mount *p;
617
618 hlist_for_each_entry_rcu(p, head, mnt_hash)
619 if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
620 return p;
621 return NULL;
622}
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640struct vfsmount *lookup_mnt(const struct path *path)
641{
642 struct mount *child_mnt;
643 struct vfsmount *m;
644 unsigned seq;
645
646 rcu_read_lock();
647 do {
648 seq = read_seqbegin(&mount_lock);
649 child_mnt = __lookup_mnt(path->mnt, path->dentry);
650 m = child_mnt ? &child_mnt->mnt : NULL;
651 } while (!legitimize_mnt(m, seq));
652 rcu_read_unlock();
653 return m;
654}
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671bool __is_local_mountpoint(struct dentry *dentry)
672{
673 struct mnt_namespace *ns = current->nsproxy->mnt_ns;
674 struct mount *mnt;
675 bool is_covered = false;
676
677 if (!d_mountpoint(dentry))
678 goto out;
679
680 down_read(&namespace_sem);
681 list_for_each_entry(mnt, &ns->list, mnt_list) {
682 is_covered = (mnt->mnt_mountpoint == dentry);
683 if (is_covered)
684 break;
685 }
686 up_read(&namespace_sem);
687out:
688 return is_covered;
689}
690
691static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
692{
693 struct hlist_head *chain = mp_hash(dentry);
694 struct mountpoint *mp;
695
696 hlist_for_each_entry(mp, chain, m_hash) {
697 if (mp->m_dentry == dentry) {
698 mp->m_count++;
699 return mp;
700 }
701 }
702 return NULL;
703}
704
705static struct mountpoint *get_mountpoint(struct dentry *dentry)
706{
707 struct mountpoint *mp, *new = NULL;
708 int ret;
709
710 if (d_mountpoint(dentry)) {
711
712 if (d_unlinked(dentry))
713 return ERR_PTR(-ENOENT);
714mountpoint:
715 read_seqlock_excl(&mount_lock);
716 mp = lookup_mountpoint(dentry);
717 read_sequnlock_excl(&mount_lock);
718 if (mp)
719 goto done;
720 }
721
722 if (!new)
723 new = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
724 if (!new)
725 return ERR_PTR(-ENOMEM);
726
727
728
729 ret = d_set_mounted(dentry);
730
731
732 if (ret == -EBUSY)
733 goto mountpoint;
734
735
736 mp = ERR_PTR(ret);
737 if (ret)
738 goto done;
739
740
741 read_seqlock_excl(&mount_lock);
742 new->m_dentry = dentry;
743 new->m_count = 1;
744 hlist_add_head(&new->m_hash, mp_hash(dentry));
745 INIT_HLIST_HEAD(&new->m_list);
746 read_sequnlock_excl(&mount_lock);
747
748 mp = new;
749 new = NULL;
750done:
751 kfree(new);
752 return mp;
753}
754
755static void put_mountpoint(struct mountpoint *mp)
756{
757 if (!--mp->m_count) {
758 struct dentry *dentry = mp->m_dentry;
759 BUG_ON(!hlist_empty(&mp->m_list));
760 spin_lock(&dentry->d_lock);
761 dentry->d_flags &= ~DCACHE_MOUNTED;
762 spin_unlock(&dentry->d_lock);
763 hlist_del(&mp->m_hash);
764 kfree(mp);
765 }
766}
767
768static inline int check_mnt(struct mount *mnt)
769{
770 return mnt->mnt_ns == current->nsproxy->mnt_ns;
771}
772
773
774
775
776static void touch_mnt_namespace(struct mnt_namespace *ns)
777{
778 if (ns) {
779 ns->event = ++event;
780 wake_up_interruptible(&ns->poll);
781 }
782}
783
784
785
786
787static void __touch_mnt_namespace(struct mnt_namespace *ns)
788{
789 if (ns && ns->event != event) {
790 ns->event = event;
791 wake_up_interruptible(&ns->poll);
792 }
793}
794
795
796
797
798static void unhash_mnt(struct mount *mnt)
799{
800 mnt->mnt_parent = mnt;
801 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
802 list_del_init(&mnt->mnt_child);
803 hlist_del_init_rcu(&mnt->mnt_hash);
804 hlist_del_init(&mnt->mnt_mp_list);
805 put_mountpoint(mnt->mnt_mp);
806 mnt->mnt_mp = NULL;
807}
808
809
810
811
812static void detach_mnt(struct mount *mnt, struct path *old_path)
813{
814 old_path->dentry = mnt->mnt_mountpoint;
815 old_path->mnt = &mnt->mnt_parent->mnt;
816 unhash_mnt(mnt);
817}
818
819
820
821
822static void umount_mnt(struct mount *mnt)
823{
824
825 mnt->mnt_ex_mountpoint = mnt->mnt_mountpoint;
826 unhash_mnt(mnt);
827}
828
829
830
831
832void mnt_set_mountpoint(struct mount *mnt,
833 struct mountpoint *mp,
834 struct mount *child_mnt)
835{
836 mp->m_count++;
837 mnt_add_count(mnt, 1);
838 child_mnt->mnt_mountpoint = dget(mp->m_dentry);
839 child_mnt->mnt_parent = mnt;
840 child_mnt->mnt_mp = mp;
841 hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
842}
843
844static void __attach_mnt(struct mount *mnt, struct mount *parent)
845{
846 hlist_add_head_rcu(&mnt->mnt_hash,
847 m_hash(&parent->mnt, mnt->mnt_mountpoint));
848 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
849}
850
851
852
853
854static void attach_mnt(struct mount *mnt,
855 struct mount *parent,
856 struct mountpoint *mp)
857{
858 mnt_set_mountpoint(parent, mp, mnt);
859 __attach_mnt(mnt, parent);
860}
861
862void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt)
863{
864 struct mountpoint *old_mp = mnt->mnt_mp;
865 struct dentry *old_mountpoint = mnt->mnt_mountpoint;
866 struct mount *old_parent = mnt->mnt_parent;
867
868 list_del_init(&mnt->mnt_child);
869 hlist_del_init(&mnt->mnt_mp_list);
870 hlist_del_init_rcu(&mnt->mnt_hash);
871
872 attach_mnt(mnt, parent, mp);
873
874 put_mountpoint(old_mp);
875
876
877
878
879
880
881
882
883
884
885
886
887 spin_lock(&old_mountpoint->d_lock);
888 old_mountpoint->d_lockref.count--;
889 spin_unlock(&old_mountpoint->d_lock);
890
891 mnt_add_count(old_parent, -1);
892}
893
894
895
896
897static void commit_tree(struct mount *mnt)
898{
899 struct mount *parent = mnt->mnt_parent;
900 struct mount *m;
901 LIST_HEAD(head);
902 struct mnt_namespace *n = parent->mnt_ns;
903
904 BUG_ON(parent == mnt);
905
906 list_add_tail(&head, &mnt->mnt_list);
907 list_for_each_entry(m, &head, mnt_list)
908 m->mnt_ns = n;
909
910 list_splice(&head, n->list.prev);
911
912 n->mounts += n->pending_mounts;
913 n->pending_mounts = 0;
914
915 __attach_mnt(mnt, parent);
916 touch_mnt_namespace(n);
917}
918
919static struct mount *next_mnt(struct mount *p, struct mount *root)
920{
921 struct list_head *next = p->mnt_mounts.next;
922 if (next == &p->mnt_mounts) {
923 while (1) {
924 if (p == root)
925 return NULL;
926 next = p->mnt_child.next;
927 if (next != &p->mnt_parent->mnt_mounts)
928 break;
929 p = p->mnt_parent;
930 }
931 }
932 return list_entry(next, struct mount, mnt_child);
933}
934
935static struct mount *skip_mnt_tree(struct mount *p)
936{
937 struct list_head *prev = p->mnt_mounts.prev;
938 while (prev != &p->mnt_mounts) {
939 p = list_entry(prev, struct mount, mnt_child);
940 prev = p->mnt_mounts.prev;
941 }
942 return p;
943}
944
945
946
947
948
949
950
951
952
953
954struct vfsmount *vfs_create_mount(struct fs_context *fc)
955{
956 struct mount *mnt;
957
958 if (!fc->root)
959 return ERR_PTR(-EINVAL);
960
961 mnt = alloc_vfsmnt(fc->source ?: "none");
962 if (!mnt)
963 return ERR_PTR(-ENOMEM);
964
965 if (fc->sb_flags & SB_KERNMOUNT)
966 mnt->mnt.mnt_flags = MNT_INTERNAL;
967
968 atomic_inc(&fc->root->d_sb->s_active);
969 mnt->mnt.mnt_sb = fc->root->d_sb;
970 mnt->mnt.mnt_root = dget(fc->root);
971 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
972 mnt->mnt_parent = mnt;
973
974 lock_mount_hash();
975 list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts);
976 unlock_mount_hash();
977 return &mnt->mnt;
978}
979EXPORT_SYMBOL(vfs_create_mount);
980
981struct vfsmount *fc_mount(struct fs_context *fc)
982{
983 int err = vfs_get_tree(fc);
984 if (!err) {
985 up_write(&fc->root->d_sb->s_umount);
986 return vfs_create_mount(fc);
987 }
988 return ERR_PTR(err);
989}
990EXPORT_SYMBOL(fc_mount);
991
992struct vfsmount *vfs_kern_mount(struct file_system_type *type,
993 int flags, const char *name,
994 void *data)
995{
996 struct fs_context *fc;
997 struct vfsmount *mnt;
998 int ret = 0;
999
1000 if (!type)
1001 return ERR_PTR(-EINVAL);
1002
1003 fc = fs_context_for_mount(type, flags);
1004 if (IS_ERR(fc))
1005 return ERR_CAST(fc);
1006
1007 if (name)
1008 ret = vfs_parse_fs_string(fc, "source",
1009 name, strlen(name));
1010 if (!ret)
1011 ret = parse_monolithic_mount_data(fc, data);
1012 if (!ret)
1013 mnt = fc_mount(fc);
1014 else
1015 mnt = ERR_PTR(ret);
1016
1017 put_fs_context(fc);
1018 return mnt;
1019}
1020EXPORT_SYMBOL_GPL(vfs_kern_mount);
1021
1022struct vfsmount *
1023vfs_submount(const struct dentry *mountpoint, struct file_system_type *type,
1024 const char *name, void *data)
1025{
1026
1027
1028
1029
1030 if (mountpoint->d_sb->s_user_ns != &init_user_ns)
1031 return ERR_PTR(-EPERM);
1032
1033 return vfs_kern_mount(type, SB_SUBMOUNT, name, data);
1034}
1035EXPORT_SYMBOL_GPL(vfs_submount);
1036
1037static struct mount *clone_mnt(struct mount *old, struct dentry *root,
1038 int flag)
1039{
1040 struct super_block *sb = old->mnt.mnt_sb;
1041 struct mount *mnt;
1042 int err;
1043
1044 mnt = alloc_vfsmnt(old->mnt_devname);
1045 if (!mnt)
1046 return ERR_PTR(-ENOMEM);
1047
1048 if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE))
1049 mnt->mnt_group_id = 0;
1050 else
1051 mnt->mnt_group_id = old->mnt_group_id;
1052
1053 if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {
1054 err = mnt_alloc_group_id(mnt);
1055 if (err)
1056 goto out_free;
1057 }
1058
1059 mnt->mnt.mnt_flags = old->mnt.mnt_flags;
1060 mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL);
1061
1062 atomic_inc(&sb->s_active);
1063 mnt->mnt.mnt_sb = sb;
1064 mnt->mnt.mnt_root = dget(root);
1065 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
1066 mnt->mnt_parent = mnt;
1067 lock_mount_hash();
1068 list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
1069 unlock_mount_hash();
1070
1071 if ((flag & CL_SLAVE) ||
1072 ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) {
1073 list_add(&mnt->mnt_slave, &old->mnt_slave_list);
1074 mnt->mnt_master = old;
1075 CLEAR_MNT_SHARED(mnt);
1076 } else if (!(flag & CL_PRIVATE)) {
1077 if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old))
1078 list_add(&mnt->mnt_share, &old->mnt_share);
1079 if (IS_MNT_SLAVE(old))
1080 list_add(&mnt->mnt_slave, &old->mnt_slave);
1081 mnt->mnt_master = old->mnt_master;
1082 } else {
1083 CLEAR_MNT_SHARED(mnt);
1084 }
1085 if (flag & CL_MAKE_SHARED)
1086 set_mnt_shared(mnt);
1087
1088
1089
1090 if (flag & CL_EXPIRE) {
1091 if (!list_empty(&old->mnt_expire))
1092 list_add(&mnt->mnt_expire, &old->mnt_expire);
1093 }
1094
1095 return mnt;
1096
1097 out_free:
1098 mnt_free_id(mnt);
1099 free_vfsmnt(mnt);
1100 return ERR_PTR(err);
1101}
1102
1103static void cleanup_mnt(struct mount *mnt)
1104{
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115 WARN_ON(mnt_get_writers(mnt));
1116 if (unlikely(mnt->mnt_pins.first))
1117 mnt_pin_kill(mnt);
1118 fsnotify_vfsmount_delete(&mnt->mnt);
1119 dput(mnt->mnt.mnt_root);
1120 deactivate_super(mnt->mnt.mnt_sb);
1121 mnt_free_id(mnt);
1122 call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);
1123}
1124
1125static void __cleanup_mnt(struct rcu_head *head)
1126{
1127 cleanup_mnt(container_of(head, struct mount, mnt_rcu));
1128}
1129
1130static LLIST_HEAD(delayed_mntput_list);
1131static void delayed_mntput(struct work_struct *unused)
1132{
1133 struct llist_node *node = llist_del_all(&delayed_mntput_list);
1134 struct mount *m, *t;
1135
1136 llist_for_each_entry_safe(m, t, node, mnt_llist)
1137 cleanup_mnt(m);
1138}
1139static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
1140
1141static void mntput_no_expire(struct mount *mnt)
1142{
1143 rcu_read_lock();
1144 if (likely(READ_ONCE(mnt->mnt_ns))) {
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154 mnt_add_count(mnt, -1);
1155 rcu_read_unlock();
1156 return;
1157 }
1158 lock_mount_hash();
1159
1160
1161
1162
1163 smp_mb();
1164 mnt_add_count(mnt, -1);
1165 if (mnt_get_count(mnt)) {
1166 rcu_read_unlock();
1167 unlock_mount_hash();
1168 return;
1169 }
1170 if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
1171 rcu_read_unlock();
1172 unlock_mount_hash();
1173 return;
1174 }
1175 mnt->mnt.mnt_flags |= MNT_DOOMED;
1176 rcu_read_unlock();
1177
1178 list_del(&mnt->mnt_instance);
1179
1180 if (unlikely(!list_empty(&mnt->mnt_mounts))) {
1181 struct mount *p, *tmp;
1182 list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) {
1183 umount_mnt(p);
1184 }
1185 }
1186 unlock_mount_hash();
1187
1188 if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {
1189 struct task_struct *task = current;
1190 if (likely(!(task->flags & PF_KTHREAD))) {
1191 init_task_work(&mnt->mnt_rcu, __cleanup_mnt);
1192 if (!task_work_add(task, &mnt->mnt_rcu, true))
1193 return;
1194 }
1195 if (llist_add(&mnt->mnt_llist, &delayed_mntput_list))
1196 schedule_delayed_work(&delayed_mntput_work, 1);
1197 return;
1198 }
1199 cleanup_mnt(mnt);
1200}
1201
1202void mntput(struct vfsmount *mnt)
1203{
1204 if (mnt) {
1205 struct mount *m = real_mount(mnt);
1206
1207 if (unlikely(m->mnt_expiry_mark))
1208 m->mnt_expiry_mark = 0;
1209 mntput_no_expire(m);
1210 }
1211}
1212EXPORT_SYMBOL(mntput);
1213
1214struct vfsmount *mntget(struct vfsmount *mnt)
1215{
1216 if (mnt)
1217 mnt_add_count(real_mount(mnt), 1);
1218 return mnt;
1219}
1220EXPORT_SYMBOL(mntget);
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232bool path_is_mountpoint(const struct path *path)
1233{
1234 unsigned seq;
1235 bool res;
1236
1237 if (!d_mountpoint(path->dentry))
1238 return false;
1239
1240 rcu_read_lock();
1241 do {
1242 seq = read_seqbegin(&mount_lock);
1243 res = __path_is_mountpoint(path);
1244 } while (read_seqretry(&mount_lock, seq));
1245 rcu_read_unlock();
1246
1247 return res;
1248}
1249EXPORT_SYMBOL(path_is_mountpoint);
1250
1251struct vfsmount *mnt_clone_internal(const struct path *path)
1252{
1253 struct mount *p;
1254 p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE);
1255 if (IS_ERR(p))
1256 return ERR_CAST(p);
1257 p->mnt.mnt_flags |= MNT_INTERNAL;
1258 return &p->mnt;
1259}
1260
1261#ifdef CONFIG_PROC_FS
1262
1263static void *m_start(struct seq_file *m, loff_t *pos)
1264{
1265 struct proc_mounts *p = m->private;
1266
1267 down_read(&namespace_sem);
1268 if (p->cached_event == p->ns->event) {
1269 void *v = p->cached_mount;
1270 if (*pos == p->cached_index)
1271 return v;
1272 if (*pos == p->cached_index + 1) {
1273 v = seq_list_next(v, &p->ns->list, &p->cached_index);
1274 return p->cached_mount = v;
1275 }
1276 }
1277
1278 p->cached_event = p->ns->event;
1279 p->cached_mount = seq_list_start(&p->ns->list, *pos);
1280 p->cached_index = *pos;
1281 return p->cached_mount;
1282}
1283
1284static void *m_next(struct seq_file *m, void *v, loff_t *pos)
1285{
1286 struct proc_mounts *p = m->private;
1287
1288 p->cached_mount = seq_list_next(v, &p->ns->list, pos);
1289 p->cached_index = *pos;
1290 return p->cached_mount;
1291}
1292
1293static void m_stop(struct seq_file *m, void *v)
1294{
1295 up_read(&namespace_sem);
1296}
1297
1298static int m_show(struct seq_file *m, void *v)
1299{
1300 struct proc_mounts *p = m->private;
1301 struct mount *r = list_entry(v, struct mount, mnt_list);
1302 return p->show(m, &r->mnt);
1303}
1304
1305const struct seq_operations mounts_op = {
1306 .start = m_start,
1307 .next = m_next,
1308 .stop = m_stop,
1309 .show = m_show,
1310};
1311#endif
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321int may_umount_tree(struct vfsmount *m)
1322{
1323 struct mount *mnt = real_mount(m);
1324 int actual_refs = 0;
1325 int minimum_refs = 0;
1326 struct mount *p;
1327 BUG_ON(!m);
1328
1329
1330 lock_mount_hash();
1331 for (p = mnt; p; p = next_mnt(p, mnt)) {
1332 actual_refs += mnt_get_count(p);
1333 minimum_refs += 2;
1334 }
1335 unlock_mount_hash();
1336
1337 if (actual_refs > minimum_refs)
1338 return 0;
1339
1340 return 1;
1341}
1342
1343EXPORT_SYMBOL(may_umount_tree);
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358int may_umount(struct vfsmount *mnt)
1359{
1360 int ret = 1;
1361 down_read(&namespace_sem);
1362 lock_mount_hash();
1363 if (propagate_mount_busy(real_mount(mnt), 2))
1364 ret = 0;
1365 unlock_mount_hash();
1366 up_read(&namespace_sem);
1367 return ret;
1368}
1369
1370EXPORT_SYMBOL(may_umount);
1371
1372static HLIST_HEAD(unmounted);
1373
1374static void namespace_unlock(void)
1375{
1376 struct hlist_head head;
1377
1378 hlist_move_list(&unmounted, &head);
1379
1380 up_write(&namespace_sem);
1381
1382 if (likely(hlist_empty(&head)))
1383 return;
1384
1385 synchronize_rcu_expedited();
1386
1387 group_pin_kill(&head);
1388}
1389
1390static inline void namespace_lock(void)
1391{
1392 down_write(&namespace_sem);
1393}
1394
1395enum umount_tree_flags {
1396 UMOUNT_SYNC = 1,
1397 UMOUNT_PROPAGATE = 2,
1398 UMOUNT_CONNECTED = 4,
1399};
1400
1401static bool disconnect_mount(struct mount *mnt, enum umount_tree_flags how)
1402{
1403
1404 if (how & UMOUNT_SYNC)
1405 return true;
1406
1407
1408 if (!mnt_has_parent(mnt))
1409 return true;
1410
1411
1412
1413
1414
1415 if (!(mnt->mnt_parent->mnt.mnt_flags & MNT_UMOUNT))
1416 return true;
1417
1418
1419 if (how & UMOUNT_CONNECTED)
1420 return false;
1421
1422
1423 if (IS_MNT_LOCKED(mnt))
1424 return false;
1425
1426
1427 return true;
1428}
1429
1430
1431
1432
1433
1434static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
1435{
1436 LIST_HEAD(tmp_list);
1437 struct mount *p;
1438
1439 if (how & UMOUNT_PROPAGATE)
1440 propagate_mount_unlock(mnt);
1441
1442
1443 for (p = mnt; p; p = next_mnt(p, mnt)) {
1444 p->mnt.mnt_flags |= MNT_UMOUNT;
1445 list_move(&p->mnt_list, &tmp_list);
1446 }
1447
1448
1449 list_for_each_entry(p, &tmp_list, mnt_list) {
1450 list_del_init(&p->mnt_child);
1451 }
1452
1453
1454 if (how & UMOUNT_PROPAGATE)
1455 propagate_umount(&tmp_list);
1456
1457 while (!list_empty(&tmp_list)) {
1458 struct mnt_namespace *ns;
1459 bool disconnect;
1460 p = list_first_entry(&tmp_list, struct mount, mnt_list);
1461 list_del_init(&p->mnt_expire);
1462 list_del_init(&p->mnt_list);
1463 ns = p->mnt_ns;
1464 if (ns) {
1465 ns->mounts--;
1466 __touch_mnt_namespace(ns);
1467 }
1468 p->mnt_ns = NULL;
1469 if (how & UMOUNT_SYNC)
1470 p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
1471
1472 disconnect = disconnect_mount(p, how);
1473
1474 pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt,
1475 disconnect ? &unmounted : NULL);
1476 if (mnt_has_parent(p)) {
1477 mnt_add_count(p->mnt_parent, -1);
1478 if (!disconnect) {
1479
1480 list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts);
1481 } else {
1482 umount_mnt(p);
1483 }
1484 }
1485 change_mnt_propagation(p, MS_PRIVATE);
1486 }
1487}
1488
1489static void shrink_submounts(struct mount *mnt);
1490
1491static int do_umount_root(struct super_block *sb)
1492{
1493 int ret = 0;
1494
1495 down_write(&sb->s_umount);
1496 if (!sb_rdonly(sb)) {
1497 struct fs_context *fc;
1498
1499 fc = fs_context_for_reconfigure(sb->s_root, SB_RDONLY,
1500 SB_RDONLY);
1501 if (IS_ERR(fc)) {
1502 ret = PTR_ERR(fc);
1503 } else {
1504 ret = parse_monolithic_mount_data(fc, NULL);
1505 if (!ret)
1506 ret = reconfigure_super(fc);
1507 put_fs_context(fc);
1508 }
1509 }
1510 up_write(&sb->s_umount);
1511 return ret;
1512}
1513
1514static int do_umount(struct mount *mnt, int flags)
1515{
1516 struct super_block *sb = mnt->mnt.mnt_sb;
1517 int retval;
1518
1519 retval = security_sb_umount(&mnt->mnt, flags);
1520 if (retval)
1521 return retval;
1522
1523
1524
1525
1526
1527
1528
1529 if (flags & MNT_EXPIRE) {
1530 if (&mnt->mnt == current->fs->root.mnt ||
1531 flags & (MNT_FORCE | MNT_DETACH))
1532 return -EINVAL;
1533
1534
1535
1536
1537
1538 lock_mount_hash();
1539 if (mnt_get_count(mnt) != 2) {
1540 unlock_mount_hash();
1541 return -EBUSY;
1542 }
1543 unlock_mount_hash();
1544
1545 if (!xchg(&mnt->mnt_expiry_mark, 1))
1546 return -EAGAIN;
1547 }
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559 if (flags & MNT_FORCE && sb->s_op->umount_begin) {
1560 sb->s_op->umount_begin(sb);
1561 }
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572 if (&mnt->mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) {
1573
1574
1575
1576
1577 if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
1578 return -EPERM;
1579 return do_umount_root(sb);
1580 }
1581
1582 namespace_lock();
1583 lock_mount_hash();
1584
1585
1586 retval = -EINVAL;
1587 if (mnt->mnt.mnt_flags & MNT_LOCKED)
1588 goto out;
1589
1590 event++;
1591 if (flags & MNT_DETACH) {
1592 if (!list_empty(&mnt->mnt_list))
1593 umount_tree(mnt, UMOUNT_PROPAGATE);
1594 retval = 0;
1595 } else {
1596 shrink_submounts(mnt);
1597 retval = -EBUSY;
1598 if (!propagate_mount_busy(mnt, 2)) {
1599 if (!list_empty(&mnt->mnt_list))
1600 umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
1601 retval = 0;
1602 }
1603 }
1604out:
1605 unlock_mount_hash();
1606 namespace_unlock();
1607 return retval;
1608}
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620void __detach_mounts(struct dentry *dentry)
1621{
1622 struct mountpoint *mp;
1623 struct mount *mnt;
1624
1625 namespace_lock();
1626 lock_mount_hash();
1627 mp = lookup_mountpoint(dentry);
1628 if (IS_ERR_OR_NULL(mp))
1629 goto out_unlock;
1630
1631 event++;
1632 while (!hlist_empty(&mp->m_list)) {
1633 mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
1634 if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
1635 hlist_add_head(&mnt->mnt_umount.s_list, &unmounted);
1636 umount_mnt(mnt);
1637 }
1638 else umount_tree(mnt, UMOUNT_CONNECTED);
1639 }
1640 put_mountpoint(mp);
1641out_unlock:
1642 unlock_mount_hash();
1643 namespace_unlock();
1644}
1645
1646
1647
1648
1649static inline bool may_mount(void)
1650{
1651 return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
1652}
1653
1654static inline bool may_mandlock(void)
1655{
1656#ifndef CONFIG_MANDATORY_FILE_LOCKING
1657 return false;
1658#endif
1659 return capable(CAP_SYS_ADMIN);
1660}
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670int ksys_umount(char __user *name, int flags)
1671{
1672 struct path path;
1673 struct mount *mnt;
1674 int retval;
1675 int lookup_flags = 0;
1676
1677 if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
1678 return -EINVAL;
1679
1680 if (!may_mount())
1681 return -EPERM;
1682
1683 if (!(flags & UMOUNT_NOFOLLOW))
1684 lookup_flags |= LOOKUP_FOLLOW;
1685
1686 lookup_flags |= LOOKUP_NO_EVAL;
1687
1688 retval = user_path_mountpoint_at(AT_FDCWD, name, lookup_flags, &path);
1689 if (retval)
1690 goto out;
1691 mnt = real_mount(path.mnt);
1692 retval = -EINVAL;
1693 if (path.dentry != path.mnt->mnt_root)
1694 goto dput_and_out;
1695 if (!check_mnt(mnt))
1696 goto dput_and_out;
1697 if (mnt->mnt.mnt_flags & MNT_LOCKED)
1698 goto dput_and_out;
1699 retval = -EPERM;
1700 if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
1701 goto dput_and_out;
1702
1703 retval = do_umount(mnt, flags);
1704dput_and_out:
1705
1706 dput(path.dentry);
1707 mntput_no_expire(mnt);
1708out:
1709 return retval;
1710}
1711
1712SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1713{
1714 return ksys_umount(name, flags);
1715}
1716
1717#ifdef __ARCH_WANT_SYS_OLDUMOUNT
1718
1719
1720
1721
1722SYSCALL_DEFINE1(oldumount, char __user *, name)
1723{
1724 return ksys_umount(name, 0);
1725}
1726
1727#endif
1728
1729static bool is_mnt_ns_file(struct dentry *dentry)
1730{
1731
1732 return dentry->d_op == &ns_dentry_operations &&
1733 dentry->d_fsdata == &mntns_operations;
1734}
1735
1736struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
1737{
1738 return container_of(ns, struct mnt_namespace, ns);
1739}
1740
1741static bool mnt_ns_loop(struct dentry *dentry)
1742{
1743
1744
1745
1746 struct mnt_namespace *mnt_ns;
1747 if (!is_mnt_ns_file(dentry))
1748 return false;
1749
1750 mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode));
1751 return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
1752}
1753
1754struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1755 int flag)
1756{
1757 struct mount *res, *p, *q, *r, *parent;
1758
1759 if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(mnt))
1760 return ERR_PTR(-EINVAL);
1761
1762 if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(dentry))
1763 return ERR_PTR(-EINVAL);
1764
1765 res = q = clone_mnt(mnt, dentry, flag);
1766 if (IS_ERR(q))
1767 return q;
1768
1769 q->mnt_mountpoint = mnt->mnt_mountpoint;
1770
1771 p = mnt;
1772 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
1773 struct mount *s;
1774 if (!is_subdir(r->mnt_mountpoint, dentry))
1775 continue;
1776
1777 for (s = r; s; s = next_mnt(s, r)) {
1778 if (!(flag & CL_COPY_UNBINDABLE) &&
1779 IS_MNT_UNBINDABLE(s)) {
1780 if (s->mnt.mnt_flags & MNT_LOCKED) {
1781
1782 q = ERR_PTR(-EPERM);
1783 goto out;
1784 } else {
1785 s = skip_mnt_tree(s);
1786 continue;
1787 }
1788 }
1789 if (!(flag & CL_COPY_MNT_NS_FILE) &&
1790 is_mnt_ns_file(s->mnt.mnt_root)) {
1791 s = skip_mnt_tree(s);
1792 continue;
1793 }
1794 while (p != s->mnt_parent) {
1795 p = p->mnt_parent;
1796 q = q->mnt_parent;
1797 }
1798 p = s;
1799 parent = q;
1800 q = clone_mnt(p, p->mnt.mnt_root, flag);
1801 if (IS_ERR(q))
1802 goto out;
1803 lock_mount_hash();
1804 list_add_tail(&q->mnt_list, &res->mnt_list);
1805 attach_mnt(q, parent, p->mnt_mp);
1806 unlock_mount_hash();
1807 }
1808 }
1809 return res;
1810out:
1811 if (res) {
1812 lock_mount_hash();
1813 umount_tree(res, UMOUNT_SYNC);
1814 unlock_mount_hash();
1815 }
1816 return q;
1817}
1818
1819
1820
1821struct vfsmount *collect_mounts(const struct path *path)
1822{
1823 struct mount *tree;
1824 namespace_lock();
1825 if (!check_mnt(real_mount(path->mnt)))
1826 tree = ERR_PTR(-EINVAL);
1827 else
1828 tree = copy_tree(real_mount(path->mnt), path->dentry,
1829 CL_COPY_ALL | CL_PRIVATE);
1830 namespace_unlock();
1831 if (IS_ERR(tree))
1832 return ERR_CAST(tree);
1833 return &tree->mnt;
1834}
1835
1836static void free_mnt_ns(struct mnt_namespace *);
1837static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *, bool);
1838
1839void dissolve_on_fput(struct vfsmount *mnt)
1840{
1841 struct mnt_namespace *ns;
1842 namespace_lock();
1843 lock_mount_hash();
1844 ns = real_mount(mnt)->mnt_ns;
1845 if (ns) {
1846 if (is_anon_ns(ns))
1847 umount_tree(real_mount(mnt), UMOUNT_CONNECTED);
1848 else
1849 ns = NULL;
1850 }
1851 unlock_mount_hash();
1852 namespace_unlock();
1853 if (ns)
1854 free_mnt_ns(ns);
1855}
1856
1857void drop_collected_mounts(struct vfsmount *mnt)
1858{
1859 namespace_lock();
1860 lock_mount_hash();
1861 umount_tree(real_mount(mnt), 0);
1862 unlock_mount_hash();
1863 namespace_unlock();
1864}
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875struct vfsmount *clone_private_mount(const struct path *path)
1876{
1877 struct mount *old_mnt = real_mount(path->mnt);
1878 struct mount *new_mnt;
1879
1880 if (IS_MNT_UNBINDABLE(old_mnt))
1881 return ERR_PTR(-EINVAL);
1882
1883 new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
1884 if (IS_ERR(new_mnt))
1885 return ERR_CAST(new_mnt);
1886
1887 return &new_mnt->mnt;
1888}
1889EXPORT_SYMBOL_GPL(clone_private_mount);
1890
1891int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
1892 struct vfsmount *root)
1893{
1894 struct mount *mnt;
1895 int res = f(root, arg);
1896 if (res)
1897 return res;
1898 list_for_each_entry(mnt, &real_mount(root)->mnt_list, mnt_list) {
1899 res = f(&mnt->mnt, arg);
1900 if (res)
1901 return res;
1902 }
1903 return 0;
1904}
1905
1906static void lock_mnt_tree(struct mount *mnt)
1907{
1908 struct mount *p;
1909
1910 for (p = mnt; p; p = next_mnt(p, mnt)) {
1911 int flags = p->mnt.mnt_flags;
1912
1913 flags |= MNT_LOCK_ATIME;
1914
1915 if (flags & MNT_READONLY)
1916 flags |= MNT_LOCK_READONLY;
1917
1918 if (flags & MNT_NODEV)
1919 flags |= MNT_LOCK_NODEV;
1920
1921 if (flags & MNT_NOSUID)
1922 flags |= MNT_LOCK_NOSUID;
1923
1924 if (flags & MNT_NOEXEC)
1925 flags |= MNT_LOCK_NOEXEC;
1926
1927 if (list_empty(&p->mnt_expire))
1928 flags |= MNT_LOCKED;
1929 p->mnt.mnt_flags = flags;
1930 }
1931}
1932
1933static void cleanup_group_ids(struct mount *mnt, struct mount *end)
1934{
1935 struct mount *p;
1936
1937 for (p = mnt; p != end; p = next_mnt(p, mnt)) {
1938 if (p->mnt_group_id && !IS_MNT_SHARED(p))
1939 mnt_release_group_id(p);
1940 }
1941}
1942
1943static int invent_group_ids(struct mount *mnt, bool recurse)
1944{
1945 struct mount *p;
1946
1947 for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) {
1948 if (!p->mnt_group_id && !IS_MNT_SHARED(p)) {
1949 int err = mnt_alloc_group_id(p);
1950 if (err) {
1951 cleanup_group_ids(mnt, p);
1952 return err;
1953 }
1954 }
1955 }
1956
1957 return 0;
1958}
1959
1960int count_mounts(struct mnt_namespace *ns, struct mount *mnt)
1961{
1962 unsigned int max = READ_ONCE(sysctl_mount_max);
1963 unsigned int mounts = 0, old, pending, sum;
1964 struct mount *p;
1965
1966 for (p = mnt; p; p = next_mnt(p, mnt))
1967 mounts++;
1968
1969 old = ns->mounts;
1970 pending = ns->pending_mounts;
1971 sum = old + pending;
1972 if ((old > sum) ||
1973 (pending > sum) ||
1974 (max < sum) ||
1975 (mounts > (max - sum)))
1976 return -ENOSPC;
1977
1978 ns->pending_mounts = pending + mounts;
1979 return 0;
1980}
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045static int attach_recursive_mnt(struct mount *source_mnt,
2046 struct mount *dest_mnt,
2047 struct mountpoint *dest_mp,
2048 struct path *parent_path)
2049{
2050 struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
2051 HLIST_HEAD(tree_list);
2052 struct mnt_namespace *ns = dest_mnt->mnt_ns;
2053 struct mountpoint *smp;
2054 struct mount *child, *p;
2055 struct hlist_node *n;
2056 int err;
2057
2058
2059
2060
2061 smp = get_mountpoint(source_mnt->mnt.mnt_root);
2062 if (IS_ERR(smp))
2063 return PTR_ERR(smp);
2064
2065
2066 if (!parent_path) {
2067 err = count_mounts(ns, source_mnt);
2068 if (err)
2069 goto out;
2070 }
2071
2072 if (IS_MNT_SHARED(dest_mnt)) {
2073 err = invent_group_ids(source_mnt, true);
2074 if (err)
2075 goto out;
2076 err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
2077 lock_mount_hash();
2078 if (err)
2079 goto out_cleanup_ids;
2080 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
2081 set_mnt_shared(p);
2082 } else {
2083 lock_mount_hash();
2084 }
2085 if (parent_path) {
2086 detach_mnt(source_mnt, parent_path);
2087 attach_mnt(source_mnt, dest_mnt, dest_mp);
2088 touch_mnt_namespace(source_mnt->mnt_ns);
2089 } else {
2090 if (source_mnt->mnt_ns) {
2091
2092 list_del_init(&source_mnt->mnt_ns->list);
2093 }
2094 mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
2095 commit_tree(source_mnt);
2096 }
2097
2098 hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
2099 struct mount *q;
2100 hlist_del_init(&child->mnt_hash);
2101 q = __lookup_mnt(&child->mnt_parent->mnt,
2102 child->mnt_mountpoint);
2103 if (q)
2104 mnt_change_mountpoint(child, smp, q);
2105
2106 if (child->mnt_parent->mnt_ns->user_ns != user_ns)
2107 lock_mnt_tree(child);
2108 child->mnt.mnt_flags &= ~MNT_LOCKED;
2109 commit_tree(child);
2110 }
2111 put_mountpoint(smp);
2112 unlock_mount_hash();
2113
2114 return 0;
2115
2116 out_cleanup_ids:
2117 while (!hlist_empty(&tree_list)) {
2118 child = hlist_entry(tree_list.first, struct mount, mnt_hash);
2119 child->mnt_parent->mnt_ns->pending_mounts = 0;
2120 umount_tree(child, UMOUNT_SYNC);
2121 }
2122 unlock_mount_hash();
2123 cleanup_group_ids(source_mnt, NULL);
2124 out:
2125 ns->pending_mounts = 0;
2126
2127 read_seqlock_excl(&mount_lock);
2128 put_mountpoint(smp);
2129 read_sequnlock_excl(&mount_lock);
2130
2131 return err;
2132}
2133
2134static struct mountpoint *lock_mount(struct path *path)
2135{
2136 struct vfsmount *mnt;
2137 struct dentry *dentry = path->dentry;
2138retry:
2139 inode_lock(dentry->d_inode);
2140 if (unlikely(cant_mount(dentry))) {
2141 inode_unlock(dentry->d_inode);
2142 return ERR_PTR(-ENOENT);
2143 }
2144 namespace_lock();
2145 mnt = lookup_mnt(path);
2146 if (likely(!mnt)) {
2147 struct mountpoint *mp = get_mountpoint(dentry);
2148 if (IS_ERR(mp)) {
2149 namespace_unlock();
2150 inode_unlock(dentry->d_inode);
2151 return mp;
2152 }
2153 return mp;
2154 }
2155 namespace_unlock();
2156 inode_unlock(path->dentry->d_inode);
2157 path_put(path);
2158 path->mnt = mnt;
2159 dentry = path->dentry = dget(mnt->mnt_root);
2160 goto retry;
2161}
2162
2163static void unlock_mount(struct mountpoint *where)
2164{
2165 struct dentry *dentry = where->m_dentry;
2166
2167 read_seqlock_excl(&mount_lock);
2168 put_mountpoint(where);
2169 read_sequnlock_excl(&mount_lock);
2170
2171 namespace_unlock();
2172 inode_unlock(dentry->d_inode);
2173}
2174
2175static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
2176{
2177 if (mnt->mnt.mnt_sb->s_flags & SB_NOUSER)
2178 return -EINVAL;
2179
2180 if (d_is_dir(mp->m_dentry) !=
2181 d_is_dir(mnt->mnt.mnt_root))
2182 return -ENOTDIR;
2183
2184 return attach_recursive_mnt(mnt, p, mp, NULL);
2185}
2186
2187
2188
2189
2190
2191static int flags_to_propagation_type(int ms_flags)
2192{
2193 int type = ms_flags & ~(MS_REC | MS_SILENT);
2194
2195
2196 if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
2197 return 0;
2198
2199 if (!is_power_of_2(type))
2200 return 0;
2201 return type;
2202}
2203
2204
2205
2206
2207static int do_change_type(struct path *path, int ms_flags)
2208{
2209 struct mount *m;
2210 struct mount *mnt = real_mount(path->mnt);
2211 int recurse = ms_flags & MS_REC;
2212 int type;
2213 int err = 0;
2214
2215 if (path->dentry != path->mnt->mnt_root)
2216 return -EINVAL;
2217
2218 type = flags_to_propagation_type(ms_flags);
2219 if (!type)
2220 return -EINVAL;
2221
2222 namespace_lock();
2223 if (type == MS_SHARED) {
2224 err = invent_group_ids(mnt, recurse);
2225 if (err)
2226 goto out_unlock;
2227 }
2228
2229 lock_mount_hash();
2230 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
2231 change_mnt_propagation(m, type);
2232 unlock_mount_hash();
2233
2234 out_unlock:
2235 namespace_unlock();
2236 return err;
2237}
2238
2239static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
2240{
2241 struct mount *child;
2242 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
2243 if (!is_subdir(child->mnt_mountpoint, dentry))
2244 continue;
2245
2246 if (child->mnt.mnt_flags & MNT_LOCKED)
2247 return true;
2248 }
2249 return false;
2250}
2251
2252static struct mount *__do_loopback(struct path *old_path, int recurse)
2253{
2254 struct mount *mnt = ERR_PTR(-EINVAL), *old = real_mount(old_path->mnt);
2255
2256 if (IS_MNT_UNBINDABLE(old))
2257 return mnt;
2258
2259 if (!check_mnt(old) && old_path->dentry->d_op != &ns_dentry_operations)
2260 return mnt;
2261
2262 if (!recurse && has_locked_children(old, old_path->dentry))
2263 return mnt;
2264
2265 if (recurse)
2266 mnt = copy_tree(old, old_path->dentry, CL_COPY_MNT_NS_FILE);
2267 else
2268 mnt = clone_mnt(old, old_path->dentry, 0);
2269
2270 if (!IS_ERR(mnt))
2271 mnt->mnt.mnt_flags &= ~MNT_LOCKED;
2272
2273 return mnt;
2274}
2275
2276
2277
2278
2279static int do_loopback(struct path *path, const char *old_name,
2280 int recurse)
2281{
2282 struct path old_path;
2283 struct mount *mnt = NULL, *parent;
2284 struct mountpoint *mp;
2285 int err;
2286 if (!old_name || !*old_name)
2287 return -EINVAL;
2288 err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path);
2289 if (err)
2290 return err;
2291
2292 err = -EINVAL;
2293 if (mnt_ns_loop(old_path.dentry))
2294 goto out;
2295
2296 mp = lock_mount(path);
2297 if (IS_ERR(mp)) {
2298 err = PTR_ERR(mp);
2299 goto out;
2300 }
2301
2302 parent = real_mount(path->mnt);
2303 if (!check_mnt(parent))
2304 goto out2;
2305
2306 mnt = __do_loopback(&old_path, recurse);
2307 if (IS_ERR(mnt)) {
2308 err = PTR_ERR(mnt);
2309 goto out2;
2310 }
2311
2312 err = graft_tree(mnt, parent, mp);
2313 if (err) {
2314 lock_mount_hash();
2315 umount_tree(mnt, UMOUNT_SYNC);
2316 unlock_mount_hash();
2317 }
2318out2:
2319 unlock_mount(mp);
2320out:
2321 path_put(&old_path);
2322 return err;
2323}
2324
2325static struct file *open_detached_copy(struct path *path, bool recursive)
2326{
2327 struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
2328 struct mnt_namespace *ns = alloc_mnt_ns(user_ns, true);
2329 struct mount *mnt, *p;
2330 struct file *file;
2331
2332 if (IS_ERR(ns))
2333 return ERR_CAST(ns);
2334
2335 namespace_lock();
2336 mnt = __do_loopback(path, recursive);
2337 if (IS_ERR(mnt)) {
2338 namespace_unlock();
2339 free_mnt_ns(ns);
2340 return ERR_CAST(mnt);
2341 }
2342
2343 lock_mount_hash();
2344 for (p = mnt; p; p = next_mnt(p, mnt)) {
2345 p->mnt_ns = ns;
2346 ns->mounts++;
2347 }
2348 ns->root = mnt;
2349 list_add_tail(&ns->list, &mnt->mnt_list);
2350 mntget(&mnt->mnt);
2351 unlock_mount_hash();
2352 namespace_unlock();
2353
2354 mntput(path->mnt);
2355 path->mnt = &mnt->mnt;
2356 file = dentry_open(path, O_PATH, current_cred());
2357 if (IS_ERR(file))
2358 dissolve_on_fput(path->mnt);
2359 else
2360 file->f_mode |= FMODE_NEED_UNMOUNT;
2361 return file;
2362}
2363
2364SYSCALL_DEFINE3(open_tree, int, dfd, const char *, filename, unsigned, flags)
2365{
2366 struct file *file;
2367 struct path path;
2368 int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW;
2369 bool detached = flags & OPEN_TREE_CLONE;
2370 int error;
2371 int fd;
2372
2373 BUILD_BUG_ON(OPEN_TREE_CLOEXEC != O_CLOEXEC);
2374
2375 if (flags & ~(AT_EMPTY_PATH | AT_NO_AUTOMOUNT | AT_RECURSIVE |
2376 AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLONE |
2377 OPEN_TREE_CLOEXEC))
2378 return -EINVAL;
2379
2380 if ((flags & (AT_RECURSIVE | OPEN_TREE_CLONE)) == AT_RECURSIVE)
2381 return -EINVAL;
2382
2383 if (flags & AT_NO_AUTOMOUNT)
2384 lookup_flags &= ~LOOKUP_AUTOMOUNT;
2385 if (flags & AT_SYMLINK_NOFOLLOW)
2386 lookup_flags &= ~LOOKUP_FOLLOW;
2387 if (flags & AT_EMPTY_PATH)
2388 lookup_flags |= LOOKUP_EMPTY;
2389
2390 if (detached && !may_mount())
2391 return -EPERM;
2392
2393 fd = get_unused_fd_flags(flags & O_CLOEXEC);
2394 if (fd < 0)
2395 return fd;
2396
2397 error = user_path_at(dfd, filename, lookup_flags, &path);
2398 if (unlikely(error)) {
2399 file = ERR_PTR(error);
2400 } else {
2401 if (detached)
2402 file = open_detached_copy(&path, flags & AT_RECURSIVE);
2403 else
2404 file = dentry_open(&path, O_PATH, current_cred());
2405 path_put(&path);
2406 }
2407 if (IS_ERR(file)) {
2408 put_unused_fd(fd);
2409 return PTR_ERR(file);
2410 }
2411 fd_install(fd, file);
2412 return fd;
2413}
2414
2415
2416
2417
2418
2419
2420
2421static bool can_change_locked_flags(struct mount *mnt, unsigned int mnt_flags)
2422{
2423 unsigned int fl = mnt->mnt.mnt_flags;
2424
2425 if ((fl & MNT_LOCK_READONLY) &&
2426 !(mnt_flags & MNT_READONLY))
2427 return false;
2428
2429 if ((fl & MNT_LOCK_NODEV) &&
2430 !(mnt_flags & MNT_NODEV))
2431 return false;
2432
2433 if ((fl & MNT_LOCK_NOSUID) &&
2434 !(mnt_flags & MNT_NOSUID))
2435 return false;
2436
2437 if ((fl & MNT_LOCK_NOEXEC) &&
2438 !(mnt_flags & MNT_NOEXEC))
2439 return false;
2440
2441 if ((fl & MNT_LOCK_ATIME) &&
2442 ((fl & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK)))
2443 return false;
2444
2445 return true;
2446}
2447
2448static int change_mount_ro_state(struct mount *mnt, unsigned int mnt_flags)
2449{
2450 bool readonly_request = (mnt_flags & MNT_READONLY);
2451
2452 if (readonly_request == __mnt_is_readonly(&mnt->mnt))
2453 return 0;
2454
2455 if (readonly_request)
2456 return mnt_make_readonly(mnt);
2457
2458 return __mnt_unmake_readonly(mnt);
2459}
2460
2461
2462
2463
2464
2465static void set_mount_attributes(struct mount *mnt, unsigned int mnt_flags)
2466{
2467 lock_mount_hash();
2468 mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
2469 mnt->mnt.mnt_flags = mnt_flags;
2470 touch_mnt_namespace(mnt->mnt_ns);
2471 unlock_mount_hash();
2472}
2473
2474
2475
2476
2477
2478
2479static int do_reconfigure_mnt(struct path *path, unsigned int mnt_flags)
2480{
2481 struct super_block *sb = path->mnt->mnt_sb;
2482 struct mount *mnt = real_mount(path->mnt);
2483 int ret;
2484
2485 if (!check_mnt(mnt))
2486 return -EINVAL;
2487
2488 if (path->dentry != mnt->mnt.mnt_root)
2489 return -EINVAL;
2490
2491 if (!can_change_locked_flags(mnt, mnt_flags))
2492 return -EPERM;
2493
2494 down_write(&sb->s_umount);
2495 ret = change_mount_ro_state(mnt, mnt_flags);
2496 if (ret == 0)
2497 set_mount_attributes(mnt, mnt_flags);
2498 up_write(&sb->s_umount);
2499 return ret;
2500}
2501
2502
2503
2504
2505
2506
2507static int do_remount(struct path *path, int ms_flags, int sb_flags,
2508 int mnt_flags, void *data)
2509{
2510 int err;
2511 struct super_block *sb = path->mnt->mnt_sb;
2512 struct mount *mnt = real_mount(path->mnt);
2513 struct fs_context *fc;
2514
2515 if (!check_mnt(mnt))
2516 return -EINVAL;
2517
2518 if (path->dentry != path->mnt->mnt_root)
2519 return -EINVAL;
2520
2521 if (!can_change_locked_flags(mnt, mnt_flags))
2522 return -EPERM;
2523
2524 fc = fs_context_for_reconfigure(path->dentry, sb_flags, MS_RMT_MASK);
2525 if (IS_ERR(fc))
2526 return PTR_ERR(fc);
2527
2528 err = parse_monolithic_mount_data(fc, data);
2529 if (!err) {
2530 down_write(&sb->s_umount);
2531 err = -EPERM;
2532 if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
2533 err = reconfigure_super(fc);
2534 if (!err)
2535 set_mount_attributes(mnt, mnt_flags);
2536 }
2537 up_write(&sb->s_umount);
2538 }
2539 put_fs_context(fc);
2540 return err;
2541}
2542
2543static inline int tree_contains_unbindable(struct mount *mnt)
2544{
2545 struct mount *p;
2546 for (p = mnt; p; p = next_mnt(p, mnt)) {
2547 if (IS_MNT_UNBINDABLE(p))
2548 return 1;
2549 }
2550 return 0;
2551}
2552
2553
2554
2555
2556
2557
2558
2559static bool check_for_nsfs_mounts(struct mount *subtree)
2560{
2561 struct mount *p;
2562 bool ret = false;
2563
2564 lock_mount_hash();
2565 for (p = subtree; p; p = next_mnt(p, subtree))
2566 if (mnt_ns_loop(p->mnt.mnt_root))
2567 goto out;
2568
2569 ret = true;
2570out:
2571 unlock_mount_hash();
2572 return ret;
2573}
2574
2575static int do_move_mount(struct path *old_path, struct path *new_path)
2576{
2577 struct path parent_path = {.mnt = NULL, .dentry = NULL};
2578 struct mnt_namespace *ns;
2579 struct mount *p;
2580 struct mount *old;
2581 struct mountpoint *mp;
2582 int err;
2583 bool attached;
2584
2585 mp = lock_mount(new_path);
2586 if (IS_ERR(mp))
2587 return PTR_ERR(mp);
2588
2589 old = real_mount(old_path->mnt);
2590 p = real_mount(new_path->mnt);
2591 attached = mnt_has_parent(old);
2592 ns = old->mnt_ns;
2593
2594 err = -EINVAL;
2595
2596 if (!check_mnt(p))
2597 goto out;
2598
2599
2600 if (!is_mounted(&old->mnt))
2601 goto out;
2602
2603
2604 if (!(attached ? check_mnt(old) : is_anon_ns(ns)))
2605 goto out;
2606
2607 if (old->mnt.mnt_flags & MNT_LOCKED)
2608 goto out;
2609
2610 if (old_path->dentry != old_path->mnt->mnt_root)
2611 goto out;
2612
2613 if (d_is_dir(new_path->dentry) !=
2614 d_is_dir(old_path->dentry))
2615 goto out;
2616
2617
2618
2619 if (attached && IS_MNT_SHARED(old->mnt_parent))
2620 goto out;
2621
2622
2623
2624
2625 if (IS_MNT_SHARED(p) && tree_contains_unbindable(old))
2626 goto out;
2627 err = -ELOOP;
2628 if (!check_for_nsfs_mounts(old))
2629 goto out;
2630 for (; mnt_has_parent(p); p = p->mnt_parent)
2631 if (p == old)
2632 goto out;
2633
2634 err = attach_recursive_mnt(old, real_mount(new_path->mnt), mp,
2635 attached ? &parent_path : NULL);
2636 if (err)
2637 goto out;
2638
2639
2640
2641 list_del_init(&old->mnt_expire);
2642out:
2643 unlock_mount(mp);
2644 if (!err) {
2645 path_put(&parent_path);
2646 if (!attached)
2647 free_mnt_ns(ns);
2648 }
2649 return err;
2650}
2651
2652static int do_move_mount_old(struct path *path, const char *old_name)
2653{
2654 struct path old_path;
2655 int err;
2656
2657 if (!old_name || !*old_name)
2658 return -EINVAL;
2659
2660 err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
2661 if (err)
2662 return err;
2663
2664 err = do_move_mount(&old_path, path);
2665 path_put(&old_path);
2666 return err;
2667}
2668
2669
2670
2671
2672static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
2673{
2674 struct mountpoint *mp;
2675 struct mount *parent;
2676 int err;
2677
2678 mnt_flags &= ~MNT_INTERNAL_FLAGS;
2679
2680 mp = lock_mount(path);
2681 if (IS_ERR(mp))
2682 return PTR_ERR(mp);
2683
2684 parent = real_mount(path->mnt);
2685 err = -EINVAL;
2686 if (unlikely(!check_mnt(parent))) {
2687
2688 if (!(mnt_flags & MNT_SHRINKABLE))
2689 goto unlock;
2690
2691 if (!parent->mnt_ns)
2692 goto unlock;
2693 }
2694
2695
2696 err = -EBUSY;
2697 if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb &&
2698 path->mnt->mnt_root == path->dentry)
2699 goto unlock;
2700
2701 err = -EINVAL;
2702 if (d_is_symlink(newmnt->mnt.mnt_root))
2703 goto unlock;
2704
2705 newmnt->mnt.mnt_flags = mnt_flags;
2706 err = graft_tree(newmnt, parent, mp);
2707
2708unlock:
2709 unlock_mount(mp);
2710 return err;
2711}
2712
2713static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags);
2714
2715
2716
2717
2718
2719static int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint,
2720 unsigned int mnt_flags)
2721{
2722 struct vfsmount *mnt;
2723 struct super_block *sb = fc->root->d_sb;
2724 int error;
2725
2726 error = security_sb_kern_mount(sb);
2727 if (!error && mount_too_revealing(sb, &mnt_flags))
2728 error = -EPERM;
2729
2730 if (unlikely(error)) {
2731 fc_drop_locked(fc);
2732 return error;
2733 }
2734
2735 up_write(&sb->s_umount);
2736
2737 mnt = vfs_create_mount(fc);
2738 if (IS_ERR(mnt))
2739 return PTR_ERR(mnt);
2740
2741 error = do_add_mount(real_mount(mnt), mountpoint, mnt_flags);
2742 if (error < 0)
2743 mntput(mnt);
2744 return error;
2745}
2746
2747
2748
2749
2750
2751static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
2752 int mnt_flags, const char *name, void *data)
2753{
2754 struct file_system_type *type;
2755 struct fs_context *fc;
2756 const char *subtype = NULL;
2757 int err = 0;
2758
2759 if (!fstype)
2760 return -EINVAL;
2761
2762 type = get_fs_type(fstype);
2763 if (!type)
2764 return -ENODEV;
2765
2766 if (type->fs_flags & FS_HAS_SUBTYPE) {
2767 subtype = strchr(fstype, '.');
2768 if (subtype) {
2769 subtype++;
2770 if (!*subtype) {
2771 put_filesystem(type);
2772 return -EINVAL;
2773 }
2774 } else {
2775 subtype = "";
2776 }
2777 }
2778
2779 fc = fs_context_for_mount(type, sb_flags);
2780 put_filesystem(type);
2781 if (IS_ERR(fc))
2782 return PTR_ERR(fc);
2783
2784 if (subtype)
2785 err = vfs_parse_fs_string(fc, "subtype",
2786 subtype, strlen(subtype));
2787 if (!err && name)
2788 err = vfs_parse_fs_string(fc, "source", name, strlen(name));
2789 if (!err)
2790 err = parse_monolithic_mount_data(fc, data);
2791 if (!err)
2792 err = vfs_get_tree(fc);
2793 if (!err)
2794 err = do_new_mount_fc(fc, path, mnt_flags);
2795
2796 put_fs_context(fc);
2797 return err;
2798}
2799
2800int finish_automount(struct vfsmount *m, struct path *path)
2801{
2802 struct mount *mnt = real_mount(m);
2803 int err;
2804
2805
2806
2807 BUG_ON(mnt_get_count(mnt) < 2);
2808
2809 if (m->mnt_sb == path->mnt->mnt_sb &&
2810 m->mnt_root == path->dentry) {
2811 err = -ELOOP;
2812 goto fail;
2813 }
2814
2815 err = do_add_mount(mnt, path, path->mnt->mnt_flags | MNT_SHRINKABLE);
2816 if (!err)
2817 return 0;
2818fail:
2819
2820 if (!list_empty(&mnt->mnt_expire)) {
2821 namespace_lock();
2822 list_del_init(&mnt->mnt_expire);
2823 namespace_unlock();
2824 }
2825 mntput(m);
2826 mntput(m);
2827 return err;
2828}
2829
2830
2831
2832
2833
2834
2835void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
2836{
2837 namespace_lock();
2838
2839 list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);
2840
2841 namespace_unlock();
2842}
2843EXPORT_SYMBOL(mnt_set_expiry);
2844
2845
2846
2847
2848
2849
2850void mark_mounts_for_expiry(struct list_head *mounts)
2851{
2852 struct mount *mnt, *next;
2853 LIST_HEAD(graveyard);
2854
2855 if (list_empty(mounts))
2856 return;
2857
2858 namespace_lock();
2859 lock_mount_hash();
2860
2861
2862
2863
2864
2865
2866
2867 list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
2868 if (!xchg(&mnt->mnt_expiry_mark, 1) ||
2869 propagate_mount_busy(mnt, 1))
2870 continue;
2871 list_move(&mnt->mnt_expire, &graveyard);
2872 }
2873 while (!list_empty(&graveyard)) {
2874 mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
2875 touch_mnt_namespace(mnt->mnt_ns);
2876 umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
2877 }
2878 unlock_mount_hash();
2879 namespace_unlock();
2880}
2881
2882EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
2883
2884
2885
2886
2887
2888
2889
2890static int select_submounts(struct mount *parent, struct list_head *graveyard)
2891{
2892 struct mount *this_parent = parent;
2893 struct list_head *next;
2894 int found = 0;
2895
2896repeat:
2897 next = this_parent->mnt_mounts.next;
2898resume:
2899 while (next != &this_parent->mnt_mounts) {
2900 struct list_head *tmp = next;
2901 struct mount *mnt = list_entry(tmp, struct mount, mnt_child);
2902
2903 next = tmp->next;
2904 if (!(mnt->mnt.mnt_flags & MNT_SHRINKABLE))
2905 continue;
2906
2907
2908
2909 if (!list_empty(&mnt->mnt_mounts)) {
2910 this_parent = mnt;
2911 goto repeat;
2912 }
2913
2914 if (!propagate_mount_busy(mnt, 1)) {
2915 list_move_tail(&mnt->mnt_expire, graveyard);
2916 found++;
2917 }
2918 }
2919
2920
2921
2922 if (this_parent != parent) {
2923 next = this_parent->mnt_child.next;
2924 this_parent = this_parent->mnt_parent;
2925 goto resume;
2926 }
2927 return found;
2928}
2929
2930
2931
2932
2933
2934
2935
2936static void shrink_submounts(struct mount *mnt)
2937{
2938 LIST_HEAD(graveyard);
2939 struct mount *m;
2940
2941
2942 while (select_submounts(mnt, &graveyard)) {
2943 while (!list_empty(&graveyard)) {
2944 m = list_first_entry(&graveyard, struct mount,
2945 mnt_expire);
2946 touch_mnt_namespace(m->mnt_ns);
2947 umount_tree(m, UMOUNT_PROPAGATE|UMOUNT_SYNC);
2948 }
2949 }
2950}
2951
2952
2953
2954
2955
2956
2957
2958static long exact_copy_from_user(void *to, const void __user * from,
2959 unsigned long n)
2960{
2961 char *t = to;
2962 const char __user *f = from;
2963 char c;
2964
2965 if (!access_ok(from, n))
2966 return n;
2967
2968 while (n) {
2969 if (__get_user(c, f)) {
2970 memset(t, 0, n);
2971 break;
2972 }
2973 *t++ = c;
2974 f++;
2975 n--;
2976 }
2977 return n;
2978}
2979
2980void *copy_mount_options(const void __user * data)
2981{
2982 int i;
2983 unsigned long size;
2984 char *copy;
2985
2986 if (!data)
2987 return NULL;
2988
2989 copy = kmalloc(PAGE_SIZE, GFP_KERNEL);
2990 if (!copy)
2991 return ERR_PTR(-ENOMEM);
2992
2993
2994
2995
2996
2997
2998 size = TASK_SIZE - (unsigned long)data;
2999 if (size > PAGE_SIZE)
3000 size = PAGE_SIZE;
3001
3002 i = size - exact_copy_from_user(copy, data, size);
3003 if (!i) {
3004 kfree(copy);
3005 return ERR_PTR(-EFAULT);
3006 }
3007 if (i != PAGE_SIZE)
3008 memset(copy + i, 0, PAGE_SIZE - i);
3009 return copy;
3010}
3011
3012char *copy_mount_string(const void __user *data)
3013{
3014 return data ? strndup_user(data, PATH_MAX) : NULL;
3015}
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031long do_mount(const char *dev_name, const char __user *dir_name,
3032 const char *type_page, unsigned long flags, void *data_page)
3033{
3034 struct path path;
3035 unsigned int mnt_flags = 0, sb_flags;
3036 int retval = 0;
3037
3038
3039 if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
3040 flags &= ~MS_MGC_MSK;
3041
3042
3043 if (data_page)
3044 ((char *)data_page)[PAGE_SIZE - 1] = 0;
3045
3046 if (flags & MS_NOUSER)
3047 return -EINVAL;
3048
3049
3050 retval = user_path(dir_name, &path);
3051 if (retval)
3052 return retval;
3053
3054 retval = security_sb_mount(dev_name, &path,
3055 type_page, flags, data_page);
3056 if (!retval && !may_mount())
3057 retval = -EPERM;
3058 if (!retval && (flags & SB_MANDLOCK) && !may_mandlock())
3059 retval = -EPERM;
3060 if (retval)
3061 goto dput_out;
3062
3063
3064 if (!(flags & MS_NOATIME))
3065 mnt_flags |= MNT_RELATIME;
3066
3067
3068 if (flags & MS_NOSUID)
3069 mnt_flags |= MNT_NOSUID;
3070 if (flags & MS_NODEV)
3071 mnt_flags |= MNT_NODEV;
3072 if (flags & MS_NOEXEC)
3073 mnt_flags |= MNT_NOEXEC;
3074 if (flags & MS_NOATIME)
3075 mnt_flags |= MNT_NOATIME;
3076 if (flags & MS_NODIRATIME)
3077 mnt_flags |= MNT_NODIRATIME;
3078 if (flags & MS_STRICTATIME)
3079 mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
3080 if (flags & MS_RDONLY)
3081 mnt_flags |= MNT_READONLY;
3082
3083
3084 if ((flags & MS_REMOUNT) &&
3085 ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME |
3086 MS_STRICTATIME)) == 0)) {
3087 mnt_flags &= ~MNT_ATIME_MASK;
3088 mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK;
3089 }
3090
3091 sb_flags = flags & (SB_RDONLY |
3092 SB_SYNCHRONOUS |
3093 SB_MANDLOCK |
3094 SB_DIRSYNC |
3095 SB_SILENT |
3096 SB_POSIXACL |
3097 SB_LAZYTIME |
3098 SB_I_VERSION);
3099
3100 if ((flags & (MS_REMOUNT | MS_BIND)) == (MS_REMOUNT | MS_BIND))
3101 retval = do_reconfigure_mnt(&path, mnt_flags);
3102 else if (flags & MS_REMOUNT)
3103 retval = do_remount(&path, flags, sb_flags, mnt_flags,
3104 data_page);
3105 else if (flags & MS_BIND)
3106 retval = do_loopback(&path, dev_name, flags & MS_REC);
3107 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
3108 retval = do_change_type(&path, flags);
3109 else if (flags & MS_MOVE)
3110 retval = do_move_mount_old(&path, dev_name);
3111 else
3112 retval = do_new_mount(&path, type_page, sb_flags, mnt_flags,
3113 dev_name, data_page);
3114dput_out:
3115 path_put(&path);
3116 return retval;
3117}
3118
3119static struct ucounts *inc_mnt_namespaces(struct user_namespace *ns)
3120{
3121 return inc_ucount(ns, current_euid(), UCOUNT_MNT_NAMESPACES);
3122}
3123
3124static void dec_mnt_namespaces(struct ucounts *ucounts)
3125{
3126 dec_ucount(ucounts, UCOUNT_MNT_NAMESPACES);
3127}
3128
3129static void free_mnt_ns(struct mnt_namespace *ns)
3130{
3131 if (!is_anon_ns(ns))
3132 ns_free_inum(&ns->ns);
3133 dec_mnt_namespaces(ns->ucounts);
3134 put_user_ns(ns->user_ns);
3135 kfree(ns);
3136}
3137
3138
3139
3140
3141
3142
3143
3144
3145static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
3146
3147static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool anon)
3148{
3149 struct mnt_namespace *new_ns;
3150 struct ucounts *ucounts;
3151 int ret;
3152
3153 ucounts = inc_mnt_namespaces(user_ns);
3154 if (!ucounts)
3155 return ERR_PTR(-ENOSPC);
3156
3157 new_ns = kzalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
3158 if (!new_ns) {
3159 dec_mnt_namespaces(ucounts);
3160 return ERR_PTR(-ENOMEM);
3161 }
3162 if (!anon) {
3163 ret = ns_alloc_inum(&new_ns->ns);
3164 if (ret) {
3165 kfree(new_ns);
3166 dec_mnt_namespaces(ucounts);
3167 return ERR_PTR(ret);
3168 }
3169 }
3170 new_ns->ns.ops = &mntns_operations;
3171 if (!anon)
3172 new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
3173 atomic_set(&new_ns->count, 1);
3174 INIT_LIST_HEAD(&new_ns->list);
3175 init_waitqueue_head(&new_ns->poll);
3176 new_ns->user_ns = get_user_ns(user_ns);
3177 new_ns->ucounts = ucounts;
3178 return new_ns;
3179}
3180
3181__latent_entropy
3182struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
3183 struct user_namespace *user_ns, struct fs_struct *new_fs)
3184{
3185 struct mnt_namespace *new_ns;
3186 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
3187 struct mount *p, *q;
3188 struct mount *old;
3189 struct mount *new;
3190 int copy_flags;
3191
3192 BUG_ON(!ns);
3193
3194 if (likely(!(flags & CLONE_NEWNS))) {
3195 get_mnt_ns(ns);
3196 return ns;
3197 }
3198
3199 old = ns->root;
3200
3201 new_ns = alloc_mnt_ns(user_ns, false);
3202 if (IS_ERR(new_ns))
3203 return new_ns;
3204
3205 namespace_lock();
3206
3207 copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE;
3208 if (user_ns != ns->user_ns)
3209 copy_flags |= CL_SHARED_TO_SLAVE;
3210 new = copy_tree(old, old->mnt.mnt_root, copy_flags);
3211 if (IS_ERR(new)) {
3212 namespace_unlock();
3213 free_mnt_ns(new_ns);
3214 return ERR_CAST(new);
3215 }
3216 if (user_ns != ns->user_ns) {
3217 lock_mount_hash();
3218 lock_mnt_tree(new);
3219 unlock_mount_hash();
3220 }
3221 new_ns->root = new;
3222 list_add_tail(&new_ns->list, &new->mnt_list);
3223
3224
3225
3226
3227
3228
3229 p = old;
3230 q = new;
3231 while (p) {
3232 q->mnt_ns = new_ns;
3233 new_ns->mounts++;
3234 if (new_fs) {
3235 if (&p->mnt == new_fs->root.mnt) {
3236 new_fs->root.mnt = mntget(&q->mnt);
3237 rootmnt = &p->mnt;
3238 }
3239 if (&p->mnt == new_fs->pwd.mnt) {
3240 new_fs->pwd.mnt = mntget(&q->mnt);
3241 pwdmnt = &p->mnt;
3242 }
3243 }
3244 p = next_mnt(p, old);
3245 q = next_mnt(q, new);
3246 if (!q)
3247 break;
3248 while (p->mnt.mnt_root != q->mnt.mnt_root)
3249 p = next_mnt(p, old);
3250 }
3251 namespace_unlock();
3252
3253 if (rootmnt)
3254 mntput(rootmnt);
3255 if (pwdmnt)
3256 mntput(pwdmnt);
3257
3258 return new_ns;
3259}
3260
3261struct dentry *mount_subtree(struct vfsmount *m, const char *name)
3262{
3263 struct mount *mnt = real_mount(m);
3264 struct mnt_namespace *ns;
3265 struct super_block *s;
3266 struct path path;
3267 int err;
3268
3269 ns = alloc_mnt_ns(&init_user_ns, true);
3270 if (IS_ERR(ns)) {
3271 mntput(m);
3272 return ERR_CAST(ns);
3273 }
3274 mnt->mnt_ns = ns;
3275 ns->root = mnt;
3276 ns->mounts++;
3277 list_add(&mnt->mnt_list, &ns->list);
3278
3279 err = vfs_path_lookup(m->mnt_root, m,
3280 name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);
3281
3282 put_mnt_ns(ns);
3283
3284 if (err)
3285 return ERR_PTR(err);
3286
3287
3288 s = path.mnt->mnt_sb;
3289 atomic_inc(&s->s_active);
3290 mntput(path.mnt);
3291
3292 down_write(&s->s_umount);
3293
3294 return path.dentry;
3295}
3296EXPORT_SYMBOL(mount_subtree);
3297
3298int ksys_mount(char __user *dev_name, char __user *dir_name, char __user *type,
3299 unsigned long flags, void __user *data)
3300{
3301 int ret;
3302 char *kernel_type;
3303 char *kernel_dev;
3304 void *options;
3305
3306 kernel_type = copy_mount_string(type);
3307 ret = PTR_ERR(kernel_type);
3308 if (IS_ERR(kernel_type))
3309 goto out_type;
3310
3311 kernel_dev = copy_mount_string(dev_name);
3312 ret = PTR_ERR(kernel_dev);
3313 if (IS_ERR(kernel_dev))
3314 goto out_dev;
3315
3316 options = copy_mount_options(data);
3317 ret = PTR_ERR(options);
3318 if (IS_ERR(options))
3319 goto out_data;
3320
3321 ret = do_mount(kernel_dev, dir_name, kernel_type, flags, options);
3322
3323 kfree(options);
3324out_data:
3325 kfree(kernel_dev);
3326out_dev:
3327 kfree(kernel_type);
3328out_type:
3329 return ret;
3330}
3331
3332SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
3333 char __user *, type, unsigned long, flags, void __user *, data)
3334{
3335 return ksys_mount(dev_name, dir_name, type, flags, data);
3336}
3337
3338
3339
3340
3341
3342SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags,
3343 unsigned int, attr_flags)
3344{
3345 struct mnt_namespace *ns;
3346 struct fs_context *fc;
3347 struct file *file;
3348 struct path newmount;
3349 struct mount *mnt;
3350 struct fd f;
3351 unsigned int mnt_flags = 0;
3352 long ret;
3353
3354 if (!may_mount())
3355 return -EPERM;
3356
3357 if ((flags & ~(FSMOUNT_CLOEXEC)) != 0)
3358 return -EINVAL;
3359
3360 if (attr_flags & ~(MOUNT_ATTR_RDONLY |
3361 MOUNT_ATTR_NOSUID |
3362 MOUNT_ATTR_NODEV |
3363 MOUNT_ATTR_NOEXEC |
3364 MOUNT_ATTR__ATIME |
3365 MOUNT_ATTR_NODIRATIME))
3366 return -EINVAL;
3367
3368 if (attr_flags & MOUNT_ATTR_RDONLY)
3369 mnt_flags |= MNT_READONLY;
3370 if (attr_flags & MOUNT_ATTR_NOSUID)
3371 mnt_flags |= MNT_NOSUID;
3372 if (attr_flags & MOUNT_ATTR_NODEV)
3373 mnt_flags |= MNT_NODEV;
3374 if (attr_flags & MOUNT_ATTR_NOEXEC)
3375 mnt_flags |= MNT_NOEXEC;
3376 if (attr_flags & MOUNT_ATTR_NODIRATIME)
3377 mnt_flags |= MNT_NODIRATIME;
3378
3379 switch (attr_flags & MOUNT_ATTR__ATIME) {
3380 case MOUNT_ATTR_STRICTATIME:
3381 break;
3382 case MOUNT_ATTR_NOATIME:
3383 mnt_flags |= MNT_NOATIME;
3384 break;
3385 case MOUNT_ATTR_RELATIME:
3386 mnt_flags |= MNT_RELATIME;
3387 break;
3388 default:
3389 return -EINVAL;
3390 }
3391
3392 f = fdget(fs_fd);
3393 if (!f.file)
3394 return -EBADF;
3395
3396 ret = -EINVAL;
3397 if (f.file->f_op != &fscontext_fops)
3398 goto err_fsfd;
3399
3400 fc = f.file->private_data;
3401
3402 ret = mutex_lock_interruptible(&fc->uapi_mutex);
3403 if (ret < 0)
3404 goto err_fsfd;
3405
3406
3407 ret = -EINVAL;
3408 if (!fc->root)
3409 goto err_unlock;
3410
3411 ret = -EPERM;
3412 if (mount_too_revealing(fc->root->d_sb, &mnt_flags)) {
3413 pr_warn("VFS: Mount too revealing\n");
3414 goto err_unlock;
3415 }
3416
3417 ret = -EBUSY;
3418 if (fc->phase != FS_CONTEXT_AWAITING_MOUNT)
3419 goto err_unlock;
3420
3421 ret = -EPERM;
3422 if ((fc->sb_flags & SB_MANDLOCK) && !may_mandlock())
3423 goto err_unlock;
3424
3425 newmount.mnt = vfs_create_mount(fc);
3426 if (IS_ERR(newmount.mnt)) {
3427 ret = PTR_ERR(newmount.mnt);
3428 goto err_unlock;
3429 }
3430 newmount.dentry = dget(fc->root);
3431 newmount.mnt->mnt_flags = mnt_flags;
3432
3433
3434
3435
3436
3437
3438 vfs_clean_context(fc);
3439
3440 ns = alloc_mnt_ns(current->nsproxy->mnt_ns->user_ns, true);
3441 if (IS_ERR(ns)) {
3442 ret = PTR_ERR(ns);
3443 goto err_path;
3444 }
3445 mnt = real_mount(newmount.mnt);
3446 mnt->mnt_ns = ns;
3447 ns->root = mnt;
3448 ns->mounts = 1;
3449 list_add(&mnt->mnt_list, &ns->list);
3450 mntget(newmount.mnt);
3451
3452
3453
3454
3455 file = dentry_open(&newmount, O_PATH, fc->cred);
3456 if (IS_ERR(file)) {
3457 dissolve_on_fput(newmount.mnt);
3458 ret = PTR_ERR(file);
3459 goto err_path;
3460 }
3461 file->f_mode |= FMODE_NEED_UNMOUNT;
3462
3463 ret = get_unused_fd_flags((flags & FSMOUNT_CLOEXEC) ? O_CLOEXEC : 0);
3464 if (ret >= 0)
3465 fd_install(ret, file);
3466 else
3467 fput(file);
3468
3469err_path:
3470 path_put(&newmount);
3471err_unlock:
3472 mutex_unlock(&fc->uapi_mutex);
3473err_fsfd:
3474 fdput(f);
3475 return ret;
3476}
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486SYSCALL_DEFINE5(move_mount,
3487 int, from_dfd, const char *, from_pathname,
3488 int, to_dfd, const char *, to_pathname,
3489 unsigned int, flags)
3490{
3491 struct path from_path, to_path;
3492 unsigned int lflags;
3493 int ret = 0;
3494
3495 if (!may_mount())
3496 return -EPERM;
3497
3498 if (flags & ~MOVE_MOUNT__MASK)
3499 return -EINVAL;
3500
3501
3502
3503
3504
3505 lflags = 0;
3506 if (flags & MOVE_MOUNT_F_SYMLINKS) lflags |= LOOKUP_FOLLOW;
3507 if (flags & MOVE_MOUNT_F_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT;
3508 if (flags & MOVE_MOUNT_F_EMPTY_PATH) lflags |= LOOKUP_EMPTY;
3509
3510 ret = user_path_at(from_dfd, from_pathname, lflags, &from_path);
3511 if (ret < 0)
3512 return ret;
3513
3514 lflags = 0;
3515 if (flags & MOVE_MOUNT_T_SYMLINKS) lflags |= LOOKUP_FOLLOW;
3516 if (flags & MOVE_MOUNT_T_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT;
3517 if (flags & MOVE_MOUNT_T_EMPTY_PATH) lflags |= LOOKUP_EMPTY;
3518
3519 ret = user_path_at(to_dfd, to_pathname, lflags, &to_path);
3520 if (ret < 0)
3521 goto out_from;
3522
3523 ret = security_move_mount(&from_path, &to_path);
3524 if (ret < 0)
3525 goto out_to;
3526
3527 ret = do_move_mount(&from_path, &to_path);
3528
3529out_to:
3530 path_put(&to_path);
3531out_from:
3532 path_put(&from_path);
3533 return ret;
3534}
3535
3536
3537
3538
3539
3540
3541bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
3542 const struct path *root)
3543{
3544 while (&mnt->mnt != root->mnt && mnt_has_parent(mnt)) {
3545 dentry = mnt->mnt_mountpoint;
3546 mnt = mnt->mnt_parent;
3547 }
3548 return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry);
3549}
3550
3551bool path_is_under(const struct path *path1, const struct path *path2)
3552{
3553 bool res;
3554 read_seqlock_excl(&mount_lock);
3555 res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2);
3556 read_sequnlock_excl(&mount_lock);
3557 return res;
3558}
3559EXPORT_SYMBOL(path_is_under);
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
3587 const char __user *, put_old)
3588{
3589 struct path new, old, parent_path, root_parent, root;
3590 struct mount *new_mnt, *root_mnt, *old_mnt;
3591 struct mountpoint *old_mp, *root_mp;
3592 int error;
3593
3594 if (!may_mount())
3595 return -EPERM;
3596
3597 error = user_path_dir(new_root, &new);
3598 if (error)
3599 goto out0;
3600
3601 error = user_path_dir(put_old, &old);
3602 if (error)
3603 goto out1;
3604
3605 error = security_sb_pivotroot(&old, &new);
3606 if (error)
3607 goto out2;
3608
3609 get_fs_root(current->fs, &root);
3610 old_mp = lock_mount(&old);
3611 error = PTR_ERR(old_mp);
3612 if (IS_ERR(old_mp))
3613 goto out3;
3614
3615 error = -EINVAL;
3616 new_mnt = real_mount(new.mnt);
3617 root_mnt = real_mount(root.mnt);
3618 old_mnt = real_mount(old.mnt);
3619 if (IS_MNT_SHARED(old_mnt) ||
3620 IS_MNT_SHARED(new_mnt->mnt_parent) ||
3621 IS_MNT_SHARED(root_mnt->mnt_parent))
3622 goto out4;
3623 if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
3624 goto out4;
3625 if (new_mnt->mnt.mnt_flags & MNT_LOCKED)
3626 goto out4;
3627 error = -ENOENT;
3628 if (d_unlinked(new.dentry))
3629 goto out4;
3630 error = -EBUSY;
3631 if (new_mnt == root_mnt || old_mnt == root_mnt)
3632 goto out4;
3633 error = -EINVAL;
3634 if (root.mnt->mnt_root != root.dentry)
3635 goto out4;
3636 if (!mnt_has_parent(root_mnt))
3637 goto out4;
3638 root_mp = root_mnt->mnt_mp;
3639 if (new.mnt->mnt_root != new.dentry)
3640 goto out4;
3641 if (!mnt_has_parent(new_mnt))
3642 goto out4;
3643
3644 if (!is_path_reachable(old_mnt, old.dentry, &new))
3645 goto out4;
3646
3647 if (!is_path_reachable(new_mnt, new.dentry, &root))
3648 goto out4;
3649 root_mp->m_count++;
3650 lock_mount_hash();
3651 detach_mnt(new_mnt, &parent_path);
3652 detach_mnt(root_mnt, &root_parent);
3653 if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
3654 new_mnt->mnt.mnt_flags |= MNT_LOCKED;
3655 root_mnt->mnt.mnt_flags &= ~MNT_LOCKED;
3656 }
3657
3658 attach_mnt(root_mnt, old_mnt, old_mp);
3659
3660 attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp);
3661 touch_mnt_namespace(current->nsproxy->mnt_ns);
3662
3663 list_del_init(&new_mnt->mnt_expire);
3664 put_mountpoint(root_mp);
3665 unlock_mount_hash();
3666 chroot_fs_refs(&root, &new);
3667 error = 0;
3668out4:
3669 unlock_mount(old_mp);
3670 if (!error) {
3671 path_put(&root_parent);
3672 path_put(&parent_path);
3673 }
3674out3:
3675 path_put(&root);
3676out2:
3677 path_put(&old);
3678out1:
3679 path_put(&new);
3680out0:
3681 return error;
3682}
3683
3684static void __init init_mount_tree(void)
3685{
3686 struct vfsmount *mnt;
3687 struct mount *m;
3688 struct mnt_namespace *ns;
3689 struct path root;
3690 struct file_system_type *type;
3691
3692 type = get_fs_type("rootfs");
3693 if (!type)
3694 panic("Can't find rootfs type");
3695 mnt = vfs_kern_mount(type, 0, "rootfs", NULL);
3696 put_filesystem(type);
3697 if (IS_ERR(mnt))
3698 panic("Can't create rootfs");
3699
3700 ns = alloc_mnt_ns(&init_user_ns, false);
3701 if (IS_ERR(ns))
3702 panic("Can't allocate initial namespace");
3703 m = real_mount(mnt);
3704 m->mnt_ns = ns;
3705 ns->root = m;
3706 ns->mounts = 1;
3707 list_add(&m->mnt_list, &ns->list);
3708 init_task.nsproxy->mnt_ns = ns;
3709 get_mnt_ns(ns);
3710
3711 root.mnt = mnt;
3712 root.dentry = mnt->mnt_root;
3713 mnt->mnt_flags |= MNT_LOCKED;
3714
3715 set_fs_pwd(current->fs, &root);
3716 set_fs_root(current->fs, &root);
3717}
3718
3719void __init mnt_init(void)
3720{
3721 int err;
3722
3723 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
3724 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
3725
3726 mount_hashtable = alloc_large_system_hash("Mount-cache",
3727 sizeof(struct hlist_head),
3728 mhash_entries, 19,
3729 HASH_ZERO,
3730 &m_hash_shift, &m_hash_mask, 0, 0);
3731 mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache",
3732 sizeof(struct hlist_head),
3733 mphash_entries, 19,
3734 HASH_ZERO,
3735 &mp_hash_shift, &mp_hash_mask, 0, 0);
3736
3737 if (!mount_hashtable || !mountpoint_hashtable)
3738 panic("Failed to allocate mount hash table\n");
3739
3740 kernfs_init();
3741
3742 err = sysfs_init();
3743 if (err)
3744 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
3745 __func__, err);
3746 fs_kobj = kobject_create_and_add("fs", NULL);
3747 if (!fs_kobj)
3748 printk(KERN_WARNING "%s: kobj create error\n", __func__);
3749 init_rootfs();
3750 init_mount_tree();
3751}
3752
3753void put_mnt_ns(struct mnt_namespace *ns)
3754{
3755 if (!atomic_dec_and_test(&ns->count))
3756 return;
3757 drop_collected_mounts(&ns->root->mnt);
3758 free_mnt_ns(ns);
3759}
3760
3761struct vfsmount *kern_mount(struct file_system_type *type)
3762{
3763 struct vfsmount *mnt;
3764 mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL);
3765 if (!IS_ERR(mnt)) {
3766
3767
3768
3769
3770 real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL;
3771 }
3772 return mnt;
3773}
3774EXPORT_SYMBOL_GPL(kern_mount);
3775
3776void kern_unmount(struct vfsmount *mnt)
3777{
3778
3779 if (!IS_ERR_OR_NULL(mnt)) {
3780 real_mount(mnt)->mnt_ns = NULL;
3781 synchronize_rcu();
3782 mntput(mnt);
3783 }
3784}
3785EXPORT_SYMBOL(kern_unmount);
3786
3787bool our_mnt(struct vfsmount *mnt)
3788{
3789 return check_mnt(real_mount(mnt));
3790}
3791
3792bool current_chrooted(void)
3793{
3794
3795 struct path ns_root;
3796 struct path fs_root;
3797 bool chrooted;
3798
3799
3800 ns_root.mnt = ¤t->nsproxy->mnt_ns->root->mnt;
3801 ns_root.dentry = ns_root.mnt->mnt_root;
3802 path_get(&ns_root);
3803 while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root))
3804 ;
3805
3806 get_fs_root(current->fs, &fs_root);
3807
3808 chrooted = !path_equal(&fs_root, &ns_root);
3809
3810 path_put(&fs_root);
3811 path_put(&ns_root);
3812
3813 return chrooted;
3814}
3815
3816static bool mnt_already_visible(struct mnt_namespace *ns,
3817 const struct super_block *sb,
3818 int *new_mnt_flags)
3819{
3820 int new_flags = *new_mnt_flags;
3821 struct mount *mnt;
3822 bool visible = false;
3823
3824 down_read(&namespace_sem);
3825 list_for_each_entry(mnt, &ns->list, mnt_list) {
3826 struct mount *child;
3827 int mnt_flags;
3828
3829 if (mnt->mnt.mnt_sb->s_type != sb->s_type)
3830 continue;
3831
3832
3833
3834
3835 if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root)
3836 continue;
3837
3838
3839 mnt_flags = mnt->mnt.mnt_flags;
3840
3841
3842 if (sb_rdonly(mnt->mnt.mnt_sb))
3843 mnt_flags |= MNT_LOCK_READONLY;
3844
3845
3846
3847
3848 if ((mnt_flags & MNT_LOCK_READONLY) &&
3849 !(new_flags & MNT_READONLY))
3850 continue;
3851 if ((mnt_flags & MNT_LOCK_ATIME) &&
3852 ((mnt_flags & MNT_ATIME_MASK) != (new_flags & MNT_ATIME_MASK)))
3853 continue;
3854
3855
3856
3857
3858
3859 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
3860 struct inode *inode = child->mnt_mountpoint->d_inode;
3861
3862 if (!(child->mnt.mnt_flags & MNT_LOCKED))
3863 continue;
3864
3865 if (!is_empty_dir_inode(inode))
3866 goto next;
3867 }
3868
3869 *new_mnt_flags |= mnt_flags & (MNT_LOCK_READONLY | \
3870 MNT_LOCK_ATIME);
3871 visible = true;
3872 goto found;
3873 next: ;
3874 }
3875found:
3876 up_read(&namespace_sem);
3877 return visible;
3878}
3879
3880static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags)
3881{
3882 const unsigned long required_iflags = SB_I_NOEXEC | SB_I_NODEV;
3883 struct mnt_namespace *ns = current->nsproxy->mnt_ns;
3884 unsigned long s_iflags;
3885
3886 if (ns->user_ns == &init_user_ns)
3887 return false;
3888
3889
3890 s_iflags = sb->s_iflags;
3891 if (!(s_iflags & SB_I_USERNS_VISIBLE))
3892 return false;
3893
3894 if ((s_iflags & required_iflags) != required_iflags) {
3895 WARN_ONCE(1, "Expected s_iflags to contain 0x%lx\n",
3896 required_iflags);
3897 return true;
3898 }
3899
3900 return !mnt_already_visible(ns, sb, new_mnt_flags);
3901}
3902
3903bool mnt_may_suid(struct vfsmount *mnt)
3904{
3905
3906
3907
3908
3909
3910
3911
3912 return !(mnt->mnt_flags & MNT_NOSUID) && check_mnt(real_mount(mnt)) &&
3913 current_in_userns(mnt->mnt_sb->s_user_ns);
3914}
3915
3916static struct ns_common *mntns_get(struct task_struct *task)
3917{
3918 struct ns_common *ns = NULL;
3919 struct nsproxy *nsproxy;
3920
3921 task_lock(task);
3922 nsproxy = task->nsproxy;
3923 if (nsproxy) {
3924 ns = &nsproxy->mnt_ns->ns;
3925 get_mnt_ns(to_mnt_ns(ns));
3926 }
3927 task_unlock(task);
3928
3929 return ns;
3930}
3931
3932static void mntns_put(struct ns_common *ns)
3933{
3934 put_mnt_ns(to_mnt_ns(ns));
3935}
3936
3937static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
3938{
3939 struct fs_struct *fs = current->fs;
3940 struct mnt_namespace *mnt_ns = to_mnt_ns(ns), *old_mnt_ns;
3941 struct path root;
3942 int err;
3943
3944 if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
3945 !ns_capable(current_user_ns(), CAP_SYS_CHROOT) ||
3946 !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
3947 return -EPERM;
3948
3949 if (is_anon_ns(mnt_ns))
3950 return -EINVAL;
3951
3952 if (fs->users != 1)
3953 return -EINVAL;
3954
3955 get_mnt_ns(mnt_ns);
3956 old_mnt_ns = nsproxy->mnt_ns;
3957 nsproxy->mnt_ns = mnt_ns;
3958
3959
3960 err = vfs_path_lookup(mnt_ns->root->mnt.mnt_root, &mnt_ns->root->mnt,
3961 "/", LOOKUP_DOWN, &root);
3962 if (err) {
3963
3964 nsproxy->mnt_ns = old_mnt_ns;
3965 put_mnt_ns(mnt_ns);
3966 return err;
3967 }
3968
3969 put_mnt_ns(old_mnt_ns);
3970
3971
3972 set_fs_pwd(fs, &root);
3973 set_fs_root(fs, &root);
3974
3975 path_put(&root);
3976 return 0;
3977}
3978
3979static struct user_namespace *mntns_owner(struct ns_common *ns)
3980{
3981 return to_mnt_ns(ns)->user_ns;
3982}
3983
3984const struct proc_ns_operations mntns_operations = {
3985 .name = "mnt",
3986 .type = CLONE_NEWNS,
3987 .get = mntns_get,
3988 .put = mntns_put,
3989 .install = mntns_install,
3990 .owner = mntns_owner,
3991};
3992