1
2
3
4
5
6
7
8
9
10
11#include <linux/syscalls.h>
12#include <linux/export.h>
13#include <linux/capability.h>
14#include <linux/mnt_namespace.h>
15#include <linux/user_namespace.h>
16#include <linux/namei.h>
17#include <linux/security.h>
18#include <linux/cred.h>
19#include <linux/idr.h>
20#include <linux/init.h>
21#include <linux/fs_struct.h>
22#include <linux/fsnotify.h>
23#include <linux/file.h>
24#include <linux/uaccess.h>
25#include <linux/proc_ns.h>
26#include <linux/magic.h>
27#include <linux/memblock.h>
28#include <linux/task_work.h>
29#include <linux/sched/task.h>
30#include <uapi/linux/mount.h>
31#include <linux/fs_context.h>
32#include <linux/shmem_fs.h>
33
34#include "pnode.h"
35#include "internal.h"
36
37
38unsigned int sysctl_mount_max __read_mostly = 100000;
39
40static unsigned int m_hash_mask __read_mostly;
41static unsigned int m_hash_shift __read_mostly;
42static unsigned int mp_hash_mask __read_mostly;
43static unsigned int mp_hash_shift __read_mostly;
44
45static __initdata unsigned long mhash_entries;
46static int __init set_mhash_entries(char *str)
47{
48 if (!str)
49 return 0;
50 mhash_entries = simple_strtoul(str, &str, 0);
51 return 1;
52}
53__setup("mhash_entries=", set_mhash_entries);
54
55static __initdata unsigned long mphash_entries;
56static int __init set_mphash_entries(char *str)
57{
58 if (!str)
59 return 0;
60 mphash_entries = simple_strtoul(str, &str, 0);
61 return 1;
62}
63__setup("mphash_entries=", set_mphash_entries);
64
65static u64 event;
66static DEFINE_IDA(mnt_id_ida);
67static DEFINE_IDA(mnt_group_ida);
68
69static struct hlist_head *mount_hashtable __read_mostly;
70static struct hlist_head *mountpoint_hashtable __read_mostly;
71static struct kmem_cache *mnt_cache __read_mostly;
72static DECLARE_RWSEM(namespace_sem);
73static HLIST_HEAD(unmounted);
74static LIST_HEAD(ex_mountpoints);
75
76
77struct kobject *fs_kobj;
78EXPORT_SYMBOL_GPL(fs_kobj);
79
80
81
82
83
84
85
86
87
88__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
89
90static inline struct hlist_head *m_hash(struct vfsmount *mnt, struct dentry *dentry)
91{
92 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
93 tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
94 tmp = tmp + (tmp >> m_hash_shift);
95 return &mount_hashtable[tmp & m_hash_mask];
96}
97
98static inline struct hlist_head *mp_hash(struct dentry *dentry)
99{
100 unsigned long tmp = ((unsigned long)dentry / L1_CACHE_BYTES);
101 tmp = tmp + (tmp >> mp_hash_shift);
102 return &mountpoint_hashtable[tmp & mp_hash_mask];
103}
104
105static int mnt_alloc_id(struct mount *mnt)
106{
107 int res = ida_alloc(&mnt_id_ida, GFP_KERNEL);
108
109 if (res < 0)
110 return res;
111 mnt->mnt_id = res;
112 return 0;
113}
114
115static void mnt_free_id(struct mount *mnt)
116{
117 ida_free(&mnt_id_ida, mnt->mnt_id);
118}
119
120
121
122
123static int mnt_alloc_group_id(struct mount *mnt)
124{
125 int res = ida_alloc_min(&mnt_group_ida, 1, GFP_KERNEL);
126
127 if (res < 0)
128 return res;
129 mnt->mnt_group_id = res;
130 return 0;
131}
132
133
134
135
136void mnt_release_group_id(struct mount *mnt)
137{
138 ida_free(&mnt_group_ida, mnt->mnt_group_id);
139 mnt->mnt_group_id = 0;
140}
141
142
143
144
145static inline void mnt_add_count(struct mount *mnt, int n)
146{
147#ifdef CONFIG_SMP
148 this_cpu_add(mnt->mnt_pcp->mnt_count, n);
149#else
150 preempt_disable();
151 mnt->mnt_count += n;
152 preempt_enable();
153#endif
154}
155
156
157
158
159unsigned int mnt_get_count(struct mount *mnt)
160{
161#ifdef CONFIG_SMP
162 unsigned int count = 0;
163 int cpu;
164
165 for_each_possible_cpu(cpu) {
166 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count;
167 }
168
169 return count;
170#else
171 return mnt->mnt_count;
172#endif
173}
174
175static struct mount *alloc_vfsmnt(const char *name)
176{
177 struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
178 if (mnt) {
179 int err;
180
181 err = mnt_alloc_id(mnt);
182 if (err)
183 goto out_free_cache;
184
185 if (name) {
186 mnt->mnt_devname = kstrdup_const(name, GFP_KERNEL);
187 if (!mnt->mnt_devname)
188 goto out_free_id;
189 }
190
191#ifdef CONFIG_SMP
192 mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);
193 if (!mnt->mnt_pcp)
194 goto out_free_devname;
195
196 this_cpu_add(mnt->mnt_pcp->mnt_count, 1);
197#else
198 mnt->mnt_count = 1;
199 mnt->mnt_writers = 0;
200#endif
201
202 INIT_HLIST_NODE(&mnt->mnt_hash);
203 INIT_LIST_HEAD(&mnt->mnt_child);
204 INIT_LIST_HEAD(&mnt->mnt_mounts);
205 INIT_LIST_HEAD(&mnt->mnt_list);
206 INIT_LIST_HEAD(&mnt->mnt_expire);
207 INIT_LIST_HEAD(&mnt->mnt_share);
208 INIT_LIST_HEAD(&mnt->mnt_slave_list);
209 INIT_LIST_HEAD(&mnt->mnt_slave);
210 INIT_HLIST_NODE(&mnt->mnt_mp_list);
211 INIT_LIST_HEAD(&mnt->mnt_umounting);
212 INIT_HLIST_HEAD(&mnt->mnt_stuck_children);
213 }
214 return mnt;
215
216#ifdef CONFIG_SMP
217out_free_devname:
218 kfree_const(mnt->mnt_devname);
219#endif
220out_free_id:
221 mnt_free_id(mnt);
222out_free_cache:
223 kmem_cache_free(mnt_cache, mnt);
224 return NULL;
225}
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246bool __mnt_is_readonly(struct vfsmount *mnt)
247{
248 return (mnt->mnt_flags & MNT_READONLY) || sb_rdonly(mnt->mnt_sb);
249}
250EXPORT_SYMBOL_GPL(__mnt_is_readonly);
251
252static inline void mnt_inc_writers(struct mount *mnt)
253{
254#ifdef CONFIG_SMP
255 this_cpu_inc(mnt->mnt_pcp->mnt_writers);
256#else
257 mnt->mnt_writers++;
258#endif
259}
260
261static inline void mnt_dec_writers(struct mount *mnt)
262{
263#ifdef CONFIG_SMP
264 this_cpu_dec(mnt->mnt_pcp->mnt_writers);
265#else
266 mnt->mnt_writers--;
267#endif
268}
269
270static unsigned int mnt_get_writers(struct mount *mnt)
271{
272#ifdef CONFIG_SMP
273 unsigned int count = 0;
274 int cpu;
275
276 for_each_possible_cpu(cpu) {
277 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers;
278 }
279
280 return count;
281#else
282 return mnt->mnt_writers;
283#endif
284}
285
286static int mnt_is_readonly(struct vfsmount *mnt)
287{
288 if (mnt->mnt_sb->s_readonly_remount)
289 return 1;
290
291 smp_rmb();
292 return __mnt_is_readonly(mnt);
293}
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311int __mnt_want_write(struct vfsmount *m)
312{
313 struct mount *mnt = real_mount(m);
314 int ret = 0;
315
316 preempt_disable();
317 mnt_inc_writers(mnt);
318
319
320
321
322
323 smp_mb();
324 while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
325 cpu_relax();
326
327
328
329
330
331 smp_rmb();
332 if (mnt_is_readonly(m)) {
333 mnt_dec_writers(mnt);
334 ret = -EROFS;
335 }
336 preempt_enable();
337
338 return ret;
339}
340
341
342
343
344
345
346
347
348
349
350int mnt_want_write(struct vfsmount *m)
351{
352 int ret;
353
354 sb_start_write(m->mnt_sb);
355 ret = __mnt_want_write(m);
356 if (ret)
357 sb_end_write(m->mnt_sb);
358 return ret;
359}
360EXPORT_SYMBOL_GPL(mnt_want_write);
361
362
363
364
365
366
367
368
369
370
371
372
373
374int mnt_clone_write(struct vfsmount *mnt)
375{
376
377 if (__mnt_is_readonly(mnt))
378 return -EROFS;
379 preempt_disable();
380 mnt_inc_writers(real_mount(mnt));
381 preempt_enable();
382 return 0;
383}
384EXPORT_SYMBOL_GPL(mnt_clone_write);
385
386
387
388
389
390
391
392
393int __mnt_want_write_file(struct file *file)
394{
395 if (!(file->f_mode & FMODE_WRITER))
396 return __mnt_want_write(file->f_path.mnt);
397 else
398 return mnt_clone_write(file->f_path.mnt);
399}
400
401
402
403
404
405
406
407
408int mnt_want_write_file(struct file *file)
409{
410 int ret;
411
412 sb_start_write(file_inode(file)->i_sb);
413 ret = __mnt_want_write_file(file);
414 if (ret)
415 sb_end_write(file_inode(file)->i_sb);
416 return ret;
417}
418EXPORT_SYMBOL_GPL(mnt_want_write_file);
419
420
421
422
423
424
425
426
427
428void __mnt_drop_write(struct vfsmount *mnt)
429{
430 preempt_disable();
431 mnt_dec_writers(real_mount(mnt));
432 preempt_enable();
433}
434
435
436
437
438
439
440
441
442
443void mnt_drop_write(struct vfsmount *mnt)
444{
445 __mnt_drop_write(mnt);
446 sb_end_write(mnt->mnt_sb);
447}
448EXPORT_SYMBOL_GPL(mnt_drop_write);
449
450void __mnt_drop_write_file(struct file *file)
451{
452 __mnt_drop_write(file->f_path.mnt);
453}
454
455void mnt_drop_write_file(struct file *file)
456{
457 __mnt_drop_write_file(file);
458 sb_end_write(file_inode(file)->i_sb);
459}
460EXPORT_SYMBOL(mnt_drop_write_file);
461
462static int mnt_make_readonly(struct mount *mnt)
463{
464 int ret = 0;
465
466 lock_mount_hash();
467 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
468
469
470
471
472 smp_mb();
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490 if (mnt_get_writers(mnt) > 0)
491 ret = -EBUSY;
492 else
493 mnt->mnt.mnt_flags |= MNT_READONLY;
494
495
496
497
498 smp_wmb();
499 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
500 unlock_mount_hash();
501 return ret;
502}
503
504static int __mnt_unmake_readonly(struct mount *mnt)
505{
506 lock_mount_hash();
507 mnt->mnt.mnt_flags &= ~MNT_READONLY;
508 unlock_mount_hash();
509 return 0;
510}
511
512int sb_prepare_remount_readonly(struct super_block *sb)
513{
514 struct mount *mnt;
515 int err = 0;
516
517
518 if (atomic_long_read(&sb->s_remove_count))
519 return -EBUSY;
520
521 lock_mount_hash();
522 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
523 if (!(mnt->mnt.mnt_flags & MNT_READONLY)) {
524 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
525 smp_mb();
526 if (mnt_get_writers(mnt) > 0) {
527 err = -EBUSY;
528 break;
529 }
530 }
531 }
532 if (!err && atomic_long_read(&sb->s_remove_count))
533 err = -EBUSY;
534
535 if (!err) {
536 sb->s_readonly_remount = 1;
537 smp_wmb();
538 }
539 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
540 if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
541 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
542 }
543 unlock_mount_hash();
544
545 return err;
546}
547
548static void free_vfsmnt(struct mount *mnt)
549{
550 kfree_const(mnt->mnt_devname);
551#ifdef CONFIG_SMP
552 free_percpu(mnt->mnt_pcp);
553#endif
554 kmem_cache_free(mnt_cache, mnt);
555}
556
557static void delayed_free_vfsmnt(struct rcu_head *head)
558{
559 free_vfsmnt(container_of(head, struct mount, mnt_rcu));
560}
561
562
563int __legitimize_mnt(struct vfsmount *bastard, unsigned seq)
564{
565 struct mount *mnt;
566 if (read_seqretry(&mount_lock, seq))
567 return 1;
568 if (bastard == NULL)
569 return 0;
570 mnt = real_mount(bastard);
571 mnt_add_count(mnt, 1);
572 smp_mb();
573 if (likely(!read_seqretry(&mount_lock, seq)))
574 return 0;
575 if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
576 mnt_add_count(mnt, -1);
577 return 1;
578 }
579 lock_mount_hash();
580 if (unlikely(bastard->mnt_flags & MNT_DOOMED)) {
581 mnt_add_count(mnt, -1);
582 unlock_mount_hash();
583 return 1;
584 }
585 unlock_mount_hash();
586
587 return -1;
588}
589
590
591bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
592{
593 int res = __legitimize_mnt(bastard, seq);
594 if (likely(!res))
595 return true;
596 if (unlikely(res < 0)) {
597 rcu_read_unlock();
598 mntput(bastard);
599 rcu_read_lock();
600 }
601 return false;
602}
603
604
605
606
607
608struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
609{
610 struct hlist_head *head = m_hash(mnt, dentry);
611 struct mount *p;
612
613 hlist_for_each_entry_rcu(p, head, mnt_hash)
614 if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
615 return p;
616 return NULL;
617}
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635struct vfsmount *lookup_mnt(const struct path *path)
636{
637 struct mount *child_mnt;
638 struct vfsmount *m;
639 unsigned seq;
640
641 rcu_read_lock();
642 do {
643 seq = read_seqbegin(&mount_lock);
644 child_mnt = __lookup_mnt(path->mnt, path->dentry);
645 m = child_mnt ? &child_mnt->mnt : NULL;
646 } while (!legitimize_mnt(m, seq));
647 rcu_read_unlock();
648 return m;
649}
650
651static inline void lock_ns_list(struct mnt_namespace *ns)
652{
653 spin_lock(&ns->ns_lock);
654}
655
656static inline void unlock_ns_list(struct mnt_namespace *ns)
657{
658 spin_unlock(&ns->ns_lock);
659}
660
661static inline bool mnt_is_cursor(struct mount *mnt)
662{
663 return mnt->mnt.mnt_flags & MNT_CURSOR;
664}
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681bool __is_local_mountpoint(struct dentry *dentry)
682{
683 struct mnt_namespace *ns = current->nsproxy->mnt_ns;
684 struct mount *mnt;
685 bool is_covered = false;
686
687 down_read(&namespace_sem);
688 lock_ns_list(ns);
689 list_for_each_entry(mnt, &ns->list, mnt_list) {
690 if (mnt_is_cursor(mnt))
691 continue;
692 is_covered = (mnt->mnt_mountpoint == dentry);
693 if (is_covered)
694 break;
695 }
696 unlock_ns_list(ns);
697 up_read(&namespace_sem);
698
699 return is_covered;
700}
701
702static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
703{
704 struct hlist_head *chain = mp_hash(dentry);
705 struct mountpoint *mp;
706
707 hlist_for_each_entry(mp, chain, m_hash) {
708 if (mp->m_dentry == dentry) {
709 mp->m_count++;
710 return mp;
711 }
712 }
713 return NULL;
714}
715
716static struct mountpoint *get_mountpoint(struct dentry *dentry)
717{
718 struct mountpoint *mp, *new = NULL;
719 int ret;
720
721 if (d_mountpoint(dentry)) {
722
723 if (d_unlinked(dentry))
724 return ERR_PTR(-ENOENT);
725mountpoint:
726 read_seqlock_excl(&mount_lock);
727 mp = lookup_mountpoint(dentry);
728 read_sequnlock_excl(&mount_lock);
729 if (mp)
730 goto done;
731 }
732
733 if (!new)
734 new = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
735 if (!new)
736 return ERR_PTR(-ENOMEM);
737
738
739
740 ret = d_set_mounted(dentry);
741
742
743 if (ret == -EBUSY)
744 goto mountpoint;
745
746
747 mp = ERR_PTR(ret);
748 if (ret)
749 goto done;
750
751
752 read_seqlock_excl(&mount_lock);
753 new->m_dentry = dget(dentry);
754 new->m_count = 1;
755 hlist_add_head(&new->m_hash, mp_hash(dentry));
756 INIT_HLIST_HEAD(&new->m_list);
757 read_sequnlock_excl(&mount_lock);
758
759 mp = new;
760 new = NULL;
761done:
762 kfree(new);
763 return mp;
764}
765
766
767
768
769
770static void __put_mountpoint(struct mountpoint *mp, struct list_head *list)
771{
772 if (!--mp->m_count) {
773 struct dentry *dentry = mp->m_dentry;
774 BUG_ON(!hlist_empty(&mp->m_list));
775 spin_lock(&dentry->d_lock);
776 dentry->d_flags &= ~DCACHE_MOUNTED;
777 spin_unlock(&dentry->d_lock);
778 dput_to_list(dentry, list);
779 hlist_del(&mp->m_hash);
780 kfree(mp);
781 }
782}
783
784
785static void put_mountpoint(struct mountpoint *mp)
786{
787 __put_mountpoint(mp, &ex_mountpoints);
788}
789
790static inline int check_mnt(struct mount *mnt)
791{
792 return mnt->mnt_ns == current->nsproxy->mnt_ns;
793}
794
795
796
797
798static void touch_mnt_namespace(struct mnt_namespace *ns)
799{
800 if (ns) {
801 ns->event = ++event;
802 wake_up_interruptible(&ns->poll);
803 }
804}
805
806
807
808
809static void __touch_mnt_namespace(struct mnt_namespace *ns)
810{
811 if (ns && ns->event != event) {
812 ns->event = event;
813 wake_up_interruptible(&ns->poll);
814 }
815}
816
817
818
819
820static struct mountpoint *unhash_mnt(struct mount *mnt)
821{
822 struct mountpoint *mp;
823 mnt->mnt_parent = mnt;
824 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
825 list_del_init(&mnt->mnt_child);
826 hlist_del_init_rcu(&mnt->mnt_hash);
827 hlist_del_init(&mnt->mnt_mp_list);
828 mp = mnt->mnt_mp;
829 mnt->mnt_mp = NULL;
830 return mp;
831}
832
833
834
835
836static void umount_mnt(struct mount *mnt)
837{
838 put_mountpoint(unhash_mnt(mnt));
839}
840
841
842
843
844void mnt_set_mountpoint(struct mount *mnt,
845 struct mountpoint *mp,
846 struct mount *child_mnt)
847{
848 mp->m_count++;
849 mnt_add_count(mnt, 1);
850 child_mnt->mnt_mountpoint = mp->m_dentry;
851 child_mnt->mnt_parent = mnt;
852 child_mnt->mnt_mp = mp;
853 hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
854}
855
856static void __attach_mnt(struct mount *mnt, struct mount *parent)
857{
858 hlist_add_head_rcu(&mnt->mnt_hash,
859 m_hash(&parent->mnt, mnt->mnt_mountpoint));
860 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
861}
862
863
864
865
866static void attach_mnt(struct mount *mnt,
867 struct mount *parent,
868 struct mountpoint *mp)
869{
870 mnt_set_mountpoint(parent, mp, mnt);
871 __attach_mnt(mnt, parent);
872}
873
874void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt)
875{
876 struct mountpoint *old_mp = mnt->mnt_mp;
877 struct mount *old_parent = mnt->mnt_parent;
878
879 list_del_init(&mnt->mnt_child);
880 hlist_del_init(&mnt->mnt_mp_list);
881 hlist_del_init_rcu(&mnt->mnt_hash);
882
883 attach_mnt(mnt, parent, mp);
884
885 put_mountpoint(old_mp);
886 mnt_add_count(old_parent, -1);
887}
888
889
890
891
892static void commit_tree(struct mount *mnt)
893{
894 struct mount *parent = mnt->mnt_parent;
895 struct mount *m;
896 LIST_HEAD(head);
897 struct mnt_namespace *n = parent->mnt_ns;
898
899 BUG_ON(parent == mnt);
900
901 list_add_tail(&head, &mnt->mnt_list);
902 list_for_each_entry(m, &head, mnt_list)
903 m->mnt_ns = n;
904
905 list_splice(&head, n->list.prev);
906
907 n->mounts += n->pending_mounts;
908 n->pending_mounts = 0;
909
910 __attach_mnt(mnt, parent);
911 touch_mnt_namespace(n);
912}
913
914static struct mount *next_mnt(struct mount *p, struct mount *root)
915{
916 struct list_head *next = p->mnt_mounts.next;
917 if (next == &p->mnt_mounts) {
918 while (1) {
919 if (p == root)
920 return NULL;
921 next = p->mnt_child.next;
922 if (next != &p->mnt_parent->mnt_mounts)
923 break;
924 p = p->mnt_parent;
925 }
926 }
927 return list_entry(next, struct mount, mnt_child);
928}
929
930static struct mount *skip_mnt_tree(struct mount *p)
931{
932 struct list_head *prev = p->mnt_mounts.prev;
933 while (prev != &p->mnt_mounts) {
934 p = list_entry(prev, struct mount, mnt_child);
935 prev = p->mnt_mounts.prev;
936 }
937 return p;
938}
939
940
941
942
943
944
945
946
947
948
949struct vfsmount *vfs_create_mount(struct fs_context *fc)
950{
951 struct mount *mnt;
952
953 if (!fc->root)
954 return ERR_PTR(-EINVAL);
955
956 mnt = alloc_vfsmnt(fc->source ?: "none");
957 if (!mnt)
958 return ERR_PTR(-ENOMEM);
959
960 if (fc->sb_flags & SB_KERNMOUNT)
961 mnt->mnt.mnt_flags = MNT_INTERNAL;
962
963 atomic_inc(&fc->root->d_sb->s_active);
964 mnt->mnt.mnt_sb = fc->root->d_sb;
965 mnt->mnt.mnt_root = dget(fc->root);
966 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
967 mnt->mnt_parent = mnt;
968
969 lock_mount_hash();
970 list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts);
971 unlock_mount_hash();
972 return &mnt->mnt;
973}
974EXPORT_SYMBOL(vfs_create_mount);
975
976struct vfsmount *fc_mount(struct fs_context *fc)
977{
978 int err = vfs_get_tree(fc);
979 if (!err) {
980 up_write(&fc->root->d_sb->s_umount);
981 return vfs_create_mount(fc);
982 }
983 return ERR_PTR(err);
984}
985EXPORT_SYMBOL(fc_mount);
986
987struct vfsmount *vfs_kern_mount(struct file_system_type *type,
988 int flags, const char *name,
989 void *data)
990{
991 struct fs_context *fc;
992 struct vfsmount *mnt;
993 int ret = 0;
994
995 if (!type)
996 return ERR_PTR(-EINVAL);
997
998 fc = fs_context_for_mount(type, flags);
999 if (IS_ERR(fc))
1000 return ERR_CAST(fc);
1001
1002 if (name)
1003 ret = vfs_parse_fs_string(fc, "source",
1004 name, strlen(name));
1005 if (!ret)
1006 ret = parse_monolithic_mount_data(fc, data);
1007 if (!ret)
1008 mnt = fc_mount(fc);
1009 else
1010 mnt = ERR_PTR(ret);
1011
1012 put_fs_context(fc);
1013 return mnt;
1014}
1015EXPORT_SYMBOL_GPL(vfs_kern_mount);
1016
1017struct vfsmount *
1018vfs_submount(const struct dentry *mountpoint, struct file_system_type *type,
1019 const char *name, void *data)
1020{
1021
1022
1023
1024
1025 if (mountpoint->d_sb->s_user_ns != &init_user_ns)
1026 return ERR_PTR(-EPERM);
1027
1028 return vfs_kern_mount(type, SB_SUBMOUNT, name, data);
1029}
1030EXPORT_SYMBOL_GPL(vfs_submount);
1031
1032static struct mount *clone_mnt(struct mount *old, struct dentry *root,
1033 int flag)
1034{
1035 struct super_block *sb = old->mnt.mnt_sb;
1036 struct mount *mnt;
1037 int err;
1038
1039 mnt = alloc_vfsmnt(old->mnt_devname);
1040 if (!mnt)
1041 return ERR_PTR(-ENOMEM);
1042
1043 if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE))
1044 mnt->mnt_group_id = 0;
1045 else
1046 mnt->mnt_group_id = old->mnt_group_id;
1047
1048 if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {
1049 err = mnt_alloc_group_id(mnt);
1050 if (err)
1051 goto out_free;
1052 }
1053
1054 mnt->mnt.mnt_flags = old->mnt.mnt_flags;
1055 mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL);
1056
1057 atomic_inc(&sb->s_active);
1058 mnt->mnt.mnt_sb = sb;
1059 mnt->mnt.mnt_root = dget(root);
1060 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
1061 mnt->mnt_parent = mnt;
1062 lock_mount_hash();
1063 list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
1064 unlock_mount_hash();
1065
1066 if ((flag & CL_SLAVE) ||
1067 ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) {
1068 list_add(&mnt->mnt_slave, &old->mnt_slave_list);
1069 mnt->mnt_master = old;
1070 CLEAR_MNT_SHARED(mnt);
1071 } else if (!(flag & CL_PRIVATE)) {
1072 if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old))
1073 list_add(&mnt->mnt_share, &old->mnt_share);
1074 if (IS_MNT_SLAVE(old))
1075 list_add(&mnt->mnt_slave, &old->mnt_slave);
1076 mnt->mnt_master = old->mnt_master;
1077 } else {
1078 CLEAR_MNT_SHARED(mnt);
1079 }
1080 if (flag & CL_MAKE_SHARED)
1081 set_mnt_shared(mnt);
1082
1083
1084
1085 if (flag & CL_EXPIRE) {
1086 if (!list_empty(&old->mnt_expire))
1087 list_add(&mnt->mnt_expire, &old->mnt_expire);
1088 }
1089
1090 return mnt;
1091
1092 out_free:
1093 mnt_free_id(mnt);
1094 free_vfsmnt(mnt);
1095 return ERR_PTR(err);
1096}
1097
1098static void cleanup_mnt(struct mount *mnt)
1099{
1100 struct hlist_node *p;
1101 struct mount *m;
1102
1103
1104
1105
1106
1107
1108
1109 WARN_ON(mnt_get_writers(mnt));
1110 if (unlikely(mnt->mnt_pins.first))
1111 mnt_pin_kill(mnt);
1112 hlist_for_each_entry_safe(m, p, &mnt->mnt_stuck_children, mnt_umount) {
1113 hlist_del(&m->mnt_umount);
1114 mntput(&m->mnt);
1115 }
1116 fsnotify_vfsmount_delete(&mnt->mnt);
1117 dput(mnt->mnt.mnt_root);
1118 deactivate_super(mnt->mnt.mnt_sb);
1119 mnt_free_id(mnt);
1120 call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);
1121}
1122
1123static void __cleanup_mnt(struct rcu_head *head)
1124{
1125 cleanup_mnt(container_of(head, struct mount, mnt_rcu));
1126}
1127
1128static LLIST_HEAD(delayed_mntput_list);
1129static void delayed_mntput(struct work_struct *unused)
1130{
1131 struct llist_node *node = llist_del_all(&delayed_mntput_list);
1132 struct mount *m, *t;
1133
1134 llist_for_each_entry_safe(m, t, node, mnt_llist)
1135 cleanup_mnt(m);
1136}
1137static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
1138
1139static void mntput_no_expire(struct mount *mnt)
1140{
1141 LIST_HEAD(list);
1142
1143 rcu_read_lock();
1144 if (likely(READ_ONCE(mnt->mnt_ns))) {
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154 mnt_add_count(mnt, -1);
1155 rcu_read_unlock();
1156 return;
1157 }
1158 lock_mount_hash();
1159
1160
1161
1162
1163 smp_mb();
1164 mnt_add_count(mnt, -1);
1165 if (mnt_get_count(mnt)) {
1166 rcu_read_unlock();
1167 unlock_mount_hash();
1168 return;
1169 }
1170 if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
1171 rcu_read_unlock();
1172 unlock_mount_hash();
1173 return;
1174 }
1175 mnt->mnt.mnt_flags |= MNT_DOOMED;
1176 rcu_read_unlock();
1177
1178 list_del(&mnt->mnt_instance);
1179
1180 if (unlikely(!list_empty(&mnt->mnt_mounts))) {
1181 struct mount *p, *tmp;
1182 list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) {
1183 __put_mountpoint(unhash_mnt(p), &list);
1184 hlist_add_head(&p->mnt_umount, &mnt->mnt_stuck_children);
1185 }
1186 }
1187 unlock_mount_hash();
1188 shrink_dentry_list(&list);
1189
1190 if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {
1191 struct task_struct *task = current;
1192 if (likely(!(task->flags & PF_KTHREAD))) {
1193 init_task_work(&mnt->mnt_rcu, __cleanup_mnt);
1194 if (!task_work_add(task, &mnt->mnt_rcu, true))
1195 return;
1196 }
1197 if (llist_add(&mnt->mnt_llist, &delayed_mntput_list))
1198 schedule_delayed_work(&delayed_mntput_work, 1);
1199 return;
1200 }
1201 cleanup_mnt(mnt);
1202}
1203
1204void mntput(struct vfsmount *mnt)
1205{
1206 if (mnt) {
1207 struct mount *m = real_mount(mnt);
1208
1209 if (unlikely(m->mnt_expiry_mark))
1210 m->mnt_expiry_mark = 0;
1211 mntput_no_expire(m);
1212 }
1213}
1214EXPORT_SYMBOL(mntput);
1215
1216struct vfsmount *mntget(struct vfsmount *mnt)
1217{
1218 if (mnt)
1219 mnt_add_count(real_mount(mnt), 1);
1220 return mnt;
1221}
1222EXPORT_SYMBOL(mntget);
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234bool path_is_mountpoint(const struct path *path)
1235{
1236 unsigned seq;
1237 bool res;
1238
1239 if (!d_mountpoint(path->dentry))
1240 return false;
1241
1242 rcu_read_lock();
1243 do {
1244 seq = read_seqbegin(&mount_lock);
1245 res = __path_is_mountpoint(path);
1246 } while (read_seqretry(&mount_lock, seq));
1247 rcu_read_unlock();
1248
1249 return res;
1250}
1251EXPORT_SYMBOL(path_is_mountpoint);
1252
1253struct vfsmount *mnt_clone_internal(const struct path *path)
1254{
1255 struct mount *p;
1256 p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE);
1257 if (IS_ERR(p))
1258 return ERR_CAST(p);
1259 p->mnt.mnt_flags |= MNT_INTERNAL;
1260 return &p->mnt;
1261}
1262
1263#ifdef CONFIG_PROC_FS
1264static struct mount *mnt_list_next(struct mnt_namespace *ns,
1265 struct list_head *p)
1266{
1267 struct mount *mnt, *ret = NULL;
1268
1269 lock_ns_list(ns);
1270 list_for_each_continue(p, &ns->list) {
1271 mnt = list_entry(p, typeof(*mnt), mnt_list);
1272 if (!mnt_is_cursor(mnt)) {
1273 ret = mnt;
1274 break;
1275 }
1276 }
1277 unlock_ns_list(ns);
1278
1279 return ret;
1280}
1281
1282
1283static void *m_start(struct seq_file *m, loff_t *pos)
1284{
1285 struct proc_mounts *p = m->private;
1286 struct list_head *prev;
1287
1288 down_read(&namespace_sem);
1289 if (!*pos) {
1290 prev = &p->ns->list;
1291 } else {
1292 prev = &p->cursor.mnt_list;
1293
1294
1295 if (list_empty(prev))
1296 return NULL;
1297 }
1298
1299 return mnt_list_next(p->ns, prev);
1300}
1301
1302static void *m_next(struct seq_file *m, void *v, loff_t *pos)
1303{
1304 struct proc_mounts *p = m->private;
1305 struct mount *mnt = v;
1306
1307 ++*pos;
1308 return mnt_list_next(p->ns, &mnt->mnt_list);
1309}
1310
1311static void m_stop(struct seq_file *m, void *v)
1312{
1313 struct proc_mounts *p = m->private;
1314 struct mount *mnt = v;
1315
1316 lock_ns_list(p->ns);
1317 if (mnt)
1318 list_move_tail(&p->cursor.mnt_list, &mnt->mnt_list);
1319 else
1320 list_del_init(&p->cursor.mnt_list);
1321 unlock_ns_list(p->ns);
1322 up_read(&namespace_sem);
1323}
1324
1325static int m_show(struct seq_file *m, void *v)
1326{
1327 struct proc_mounts *p = m->private;
1328 struct mount *r = v;
1329 return p->show(m, &r->mnt);
1330}
1331
1332const struct seq_operations mounts_op = {
1333 .start = m_start,
1334 .next = m_next,
1335 .stop = m_stop,
1336 .show = m_show,
1337};
1338
1339void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor)
1340{
1341 down_read(&namespace_sem);
1342 lock_ns_list(ns);
1343 list_del(&cursor->mnt_list);
1344 unlock_ns_list(ns);
1345 up_read(&namespace_sem);
1346}
1347#endif
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357int may_umount_tree(struct vfsmount *m)
1358{
1359 struct mount *mnt = real_mount(m);
1360 int actual_refs = 0;
1361 int minimum_refs = 0;
1362 struct mount *p;
1363 BUG_ON(!m);
1364
1365
1366 lock_mount_hash();
1367 for (p = mnt; p; p = next_mnt(p, mnt)) {
1368 actual_refs += mnt_get_count(p);
1369 minimum_refs += 2;
1370 }
1371 unlock_mount_hash();
1372
1373 if (actual_refs > minimum_refs)
1374 return 0;
1375
1376 return 1;
1377}
1378
1379EXPORT_SYMBOL(may_umount_tree);
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394int may_umount(struct vfsmount *mnt)
1395{
1396 int ret = 1;
1397 down_read(&namespace_sem);
1398 lock_mount_hash();
1399 if (propagate_mount_busy(real_mount(mnt), 2))
1400 ret = 0;
1401 unlock_mount_hash();
1402 up_read(&namespace_sem);
1403 return ret;
1404}
1405
1406EXPORT_SYMBOL(may_umount);
1407
1408static void namespace_unlock(void)
1409{
1410 struct hlist_head head;
1411 struct hlist_node *p;
1412 struct mount *m;
1413 LIST_HEAD(list);
1414
1415 hlist_move_list(&unmounted, &head);
1416 list_splice_init(&ex_mountpoints, &list);
1417
1418 up_write(&namespace_sem);
1419
1420 shrink_dentry_list(&list);
1421
1422 if (likely(hlist_empty(&head)))
1423 return;
1424
1425 synchronize_rcu_expedited();
1426
1427 hlist_for_each_entry_safe(m, p, &head, mnt_umount) {
1428 hlist_del(&m->mnt_umount);
1429 mntput(&m->mnt);
1430 }
1431}
1432
1433static inline void namespace_lock(void)
1434{
1435 down_write(&namespace_sem);
1436}
1437
1438enum umount_tree_flags {
1439 UMOUNT_SYNC = 1,
1440 UMOUNT_PROPAGATE = 2,
1441 UMOUNT_CONNECTED = 4,
1442};
1443
1444static bool disconnect_mount(struct mount *mnt, enum umount_tree_flags how)
1445{
1446
1447 if (how & UMOUNT_SYNC)
1448 return true;
1449
1450
1451 if (!mnt_has_parent(mnt))
1452 return true;
1453
1454
1455
1456
1457
1458 if (!(mnt->mnt_parent->mnt.mnt_flags & MNT_UMOUNT))
1459 return true;
1460
1461
1462 if (how & UMOUNT_CONNECTED)
1463 return false;
1464
1465
1466 if (IS_MNT_LOCKED(mnt))
1467 return false;
1468
1469
1470 return true;
1471}
1472
1473
1474
1475
1476
1477static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
1478{
1479 LIST_HEAD(tmp_list);
1480 struct mount *p;
1481
1482 if (how & UMOUNT_PROPAGATE)
1483 propagate_mount_unlock(mnt);
1484
1485
1486 for (p = mnt; p; p = next_mnt(p, mnt)) {
1487 p->mnt.mnt_flags |= MNT_UMOUNT;
1488 list_move(&p->mnt_list, &tmp_list);
1489 }
1490
1491
1492 list_for_each_entry(p, &tmp_list, mnt_list) {
1493 list_del_init(&p->mnt_child);
1494 }
1495
1496
1497 if (how & UMOUNT_PROPAGATE)
1498 propagate_umount(&tmp_list);
1499
1500 while (!list_empty(&tmp_list)) {
1501 struct mnt_namespace *ns;
1502 bool disconnect;
1503 p = list_first_entry(&tmp_list, struct mount, mnt_list);
1504 list_del_init(&p->mnt_expire);
1505 list_del_init(&p->mnt_list);
1506 ns = p->mnt_ns;
1507 if (ns) {
1508 ns->mounts--;
1509 __touch_mnt_namespace(ns);
1510 }
1511 p->mnt_ns = NULL;
1512 if (how & UMOUNT_SYNC)
1513 p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
1514
1515 disconnect = disconnect_mount(p, how);
1516 if (mnt_has_parent(p)) {
1517 mnt_add_count(p->mnt_parent, -1);
1518 if (!disconnect) {
1519
1520 list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts);
1521 } else {
1522 umount_mnt(p);
1523 }
1524 }
1525 change_mnt_propagation(p, MS_PRIVATE);
1526 if (disconnect)
1527 hlist_add_head(&p->mnt_umount, &unmounted);
1528 }
1529}
1530
1531static void shrink_submounts(struct mount *mnt);
1532
1533static int do_umount_root(struct super_block *sb)
1534{
1535 int ret = 0;
1536
1537 down_write(&sb->s_umount);
1538 if (!sb_rdonly(sb)) {
1539 struct fs_context *fc;
1540
1541 fc = fs_context_for_reconfigure(sb->s_root, SB_RDONLY,
1542 SB_RDONLY);
1543 if (IS_ERR(fc)) {
1544 ret = PTR_ERR(fc);
1545 } else {
1546 ret = parse_monolithic_mount_data(fc, NULL);
1547 if (!ret)
1548 ret = reconfigure_super(fc);
1549 put_fs_context(fc);
1550 }
1551 }
1552 up_write(&sb->s_umount);
1553 return ret;
1554}
1555
1556static int do_umount(struct mount *mnt, int flags)
1557{
1558 struct super_block *sb = mnt->mnt.mnt_sb;
1559 int retval;
1560
1561 retval = security_sb_umount(&mnt->mnt, flags);
1562 if (retval)
1563 return retval;
1564
1565
1566
1567
1568
1569
1570
1571 if (flags & MNT_EXPIRE) {
1572 if (&mnt->mnt == current->fs->root.mnt ||
1573 flags & (MNT_FORCE | MNT_DETACH))
1574 return -EINVAL;
1575
1576
1577
1578
1579
1580 lock_mount_hash();
1581 if (mnt_get_count(mnt) != 2) {
1582 unlock_mount_hash();
1583 return -EBUSY;
1584 }
1585 unlock_mount_hash();
1586
1587 if (!xchg(&mnt->mnt_expiry_mark, 1))
1588 return -EAGAIN;
1589 }
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601 if (flags & MNT_FORCE && sb->s_op->umount_begin) {
1602 sb->s_op->umount_begin(sb);
1603 }
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614 if (&mnt->mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) {
1615
1616
1617
1618
1619 if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
1620 return -EPERM;
1621 return do_umount_root(sb);
1622 }
1623
1624 namespace_lock();
1625 lock_mount_hash();
1626
1627
1628 retval = -EINVAL;
1629 if (mnt->mnt.mnt_flags & MNT_LOCKED)
1630 goto out;
1631
1632 event++;
1633 if (flags & MNT_DETACH) {
1634 if (!list_empty(&mnt->mnt_list))
1635 umount_tree(mnt, UMOUNT_PROPAGATE);
1636 retval = 0;
1637 } else {
1638 shrink_submounts(mnt);
1639 retval = -EBUSY;
1640 if (!propagate_mount_busy(mnt, 2)) {
1641 if (!list_empty(&mnt->mnt_list))
1642 umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
1643 retval = 0;
1644 }
1645 }
1646out:
1647 unlock_mount_hash();
1648 namespace_unlock();
1649 return retval;
1650}
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662void __detach_mounts(struct dentry *dentry)
1663{
1664 struct mountpoint *mp;
1665 struct mount *mnt;
1666
1667 namespace_lock();
1668 lock_mount_hash();
1669 mp = lookup_mountpoint(dentry);
1670 if (!mp)
1671 goto out_unlock;
1672
1673 event++;
1674 while (!hlist_empty(&mp->m_list)) {
1675 mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
1676 if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
1677 umount_mnt(mnt);
1678 hlist_add_head(&mnt->mnt_umount, &unmounted);
1679 }
1680 else umount_tree(mnt, UMOUNT_CONNECTED);
1681 }
1682 put_mountpoint(mp);
1683out_unlock:
1684 unlock_mount_hash();
1685 namespace_unlock();
1686}
1687
1688
1689
1690
1691static inline bool may_mount(void)
1692{
1693 return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
1694}
1695
1696#ifdef CONFIG_MANDATORY_FILE_LOCKING
1697static inline bool may_mandlock(void)
1698{
1699 return capable(CAP_SYS_ADMIN);
1700}
1701#else
1702static inline bool may_mandlock(void)
1703{
1704 pr_warn("VFS: \"mand\" mount option not supported");
1705 return false;
1706}
1707#endif
1708
1709static int can_umount(const struct path *path, int flags)
1710{
1711 struct mount *mnt = real_mount(path->mnt);
1712
1713 if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
1714 return -EINVAL;
1715 if (!may_mount())
1716 return -EPERM;
1717 if (path->dentry != path->mnt->mnt_root)
1718 return -EINVAL;
1719 if (!check_mnt(mnt))
1720 return -EINVAL;
1721 if (mnt->mnt.mnt_flags & MNT_LOCKED)
1722 return -EINVAL;
1723 if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
1724 return -EPERM;
1725 return 0;
1726}
1727
1728int path_umount(struct path *path, int flags)
1729{
1730 struct mount *mnt = real_mount(path->mnt);
1731 int ret;
1732
1733 ret = can_umount(path, flags);
1734 if (!ret)
1735 ret = do_umount(mnt, flags);
1736
1737
1738 dput(path->dentry);
1739 mntput_no_expire(mnt);
1740 return ret;
1741}
1742
1743static int ksys_umount(char __user *name, int flags)
1744{
1745 int lookup_flags = LOOKUP_MOUNTPOINT;
1746 struct path path;
1747 int ret;
1748
1749 if (!(flags & UMOUNT_NOFOLLOW))
1750 lookup_flags |= LOOKUP_FOLLOW;
1751 ret = user_path_at(AT_FDCWD, name, lookup_flags, &path);
1752 if (ret)
1753 return ret;
1754 return path_umount(&path, flags);
1755}
1756
1757SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1758{
1759 return ksys_umount(name, flags);
1760}
1761
1762#ifdef __ARCH_WANT_SYS_OLDUMOUNT
1763
1764
1765
1766
1767SYSCALL_DEFINE1(oldumount, char __user *, name)
1768{
1769 return ksys_umount(name, 0);
1770}
1771
1772#endif
1773
1774static bool is_mnt_ns_file(struct dentry *dentry)
1775{
1776
1777 return dentry->d_op == &ns_dentry_operations &&
1778 dentry->d_fsdata == &mntns_operations;
1779}
1780
1781static struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
1782{
1783 return container_of(ns, struct mnt_namespace, ns);
1784}
1785
1786struct ns_common *from_mnt_ns(struct mnt_namespace *mnt)
1787{
1788 return &mnt->ns;
1789}
1790
1791static bool mnt_ns_loop(struct dentry *dentry)
1792{
1793
1794
1795
1796 struct mnt_namespace *mnt_ns;
1797 if (!is_mnt_ns_file(dentry))
1798 return false;
1799
1800 mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode));
1801 return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
1802}
1803
1804struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1805 int flag)
1806{
1807 struct mount *res, *p, *q, *r, *parent;
1808
1809 if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(mnt))
1810 return ERR_PTR(-EINVAL);
1811
1812 if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(dentry))
1813 return ERR_PTR(-EINVAL);
1814
1815 res = q = clone_mnt(mnt, dentry, flag);
1816 if (IS_ERR(q))
1817 return q;
1818
1819 q->mnt_mountpoint = mnt->mnt_mountpoint;
1820
1821 p = mnt;
1822 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
1823 struct mount *s;
1824 if (!is_subdir(r->mnt_mountpoint, dentry))
1825 continue;
1826
1827 for (s = r; s; s = next_mnt(s, r)) {
1828 if (!(flag & CL_COPY_UNBINDABLE) &&
1829 IS_MNT_UNBINDABLE(s)) {
1830 if (s->mnt.mnt_flags & MNT_LOCKED) {
1831
1832 q = ERR_PTR(-EPERM);
1833 goto out;
1834 } else {
1835 s = skip_mnt_tree(s);
1836 continue;
1837 }
1838 }
1839 if (!(flag & CL_COPY_MNT_NS_FILE) &&
1840 is_mnt_ns_file(s->mnt.mnt_root)) {
1841 s = skip_mnt_tree(s);
1842 continue;
1843 }
1844 while (p != s->mnt_parent) {
1845 p = p->mnt_parent;
1846 q = q->mnt_parent;
1847 }
1848 p = s;
1849 parent = q;
1850 q = clone_mnt(p, p->mnt.mnt_root, flag);
1851 if (IS_ERR(q))
1852 goto out;
1853 lock_mount_hash();
1854 list_add_tail(&q->mnt_list, &res->mnt_list);
1855 attach_mnt(q, parent, p->mnt_mp);
1856 unlock_mount_hash();
1857 }
1858 }
1859 return res;
1860out:
1861 if (res) {
1862 lock_mount_hash();
1863 umount_tree(res, UMOUNT_SYNC);
1864 unlock_mount_hash();
1865 }
1866 return q;
1867}
1868
1869
1870
1871struct vfsmount *collect_mounts(const struct path *path)
1872{
1873 struct mount *tree;
1874 namespace_lock();
1875 if (!check_mnt(real_mount(path->mnt)))
1876 tree = ERR_PTR(-EINVAL);
1877 else
1878 tree = copy_tree(real_mount(path->mnt), path->dentry,
1879 CL_COPY_ALL | CL_PRIVATE);
1880 namespace_unlock();
1881 if (IS_ERR(tree))
1882 return ERR_CAST(tree);
1883 return &tree->mnt;
1884}
1885
1886static void free_mnt_ns(struct mnt_namespace *);
1887static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *, bool);
1888
1889void dissolve_on_fput(struct vfsmount *mnt)
1890{
1891 struct mnt_namespace *ns;
1892 namespace_lock();
1893 lock_mount_hash();
1894 ns = real_mount(mnt)->mnt_ns;
1895 if (ns) {
1896 if (is_anon_ns(ns))
1897 umount_tree(real_mount(mnt), UMOUNT_CONNECTED);
1898 else
1899 ns = NULL;
1900 }
1901 unlock_mount_hash();
1902 namespace_unlock();
1903 if (ns)
1904 free_mnt_ns(ns);
1905}
1906
1907void drop_collected_mounts(struct vfsmount *mnt)
1908{
1909 namespace_lock();
1910 lock_mount_hash();
1911 umount_tree(real_mount(mnt), 0);
1912 unlock_mount_hash();
1913 namespace_unlock();
1914}
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925struct vfsmount *clone_private_mount(const struct path *path)
1926{
1927 struct mount *old_mnt = real_mount(path->mnt);
1928 struct mount *new_mnt;
1929
1930 if (IS_MNT_UNBINDABLE(old_mnt))
1931 return ERR_PTR(-EINVAL);
1932
1933 new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
1934 if (IS_ERR(new_mnt))
1935 return ERR_CAST(new_mnt);
1936
1937
1938 new_mnt->mnt_ns = MNT_NS_INTERNAL;
1939
1940 return &new_mnt->mnt;
1941}
1942EXPORT_SYMBOL_GPL(clone_private_mount);
1943
1944int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
1945 struct vfsmount *root)
1946{
1947 struct mount *mnt;
1948 int res = f(root, arg);
1949 if (res)
1950 return res;
1951 list_for_each_entry(mnt, &real_mount(root)->mnt_list, mnt_list) {
1952 res = f(&mnt->mnt, arg);
1953 if (res)
1954 return res;
1955 }
1956 return 0;
1957}
1958
1959static void lock_mnt_tree(struct mount *mnt)
1960{
1961 struct mount *p;
1962
1963 for (p = mnt; p; p = next_mnt(p, mnt)) {
1964 int flags = p->mnt.mnt_flags;
1965
1966 flags |= MNT_LOCK_ATIME;
1967
1968 if (flags & MNT_READONLY)
1969 flags |= MNT_LOCK_READONLY;
1970
1971 if (flags & MNT_NODEV)
1972 flags |= MNT_LOCK_NODEV;
1973
1974 if (flags & MNT_NOSUID)
1975 flags |= MNT_LOCK_NOSUID;
1976
1977 if (flags & MNT_NOEXEC)
1978 flags |= MNT_LOCK_NOEXEC;
1979
1980 if (list_empty(&p->mnt_expire))
1981 flags |= MNT_LOCKED;
1982 p->mnt.mnt_flags = flags;
1983 }
1984}
1985
1986static void cleanup_group_ids(struct mount *mnt, struct mount *end)
1987{
1988 struct mount *p;
1989
1990 for (p = mnt; p != end; p = next_mnt(p, mnt)) {
1991 if (p->mnt_group_id && !IS_MNT_SHARED(p))
1992 mnt_release_group_id(p);
1993 }
1994}
1995
1996static int invent_group_ids(struct mount *mnt, bool recurse)
1997{
1998 struct mount *p;
1999
2000 for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) {
2001 if (!p->mnt_group_id && !IS_MNT_SHARED(p)) {
2002 int err = mnt_alloc_group_id(p);
2003 if (err) {
2004 cleanup_group_ids(mnt, p);
2005 return err;
2006 }
2007 }
2008 }
2009
2010 return 0;
2011}
2012
2013int count_mounts(struct mnt_namespace *ns, struct mount *mnt)
2014{
2015 unsigned int max = READ_ONCE(sysctl_mount_max);
2016 unsigned int mounts = 0, old, pending, sum;
2017 struct mount *p;
2018
2019 for (p = mnt; p; p = next_mnt(p, mnt))
2020 mounts++;
2021
2022 old = ns->mounts;
2023 pending = ns->pending_mounts;
2024 sum = old + pending;
2025 if ((old > sum) ||
2026 (pending > sum) ||
2027 (max < sum) ||
2028 (mounts > (max - sum)))
2029 return -ENOSPC;
2030
2031 ns->pending_mounts = pending + mounts;
2032 return 0;
2033}
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098static int attach_recursive_mnt(struct mount *source_mnt,
2099 struct mount *dest_mnt,
2100 struct mountpoint *dest_mp,
2101 bool moving)
2102{
2103 struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
2104 HLIST_HEAD(tree_list);
2105 struct mnt_namespace *ns = dest_mnt->mnt_ns;
2106 struct mountpoint *smp;
2107 struct mount *child, *p;
2108 struct hlist_node *n;
2109 int err;
2110
2111
2112
2113
2114 smp = get_mountpoint(source_mnt->mnt.mnt_root);
2115 if (IS_ERR(smp))
2116 return PTR_ERR(smp);
2117
2118
2119 if (!moving) {
2120 err = count_mounts(ns, source_mnt);
2121 if (err)
2122 goto out;
2123 }
2124
2125 if (IS_MNT_SHARED(dest_mnt)) {
2126 err = invent_group_ids(source_mnt, true);
2127 if (err)
2128 goto out;
2129 err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
2130 lock_mount_hash();
2131 if (err)
2132 goto out_cleanup_ids;
2133 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
2134 set_mnt_shared(p);
2135 } else {
2136 lock_mount_hash();
2137 }
2138 if (moving) {
2139 unhash_mnt(source_mnt);
2140 attach_mnt(source_mnt, dest_mnt, dest_mp);
2141 touch_mnt_namespace(source_mnt->mnt_ns);
2142 } else {
2143 if (source_mnt->mnt_ns) {
2144
2145 list_del_init(&source_mnt->mnt_ns->list);
2146 }
2147 mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
2148 commit_tree(source_mnt);
2149 }
2150
2151 hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
2152 struct mount *q;
2153 hlist_del_init(&child->mnt_hash);
2154 q = __lookup_mnt(&child->mnt_parent->mnt,
2155 child->mnt_mountpoint);
2156 if (q)
2157 mnt_change_mountpoint(child, smp, q);
2158
2159 if (child->mnt_parent->mnt_ns->user_ns != user_ns)
2160 lock_mnt_tree(child);
2161 child->mnt.mnt_flags &= ~MNT_LOCKED;
2162 commit_tree(child);
2163 }
2164 put_mountpoint(smp);
2165 unlock_mount_hash();
2166
2167 return 0;
2168
2169 out_cleanup_ids:
2170 while (!hlist_empty(&tree_list)) {
2171 child = hlist_entry(tree_list.first, struct mount, mnt_hash);
2172 child->mnt_parent->mnt_ns->pending_mounts = 0;
2173 umount_tree(child, UMOUNT_SYNC);
2174 }
2175 unlock_mount_hash();
2176 cleanup_group_ids(source_mnt, NULL);
2177 out:
2178 ns->pending_mounts = 0;
2179
2180 read_seqlock_excl(&mount_lock);
2181 put_mountpoint(smp);
2182 read_sequnlock_excl(&mount_lock);
2183
2184 return err;
2185}
2186
2187static struct mountpoint *lock_mount(struct path *path)
2188{
2189 struct vfsmount *mnt;
2190 struct dentry *dentry = path->dentry;
2191retry:
2192 inode_lock(dentry->d_inode);
2193 if (unlikely(cant_mount(dentry))) {
2194 inode_unlock(dentry->d_inode);
2195 return ERR_PTR(-ENOENT);
2196 }
2197 namespace_lock();
2198 mnt = lookup_mnt(path);
2199 if (likely(!mnt)) {
2200 struct mountpoint *mp = get_mountpoint(dentry);
2201 if (IS_ERR(mp)) {
2202 namespace_unlock();
2203 inode_unlock(dentry->d_inode);
2204 return mp;
2205 }
2206 return mp;
2207 }
2208 namespace_unlock();
2209 inode_unlock(path->dentry->d_inode);
2210 path_put(path);
2211 path->mnt = mnt;
2212 dentry = path->dentry = dget(mnt->mnt_root);
2213 goto retry;
2214}
2215
2216static void unlock_mount(struct mountpoint *where)
2217{
2218 struct dentry *dentry = where->m_dentry;
2219
2220 read_seqlock_excl(&mount_lock);
2221 put_mountpoint(where);
2222 read_sequnlock_excl(&mount_lock);
2223
2224 namespace_unlock();
2225 inode_unlock(dentry->d_inode);
2226}
2227
2228static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
2229{
2230 if (mnt->mnt.mnt_sb->s_flags & SB_NOUSER)
2231 return -EINVAL;
2232
2233 if (d_is_dir(mp->m_dentry) !=
2234 d_is_dir(mnt->mnt.mnt_root))
2235 return -ENOTDIR;
2236
2237 return attach_recursive_mnt(mnt, p, mp, false);
2238}
2239
2240
2241
2242
2243
2244static int flags_to_propagation_type(int ms_flags)
2245{
2246 int type = ms_flags & ~(MS_REC | MS_SILENT);
2247
2248
2249 if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
2250 return 0;
2251
2252 if (!is_power_of_2(type))
2253 return 0;
2254 return type;
2255}
2256
2257
2258
2259
2260static int do_change_type(struct path *path, int ms_flags)
2261{
2262 struct mount *m;
2263 struct mount *mnt = real_mount(path->mnt);
2264 int recurse = ms_flags & MS_REC;
2265 int type;
2266 int err = 0;
2267
2268 if (path->dentry != path->mnt->mnt_root)
2269 return -EINVAL;
2270
2271 type = flags_to_propagation_type(ms_flags);
2272 if (!type)
2273 return -EINVAL;
2274
2275 namespace_lock();
2276 if (type == MS_SHARED) {
2277 err = invent_group_ids(mnt, recurse);
2278 if (err)
2279 goto out_unlock;
2280 }
2281
2282 lock_mount_hash();
2283 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
2284 change_mnt_propagation(m, type);
2285 unlock_mount_hash();
2286
2287 out_unlock:
2288 namespace_unlock();
2289 return err;
2290}
2291
2292static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
2293{
2294 struct mount *child;
2295 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
2296 if (!is_subdir(child->mnt_mountpoint, dentry))
2297 continue;
2298
2299 if (child->mnt.mnt_flags & MNT_LOCKED)
2300 return true;
2301 }
2302 return false;
2303}
2304
2305static struct mount *__do_loopback(struct path *old_path, int recurse)
2306{
2307 struct mount *mnt = ERR_PTR(-EINVAL), *old = real_mount(old_path->mnt);
2308
2309 if (IS_MNT_UNBINDABLE(old))
2310 return mnt;
2311
2312 if (!check_mnt(old) && old_path->dentry->d_op != &ns_dentry_operations)
2313 return mnt;
2314
2315 if (!recurse && has_locked_children(old, old_path->dentry))
2316 return mnt;
2317
2318 if (recurse)
2319 mnt = copy_tree(old, old_path->dentry, CL_COPY_MNT_NS_FILE);
2320 else
2321 mnt = clone_mnt(old, old_path->dentry, 0);
2322
2323 if (!IS_ERR(mnt))
2324 mnt->mnt.mnt_flags &= ~MNT_LOCKED;
2325
2326 return mnt;
2327}
2328
2329
2330
2331
2332static int do_loopback(struct path *path, const char *old_name,
2333 int recurse)
2334{
2335 struct path old_path;
2336 struct mount *mnt = NULL, *parent;
2337 struct mountpoint *mp;
2338 int err;
2339 if (!old_name || !*old_name)
2340 return -EINVAL;
2341 err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path);
2342 if (err)
2343 return err;
2344
2345 err = -EINVAL;
2346 if (mnt_ns_loop(old_path.dentry))
2347 goto out;
2348
2349 mp = lock_mount(path);
2350 if (IS_ERR(mp)) {
2351 err = PTR_ERR(mp);
2352 goto out;
2353 }
2354
2355 parent = real_mount(path->mnt);
2356 if (!check_mnt(parent))
2357 goto out2;
2358
2359 mnt = __do_loopback(&old_path, recurse);
2360 if (IS_ERR(mnt)) {
2361 err = PTR_ERR(mnt);
2362 goto out2;
2363 }
2364
2365 err = graft_tree(mnt, parent, mp);
2366 if (err) {
2367 lock_mount_hash();
2368 umount_tree(mnt, UMOUNT_SYNC);
2369 unlock_mount_hash();
2370 }
2371out2:
2372 unlock_mount(mp);
2373out:
2374 path_put(&old_path);
2375 return err;
2376}
2377
2378static struct file *open_detached_copy(struct path *path, bool recursive)
2379{
2380 struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
2381 struct mnt_namespace *ns = alloc_mnt_ns(user_ns, true);
2382 struct mount *mnt, *p;
2383 struct file *file;
2384
2385 if (IS_ERR(ns))
2386 return ERR_CAST(ns);
2387
2388 namespace_lock();
2389 mnt = __do_loopback(path, recursive);
2390 if (IS_ERR(mnt)) {
2391 namespace_unlock();
2392 free_mnt_ns(ns);
2393 return ERR_CAST(mnt);
2394 }
2395
2396 lock_mount_hash();
2397 for (p = mnt; p; p = next_mnt(p, mnt)) {
2398 p->mnt_ns = ns;
2399 ns->mounts++;
2400 }
2401 ns->root = mnt;
2402 list_add_tail(&ns->list, &mnt->mnt_list);
2403 mntget(&mnt->mnt);
2404 unlock_mount_hash();
2405 namespace_unlock();
2406
2407 mntput(path->mnt);
2408 path->mnt = &mnt->mnt;
2409 file = dentry_open(path, O_PATH, current_cred());
2410 if (IS_ERR(file))
2411 dissolve_on_fput(path->mnt);
2412 else
2413 file->f_mode |= FMODE_NEED_UNMOUNT;
2414 return file;
2415}
2416
2417SYSCALL_DEFINE3(open_tree, int, dfd, const char __user *, filename, unsigned, flags)
2418{
2419 struct file *file;
2420 struct path path;
2421 int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW;
2422 bool detached = flags & OPEN_TREE_CLONE;
2423 int error;
2424 int fd;
2425
2426 BUILD_BUG_ON(OPEN_TREE_CLOEXEC != O_CLOEXEC);
2427
2428 if (flags & ~(AT_EMPTY_PATH | AT_NO_AUTOMOUNT | AT_RECURSIVE |
2429 AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLONE |
2430 OPEN_TREE_CLOEXEC))
2431 return -EINVAL;
2432
2433 if ((flags & (AT_RECURSIVE | OPEN_TREE_CLONE)) == AT_RECURSIVE)
2434 return -EINVAL;
2435
2436 if (flags & AT_NO_AUTOMOUNT)
2437 lookup_flags &= ~LOOKUP_AUTOMOUNT;
2438 if (flags & AT_SYMLINK_NOFOLLOW)
2439 lookup_flags &= ~LOOKUP_FOLLOW;
2440 if (flags & AT_EMPTY_PATH)
2441 lookup_flags |= LOOKUP_EMPTY;
2442
2443 if (detached && !may_mount())
2444 return -EPERM;
2445
2446 fd = get_unused_fd_flags(flags & O_CLOEXEC);
2447 if (fd < 0)
2448 return fd;
2449
2450 error = user_path_at(dfd, filename, lookup_flags, &path);
2451 if (unlikely(error)) {
2452 file = ERR_PTR(error);
2453 } else {
2454 if (detached)
2455 file = open_detached_copy(&path, flags & AT_RECURSIVE);
2456 else
2457 file = dentry_open(&path, O_PATH, current_cred());
2458 path_put(&path);
2459 }
2460 if (IS_ERR(file)) {
2461 put_unused_fd(fd);
2462 return PTR_ERR(file);
2463 }
2464 fd_install(fd, file);
2465 return fd;
2466}
2467
2468
2469
2470
2471
2472
2473
2474static bool can_change_locked_flags(struct mount *mnt, unsigned int mnt_flags)
2475{
2476 unsigned int fl = mnt->mnt.mnt_flags;
2477
2478 if ((fl & MNT_LOCK_READONLY) &&
2479 !(mnt_flags & MNT_READONLY))
2480 return false;
2481
2482 if ((fl & MNT_LOCK_NODEV) &&
2483 !(mnt_flags & MNT_NODEV))
2484 return false;
2485
2486 if ((fl & MNT_LOCK_NOSUID) &&
2487 !(mnt_flags & MNT_NOSUID))
2488 return false;
2489
2490 if ((fl & MNT_LOCK_NOEXEC) &&
2491 !(mnt_flags & MNT_NOEXEC))
2492 return false;
2493
2494 if ((fl & MNT_LOCK_ATIME) &&
2495 ((fl & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK)))
2496 return false;
2497
2498 return true;
2499}
2500
2501static int change_mount_ro_state(struct mount *mnt, unsigned int mnt_flags)
2502{
2503 bool readonly_request = (mnt_flags & MNT_READONLY);
2504
2505 if (readonly_request == __mnt_is_readonly(&mnt->mnt))
2506 return 0;
2507
2508 if (readonly_request)
2509 return mnt_make_readonly(mnt);
2510
2511 return __mnt_unmake_readonly(mnt);
2512}
2513
2514
2515
2516
2517
2518static void set_mount_attributes(struct mount *mnt, unsigned int mnt_flags)
2519{
2520 lock_mount_hash();
2521 mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
2522 mnt->mnt.mnt_flags = mnt_flags;
2523 touch_mnt_namespace(mnt->mnt_ns);
2524 unlock_mount_hash();
2525}
2526
2527static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount *mnt)
2528{
2529 struct super_block *sb = mnt->mnt_sb;
2530
2531 if (!__mnt_is_readonly(mnt) &&
2532 (ktime_get_real_seconds() + TIME_UPTIME_SEC_MAX > sb->s_time_max)) {
2533 char *buf = (char *)__get_free_page(GFP_KERNEL);
2534 char *mntpath = buf ? d_path(mountpoint, buf, PAGE_SIZE) : ERR_PTR(-ENOMEM);
2535 struct tm tm;
2536
2537 time64_to_tm(sb->s_time_max, 0, &tm);
2538
2539 pr_warn("%s filesystem being %s at %s supports timestamps until %04ld (0x%llx)\n",
2540 sb->s_type->name,
2541 is_mounted(mnt) ? "remounted" : "mounted",
2542 mntpath,
2543 tm.tm_year+1900, (unsigned long long)sb->s_time_max);
2544
2545 free_page((unsigned long)buf);
2546 }
2547}
2548
2549
2550
2551
2552
2553
2554static int do_reconfigure_mnt(struct path *path, unsigned int mnt_flags)
2555{
2556 struct super_block *sb = path->mnt->mnt_sb;
2557 struct mount *mnt = real_mount(path->mnt);
2558 int ret;
2559
2560 if (!check_mnt(mnt))
2561 return -EINVAL;
2562
2563 if (path->dentry != mnt->mnt.mnt_root)
2564 return -EINVAL;
2565
2566 if (!can_change_locked_flags(mnt, mnt_flags))
2567 return -EPERM;
2568
2569 down_write(&sb->s_umount);
2570 ret = change_mount_ro_state(mnt, mnt_flags);
2571 if (ret == 0)
2572 set_mount_attributes(mnt, mnt_flags);
2573 up_write(&sb->s_umount);
2574
2575 mnt_warn_timestamp_expiry(path, &mnt->mnt);
2576
2577 return ret;
2578}
2579
2580
2581
2582
2583
2584
2585static int do_remount(struct path *path, int ms_flags, int sb_flags,
2586 int mnt_flags, void *data)
2587{
2588 int err;
2589 struct super_block *sb = path->mnt->mnt_sb;
2590 struct mount *mnt = real_mount(path->mnt);
2591 struct fs_context *fc;
2592
2593 if (!check_mnt(mnt))
2594 return -EINVAL;
2595
2596 if (path->dentry != path->mnt->mnt_root)
2597 return -EINVAL;
2598
2599 if (!can_change_locked_flags(mnt, mnt_flags))
2600 return -EPERM;
2601
2602 fc = fs_context_for_reconfigure(path->dentry, sb_flags, MS_RMT_MASK);
2603 if (IS_ERR(fc))
2604 return PTR_ERR(fc);
2605
2606 fc->oldapi = true;
2607 err = parse_monolithic_mount_data(fc, data);
2608 if (!err) {
2609 down_write(&sb->s_umount);
2610 err = -EPERM;
2611 if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
2612 err = reconfigure_super(fc);
2613 if (!err)
2614 set_mount_attributes(mnt, mnt_flags);
2615 }
2616 up_write(&sb->s_umount);
2617 }
2618
2619 mnt_warn_timestamp_expiry(path, &mnt->mnt);
2620
2621 put_fs_context(fc);
2622 return err;
2623}
2624
2625static inline int tree_contains_unbindable(struct mount *mnt)
2626{
2627 struct mount *p;
2628 for (p = mnt; p; p = next_mnt(p, mnt)) {
2629 if (IS_MNT_UNBINDABLE(p))
2630 return 1;
2631 }
2632 return 0;
2633}
2634
2635
2636
2637
2638
2639
2640
2641static bool check_for_nsfs_mounts(struct mount *subtree)
2642{
2643 struct mount *p;
2644 bool ret = false;
2645
2646 lock_mount_hash();
2647 for (p = subtree; p; p = next_mnt(p, subtree))
2648 if (mnt_ns_loop(p->mnt.mnt_root))
2649 goto out;
2650
2651 ret = true;
2652out:
2653 unlock_mount_hash();
2654 return ret;
2655}
2656
2657static int do_move_mount(struct path *old_path, struct path *new_path)
2658{
2659 struct mnt_namespace *ns;
2660 struct mount *p;
2661 struct mount *old;
2662 struct mount *parent;
2663 struct mountpoint *mp, *old_mp;
2664 int err;
2665 bool attached;
2666
2667 mp = lock_mount(new_path);
2668 if (IS_ERR(mp))
2669 return PTR_ERR(mp);
2670
2671 old = real_mount(old_path->mnt);
2672 p = real_mount(new_path->mnt);
2673 parent = old->mnt_parent;
2674 attached = mnt_has_parent(old);
2675 old_mp = old->mnt_mp;
2676 ns = old->mnt_ns;
2677
2678 err = -EINVAL;
2679
2680 if (!check_mnt(p))
2681 goto out;
2682
2683
2684 if (!is_mounted(&old->mnt))
2685 goto out;
2686
2687
2688 if (!(attached ? check_mnt(old) : is_anon_ns(ns)))
2689 goto out;
2690
2691 if (old->mnt.mnt_flags & MNT_LOCKED)
2692 goto out;
2693
2694 if (old_path->dentry != old_path->mnt->mnt_root)
2695 goto out;
2696
2697 if (d_is_dir(new_path->dentry) !=
2698 d_is_dir(old_path->dentry))
2699 goto out;
2700
2701
2702
2703 if (attached && IS_MNT_SHARED(parent))
2704 goto out;
2705
2706
2707
2708
2709 if (IS_MNT_SHARED(p) && tree_contains_unbindable(old))
2710 goto out;
2711 err = -ELOOP;
2712 if (!check_for_nsfs_mounts(old))
2713 goto out;
2714 for (; mnt_has_parent(p); p = p->mnt_parent)
2715 if (p == old)
2716 goto out;
2717
2718 err = attach_recursive_mnt(old, real_mount(new_path->mnt), mp,
2719 attached);
2720 if (err)
2721 goto out;
2722
2723
2724
2725 list_del_init(&old->mnt_expire);
2726 if (attached)
2727 put_mountpoint(old_mp);
2728out:
2729 unlock_mount(mp);
2730 if (!err) {
2731 if (attached)
2732 mntput_no_expire(parent);
2733 else
2734 free_mnt_ns(ns);
2735 }
2736 return err;
2737}
2738
2739static int do_move_mount_old(struct path *path, const char *old_name)
2740{
2741 struct path old_path;
2742 int err;
2743
2744 if (!old_name || !*old_name)
2745 return -EINVAL;
2746
2747 err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
2748 if (err)
2749 return err;
2750
2751 err = do_move_mount(&old_path, path);
2752 path_put(&old_path);
2753 return err;
2754}
2755
2756
2757
2758
2759static int do_add_mount(struct mount *newmnt, struct mountpoint *mp,
2760 struct path *path, int mnt_flags)
2761{
2762 struct mount *parent = real_mount(path->mnt);
2763
2764 mnt_flags &= ~MNT_INTERNAL_FLAGS;
2765
2766 if (unlikely(!check_mnt(parent))) {
2767
2768 if (!(mnt_flags & MNT_SHRINKABLE))
2769 return -EINVAL;
2770
2771 if (!parent->mnt_ns)
2772 return -EINVAL;
2773 }
2774
2775
2776 if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb &&
2777 path->mnt->mnt_root == path->dentry)
2778 return -EBUSY;
2779
2780 if (d_is_symlink(newmnt->mnt.mnt_root))
2781 return -EINVAL;
2782
2783 newmnt->mnt.mnt_flags = mnt_flags;
2784 return graft_tree(newmnt, parent, mp);
2785}
2786
2787static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags);
2788
2789
2790
2791
2792
2793static int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint,
2794 unsigned int mnt_flags)
2795{
2796 struct vfsmount *mnt;
2797 struct mountpoint *mp;
2798 struct super_block *sb = fc->root->d_sb;
2799 int error;
2800
2801 error = security_sb_kern_mount(sb);
2802 if (!error && mount_too_revealing(sb, &mnt_flags))
2803 error = -EPERM;
2804
2805 if (unlikely(error)) {
2806 fc_drop_locked(fc);
2807 return error;
2808 }
2809
2810 up_write(&sb->s_umount);
2811
2812 mnt = vfs_create_mount(fc);
2813 if (IS_ERR(mnt))
2814 return PTR_ERR(mnt);
2815
2816 mnt_warn_timestamp_expiry(mountpoint, mnt);
2817
2818 mp = lock_mount(mountpoint);
2819 if (IS_ERR(mp)) {
2820 mntput(mnt);
2821 return PTR_ERR(mp);
2822 }
2823 error = do_add_mount(real_mount(mnt), mp, mountpoint, mnt_flags);
2824 unlock_mount(mp);
2825 if (error < 0)
2826 mntput(mnt);
2827 return error;
2828}
2829
2830
2831
2832
2833
2834static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
2835 int mnt_flags, const char *name, void *data)
2836{
2837 struct file_system_type *type;
2838 struct fs_context *fc;
2839 const char *subtype = NULL;
2840 int err = 0;
2841
2842 if (!fstype)
2843 return -EINVAL;
2844
2845 type = get_fs_type(fstype);
2846 if (!type)
2847 return -ENODEV;
2848
2849 if (type->fs_flags & FS_HAS_SUBTYPE) {
2850 subtype = strchr(fstype, '.');
2851 if (subtype) {
2852 subtype++;
2853 if (!*subtype) {
2854 put_filesystem(type);
2855 return -EINVAL;
2856 }
2857 }
2858 }
2859
2860 fc = fs_context_for_mount(type, sb_flags);
2861 put_filesystem(type);
2862 if (IS_ERR(fc))
2863 return PTR_ERR(fc);
2864
2865 if (subtype)
2866 err = vfs_parse_fs_string(fc, "subtype",
2867 subtype, strlen(subtype));
2868 if (!err && name)
2869 err = vfs_parse_fs_string(fc, "source", name, strlen(name));
2870 if (!err)
2871 err = parse_monolithic_mount_data(fc, data);
2872 if (!err && !mount_capable(fc))
2873 err = -EPERM;
2874 if (!err)
2875 err = vfs_get_tree(fc);
2876 if (!err)
2877 err = do_new_mount_fc(fc, path, mnt_flags);
2878
2879 put_fs_context(fc);
2880 return err;
2881}
2882
2883int finish_automount(struct vfsmount *m, struct path *path)
2884{
2885 struct dentry *dentry = path->dentry;
2886 struct mountpoint *mp;
2887 struct mount *mnt;
2888 int err;
2889
2890 if (!m)
2891 return 0;
2892 if (IS_ERR(m))
2893 return PTR_ERR(m);
2894
2895 mnt = real_mount(m);
2896
2897
2898
2899 BUG_ON(mnt_get_count(mnt) < 2);
2900
2901 if (m->mnt_sb == path->mnt->mnt_sb &&
2902 m->mnt_root == dentry) {
2903 err = -ELOOP;
2904 goto discard;
2905 }
2906
2907
2908
2909
2910
2911
2912 inode_lock(dentry->d_inode);
2913 namespace_lock();
2914 if (unlikely(cant_mount(dentry))) {
2915 err = -ENOENT;
2916 goto discard_locked;
2917 }
2918 rcu_read_lock();
2919 if (unlikely(__lookup_mnt(path->mnt, dentry))) {
2920 rcu_read_unlock();
2921 err = 0;
2922 goto discard_locked;
2923 }
2924 rcu_read_unlock();
2925 mp = get_mountpoint(dentry);
2926 if (IS_ERR(mp)) {
2927 err = PTR_ERR(mp);
2928 goto discard_locked;
2929 }
2930
2931 err = do_add_mount(mnt, mp, path, path->mnt->mnt_flags | MNT_SHRINKABLE);
2932 unlock_mount(mp);
2933 if (unlikely(err))
2934 goto discard;
2935 mntput(m);
2936 return 0;
2937
2938discard_locked:
2939 namespace_unlock();
2940 inode_unlock(dentry->d_inode);
2941discard:
2942
2943 if (!list_empty(&mnt->mnt_expire)) {
2944 namespace_lock();
2945 list_del_init(&mnt->mnt_expire);
2946 namespace_unlock();
2947 }
2948 mntput(m);
2949 mntput(m);
2950 return err;
2951}
2952
2953
2954
2955
2956
2957
2958void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
2959{
2960 namespace_lock();
2961
2962 list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);
2963
2964 namespace_unlock();
2965}
2966EXPORT_SYMBOL(mnt_set_expiry);
2967
2968
2969
2970
2971
2972
2973void mark_mounts_for_expiry(struct list_head *mounts)
2974{
2975 struct mount *mnt, *next;
2976 LIST_HEAD(graveyard);
2977
2978 if (list_empty(mounts))
2979 return;
2980
2981 namespace_lock();
2982 lock_mount_hash();
2983
2984
2985
2986
2987
2988
2989
2990 list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
2991 if (!xchg(&mnt->mnt_expiry_mark, 1) ||
2992 propagate_mount_busy(mnt, 1))
2993 continue;
2994 list_move(&mnt->mnt_expire, &graveyard);
2995 }
2996 while (!list_empty(&graveyard)) {
2997 mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
2998 touch_mnt_namespace(mnt->mnt_ns);
2999 umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
3000 }
3001 unlock_mount_hash();
3002 namespace_unlock();
3003}
3004
3005EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
3006
3007
3008
3009
3010
3011
3012
3013static int select_submounts(struct mount *parent, struct list_head *graveyard)
3014{
3015 struct mount *this_parent = parent;
3016 struct list_head *next;
3017 int found = 0;
3018
3019repeat:
3020 next = this_parent->mnt_mounts.next;
3021resume:
3022 while (next != &this_parent->mnt_mounts) {
3023 struct list_head *tmp = next;
3024 struct mount *mnt = list_entry(tmp, struct mount, mnt_child);
3025
3026 next = tmp->next;
3027 if (!(mnt->mnt.mnt_flags & MNT_SHRINKABLE))
3028 continue;
3029
3030
3031
3032 if (!list_empty(&mnt->mnt_mounts)) {
3033 this_parent = mnt;
3034 goto repeat;
3035 }
3036
3037 if (!propagate_mount_busy(mnt, 1)) {
3038 list_move_tail(&mnt->mnt_expire, graveyard);
3039 found++;
3040 }
3041 }
3042
3043
3044
3045 if (this_parent != parent) {
3046 next = this_parent->mnt_child.next;
3047 this_parent = this_parent->mnt_parent;
3048 goto resume;
3049 }
3050 return found;
3051}
3052
3053
3054
3055
3056
3057
3058
3059static void shrink_submounts(struct mount *mnt)
3060{
3061 LIST_HEAD(graveyard);
3062 struct mount *m;
3063
3064
3065 while (select_submounts(mnt, &graveyard)) {
3066 while (!list_empty(&graveyard)) {
3067 m = list_first_entry(&graveyard, struct mount,
3068 mnt_expire);
3069 touch_mnt_namespace(m->mnt_ns);
3070 umount_tree(m, UMOUNT_PROPAGATE|UMOUNT_SYNC);
3071 }
3072 }
3073}
3074
3075void *copy_mount_options(const void __user * data)
3076{
3077 char *copy;
3078 unsigned size;
3079
3080 if (!data)
3081 return NULL;
3082
3083 copy = kmalloc(PAGE_SIZE, GFP_KERNEL);
3084 if (!copy)
3085 return ERR_PTR(-ENOMEM);
3086
3087 size = PAGE_SIZE - offset_in_page(data);
3088
3089 if (copy_from_user(copy, data, size)) {
3090 kfree(copy);
3091 return ERR_PTR(-EFAULT);
3092 }
3093 if (size != PAGE_SIZE) {
3094 if (copy_from_user(copy + size, data + size, PAGE_SIZE - size))
3095 memset(copy + size, 0, PAGE_SIZE - size);
3096 }
3097 return copy;
3098}
3099
3100char *copy_mount_string(const void __user *data)
3101{
3102 return data ? strndup_user(data, PATH_MAX) : NULL;
3103}
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119int path_mount(const char *dev_name, struct path *path,
3120 const char *type_page, unsigned long flags, void *data_page)
3121{
3122 unsigned int mnt_flags = 0, sb_flags;
3123 int ret;
3124
3125
3126 if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
3127 flags &= ~MS_MGC_MSK;
3128
3129
3130 if (data_page)
3131 ((char *)data_page)[PAGE_SIZE - 1] = 0;
3132
3133 if (flags & MS_NOUSER)
3134 return -EINVAL;
3135
3136 ret = security_sb_mount(dev_name, path, type_page, flags, data_page);
3137 if (ret)
3138 return ret;
3139 if (!may_mount())
3140 return -EPERM;
3141 if ((flags & SB_MANDLOCK) && !may_mandlock())
3142 return -EPERM;
3143
3144
3145 if (!(flags & MS_NOATIME))
3146 mnt_flags |= MNT_RELATIME;
3147
3148
3149 if (flags & MS_NOSUID)
3150 mnt_flags |= MNT_NOSUID;
3151 if (flags & MS_NODEV)
3152 mnt_flags |= MNT_NODEV;
3153 if (flags & MS_NOEXEC)
3154 mnt_flags |= MNT_NOEXEC;
3155 if (flags & MS_NOATIME)
3156 mnt_flags |= MNT_NOATIME;
3157 if (flags & MS_NODIRATIME)
3158 mnt_flags |= MNT_NODIRATIME;
3159 if (flags & MS_STRICTATIME)
3160 mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
3161 if (flags & MS_RDONLY)
3162 mnt_flags |= MNT_READONLY;
3163
3164
3165 if ((flags & MS_REMOUNT) &&
3166 ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME |
3167 MS_STRICTATIME)) == 0)) {
3168 mnt_flags &= ~MNT_ATIME_MASK;
3169 mnt_flags |= path->mnt->mnt_flags & MNT_ATIME_MASK;
3170 }
3171
3172 sb_flags = flags & (SB_RDONLY |
3173 SB_SYNCHRONOUS |
3174 SB_MANDLOCK |
3175 SB_DIRSYNC |
3176 SB_SILENT |
3177 SB_POSIXACL |
3178 SB_LAZYTIME |
3179 SB_I_VERSION);
3180
3181 if ((flags & (MS_REMOUNT | MS_BIND)) == (MS_REMOUNT | MS_BIND))
3182 return do_reconfigure_mnt(path, mnt_flags);
3183 if (flags & MS_REMOUNT)
3184 return do_remount(path, flags, sb_flags, mnt_flags, data_page);
3185 if (flags & MS_BIND)
3186 return do_loopback(path, dev_name, flags & MS_REC);
3187 if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
3188 return do_change_type(path, flags);
3189 if (flags & MS_MOVE)
3190 return do_move_mount_old(path, dev_name);
3191
3192 return do_new_mount(path, type_page, sb_flags, mnt_flags, dev_name,
3193 data_page);
3194}
3195
3196long do_mount(const char *dev_name, const char __user *dir_name,
3197 const char *type_page, unsigned long flags, void *data_page)
3198{
3199 struct path path;
3200 int ret;
3201
3202 ret = user_path_at(AT_FDCWD, dir_name, LOOKUP_FOLLOW, &path);
3203 if (ret)
3204 return ret;
3205 ret = path_mount(dev_name, &path, type_page, flags, data_page);
3206 path_put(&path);
3207 return ret;
3208}
3209
3210static struct ucounts *inc_mnt_namespaces(struct user_namespace *ns)
3211{
3212 return inc_ucount(ns, current_euid(), UCOUNT_MNT_NAMESPACES);
3213}
3214
3215static void dec_mnt_namespaces(struct ucounts *ucounts)
3216{
3217 dec_ucount(ucounts, UCOUNT_MNT_NAMESPACES);
3218}
3219
3220static void free_mnt_ns(struct mnt_namespace *ns)
3221{
3222 if (!is_anon_ns(ns))
3223 ns_free_inum(&ns->ns);
3224 dec_mnt_namespaces(ns->ucounts);
3225 put_user_ns(ns->user_ns);
3226 kfree(ns);
3227}
3228
3229
3230
3231
3232
3233
3234
3235
3236static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
3237
3238static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool anon)
3239{
3240 struct mnt_namespace *new_ns;
3241 struct ucounts *ucounts;
3242 int ret;
3243
3244 ucounts = inc_mnt_namespaces(user_ns);
3245 if (!ucounts)
3246 return ERR_PTR(-ENOSPC);
3247
3248 new_ns = kzalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
3249 if (!new_ns) {
3250 dec_mnt_namespaces(ucounts);
3251 return ERR_PTR(-ENOMEM);
3252 }
3253 if (!anon) {
3254 ret = ns_alloc_inum(&new_ns->ns);
3255 if (ret) {
3256 kfree(new_ns);
3257 dec_mnt_namespaces(ucounts);
3258 return ERR_PTR(ret);
3259 }
3260 }
3261 new_ns->ns.ops = &mntns_operations;
3262 if (!anon)
3263 new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
3264 atomic_set(&new_ns->count, 1);
3265 INIT_LIST_HEAD(&new_ns->list);
3266 init_waitqueue_head(&new_ns->poll);
3267 spin_lock_init(&new_ns->ns_lock);
3268 new_ns->user_ns = get_user_ns(user_ns);
3269 new_ns->ucounts = ucounts;
3270 return new_ns;
3271}
3272
3273__latent_entropy
3274struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
3275 struct user_namespace *user_ns, struct fs_struct *new_fs)
3276{
3277 struct mnt_namespace *new_ns;
3278 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
3279 struct mount *p, *q;
3280 struct mount *old;
3281 struct mount *new;
3282 int copy_flags;
3283
3284 BUG_ON(!ns);
3285
3286 if (likely(!(flags & CLONE_NEWNS))) {
3287 get_mnt_ns(ns);
3288 return ns;
3289 }
3290
3291 old = ns->root;
3292
3293 new_ns = alloc_mnt_ns(user_ns, false);
3294 if (IS_ERR(new_ns))
3295 return new_ns;
3296
3297 namespace_lock();
3298
3299 copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE;
3300 if (user_ns != ns->user_ns)
3301 copy_flags |= CL_SHARED_TO_SLAVE;
3302 new = copy_tree(old, old->mnt.mnt_root, copy_flags);
3303 if (IS_ERR(new)) {
3304 namespace_unlock();
3305 free_mnt_ns(new_ns);
3306 return ERR_CAST(new);
3307 }
3308 if (user_ns != ns->user_ns) {
3309 lock_mount_hash();
3310 lock_mnt_tree(new);
3311 unlock_mount_hash();
3312 }
3313 new_ns->root = new;
3314 list_add_tail(&new_ns->list, &new->mnt_list);
3315
3316
3317
3318
3319
3320
3321 p = old;
3322 q = new;
3323 while (p) {
3324 q->mnt_ns = new_ns;
3325 new_ns->mounts++;
3326 if (new_fs) {
3327 if (&p->mnt == new_fs->root.mnt) {
3328 new_fs->root.mnt = mntget(&q->mnt);
3329 rootmnt = &p->mnt;
3330 }
3331 if (&p->mnt == new_fs->pwd.mnt) {
3332 new_fs->pwd.mnt = mntget(&q->mnt);
3333 pwdmnt = &p->mnt;
3334 }
3335 }
3336 p = next_mnt(p, old);
3337 q = next_mnt(q, new);
3338 if (!q)
3339 break;
3340 while (p->mnt.mnt_root != q->mnt.mnt_root)
3341 p = next_mnt(p, old);
3342 }
3343 namespace_unlock();
3344
3345 if (rootmnt)
3346 mntput(rootmnt);
3347 if (pwdmnt)
3348 mntput(pwdmnt);
3349
3350 return new_ns;
3351}
3352
3353struct dentry *mount_subtree(struct vfsmount *m, const char *name)
3354{
3355 struct mount *mnt = real_mount(m);
3356 struct mnt_namespace *ns;
3357 struct super_block *s;
3358 struct path path;
3359 int err;
3360
3361 ns = alloc_mnt_ns(&init_user_ns, true);
3362 if (IS_ERR(ns)) {
3363 mntput(m);
3364 return ERR_CAST(ns);
3365 }
3366 mnt->mnt_ns = ns;
3367 ns->root = mnt;
3368 ns->mounts++;
3369 list_add(&mnt->mnt_list, &ns->list);
3370
3371 err = vfs_path_lookup(m->mnt_root, m,
3372 name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);
3373
3374 put_mnt_ns(ns);
3375
3376 if (err)
3377 return ERR_PTR(err);
3378
3379
3380 s = path.mnt->mnt_sb;
3381 atomic_inc(&s->s_active);
3382 mntput(path.mnt);
3383
3384 down_write(&s->s_umount);
3385
3386 return path.dentry;
3387}
3388EXPORT_SYMBOL(mount_subtree);
3389
3390SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
3391 char __user *, type, unsigned long, flags, void __user *, data)
3392{
3393 int ret;
3394 char *kernel_type;
3395 char *kernel_dev;
3396 void *options;
3397
3398 kernel_type = copy_mount_string(type);
3399 ret = PTR_ERR(kernel_type);
3400 if (IS_ERR(kernel_type))
3401 goto out_type;
3402
3403 kernel_dev = copy_mount_string(dev_name);
3404 ret = PTR_ERR(kernel_dev);
3405 if (IS_ERR(kernel_dev))
3406 goto out_dev;
3407
3408 options = copy_mount_options(data);
3409 ret = PTR_ERR(options);
3410 if (IS_ERR(options))
3411 goto out_data;
3412
3413 ret = do_mount(kernel_dev, dir_name, kernel_type, flags, options);
3414
3415 kfree(options);
3416out_data:
3417 kfree(kernel_dev);
3418out_dev:
3419 kfree(kernel_type);
3420out_type:
3421 return ret;
3422}
3423
3424
3425
3426
3427
3428SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags,
3429 unsigned int, attr_flags)
3430{
3431 struct mnt_namespace *ns;
3432 struct fs_context *fc;
3433 struct file *file;
3434 struct path newmount;
3435 struct mount *mnt;
3436 struct fd f;
3437 unsigned int mnt_flags = 0;
3438 long ret;
3439
3440 if (!may_mount())
3441 return -EPERM;
3442
3443 if ((flags & ~(FSMOUNT_CLOEXEC)) != 0)
3444 return -EINVAL;
3445
3446 if (attr_flags & ~(MOUNT_ATTR_RDONLY |
3447 MOUNT_ATTR_NOSUID |
3448 MOUNT_ATTR_NODEV |
3449 MOUNT_ATTR_NOEXEC |
3450 MOUNT_ATTR__ATIME |
3451 MOUNT_ATTR_NODIRATIME))
3452 return -EINVAL;
3453
3454 if (attr_flags & MOUNT_ATTR_RDONLY)
3455 mnt_flags |= MNT_READONLY;
3456 if (attr_flags & MOUNT_ATTR_NOSUID)
3457 mnt_flags |= MNT_NOSUID;
3458 if (attr_flags & MOUNT_ATTR_NODEV)
3459 mnt_flags |= MNT_NODEV;
3460 if (attr_flags & MOUNT_ATTR_NOEXEC)
3461 mnt_flags |= MNT_NOEXEC;
3462 if (attr_flags & MOUNT_ATTR_NODIRATIME)
3463 mnt_flags |= MNT_NODIRATIME;
3464
3465 switch (attr_flags & MOUNT_ATTR__ATIME) {
3466 case MOUNT_ATTR_STRICTATIME:
3467 break;
3468 case MOUNT_ATTR_NOATIME:
3469 mnt_flags |= MNT_NOATIME;
3470 break;
3471 case MOUNT_ATTR_RELATIME:
3472 mnt_flags |= MNT_RELATIME;
3473 break;
3474 default:
3475 return -EINVAL;
3476 }
3477
3478 f = fdget(fs_fd);
3479 if (!f.file)
3480 return -EBADF;
3481
3482 ret = -EINVAL;
3483 if (f.file->f_op != &fscontext_fops)
3484 goto err_fsfd;
3485
3486 fc = f.file->private_data;
3487
3488 ret = mutex_lock_interruptible(&fc->uapi_mutex);
3489 if (ret < 0)
3490 goto err_fsfd;
3491
3492
3493 ret = -EINVAL;
3494 if (!fc->root)
3495 goto err_unlock;
3496
3497 ret = -EPERM;
3498 if (mount_too_revealing(fc->root->d_sb, &mnt_flags)) {
3499 pr_warn("VFS: Mount too revealing\n");
3500 goto err_unlock;
3501 }
3502
3503 ret = -EBUSY;
3504 if (fc->phase != FS_CONTEXT_AWAITING_MOUNT)
3505 goto err_unlock;
3506
3507 ret = -EPERM;
3508 if ((fc->sb_flags & SB_MANDLOCK) && !may_mandlock())
3509 goto err_unlock;
3510
3511 newmount.mnt = vfs_create_mount(fc);
3512 if (IS_ERR(newmount.mnt)) {
3513 ret = PTR_ERR(newmount.mnt);
3514 goto err_unlock;
3515 }
3516 newmount.dentry = dget(fc->root);
3517 newmount.mnt->mnt_flags = mnt_flags;
3518
3519
3520
3521
3522
3523
3524 vfs_clean_context(fc);
3525
3526 ns = alloc_mnt_ns(current->nsproxy->mnt_ns->user_ns, true);
3527 if (IS_ERR(ns)) {
3528 ret = PTR_ERR(ns);
3529 goto err_path;
3530 }
3531 mnt = real_mount(newmount.mnt);
3532 mnt->mnt_ns = ns;
3533 ns->root = mnt;
3534 ns->mounts = 1;
3535 list_add(&mnt->mnt_list, &ns->list);
3536 mntget(newmount.mnt);
3537
3538
3539
3540
3541 file = dentry_open(&newmount, O_PATH, fc->cred);
3542 if (IS_ERR(file)) {
3543 dissolve_on_fput(newmount.mnt);
3544 ret = PTR_ERR(file);
3545 goto err_path;
3546 }
3547 file->f_mode |= FMODE_NEED_UNMOUNT;
3548
3549 ret = get_unused_fd_flags((flags & FSMOUNT_CLOEXEC) ? O_CLOEXEC : 0);
3550 if (ret >= 0)
3551 fd_install(ret, file);
3552 else
3553 fput(file);
3554
3555err_path:
3556 path_put(&newmount);
3557err_unlock:
3558 mutex_unlock(&fc->uapi_mutex);
3559err_fsfd:
3560 fdput(f);
3561 return ret;
3562}
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572SYSCALL_DEFINE5(move_mount,
3573 int, from_dfd, const char __user *, from_pathname,
3574 int, to_dfd, const char __user *, to_pathname,
3575 unsigned int, flags)
3576{
3577 struct path from_path, to_path;
3578 unsigned int lflags;
3579 int ret = 0;
3580
3581 if (!may_mount())
3582 return -EPERM;
3583
3584 if (flags & ~MOVE_MOUNT__MASK)
3585 return -EINVAL;
3586
3587
3588
3589
3590
3591 lflags = 0;
3592 if (flags & MOVE_MOUNT_F_SYMLINKS) lflags |= LOOKUP_FOLLOW;
3593 if (flags & MOVE_MOUNT_F_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT;
3594 if (flags & MOVE_MOUNT_F_EMPTY_PATH) lflags |= LOOKUP_EMPTY;
3595
3596 ret = user_path_at(from_dfd, from_pathname, lflags, &from_path);
3597 if (ret < 0)
3598 return ret;
3599
3600 lflags = 0;
3601 if (flags & MOVE_MOUNT_T_SYMLINKS) lflags |= LOOKUP_FOLLOW;
3602 if (flags & MOVE_MOUNT_T_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT;
3603 if (flags & MOVE_MOUNT_T_EMPTY_PATH) lflags |= LOOKUP_EMPTY;
3604
3605 ret = user_path_at(to_dfd, to_pathname, lflags, &to_path);
3606 if (ret < 0)
3607 goto out_from;
3608
3609 ret = security_move_mount(&from_path, &to_path);
3610 if (ret < 0)
3611 goto out_to;
3612
3613 ret = do_move_mount(&from_path, &to_path);
3614
3615out_to:
3616 path_put(&to_path);
3617out_from:
3618 path_put(&from_path);
3619 return ret;
3620}
3621
3622
3623
3624
3625
3626
3627bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
3628 const struct path *root)
3629{
3630 while (&mnt->mnt != root->mnt && mnt_has_parent(mnt)) {
3631 dentry = mnt->mnt_mountpoint;
3632 mnt = mnt->mnt_parent;
3633 }
3634 return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry);
3635}
3636
3637bool path_is_under(const struct path *path1, const struct path *path2)
3638{
3639 bool res;
3640 read_seqlock_excl(&mount_lock);
3641 res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2);
3642 read_sequnlock_excl(&mount_lock);
3643 return res;
3644}
3645EXPORT_SYMBOL(path_is_under);
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
3673 const char __user *, put_old)
3674{
3675 struct path new, old, root;
3676 struct mount *new_mnt, *root_mnt, *old_mnt, *root_parent, *ex_parent;
3677 struct mountpoint *old_mp, *root_mp;
3678 int error;
3679
3680 if (!may_mount())
3681 return -EPERM;
3682
3683 error = user_path_at(AT_FDCWD, new_root,
3684 LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &new);
3685 if (error)
3686 goto out0;
3687
3688 error = user_path_at(AT_FDCWD, put_old,
3689 LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &old);
3690 if (error)
3691 goto out1;
3692
3693 error = security_sb_pivotroot(&old, &new);
3694 if (error)
3695 goto out2;
3696
3697 get_fs_root(current->fs, &root);
3698 old_mp = lock_mount(&old);
3699 error = PTR_ERR(old_mp);
3700 if (IS_ERR(old_mp))
3701 goto out3;
3702
3703 error = -EINVAL;
3704 new_mnt = real_mount(new.mnt);
3705 root_mnt = real_mount(root.mnt);
3706 old_mnt = real_mount(old.mnt);
3707 ex_parent = new_mnt->mnt_parent;
3708 root_parent = root_mnt->mnt_parent;
3709 if (IS_MNT_SHARED(old_mnt) ||
3710 IS_MNT_SHARED(ex_parent) ||
3711 IS_MNT_SHARED(root_parent))
3712 goto out4;
3713 if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
3714 goto out4;
3715 if (new_mnt->mnt.mnt_flags & MNT_LOCKED)
3716 goto out4;
3717 error = -ENOENT;
3718 if (d_unlinked(new.dentry))
3719 goto out4;
3720 error = -EBUSY;
3721 if (new_mnt == root_mnt || old_mnt == root_mnt)
3722 goto out4;
3723 error = -EINVAL;
3724 if (root.mnt->mnt_root != root.dentry)
3725 goto out4;
3726 if (!mnt_has_parent(root_mnt))
3727 goto out4;
3728 if (new.mnt->mnt_root != new.dentry)
3729 goto out4;
3730 if (!mnt_has_parent(new_mnt))
3731 goto out4;
3732
3733 if (!is_path_reachable(old_mnt, old.dentry, &new))
3734 goto out4;
3735
3736 if (!is_path_reachable(new_mnt, new.dentry, &root))
3737 goto out4;
3738 lock_mount_hash();
3739 umount_mnt(new_mnt);
3740 root_mp = unhash_mnt(root_mnt);
3741 if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
3742 new_mnt->mnt.mnt_flags |= MNT_LOCKED;
3743 root_mnt->mnt.mnt_flags &= ~MNT_LOCKED;
3744 }
3745
3746 attach_mnt(root_mnt, old_mnt, old_mp);
3747
3748 attach_mnt(new_mnt, root_parent, root_mp);
3749 mnt_add_count(root_parent, -1);
3750 touch_mnt_namespace(current->nsproxy->mnt_ns);
3751
3752 list_del_init(&new_mnt->mnt_expire);
3753 put_mountpoint(root_mp);
3754 unlock_mount_hash();
3755 chroot_fs_refs(&root, &new);
3756 error = 0;
3757out4:
3758 unlock_mount(old_mp);
3759 if (!error)
3760 mntput_no_expire(ex_parent);
3761out3:
3762 path_put(&root);
3763out2:
3764 path_put(&old);
3765out1:
3766 path_put(&new);
3767out0:
3768 return error;
3769}
3770
3771static void __init init_mount_tree(void)
3772{
3773 struct vfsmount *mnt;
3774 struct mount *m;
3775 struct mnt_namespace *ns;
3776 struct path root;
3777
3778 mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", NULL);
3779 if (IS_ERR(mnt))
3780 panic("Can't create rootfs");
3781
3782 ns = alloc_mnt_ns(&init_user_ns, false);
3783 if (IS_ERR(ns))
3784 panic("Can't allocate initial namespace");
3785 m = real_mount(mnt);
3786 m->mnt_ns = ns;
3787 ns->root = m;
3788 ns->mounts = 1;
3789 list_add(&m->mnt_list, &ns->list);
3790 init_task.nsproxy->mnt_ns = ns;
3791 get_mnt_ns(ns);
3792
3793 root.mnt = mnt;
3794 root.dentry = mnt->mnt_root;
3795 mnt->mnt_flags |= MNT_LOCKED;
3796
3797 set_fs_pwd(current->fs, &root);
3798 set_fs_root(current->fs, &root);
3799}
3800
3801void __init mnt_init(void)
3802{
3803 int err;
3804
3805 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
3806 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
3807
3808 mount_hashtable = alloc_large_system_hash("Mount-cache",
3809 sizeof(struct hlist_head),
3810 mhash_entries, 19,
3811 HASH_ZERO,
3812 &m_hash_shift, &m_hash_mask, 0, 0);
3813 mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache",
3814 sizeof(struct hlist_head),
3815 mphash_entries, 19,
3816 HASH_ZERO,
3817 &mp_hash_shift, &mp_hash_mask, 0, 0);
3818
3819 if (!mount_hashtable || !mountpoint_hashtable)
3820 panic("Failed to allocate mount hash table\n");
3821
3822 kernfs_init();
3823
3824 err = sysfs_init();
3825 if (err)
3826 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
3827 __func__, err);
3828 fs_kobj = kobject_create_and_add("fs", NULL);
3829 if (!fs_kobj)
3830 printk(KERN_WARNING "%s: kobj create error\n", __func__);
3831 shmem_init();
3832 init_rootfs();
3833 init_mount_tree();
3834}
3835
3836void put_mnt_ns(struct mnt_namespace *ns)
3837{
3838 if (!atomic_dec_and_test(&ns->count))
3839 return;
3840 drop_collected_mounts(&ns->root->mnt);
3841 free_mnt_ns(ns);
3842}
3843
3844struct vfsmount *kern_mount(struct file_system_type *type)
3845{
3846 struct vfsmount *mnt;
3847 mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL);
3848 if (!IS_ERR(mnt)) {
3849
3850
3851
3852
3853 real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL;
3854 }
3855 return mnt;
3856}
3857EXPORT_SYMBOL_GPL(kern_mount);
3858
3859void kern_unmount(struct vfsmount *mnt)
3860{
3861
3862 if (!IS_ERR_OR_NULL(mnt)) {
3863 real_mount(mnt)->mnt_ns = NULL;
3864 synchronize_rcu();
3865 mntput(mnt);
3866 }
3867}
3868EXPORT_SYMBOL(kern_unmount);
3869
3870void kern_unmount_array(struct vfsmount *mnt[], unsigned int num)
3871{
3872 unsigned int i;
3873
3874 for (i = 0; i < num; i++)
3875 if (mnt[i])
3876 real_mount(mnt[i])->mnt_ns = NULL;
3877 synchronize_rcu_expedited();
3878 for (i = 0; i < num; i++)
3879 mntput(mnt[i]);
3880}
3881EXPORT_SYMBOL(kern_unmount_array);
3882
3883bool our_mnt(struct vfsmount *mnt)
3884{
3885 return check_mnt(real_mount(mnt));
3886}
3887
3888bool current_chrooted(void)
3889{
3890
3891 struct path ns_root;
3892 struct path fs_root;
3893 bool chrooted;
3894
3895
3896 ns_root.mnt = ¤t->nsproxy->mnt_ns->root->mnt;
3897 ns_root.dentry = ns_root.mnt->mnt_root;
3898 path_get(&ns_root);
3899 while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root))
3900 ;
3901
3902 get_fs_root(current->fs, &fs_root);
3903
3904 chrooted = !path_equal(&fs_root, &ns_root);
3905
3906 path_put(&fs_root);
3907 path_put(&ns_root);
3908
3909 return chrooted;
3910}
3911
3912static bool mnt_already_visible(struct mnt_namespace *ns,
3913 const struct super_block *sb,
3914 int *new_mnt_flags)
3915{
3916 int new_flags = *new_mnt_flags;
3917 struct mount *mnt;
3918 bool visible = false;
3919
3920 down_read(&namespace_sem);
3921 lock_ns_list(ns);
3922 list_for_each_entry(mnt, &ns->list, mnt_list) {
3923 struct mount *child;
3924 int mnt_flags;
3925
3926 if (mnt_is_cursor(mnt))
3927 continue;
3928
3929 if (mnt->mnt.mnt_sb->s_type != sb->s_type)
3930 continue;
3931
3932
3933
3934
3935 if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root)
3936 continue;
3937
3938
3939 mnt_flags = mnt->mnt.mnt_flags;
3940
3941
3942 if (sb_rdonly(mnt->mnt.mnt_sb))
3943 mnt_flags |= MNT_LOCK_READONLY;
3944
3945
3946
3947
3948 if ((mnt_flags & MNT_LOCK_READONLY) &&
3949 !(new_flags & MNT_READONLY))
3950 continue;
3951 if ((mnt_flags & MNT_LOCK_ATIME) &&
3952 ((mnt_flags & MNT_ATIME_MASK) != (new_flags & MNT_ATIME_MASK)))
3953 continue;
3954
3955
3956
3957
3958
3959 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
3960 struct inode *inode = child->mnt_mountpoint->d_inode;
3961
3962 if (!(child->mnt.mnt_flags & MNT_LOCKED))
3963 continue;
3964
3965 if (!is_empty_dir_inode(inode))
3966 goto next;
3967 }
3968
3969 *new_mnt_flags |= mnt_flags & (MNT_LOCK_READONLY | \
3970 MNT_LOCK_ATIME);
3971 visible = true;
3972 goto found;
3973 next: ;
3974 }
3975found:
3976 unlock_ns_list(ns);
3977 up_read(&namespace_sem);
3978 return visible;
3979}
3980
3981static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags)
3982{
3983 const unsigned long required_iflags = SB_I_NOEXEC | SB_I_NODEV;
3984 struct mnt_namespace *ns = current->nsproxy->mnt_ns;
3985 unsigned long s_iflags;
3986
3987 if (ns->user_ns == &init_user_ns)
3988 return false;
3989
3990
3991 s_iflags = sb->s_iflags;
3992 if (!(s_iflags & SB_I_USERNS_VISIBLE))
3993 return false;
3994
3995 if ((s_iflags & required_iflags) != required_iflags) {
3996 WARN_ONCE(1, "Expected s_iflags to contain 0x%lx\n",
3997 required_iflags);
3998 return true;
3999 }
4000
4001 return !mnt_already_visible(ns, sb, new_mnt_flags);
4002}
4003
4004bool mnt_may_suid(struct vfsmount *mnt)
4005{
4006
4007
4008
4009
4010
4011
4012
4013 return !(mnt->mnt_flags & MNT_NOSUID) && check_mnt(real_mount(mnt)) &&
4014 current_in_userns(mnt->mnt_sb->s_user_ns);
4015}
4016
4017static struct ns_common *mntns_get(struct task_struct *task)
4018{
4019 struct ns_common *ns = NULL;
4020 struct nsproxy *nsproxy;
4021
4022 task_lock(task);
4023 nsproxy = task->nsproxy;
4024 if (nsproxy) {
4025 ns = &nsproxy->mnt_ns->ns;
4026 get_mnt_ns(to_mnt_ns(ns));
4027 }
4028 task_unlock(task);
4029
4030 return ns;
4031}
4032
4033static void mntns_put(struct ns_common *ns)
4034{
4035 put_mnt_ns(to_mnt_ns(ns));
4036}
4037
4038static int mntns_install(struct nsset *nsset, struct ns_common *ns)
4039{
4040 struct nsproxy *nsproxy = nsset->nsproxy;
4041 struct fs_struct *fs = nsset->fs;
4042 struct mnt_namespace *mnt_ns = to_mnt_ns(ns), *old_mnt_ns;
4043 struct user_namespace *user_ns = nsset->cred->user_ns;
4044 struct path root;
4045 int err;
4046
4047 if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
4048 !ns_capable(user_ns, CAP_SYS_CHROOT) ||
4049 !ns_capable(user_ns, CAP_SYS_ADMIN))
4050 return -EPERM;
4051
4052 if (is_anon_ns(mnt_ns))
4053 return -EINVAL;
4054
4055 if (fs->users != 1)
4056 return -EINVAL;
4057
4058 get_mnt_ns(mnt_ns);
4059 old_mnt_ns = nsproxy->mnt_ns;
4060 nsproxy->mnt_ns = mnt_ns;
4061
4062
4063 err = vfs_path_lookup(mnt_ns->root->mnt.mnt_root, &mnt_ns->root->mnt,
4064 "/", LOOKUP_DOWN, &root);
4065 if (err) {
4066
4067 nsproxy->mnt_ns = old_mnt_ns;
4068 put_mnt_ns(mnt_ns);
4069 return err;
4070 }
4071
4072 put_mnt_ns(old_mnt_ns);
4073
4074
4075 set_fs_pwd(fs, &root);
4076 set_fs_root(fs, &root);
4077
4078 path_put(&root);
4079 return 0;
4080}
4081
4082static struct user_namespace *mntns_owner(struct ns_common *ns)
4083{
4084 return to_mnt_ns(ns)->user_ns;
4085}
4086
4087const struct proc_ns_operations mntns_operations = {
4088 .name = "mnt",
4089 .type = CLONE_NEWNS,
4090 .get = mntns_get,
4091 .put = mntns_put,
4092 .install = mntns_install,
4093 .owner = mntns_owner,
4094};
4095