1
2
3
4
5
6
7
8
9
10
11#include <linux/syscalls.h>
12#include <linux/export.h>
13#include <linux/capability.h>
14#include <linux/mnt_namespace.h>
15#include <linux/user_namespace.h>
16#include <linux/namei.h>
17#include <linux/security.h>
18#include <linux/cred.h>
19#include <linux/idr.h>
20#include <linux/init.h>
21#include <linux/fs_struct.h>
22#include <linux/fsnotify.h>
23#include <linux/uaccess.h>
24#include <linux/proc_ns.h>
25#include <linux/magic.h>
26#include <linux/bootmem.h>
27#include <linux/task_work.h>
28#include <linux/sched/task.h>
29
30#include "pnode.h"
31#include "internal.h"
32
33
34unsigned int sysctl_mount_max __read_mostly = 100000;
35
36static unsigned int m_hash_mask __read_mostly;
37static unsigned int m_hash_shift __read_mostly;
38static unsigned int mp_hash_mask __read_mostly;
39static unsigned int mp_hash_shift __read_mostly;
40
41static __initdata unsigned long mhash_entries;
42static int __init set_mhash_entries(char *str)
43{
44 if (!str)
45 return 0;
46 mhash_entries = simple_strtoul(str, &str, 0);
47 return 1;
48}
49__setup("mhash_entries=", set_mhash_entries);
50
51static __initdata unsigned long mphash_entries;
52static int __init set_mphash_entries(char *str)
53{
54 if (!str)
55 return 0;
56 mphash_entries = simple_strtoul(str, &str, 0);
57 return 1;
58}
59__setup("mphash_entries=", set_mphash_entries);
60
61static u64 event;
62static DEFINE_IDA(mnt_id_ida);
63static DEFINE_IDA(mnt_group_ida);
64static DEFINE_SPINLOCK(mnt_id_lock);
65static int mnt_id_start = 0;
66static int mnt_group_start = 1;
67
68static struct hlist_head *mount_hashtable __read_mostly;
69static struct hlist_head *mountpoint_hashtable __read_mostly;
70static struct kmem_cache *mnt_cache __read_mostly;
71static DECLARE_RWSEM(namespace_sem);
72
73
74struct kobject *fs_kobj;
75EXPORT_SYMBOL_GPL(fs_kobj);
76
77
78
79
80
81
82
83
84
85__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
86
87static inline struct hlist_head *m_hash(struct vfsmount *mnt, struct dentry *dentry)
88{
89 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
90 tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
91 tmp = tmp + (tmp >> m_hash_shift);
92 return &mount_hashtable[tmp & m_hash_mask];
93}
94
95static inline struct hlist_head *mp_hash(struct dentry *dentry)
96{
97 unsigned long tmp = ((unsigned long)dentry / L1_CACHE_BYTES);
98 tmp = tmp + (tmp >> mp_hash_shift);
99 return &mountpoint_hashtable[tmp & mp_hash_mask];
100}
101
102static int mnt_alloc_id(struct mount *mnt)
103{
104 int res;
105
106retry:
107 ida_pre_get(&mnt_id_ida, GFP_KERNEL);
108 spin_lock(&mnt_id_lock);
109 res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id);
110 if (!res)
111 mnt_id_start = mnt->mnt_id + 1;
112 spin_unlock(&mnt_id_lock);
113 if (res == -EAGAIN)
114 goto retry;
115
116 return res;
117}
118
119static void mnt_free_id(struct mount *mnt)
120{
121 int id = mnt->mnt_id;
122 spin_lock(&mnt_id_lock);
123 ida_remove(&mnt_id_ida, id);
124 if (mnt_id_start > id)
125 mnt_id_start = id;
126 spin_unlock(&mnt_id_lock);
127}
128
129
130
131
132
133
134static int mnt_alloc_group_id(struct mount *mnt)
135{
136 int res;
137
138 if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL))
139 return -ENOMEM;
140
141 res = ida_get_new_above(&mnt_group_ida,
142 mnt_group_start,
143 &mnt->mnt_group_id);
144 if (!res)
145 mnt_group_start = mnt->mnt_group_id + 1;
146
147 return res;
148}
149
150
151
152
153void mnt_release_group_id(struct mount *mnt)
154{
155 int id = mnt->mnt_group_id;
156 ida_remove(&mnt_group_ida, id);
157 if (mnt_group_start > id)
158 mnt_group_start = id;
159 mnt->mnt_group_id = 0;
160}
161
162
163
164
165static inline void mnt_add_count(struct mount *mnt, int n)
166{
167#ifdef CONFIG_SMP
168 this_cpu_add(mnt->mnt_pcp->mnt_count, n);
169#else
170 preempt_disable();
171 mnt->mnt_count += n;
172 preempt_enable();
173#endif
174}
175
176
177
178
179unsigned int mnt_get_count(struct mount *mnt)
180{
181#ifdef CONFIG_SMP
182 unsigned int count = 0;
183 int cpu;
184
185 for_each_possible_cpu(cpu) {
186 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count;
187 }
188
189 return count;
190#else
191 return mnt->mnt_count;
192#endif
193}
194
195static void drop_mountpoint(struct fs_pin *p)
196{
197 struct mount *m = container_of(p, struct mount, mnt_umount);
198 dput(m->mnt_ex_mountpoint);
199 pin_remove(p);
200 mntput(&m->mnt);
201}
202
203static struct mount *alloc_vfsmnt(const char *name)
204{
205 struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
206 if (mnt) {
207 int err;
208
209 err = mnt_alloc_id(mnt);
210 if (err)
211 goto out_free_cache;
212
213 if (name) {
214 mnt->mnt_devname = kstrdup_const(name, GFP_KERNEL);
215 if (!mnt->mnt_devname)
216 goto out_free_id;
217 }
218
219#ifdef CONFIG_SMP
220 mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);
221 if (!mnt->mnt_pcp)
222 goto out_free_devname;
223
224 this_cpu_add(mnt->mnt_pcp->mnt_count, 1);
225#else
226 mnt->mnt_count = 1;
227 mnt->mnt_writers = 0;
228#endif
229
230 INIT_HLIST_NODE(&mnt->mnt_hash);
231 INIT_LIST_HEAD(&mnt->mnt_child);
232 INIT_LIST_HEAD(&mnt->mnt_mounts);
233 INIT_LIST_HEAD(&mnt->mnt_list);
234 INIT_LIST_HEAD(&mnt->mnt_expire);
235 INIT_LIST_HEAD(&mnt->mnt_share);
236 INIT_LIST_HEAD(&mnt->mnt_slave_list);
237 INIT_LIST_HEAD(&mnt->mnt_slave);
238 INIT_HLIST_NODE(&mnt->mnt_mp_list);
239 INIT_LIST_HEAD(&mnt->mnt_umounting);
240 init_fs_pin(&mnt->mnt_umount, drop_mountpoint);
241 }
242 return mnt;
243
244#ifdef CONFIG_SMP
245out_free_devname:
246 kfree_const(mnt->mnt_devname);
247#endif
248out_free_id:
249 mnt_free_id(mnt);
250out_free_cache:
251 kmem_cache_free(mnt_cache, mnt);
252 return NULL;
253}
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274int __mnt_is_readonly(struct vfsmount *mnt)
275{
276 if (mnt->mnt_flags & MNT_READONLY)
277 return 1;
278 if (sb_rdonly(mnt->mnt_sb))
279 return 1;
280 return 0;
281}
282EXPORT_SYMBOL_GPL(__mnt_is_readonly);
283
284static inline void mnt_inc_writers(struct mount *mnt)
285{
286#ifdef CONFIG_SMP
287 this_cpu_inc(mnt->mnt_pcp->mnt_writers);
288#else
289 mnt->mnt_writers++;
290#endif
291}
292
293static inline void mnt_dec_writers(struct mount *mnt)
294{
295#ifdef CONFIG_SMP
296 this_cpu_dec(mnt->mnt_pcp->mnt_writers);
297#else
298 mnt->mnt_writers--;
299#endif
300}
301
302static unsigned int mnt_get_writers(struct mount *mnt)
303{
304#ifdef CONFIG_SMP
305 unsigned int count = 0;
306 int cpu;
307
308 for_each_possible_cpu(cpu) {
309 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers;
310 }
311
312 return count;
313#else
314 return mnt->mnt_writers;
315#endif
316}
317
318static int mnt_is_readonly(struct vfsmount *mnt)
319{
320 if (mnt->mnt_sb->s_readonly_remount)
321 return 1;
322
323 smp_rmb();
324 return __mnt_is_readonly(mnt);
325}
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343int __mnt_want_write(struct vfsmount *m)
344{
345 struct mount *mnt = real_mount(m);
346 int ret = 0;
347
348 preempt_disable();
349 mnt_inc_writers(mnt);
350
351
352
353
354
355 smp_mb();
356 while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
357 cpu_relax();
358
359
360
361
362
363 smp_rmb();
364 if (mnt_is_readonly(m)) {
365 mnt_dec_writers(mnt);
366 ret = -EROFS;
367 }
368 preempt_enable();
369
370 return ret;
371}
372
373
374
375
376
377
378
379
380
381
382int mnt_want_write(struct vfsmount *m)
383{
384 int ret;
385
386 sb_start_write(m->mnt_sb);
387 ret = __mnt_want_write(m);
388 if (ret)
389 sb_end_write(m->mnt_sb);
390 return ret;
391}
392EXPORT_SYMBOL_GPL(mnt_want_write);
393
394
395
396
397
398
399
400
401
402
403
404
405
406int mnt_clone_write(struct vfsmount *mnt)
407{
408
409 if (__mnt_is_readonly(mnt))
410 return -EROFS;
411 preempt_disable();
412 mnt_inc_writers(real_mount(mnt));
413 preempt_enable();
414 return 0;
415}
416EXPORT_SYMBOL_GPL(mnt_clone_write);
417
418
419
420
421
422
423
424
425int __mnt_want_write_file(struct file *file)
426{
427 if (!(file->f_mode & FMODE_WRITER))
428 return __mnt_want_write(file->f_path.mnt);
429 else
430 return mnt_clone_write(file->f_path.mnt);
431}
432
433
434
435
436
437
438
439
440
441
442
443
444
445int mnt_want_write_file_path(struct file *file)
446{
447 int ret;
448
449 sb_start_write(file->f_path.mnt->mnt_sb);
450 ret = __mnt_want_write_file(file);
451 if (ret)
452 sb_end_write(file->f_path.mnt->mnt_sb);
453 return ret;
454}
455
456static inline int may_write_real(struct file *file)
457{
458 struct dentry *dentry = file->f_path.dentry;
459 struct dentry *upperdentry;
460
461
462 if (file->f_mode & FMODE_WRITER)
463 return 0;
464
465
466 if (likely(!(dentry->d_flags & DCACHE_OP_REAL)))
467 return 0;
468
469
470 upperdentry = d_real(dentry, NULL, 0, D_REAL_UPPER);
471 if (upperdentry &&
472 (file_inode(file) == d_inode(upperdentry) ||
473 file_inode(file) == d_inode(dentry)))
474 return 0;
475
476
477 return -EPERM;
478}
479
480
481
482
483
484
485
486
487
488
489
490
491int mnt_want_write_file(struct file *file)
492{
493 int ret;
494
495 ret = may_write_real(file);
496 if (!ret) {
497 sb_start_write(file_inode(file)->i_sb);
498 ret = __mnt_want_write_file(file);
499 if (ret)
500 sb_end_write(file_inode(file)->i_sb);
501 }
502 return ret;
503}
504EXPORT_SYMBOL_GPL(mnt_want_write_file);
505
506
507
508
509
510
511
512
513
514void __mnt_drop_write(struct vfsmount *mnt)
515{
516 preempt_disable();
517 mnt_dec_writers(real_mount(mnt));
518 preempt_enable();
519}
520
521
522
523
524
525
526
527
528
529void mnt_drop_write(struct vfsmount *mnt)
530{
531 __mnt_drop_write(mnt);
532 sb_end_write(mnt->mnt_sb);
533}
534EXPORT_SYMBOL_GPL(mnt_drop_write);
535
536void __mnt_drop_write_file(struct file *file)
537{
538 __mnt_drop_write(file->f_path.mnt);
539}
540
541void mnt_drop_write_file_path(struct file *file)
542{
543 mnt_drop_write(file->f_path.mnt);
544}
545
546void mnt_drop_write_file(struct file *file)
547{
548 __mnt_drop_write(file->f_path.mnt);
549 sb_end_write(file_inode(file)->i_sb);
550}
551EXPORT_SYMBOL(mnt_drop_write_file);
552
553static int mnt_make_readonly(struct mount *mnt)
554{
555 int ret = 0;
556
557 lock_mount_hash();
558 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
559
560
561
562
563 smp_mb();
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581 if (mnt_get_writers(mnt) > 0)
582 ret = -EBUSY;
583 else
584 mnt->mnt.mnt_flags |= MNT_READONLY;
585
586
587
588
589 smp_wmb();
590 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
591 unlock_mount_hash();
592 return ret;
593}
594
595static void __mnt_unmake_readonly(struct mount *mnt)
596{
597 lock_mount_hash();
598 mnt->mnt.mnt_flags &= ~MNT_READONLY;
599 unlock_mount_hash();
600}
601
602int sb_prepare_remount_readonly(struct super_block *sb)
603{
604 struct mount *mnt;
605 int err = 0;
606
607
608 if (atomic_long_read(&sb->s_remove_count))
609 return -EBUSY;
610
611 lock_mount_hash();
612 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
613 if (!(mnt->mnt.mnt_flags & MNT_READONLY)) {
614 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
615 smp_mb();
616 if (mnt_get_writers(mnt) > 0) {
617 err = -EBUSY;
618 break;
619 }
620 }
621 }
622 if (!err && atomic_long_read(&sb->s_remove_count))
623 err = -EBUSY;
624
625 if (!err) {
626 sb->s_readonly_remount = 1;
627 smp_wmb();
628 }
629 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
630 if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
631 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
632 }
633 unlock_mount_hash();
634
635 return err;
636}
637
638static void free_vfsmnt(struct mount *mnt)
639{
640 kfree_const(mnt->mnt_devname);
641#ifdef CONFIG_SMP
642 free_percpu(mnt->mnt_pcp);
643#endif
644 kmem_cache_free(mnt_cache, mnt);
645}
646
647static void delayed_free_vfsmnt(struct rcu_head *head)
648{
649 free_vfsmnt(container_of(head, struct mount, mnt_rcu));
650}
651
652
653int __legitimize_mnt(struct vfsmount *bastard, unsigned seq)
654{
655 struct mount *mnt;
656 if (read_seqretry(&mount_lock, seq))
657 return 1;
658 if (bastard == NULL)
659 return 0;
660 mnt = real_mount(bastard);
661 mnt_add_count(mnt, 1);
662 smp_mb();
663 if (likely(!read_seqretry(&mount_lock, seq)))
664 return 0;
665 if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
666 mnt_add_count(mnt, -1);
667 return 1;
668 }
669 lock_mount_hash();
670 if (unlikely(bastard->mnt_flags & MNT_DOOMED)) {
671 mnt_add_count(mnt, -1);
672 unlock_mount_hash();
673 return 1;
674 }
675 unlock_mount_hash();
676
677 return -1;
678}
679
680
681bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
682{
683 int res = __legitimize_mnt(bastard, seq);
684 if (likely(!res))
685 return true;
686 if (unlikely(res < 0)) {
687 rcu_read_unlock();
688 mntput(bastard);
689 rcu_read_lock();
690 }
691 return false;
692}
693
694
695
696
697
698struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
699{
700 struct hlist_head *head = m_hash(mnt, dentry);
701 struct mount *p;
702
703 hlist_for_each_entry_rcu(p, head, mnt_hash)
704 if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
705 return p;
706 return NULL;
707}
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725struct vfsmount *lookup_mnt(const struct path *path)
726{
727 struct mount *child_mnt;
728 struct vfsmount *m;
729 unsigned seq;
730
731 rcu_read_lock();
732 do {
733 seq = read_seqbegin(&mount_lock);
734 child_mnt = __lookup_mnt(path->mnt, path->dentry);
735 m = child_mnt ? &child_mnt->mnt : NULL;
736 } while (!legitimize_mnt(m, seq));
737 rcu_read_unlock();
738 return m;
739}
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756bool __is_local_mountpoint(struct dentry *dentry)
757{
758 struct mnt_namespace *ns = current->nsproxy->mnt_ns;
759 struct mount *mnt;
760 bool is_covered = false;
761
762 if (!d_mountpoint(dentry))
763 goto out;
764
765 down_read(&namespace_sem);
766 list_for_each_entry(mnt, &ns->list, mnt_list) {
767 is_covered = (mnt->mnt_mountpoint == dentry);
768 if (is_covered)
769 break;
770 }
771 up_read(&namespace_sem);
772out:
773 return is_covered;
774}
775
776static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
777{
778 struct hlist_head *chain = mp_hash(dentry);
779 struct mountpoint *mp;
780
781 hlist_for_each_entry(mp, chain, m_hash) {
782 if (mp->m_dentry == dentry) {
783
784 if (d_unlinked(dentry))
785 return ERR_PTR(-ENOENT);
786 mp->m_count++;
787 return mp;
788 }
789 }
790 return NULL;
791}
792
793static struct mountpoint *get_mountpoint(struct dentry *dentry)
794{
795 struct mountpoint *mp, *new = NULL;
796 int ret;
797
798 if (d_mountpoint(dentry)) {
799mountpoint:
800 read_seqlock_excl(&mount_lock);
801 mp = lookup_mountpoint(dentry);
802 read_sequnlock_excl(&mount_lock);
803 if (mp)
804 goto done;
805 }
806
807 if (!new)
808 new = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
809 if (!new)
810 return ERR_PTR(-ENOMEM);
811
812
813
814 ret = d_set_mounted(dentry);
815
816
817 if (ret == -EBUSY)
818 goto mountpoint;
819
820
821 mp = ERR_PTR(ret);
822 if (ret)
823 goto done;
824
825
826 read_seqlock_excl(&mount_lock);
827 new->m_dentry = dentry;
828 new->m_count = 1;
829 hlist_add_head(&new->m_hash, mp_hash(dentry));
830 INIT_HLIST_HEAD(&new->m_list);
831 read_sequnlock_excl(&mount_lock);
832
833 mp = new;
834 new = NULL;
835done:
836 kfree(new);
837 return mp;
838}
839
840static void put_mountpoint(struct mountpoint *mp)
841{
842 if (!--mp->m_count) {
843 struct dentry *dentry = mp->m_dentry;
844 BUG_ON(!hlist_empty(&mp->m_list));
845 spin_lock(&dentry->d_lock);
846 dentry->d_flags &= ~DCACHE_MOUNTED;
847 spin_unlock(&dentry->d_lock);
848 hlist_del(&mp->m_hash);
849 kfree(mp);
850 }
851}
852
853static inline int check_mnt(struct mount *mnt)
854{
855 return mnt->mnt_ns == current->nsproxy->mnt_ns;
856}
857
858
859
860
861static void touch_mnt_namespace(struct mnt_namespace *ns)
862{
863 if (ns) {
864 ns->event = ++event;
865 wake_up_interruptible(&ns->poll);
866 }
867}
868
869
870
871
872static void __touch_mnt_namespace(struct mnt_namespace *ns)
873{
874 if (ns && ns->event != event) {
875 ns->event = event;
876 wake_up_interruptible(&ns->poll);
877 }
878}
879
880
881
882
883static void unhash_mnt(struct mount *mnt)
884{
885 mnt->mnt_parent = mnt;
886 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
887 list_del_init(&mnt->mnt_child);
888 hlist_del_init_rcu(&mnt->mnt_hash);
889 hlist_del_init(&mnt->mnt_mp_list);
890 put_mountpoint(mnt->mnt_mp);
891 mnt->mnt_mp = NULL;
892}
893
894
895
896
897static void detach_mnt(struct mount *mnt, struct path *old_path)
898{
899 old_path->dentry = mnt->mnt_mountpoint;
900 old_path->mnt = &mnt->mnt_parent->mnt;
901 unhash_mnt(mnt);
902}
903
904
905
906
907static void umount_mnt(struct mount *mnt)
908{
909
910 mnt->mnt_ex_mountpoint = mnt->mnt_mountpoint;
911 unhash_mnt(mnt);
912}
913
914
915
916
917void mnt_set_mountpoint(struct mount *mnt,
918 struct mountpoint *mp,
919 struct mount *child_mnt)
920{
921 mp->m_count++;
922 mnt_add_count(mnt, 1);
923 child_mnt->mnt_mountpoint = dget(mp->m_dentry);
924 child_mnt->mnt_parent = mnt;
925 child_mnt->mnt_mp = mp;
926 hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
927}
928
929static void __attach_mnt(struct mount *mnt, struct mount *parent)
930{
931 hlist_add_head_rcu(&mnt->mnt_hash,
932 m_hash(&parent->mnt, mnt->mnt_mountpoint));
933 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
934}
935
936
937
938
939static void attach_mnt(struct mount *mnt,
940 struct mount *parent,
941 struct mountpoint *mp)
942{
943 mnt_set_mountpoint(parent, mp, mnt);
944 __attach_mnt(mnt, parent);
945}
946
947void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt)
948{
949 struct mountpoint *old_mp = mnt->mnt_mp;
950 struct dentry *old_mountpoint = mnt->mnt_mountpoint;
951 struct mount *old_parent = mnt->mnt_parent;
952
953 list_del_init(&mnt->mnt_child);
954 hlist_del_init(&mnt->mnt_mp_list);
955 hlist_del_init_rcu(&mnt->mnt_hash);
956
957 attach_mnt(mnt, parent, mp);
958
959 put_mountpoint(old_mp);
960
961
962
963
964
965
966
967
968
969
970
971
972 spin_lock(&old_mountpoint->d_lock);
973 old_mountpoint->d_lockref.count--;
974 spin_unlock(&old_mountpoint->d_lock);
975
976 mnt_add_count(old_parent, -1);
977}
978
979
980
981
982static void commit_tree(struct mount *mnt)
983{
984 struct mount *parent = mnt->mnt_parent;
985 struct mount *m;
986 LIST_HEAD(head);
987 struct mnt_namespace *n = parent->mnt_ns;
988
989 BUG_ON(parent == mnt);
990
991 list_add_tail(&head, &mnt->mnt_list);
992 list_for_each_entry(m, &head, mnt_list)
993 m->mnt_ns = n;
994
995 list_splice(&head, n->list.prev);
996
997 n->mounts += n->pending_mounts;
998 n->pending_mounts = 0;
999
1000 __attach_mnt(mnt, parent);
1001 touch_mnt_namespace(n);
1002}
1003
1004static struct mount *next_mnt(struct mount *p, struct mount *root)
1005{
1006 struct list_head *next = p->mnt_mounts.next;
1007 if (next == &p->mnt_mounts) {
1008 while (1) {
1009 if (p == root)
1010 return NULL;
1011 next = p->mnt_child.next;
1012 if (next != &p->mnt_parent->mnt_mounts)
1013 break;
1014 p = p->mnt_parent;
1015 }
1016 }
1017 return list_entry(next, struct mount, mnt_child);
1018}
1019
1020static struct mount *skip_mnt_tree(struct mount *p)
1021{
1022 struct list_head *prev = p->mnt_mounts.prev;
1023 while (prev != &p->mnt_mounts) {
1024 p = list_entry(prev, struct mount, mnt_child);
1025 prev = p->mnt_mounts.prev;
1026 }
1027 return p;
1028}
1029
1030struct vfsmount *
1031vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
1032{
1033 struct mount *mnt;
1034 struct dentry *root;
1035
1036 if (!type)
1037 return ERR_PTR(-ENODEV);
1038
1039 mnt = alloc_vfsmnt(name);
1040 if (!mnt)
1041 return ERR_PTR(-ENOMEM);
1042
1043 if (flags & SB_KERNMOUNT)
1044 mnt->mnt.mnt_flags = MNT_INTERNAL;
1045
1046 root = mount_fs(type, flags, name, data);
1047 if (IS_ERR(root)) {
1048 mnt_free_id(mnt);
1049 free_vfsmnt(mnt);
1050 return ERR_CAST(root);
1051 }
1052
1053 mnt->mnt.mnt_root = root;
1054 mnt->mnt.mnt_sb = root->d_sb;
1055 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
1056 mnt->mnt_parent = mnt;
1057 lock_mount_hash();
1058 list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts);
1059 unlock_mount_hash();
1060 return &mnt->mnt;
1061}
1062EXPORT_SYMBOL_GPL(vfs_kern_mount);
1063
1064struct vfsmount *
1065vfs_submount(const struct dentry *mountpoint, struct file_system_type *type,
1066 const char *name, void *data)
1067{
1068
1069
1070
1071
1072 if (mountpoint->d_sb->s_user_ns != &init_user_ns)
1073 return ERR_PTR(-EPERM);
1074
1075 return vfs_kern_mount(type, SB_SUBMOUNT, name, data);
1076}
1077EXPORT_SYMBOL_GPL(vfs_submount);
1078
1079static struct mount *clone_mnt(struct mount *old, struct dentry *root,
1080 int flag)
1081{
1082 struct super_block *sb = old->mnt.mnt_sb;
1083 struct mount *mnt;
1084 int err;
1085
1086 mnt = alloc_vfsmnt(old->mnt_devname);
1087 if (!mnt)
1088 return ERR_PTR(-ENOMEM);
1089
1090 if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE))
1091 mnt->mnt_group_id = 0;
1092 else
1093 mnt->mnt_group_id = old->mnt_group_id;
1094
1095 if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {
1096 err = mnt_alloc_group_id(mnt);
1097 if (err)
1098 goto out_free;
1099 }
1100
1101 mnt->mnt.mnt_flags = old->mnt.mnt_flags;
1102 mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL);
1103
1104 if (flag & CL_UNPRIVILEGED) {
1105 mnt->mnt.mnt_flags |= MNT_LOCK_ATIME;
1106
1107 if (mnt->mnt.mnt_flags & MNT_READONLY)
1108 mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
1109
1110 if (mnt->mnt.mnt_flags & MNT_NODEV)
1111 mnt->mnt.mnt_flags |= MNT_LOCK_NODEV;
1112
1113 if (mnt->mnt.mnt_flags & MNT_NOSUID)
1114 mnt->mnt.mnt_flags |= MNT_LOCK_NOSUID;
1115
1116 if (mnt->mnt.mnt_flags & MNT_NOEXEC)
1117 mnt->mnt.mnt_flags |= MNT_LOCK_NOEXEC;
1118 }
1119
1120
1121 if ((flag & CL_UNPRIVILEGED) &&
1122 (!(flag & CL_EXPIRE) || list_empty(&old->mnt_expire)))
1123 mnt->mnt.mnt_flags |= MNT_LOCKED;
1124
1125 atomic_inc(&sb->s_active);
1126 mnt->mnt.mnt_sb = sb;
1127 mnt->mnt.mnt_root = dget(root);
1128 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
1129 mnt->mnt_parent = mnt;
1130 lock_mount_hash();
1131 list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
1132 unlock_mount_hash();
1133
1134 if ((flag & CL_SLAVE) ||
1135 ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) {
1136 list_add(&mnt->mnt_slave, &old->mnt_slave_list);
1137 mnt->mnt_master = old;
1138 CLEAR_MNT_SHARED(mnt);
1139 } else if (!(flag & CL_PRIVATE)) {
1140 if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old))
1141 list_add(&mnt->mnt_share, &old->mnt_share);
1142 if (IS_MNT_SLAVE(old))
1143 list_add(&mnt->mnt_slave, &old->mnt_slave);
1144 mnt->mnt_master = old->mnt_master;
1145 } else {
1146 CLEAR_MNT_SHARED(mnt);
1147 }
1148 if (flag & CL_MAKE_SHARED)
1149 set_mnt_shared(mnt);
1150
1151
1152
1153 if (flag & CL_EXPIRE) {
1154 if (!list_empty(&old->mnt_expire))
1155 list_add(&mnt->mnt_expire, &old->mnt_expire);
1156 }
1157
1158 return mnt;
1159
1160 out_free:
1161 mnt_free_id(mnt);
1162 free_vfsmnt(mnt);
1163 return ERR_PTR(err);
1164}
1165
1166static void cleanup_mnt(struct mount *mnt)
1167{
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178 WARN_ON(mnt_get_writers(mnt));
1179 if (unlikely(mnt->mnt_pins.first))
1180 mnt_pin_kill(mnt);
1181 fsnotify_vfsmount_delete(&mnt->mnt);
1182 dput(mnt->mnt.mnt_root);
1183 deactivate_super(mnt->mnt.mnt_sb);
1184 mnt_free_id(mnt);
1185 call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);
1186}
1187
1188static void __cleanup_mnt(struct rcu_head *head)
1189{
1190 cleanup_mnt(container_of(head, struct mount, mnt_rcu));
1191}
1192
1193static LLIST_HEAD(delayed_mntput_list);
1194static void delayed_mntput(struct work_struct *unused)
1195{
1196 struct llist_node *node = llist_del_all(&delayed_mntput_list);
1197 struct mount *m, *t;
1198
1199 llist_for_each_entry_safe(m, t, node, mnt_llist)
1200 cleanup_mnt(m);
1201}
1202static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
1203
1204static void mntput_no_expire(struct mount *mnt)
1205{
1206 rcu_read_lock();
1207 if (likely(READ_ONCE(mnt->mnt_ns))) {
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217 mnt_add_count(mnt, -1);
1218 rcu_read_unlock();
1219 return;
1220 }
1221 lock_mount_hash();
1222
1223
1224
1225
1226 smp_mb();
1227 mnt_add_count(mnt, -1);
1228 if (mnt_get_count(mnt)) {
1229 rcu_read_unlock();
1230 unlock_mount_hash();
1231 return;
1232 }
1233 if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
1234 rcu_read_unlock();
1235 unlock_mount_hash();
1236 return;
1237 }
1238 mnt->mnt.mnt_flags |= MNT_DOOMED;
1239 rcu_read_unlock();
1240
1241 list_del(&mnt->mnt_instance);
1242
1243 if (unlikely(!list_empty(&mnt->mnt_mounts))) {
1244 struct mount *p, *tmp;
1245 list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) {
1246 umount_mnt(p);
1247 }
1248 }
1249 unlock_mount_hash();
1250
1251 if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {
1252 struct task_struct *task = current;
1253 if (likely(!(task->flags & PF_KTHREAD))) {
1254 init_task_work(&mnt->mnt_rcu, __cleanup_mnt);
1255 if (!task_work_add(task, &mnt->mnt_rcu, true))
1256 return;
1257 }
1258 if (llist_add(&mnt->mnt_llist, &delayed_mntput_list))
1259 schedule_delayed_work(&delayed_mntput_work, 1);
1260 return;
1261 }
1262 cleanup_mnt(mnt);
1263}
1264
1265void mntput(struct vfsmount *mnt)
1266{
1267 if (mnt) {
1268 struct mount *m = real_mount(mnt);
1269
1270 if (unlikely(m->mnt_expiry_mark))
1271 m->mnt_expiry_mark = 0;
1272 mntput_no_expire(m);
1273 }
1274}
1275EXPORT_SYMBOL(mntput);
1276
1277struct vfsmount *mntget(struct vfsmount *mnt)
1278{
1279 if (mnt)
1280 mnt_add_count(real_mount(mnt), 1);
1281 return mnt;
1282}
1283EXPORT_SYMBOL(mntget);
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295bool path_is_mountpoint(const struct path *path)
1296{
1297 unsigned seq;
1298 bool res;
1299
1300 if (!d_mountpoint(path->dentry))
1301 return false;
1302
1303 rcu_read_lock();
1304 do {
1305 seq = read_seqbegin(&mount_lock);
1306 res = __path_is_mountpoint(path);
1307 } while (read_seqretry(&mount_lock, seq));
1308 rcu_read_unlock();
1309
1310 return res;
1311}
1312EXPORT_SYMBOL(path_is_mountpoint);
1313
1314struct vfsmount *mnt_clone_internal(const struct path *path)
1315{
1316 struct mount *p;
1317 p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE);
1318 if (IS_ERR(p))
1319 return ERR_CAST(p);
1320 p->mnt.mnt_flags |= MNT_INTERNAL;
1321 return &p->mnt;
1322}
1323
1324#ifdef CONFIG_PROC_FS
1325
1326static void *m_start(struct seq_file *m, loff_t *pos)
1327{
1328 struct proc_mounts *p = m->private;
1329
1330 down_read(&namespace_sem);
1331 if (p->cached_event == p->ns->event) {
1332 void *v = p->cached_mount;
1333 if (*pos == p->cached_index)
1334 return v;
1335 if (*pos == p->cached_index + 1) {
1336 v = seq_list_next(v, &p->ns->list, &p->cached_index);
1337 return p->cached_mount = v;
1338 }
1339 }
1340
1341 p->cached_event = p->ns->event;
1342 p->cached_mount = seq_list_start(&p->ns->list, *pos);
1343 p->cached_index = *pos;
1344 return p->cached_mount;
1345}
1346
1347static void *m_next(struct seq_file *m, void *v, loff_t *pos)
1348{
1349 struct proc_mounts *p = m->private;
1350
1351 p->cached_mount = seq_list_next(v, &p->ns->list, pos);
1352 p->cached_index = *pos;
1353 return p->cached_mount;
1354}
1355
1356static void m_stop(struct seq_file *m, void *v)
1357{
1358 up_read(&namespace_sem);
1359}
1360
1361static int m_show(struct seq_file *m, void *v)
1362{
1363 struct proc_mounts *p = m->private;
1364 struct mount *r = list_entry(v, struct mount, mnt_list);
1365 return p->show(m, &r->mnt);
1366}
1367
1368const struct seq_operations mounts_op = {
1369 .start = m_start,
1370 .next = m_next,
1371 .stop = m_stop,
1372 .show = m_show,
1373};
1374#endif
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384int may_umount_tree(struct vfsmount *m)
1385{
1386 struct mount *mnt = real_mount(m);
1387 int actual_refs = 0;
1388 int minimum_refs = 0;
1389 struct mount *p;
1390 BUG_ON(!m);
1391
1392
1393 lock_mount_hash();
1394 for (p = mnt; p; p = next_mnt(p, mnt)) {
1395 actual_refs += mnt_get_count(p);
1396 minimum_refs += 2;
1397 }
1398 unlock_mount_hash();
1399
1400 if (actual_refs > minimum_refs)
1401 return 0;
1402
1403 return 1;
1404}
1405
1406EXPORT_SYMBOL(may_umount_tree);
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421int may_umount(struct vfsmount *mnt)
1422{
1423 int ret = 1;
1424 down_read(&namespace_sem);
1425 lock_mount_hash();
1426 if (propagate_mount_busy(real_mount(mnt), 2))
1427 ret = 0;
1428 unlock_mount_hash();
1429 up_read(&namespace_sem);
1430 return ret;
1431}
1432
1433EXPORT_SYMBOL(may_umount);
1434
1435static HLIST_HEAD(unmounted);
1436
1437static void namespace_unlock(void)
1438{
1439 struct hlist_head head;
1440
1441 hlist_move_list(&unmounted, &head);
1442
1443 up_write(&namespace_sem);
1444
1445 if (likely(hlist_empty(&head)))
1446 return;
1447
1448 synchronize_rcu();
1449
1450 group_pin_kill(&head);
1451}
1452
1453static inline void namespace_lock(void)
1454{
1455 down_write(&namespace_sem);
1456}
1457
1458enum umount_tree_flags {
1459 UMOUNT_SYNC = 1,
1460 UMOUNT_PROPAGATE = 2,
1461 UMOUNT_CONNECTED = 4,
1462};
1463
1464static bool disconnect_mount(struct mount *mnt, enum umount_tree_flags how)
1465{
1466
1467 if (how & UMOUNT_SYNC)
1468 return true;
1469
1470
1471 if (!mnt_has_parent(mnt))
1472 return true;
1473
1474
1475
1476
1477
1478 if (!(mnt->mnt_parent->mnt.mnt_flags & MNT_UMOUNT))
1479 return true;
1480
1481
1482 if (how & UMOUNT_CONNECTED)
1483 return false;
1484
1485
1486 if (IS_MNT_LOCKED(mnt))
1487 return false;
1488
1489
1490 return true;
1491}
1492
1493
1494
1495
1496
1497static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
1498{
1499 LIST_HEAD(tmp_list);
1500 struct mount *p;
1501
1502 if (how & UMOUNT_PROPAGATE)
1503 propagate_mount_unlock(mnt);
1504
1505
1506 for (p = mnt; p; p = next_mnt(p, mnt)) {
1507 p->mnt.mnt_flags |= MNT_UMOUNT;
1508 list_move(&p->mnt_list, &tmp_list);
1509 }
1510
1511
1512 list_for_each_entry(p, &tmp_list, mnt_list) {
1513 list_del_init(&p->mnt_child);
1514 }
1515
1516
1517 if (how & UMOUNT_PROPAGATE)
1518 propagate_umount(&tmp_list);
1519
1520 while (!list_empty(&tmp_list)) {
1521 struct mnt_namespace *ns;
1522 bool disconnect;
1523 p = list_first_entry(&tmp_list, struct mount, mnt_list);
1524 list_del_init(&p->mnt_expire);
1525 list_del_init(&p->mnt_list);
1526 ns = p->mnt_ns;
1527 if (ns) {
1528 ns->mounts--;
1529 __touch_mnt_namespace(ns);
1530 }
1531 p->mnt_ns = NULL;
1532 if (how & UMOUNT_SYNC)
1533 p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
1534
1535 disconnect = disconnect_mount(p, how);
1536
1537 pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt,
1538 disconnect ? &unmounted : NULL);
1539 if (mnt_has_parent(p)) {
1540 mnt_add_count(p->mnt_parent, -1);
1541 if (!disconnect) {
1542
1543 list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts);
1544 } else {
1545 umount_mnt(p);
1546 }
1547 }
1548 change_mnt_propagation(p, MS_PRIVATE);
1549 }
1550}
1551
1552static void shrink_submounts(struct mount *mnt);
1553
1554static int do_umount(struct mount *mnt, int flags)
1555{
1556 struct super_block *sb = mnt->mnt.mnt_sb;
1557 int retval;
1558
1559 retval = security_sb_umount(&mnt->mnt, flags);
1560 if (retval)
1561 return retval;
1562
1563
1564
1565
1566
1567
1568
1569 if (flags & MNT_EXPIRE) {
1570 if (&mnt->mnt == current->fs->root.mnt ||
1571 flags & (MNT_FORCE | MNT_DETACH))
1572 return -EINVAL;
1573
1574
1575
1576
1577
1578 lock_mount_hash();
1579 if (mnt_get_count(mnt) != 2) {
1580 unlock_mount_hash();
1581 return -EBUSY;
1582 }
1583 unlock_mount_hash();
1584
1585 if (!xchg(&mnt->mnt_expiry_mark, 1))
1586 return -EAGAIN;
1587 }
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599 if (flags & MNT_FORCE && sb->s_op->umount_begin) {
1600 sb->s_op->umount_begin(sb);
1601 }
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612 if (&mnt->mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) {
1613
1614
1615
1616
1617 if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
1618 return -EPERM;
1619 down_write(&sb->s_umount);
1620 if (!sb_rdonly(sb))
1621 retval = do_remount_sb(sb, SB_RDONLY, NULL, 0);
1622 up_write(&sb->s_umount);
1623 return retval;
1624 }
1625
1626 namespace_lock();
1627 lock_mount_hash();
1628 event++;
1629
1630 if (flags & MNT_DETACH) {
1631 if (!list_empty(&mnt->mnt_list))
1632 umount_tree(mnt, UMOUNT_PROPAGATE);
1633 retval = 0;
1634 } else {
1635 shrink_submounts(mnt);
1636 retval = -EBUSY;
1637 if (!propagate_mount_busy(mnt, 2)) {
1638 if (!list_empty(&mnt->mnt_list))
1639 umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
1640 retval = 0;
1641 }
1642 }
1643 unlock_mount_hash();
1644 namespace_unlock();
1645 return retval;
1646}
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658void __detach_mounts(struct dentry *dentry)
1659{
1660 struct mountpoint *mp;
1661 struct mount *mnt;
1662
1663 namespace_lock();
1664 lock_mount_hash();
1665 mp = lookup_mountpoint(dentry);
1666 if (IS_ERR_OR_NULL(mp))
1667 goto out_unlock;
1668
1669 event++;
1670 while (!hlist_empty(&mp->m_list)) {
1671 mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
1672 if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
1673 hlist_add_head(&mnt->mnt_umount.s_list, &unmounted);
1674 umount_mnt(mnt);
1675 }
1676 else umount_tree(mnt, UMOUNT_CONNECTED);
1677 }
1678 put_mountpoint(mp);
1679out_unlock:
1680 unlock_mount_hash();
1681 namespace_unlock();
1682}
1683
1684
1685
1686
1687static inline bool may_mount(void)
1688{
1689 return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
1690}
1691
1692static inline bool may_mandlock(void)
1693{
1694#ifndef CONFIG_MANDATORY_FILE_LOCKING
1695 return false;
1696#endif
1697 return capable(CAP_SYS_ADMIN);
1698}
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708int ksys_umount(char __user *name, int flags)
1709{
1710 struct path path;
1711 struct mount *mnt;
1712 int retval;
1713 int lookup_flags = 0;
1714
1715 if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
1716 return -EINVAL;
1717
1718 if (!may_mount())
1719 return -EPERM;
1720
1721 if (!(flags & UMOUNT_NOFOLLOW))
1722 lookup_flags |= LOOKUP_FOLLOW;
1723
1724 retval = user_path_mountpoint_at(AT_FDCWD, name, lookup_flags, &path);
1725 if (retval)
1726 goto out;
1727 mnt = real_mount(path.mnt);
1728 retval = -EINVAL;
1729 if (path.dentry != path.mnt->mnt_root)
1730 goto dput_and_out;
1731 if (!check_mnt(mnt))
1732 goto dput_and_out;
1733 if (mnt->mnt.mnt_flags & MNT_LOCKED)
1734 goto dput_and_out;
1735 retval = -EPERM;
1736 if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
1737 goto dput_and_out;
1738
1739 retval = do_umount(mnt, flags);
1740dput_and_out:
1741
1742 dput(path.dentry);
1743 mntput_no_expire(mnt);
1744out:
1745 return retval;
1746}
1747
1748SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1749{
1750 return ksys_umount(name, flags);
1751}
1752
1753#ifdef __ARCH_WANT_SYS_OLDUMOUNT
1754
1755
1756
1757
1758SYSCALL_DEFINE1(oldumount, char __user *, name)
1759{
1760 return ksys_umount(name, 0);
1761}
1762
1763#endif
1764
1765static bool is_mnt_ns_file(struct dentry *dentry)
1766{
1767
1768 return dentry->d_op == &ns_dentry_operations &&
1769 dentry->d_fsdata == &mntns_operations;
1770}
1771
1772struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
1773{
1774 return container_of(ns, struct mnt_namespace, ns);
1775}
1776
1777static bool mnt_ns_loop(struct dentry *dentry)
1778{
1779
1780
1781
1782 struct mnt_namespace *mnt_ns;
1783 if (!is_mnt_ns_file(dentry))
1784 return false;
1785
1786 mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode));
1787 return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
1788}
1789
1790struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1791 int flag)
1792{
1793 struct mount *res, *p, *q, *r, *parent;
1794
1795 if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(mnt))
1796 return ERR_PTR(-EINVAL);
1797
1798 if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(dentry))
1799 return ERR_PTR(-EINVAL);
1800
1801 res = q = clone_mnt(mnt, dentry, flag);
1802 if (IS_ERR(q))
1803 return q;
1804
1805 q->mnt_mountpoint = mnt->mnt_mountpoint;
1806
1807 p = mnt;
1808 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
1809 struct mount *s;
1810 if (!is_subdir(r->mnt_mountpoint, dentry))
1811 continue;
1812
1813 for (s = r; s; s = next_mnt(s, r)) {
1814 if (!(flag & CL_COPY_UNBINDABLE) &&
1815 IS_MNT_UNBINDABLE(s)) {
1816 s = skip_mnt_tree(s);
1817 continue;
1818 }
1819 if (!(flag & CL_COPY_MNT_NS_FILE) &&
1820 is_mnt_ns_file(s->mnt.mnt_root)) {
1821 s = skip_mnt_tree(s);
1822 continue;
1823 }
1824 while (p != s->mnt_parent) {
1825 p = p->mnt_parent;
1826 q = q->mnt_parent;
1827 }
1828 p = s;
1829 parent = q;
1830 q = clone_mnt(p, p->mnt.mnt_root, flag);
1831 if (IS_ERR(q))
1832 goto out;
1833 lock_mount_hash();
1834 list_add_tail(&q->mnt_list, &res->mnt_list);
1835 attach_mnt(q, parent, p->mnt_mp);
1836 unlock_mount_hash();
1837 }
1838 }
1839 return res;
1840out:
1841 if (res) {
1842 lock_mount_hash();
1843 umount_tree(res, UMOUNT_SYNC);
1844 unlock_mount_hash();
1845 }
1846 return q;
1847}
1848
1849
1850
1851struct vfsmount *collect_mounts(const struct path *path)
1852{
1853 struct mount *tree;
1854 namespace_lock();
1855 if (!check_mnt(real_mount(path->mnt)))
1856 tree = ERR_PTR(-EINVAL);
1857 else
1858 tree = copy_tree(real_mount(path->mnt), path->dentry,
1859 CL_COPY_ALL | CL_PRIVATE);
1860 namespace_unlock();
1861 if (IS_ERR(tree))
1862 return ERR_CAST(tree);
1863 return &tree->mnt;
1864}
1865
1866void drop_collected_mounts(struct vfsmount *mnt)
1867{
1868 namespace_lock();
1869 lock_mount_hash();
1870 umount_tree(real_mount(mnt), UMOUNT_SYNC);
1871 unlock_mount_hash();
1872 namespace_unlock();
1873}
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884struct vfsmount *clone_private_mount(const struct path *path)
1885{
1886 struct mount *old_mnt = real_mount(path->mnt);
1887 struct mount *new_mnt;
1888
1889 if (IS_MNT_UNBINDABLE(old_mnt))
1890 return ERR_PTR(-EINVAL);
1891
1892 new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
1893 if (IS_ERR(new_mnt))
1894 return ERR_CAST(new_mnt);
1895
1896 return &new_mnt->mnt;
1897}
1898EXPORT_SYMBOL_GPL(clone_private_mount);
1899
1900int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
1901 struct vfsmount *root)
1902{
1903 struct mount *mnt;
1904 int res = f(root, arg);
1905 if (res)
1906 return res;
1907 list_for_each_entry(mnt, &real_mount(root)->mnt_list, mnt_list) {
1908 res = f(&mnt->mnt, arg);
1909 if (res)
1910 return res;
1911 }
1912 return 0;
1913}
1914
1915static void cleanup_group_ids(struct mount *mnt, struct mount *end)
1916{
1917 struct mount *p;
1918
1919 for (p = mnt; p != end; p = next_mnt(p, mnt)) {
1920 if (p->mnt_group_id && !IS_MNT_SHARED(p))
1921 mnt_release_group_id(p);
1922 }
1923}
1924
1925static int invent_group_ids(struct mount *mnt, bool recurse)
1926{
1927 struct mount *p;
1928
1929 for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) {
1930 if (!p->mnt_group_id && !IS_MNT_SHARED(p)) {
1931 int err = mnt_alloc_group_id(p);
1932 if (err) {
1933 cleanup_group_ids(mnt, p);
1934 return err;
1935 }
1936 }
1937 }
1938
1939 return 0;
1940}
1941
1942int count_mounts(struct mnt_namespace *ns, struct mount *mnt)
1943{
1944 unsigned int max = READ_ONCE(sysctl_mount_max);
1945 unsigned int mounts = 0, old, pending, sum;
1946 struct mount *p;
1947
1948 for (p = mnt; p; p = next_mnt(p, mnt))
1949 mounts++;
1950
1951 old = ns->mounts;
1952 pending = ns->pending_mounts;
1953 sum = old + pending;
1954 if ((old > sum) ||
1955 (pending > sum) ||
1956 (max < sum) ||
1957 (mounts > (max - sum)))
1958 return -ENOSPC;
1959
1960 ns->pending_mounts = pending + mounts;
1961 return 0;
1962}
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027static int attach_recursive_mnt(struct mount *source_mnt,
2028 struct mount *dest_mnt,
2029 struct mountpoint *dest_mp,
2030 struct path *parent_path)
2031{
2032 HLIST_HEAD(tree_list);
2033 struct mnt_namespace *ns = dest_mnt->mnt_ns;
2034 struct mountpoint *smp;
2035 struct mount *child, *p;
2036 struct hlist_node *n;
2037 int err;
2038
2039
2040
2041
2042 smp = get_mountpoint(source_mnt->mnt.mnt_root);
2043 if (IS_ERR(smp))
2044 return PTR_ERR(smp);
2045
2046
2047 if (!parent_path) {
2048 err = count_mounts(ns, source_mnt);
2049 if (err)
2050 goto out;
2051 }
2052
2053 if (IS_MNT_SHARED(dest_mnt)) {
2054 err = invent_group_ids(source_mnt, true);
2055 if (err)
2056 goto out;
2057 err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
2058 lock_mount_hash();
2059 if (err)
2060 goto out_cleanup_ids;
2061 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
2062 set_mnt_shared(p);
2063 } else {
2064 lock_mount_hash();
2065 }
2066 if (parent_path) {
2067 detach_mnt(source_mnt, parent_path);
2068 attach_mnt(source_mnt, dest_mnt, dest_mp);
2069 touch_mnt_namespace(source_mnt->mnt_ns);
2070 } else {
2071 mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
2072 commit_tree(source_mnt);
2073 }
2074
2075 hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
2076 struct mount *q;
2077 hlist_del_init(&child->mnt_hash);
2078 q = __lookup_mnt(&child->mnt_parent->mnt,
2079 child->mnt_mountpoint);
2080 if (q)
2081 mnt_change_mountpoint(child, smp, q);
2082 commit_tree(child);
2083 }
2084 put_mountpoint(smp);
2085 unlock_mount_hash();
2086
2087 return 0;
2088
2089 out_cleanup_ids:
2090 while (!hlist_empty(&tree_list)) {
2091 child = hlist_entry(tree_list.first, struct mount, mnt_hash);
2092 child->mnt_parent->mnt_ns->pending_mounts = 0;
2093 umount_tree(child, UMOUNT_SYNC);
2094 }
2095 unlock_mount_hash();
2096 cleanup_group_ids(source_mnt, NULL);
2097 out:
2098 ns->pending_mounts = 0;
2099
2100 read_seqlock_excl(&mount_lock);
2101 put_mountpoint(smp);
2102 read_sequnlock_excl(&mount_lock);
2103
2104 return err;
2105}
2106
2107static struct mountpoint *lock_mount(struct path *path)
2108{
2109 struct vfsmount *mnt;
2110 struct dentry *dentry = path->dentry;
2111retry:
2112 inode_lock(dentry->d_inode);
2113 if (unlikely(cant_mount(dentry))) {
2114 inode_unlock(dentry->d_inode);
2115 return ERR_PTR(-ENOENT);
2116 }
2117 namespace_lock();
2118 mnt = lookup_mnt(path);
2119 if (likely(!mnt)) {
2120 struct mountpoint *mp = get_mountpoint(dentry);
2121 if (IS_ERR(mp)) {
2122 namespace_unlock();
2123 inode_unlock(dentry->d_inode);
2124 return mp;
2125 }
2126 return mp;
2127 }
2128 namespace_unlock();
2129 inode_unlock(path->dentry->d_inode);
2130 path_put(path);
2131 path->mnt = mnt;
2132 dentry = path->dentry = dget(mnt->mnt_root);
2133 goto retry;
2134}
2135
2136static void unlock_mount(struct mountpoint *where)
2137{
2138 struct dentry *dentry = where->m_dentry;
2139
2140 read_seqlock_excl(&mount_lock);
2141 put_mountpoint(where);
2142 read_sequnlock_excl(&mount_lock);
2143
2144 namespace_unlock();
2145 inode_unlock(dentry->d_inode);
2146}
2147
2148static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
2149{
2150 if (mnt->mnt.mnt_sb->s_flags & SB_NOUSER)
2151 return -EINVAL;
2152
2153 if (d_is_dir(mp->m_dentry) !=
2154 d_is_dir(mnt->mnt.mnt_root))
2155 return -ENOTDIR;
2156
2157 return attach_recursive_mnt(mnt, p, mp, NULL);
2158}
2159
2160
2161
2162
2163
2164static int flags_to_propagation_type(int ms_flags)
2165{
2166 int type = ms_flags & ~(MS_REC | MS_SILENT);
2167
2168
2169 if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
2170 return 0;
2171
2172 if (!is_power_of_2(type))
2173 return 0;
2174 return type;
2175}
2176
2177
2178
2179
2180static int do_change_type(struct path *path, int ms_flags)
2181{
2182 struct mount *m;
2183 struct mount *mnt = real_mount(path->mnt);
2184 int recurse = ms_flags & MS_REC;
2185 int type;
2186 int err = 0;
2187
2188 if (path->dentry != path->mnt->mnt_root)
2189 return -EINVAL;
2190
2191 type = flags_to_propagation_type(ms_flags);
2192 if (!type)
2193 return -EINVAL;
2194
2195 namespace_lock();
2196 if (type == MS_SHARED) {
2197 err = invent_group_ids(mnt, recurse);
2198 if (err)
2199 goto out_unlock;
2200 }
2201
2202 lock_mount_hash();
2203 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
2204 change_mnt_propagation(m, type);
2205 unlock_mount_hash();
2206
2207 out_unlock:
2208 namespace_unlock();
2209 return err;
2210}
2211
2212static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
2213{
2214 struct mount *child;
2215 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
2216 if (!is_subdir(child->mnt_mountpoint, dentry))
2217 continue;
2218
2219 if (child->mnt.mnt_flags & MNT_LOCKED)
2220 return true;
2221 }
2222 return false;
2223}
2224
2225
2226
2227
2228static int do_loopback(struct path *path, const char *old_name,
2229 int recurse)
2230{
2231 struct path old_path;
2232 struct mount *mnt = NULL, *old, *parent;
2233 struct mountpoint *mp;
2234 int err;
2235 if (!old_name || !*old_name)
2236 return -EINVAL;
2237 err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path);
2238 if (err)
2239 return err;
2240
2241 err = -EINVAL;
2242 if (mnt_ns_loop(old_path.dentry))
2243 goto out;
2244
2245 mp = lock_mount(path);
2246 err = PTR_ERR(mp);
2247 if (IS_ERR(mp))
2248 goto out;
2249
2250 old = real_mount(old_path.mnt);
2251 parent = real_mount(path->mnt);
2252
2253 err = -EINVAL;
2254 if (IS_MNT_UNBINDABLE(old))
2255 goto out2;
2256
2257 if (!check_mnt(parent))
2258 goto out2;
2259
2260 if (!check_mnt(old) && old_path.dentry->d_op != &ns_dentry_operations)
2261 goto out2;
2262
2263 if (!recurse && has_locked_children(old, old_path.dentry))
2264 goto out2;
2265
2266 if (recurse)
2267 mnt = copy_tree(old, old_path.dentry, CL_COPY_MNT_NS_FILE);
2268 else
2269 mnt = clone_mnt(old, old_path.dentry, 0);
2270
2271 if (IS_ERR(mnt)) {
2272 err = PTR_ERR(mnt);
2273 goto out2;
2274 }
2275
2276 mnt->mnt.mnt_flags &= ~MNT_LOCKED;
2277
2278 err = graft_tree(mnt, parent, mp);
2279 if (err) {
2280 lock_mount_hash();
2281 umount_tree(mnt, UMOUNT_SYNC);
2282 unlock_mount_hash();
2283 }
2284out2:
2285 unlock_mount(mp);
2286out:
2287 path_put(&old_path);
2288 return err;
2289}
2290
2291static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
2292{
2293 int error = 0;
2294 int readonly_request = 0;
2295
2296 if (ms_flags & MS_RDONLY)
2297 readonly_request = 1;
2298 if (readonly_request == __mnt_is_readonly(mnt))
2299 return 0;
2300
2301 if (readonly_request)
2302 error = mnt_make_readonly(real_mount(mnt));
2303 else
2304 __mnt_unmake_readonly(real_mount(mnt));
2305 return error;
2306}
2307
2308
2309
2310
2311
2312
2313static int do_remount(struct path *path, int ms_flags, int sb_flags,
2314 int mnt_flags, void *data)
2315{
2316 int err;
2317 struct super_block *sb = path->mnt->mnt_sb;
2318 struct mount *mnt = real_mount(path->mnt);
2319
2320 if (!check_mnt(mnt))
2321 return -EINVAL;
2322
2323 if (path->dentry != path->mnt->mnt_root)
2324 return -EINVAL;
2325
2326
2327
2328
2329
2330
2331
2332 if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) &&
2333 !(mnt_flags & MNT_READONLY)) {
2334 return -EPERM;
2335 }
2336 if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
2337 !(mnt_flags & MNT_NODEV)) {
2338 return -EPERM;
2339 }
2340 if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
2341 !(mnt_flags & MNT_NOSUID)) {
2342 return -EPERM;
2343 }
2344 if ((mnt->mnt.mnt_flags & MNT_LOCK_NOEXEC) &&
2345 !(mnt_flags & MNT_NOEXEC)) {
2346 return -EPERM;
2347 }
2348 if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) &&
2349 ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) {
2350 return -EPERM;
2351 }
2352
2353 err = security_sb_remount(sb, data);
2354 if (err)
2355 return err;
2356
2357 down_write(&sb->s_umount);
2358 if (ms_flags & MS_BIND)
2359 err = change_mount_flags(path->mnt, ms_flags);
2360 else if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
2361 err = -EPERM;
2362 else
2363 err = do_remount_sb(sb, sb_flags, data, 0);
2364 if (!err) {
2365 lock_mount_hash();
2366 mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
2367 mnt->mnt.mnt_flags = mnt_flags;
2368 touch_mnt_namespace(mnt->mnt_ns);
2369 unlock_mount_hash();
2370 }
2371 up_write(&sb->s_umount);
2372 return err;
2373}
2374
2375static inline int tree_contains_unbindable(struct mount *mnt)
2376{
2377 struct mount *p;
2378 for (p = mnt; p; p = next_mnt(p, mnt)) {
2379 if (IS_MNT_UNBINDABLE(p))
2380 return 1;
2381 }
2382 return 0;
2383}
2384
2385static int do_move_mount(struct path *path, const char *old_name)
2386{
2387 struct path old_path, parent_path;
2388 struct mount *p;
2389 struct mount *old;
2390 struct mountpoint *mp;
2391 int err;
2392 if (!old_name || !*old_name)
2393 return -EINVAL;
2394 err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
2395 if (err)
2396 return err;
2397
2398 mp = lock_mount(path);
2399 err = PTR_ERR(mp);
2400 if (IS_ERR(mp))
2401 goto out;
2402
2403 old = real_mount(old_path.mnt);
2404 p = real_mount(path->mnt);
2405
2406 err = -EINVAL;
2407 if (!check_mnt(p) || !check_mnt(old))
2408 goto out1;
2409
2410 if (old->mnt.mnt_flags & MNT_LOCKED)
2411 goto out1;
2412
2413 err = -EINVAL;
2414 if (old_path.dentry != old_path.mnt->mnt_root)
2415 goto out1;
2416
2417 if (!mnt_has_parent(old))
2418 goto out1;
2419
2420 if (d_is_dir(path->dentry) !=
2421 d_is_dir(old_path.dentry))
2422 goto out1;
2423
2424
2425
2426 if (IS_MNT_SHARED(old->mnt_parent))
2427 goto out1;
2428
2429
2430
2431
2432 if (IS_MNT_SHARED(p) && tree_contains_unbindable(old))
2433 goto out1;
2434 err = -ELOOP;
2435 for (; mnt_has_parent(p); p = p->mnt_parent)
2436 if (p == old)
2437 goto out1;
2438
2439 err = attach_recursive_mnt(old, real_mount(path->mnt), mp, &parent_path);
2440 if (err)
2441 goto out1;
2442
2443
2444
2445 list_del_init(&old->mnt_expire);
2446out1:
2447 unlock_mount(mp);
2448out:
2449 if (!err)
2450 path_put(&parent_path);
2451 path_put(&old_path);
2452 return err;
2453}
2454
2455static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
2456{
2457 int err;
2458 const char *subtype = strchr(fstype, '.');
2459 if (subtype) {
2460 subtype++;
2461 err = -EINVAL;
2462 if (!subtype[0])
2463 goto err;
2464 } else
2465 subtype = "";
2466
2467 mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL);
2468 err = -ENOMEM;
2469 if (!mnt->mnt_sb->s_subtype)
2470 goto err;
2471 return mnt;
2472
2473 err:
2474 mntput(mnt);
2475 return ERR_PTR(err);
2476}
2477
2478
2479
2480
2481static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
2482{
2483 struct mountpoint *mp;
2484 struct mount *parent;
2485 int err;
2486
2487 mnt_flags &= ~MNT_INTERNAL_FLAGS;
2488
2489 mp = lock_mount(path);
2490 if (IS_ERR(mp))
2491 return PTR_ERR(mp);
2492
2493 parent = real_mount(path->mnt);
2494 err = -EINVAL;
2495 if (unlikely(!check_mnt(parent))) {
2496
2497 if (!(mnt_flags & MNT_SHRINKABLE))
2498 goto unlock;
2499
2500 if (!parent->mnt_ns)
2501 goto unlock;
2502 }
2503
2504
2505 err = -EBUSY;
2506 if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb &&
2507 path->mnt->mnt_root == path->dentry)
2508 goto unlock;
2509
2510 err = -EINVAL;
2511 if (d_is_symlink(newmnt->mnt.mnt_root))
2512 goto unlock;
2513
2514 newmnt->mnt.mnt_flags = mnt_flags;
2515 err = graft_tree(newmnt, parent, mp);
2516
2517unlock:
2518 unlock_mount(mp);
2519 return err;
2520}
2521
2522static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags);
2523
2524
2525
2526
2527
2528static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
2529 int mnt_flags, const char *name, void *data)
2530{
2531 struct file_system_type *type;
2532 struct vfsmount *mnt;
2533 int err;
2534
2535 if (!fstype)
2536 return -EINVAL;
2537
2538 type = get_fs_type(fstype);
2539 if (!type)
2540 return -ENODEV;
2541
2542 mnt = vfs_kern_mount(type, sb_flags, name, data);
2543 if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
2544 !mnt->mnt_sb->s_subtype)
2545 mnt = fs_set_subtype(mnt, fstype);
2546
2547 put_filesystem(type);
2548 if (IS_ERR(mnt))
2549 return PTR_ERR(mnt);
2550
2551 if (mount_too_revealing(mnt, &mnt_flags)) {
2552 mntput(mnt);
2553 return -EPERM;
2554 }
2555
2556 err = do_add_mount(real_mount(mnt), path, mnt_flags);
2557 if (err)
2558 mntput(mnt);
2559 return err;
2560}
2561
2562int finish_automount(struct vfsmount *m, struct path *path)
2563{
2564 struct mount *mnt = real_mount(m);
2565 int err;
2566
2567
2568
2569 BUG_ON(mnt_get_count(mnt) < 2);
2570
2571 if (m->mnt_sb == path->mnt->mnt_sb &&
2572 m->mnt_root == path->dentry) {
2573 err = -ELOOP;
2574 goto fail;
2575 }
2576
2577 err = do_add_mount(mnt, path, path->mnt->mnt_flags | MNT_SHRINKABLE);
2578 if (!err)
2579 return 0;
2580fail:
2581
2582 if (!list_empty(&mnt->mnt_expire)) {
2583 namespace_lock();
2584 list_del_init(&mnt->mnt_expire);
2585 namespace_unlock();
2586 }
2587 mntput(m);
2588 mntput(m);
2589 return err;
2590}
2591
2592
2593
2594
2595
2596
2597void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
2598{
2599 namespace_lock();
2600
2601 list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);
2602
2603 namespace_unlock();
2604}
2605EXPORT_SYMBOL(mnt_set_expiry);
2606
2607
2608
2609
2610
2611
2612void mark_mounts_for_expiry(struct list_head *mounts)
2613{
2614 struct mount *mnt, *next;
2615 LIST_HEAD(graveyard);
2616
2617 if (list_empty(mounts))
2618 return;
2619
2620 namespace_lock();
2621 lock_mount_hash();
2622
2623
2624
2625
2626
2627
2628
2629 list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
2630 if (!xchg(&mnt->mnt_expiry_mark, 1) ||
2631 propagate_mount_busy(mnt, 1))
2632 continue;
2633 list_move(&mnt->mnt_expire, &graveyard);
2634 }
2635 while (!list_empty(&graveyard)) {
2636 mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
2637 touch_mnt_namespace(mnt->mnt_ns);
2638 umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
2639 }
2640 unlock_mount_hash();
2641 namespace_unlock();
2642}
2643
2644EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
2645
2646
2647
2648
2649
2650
2651
2652static int select_submounts(struct mount *parent, struct list_head *graveyard)
2653{
2654 struct mount *this_parent = parent;
2655 struct list_head *next;
2656 int found = 0;
2657
2658repeat:
2659 next = this_parent->mnt_mounts.next;
2660resume:
2661 while (next != &this_parent->mnt_mounts) {
2662 struct list_head *tmp = next;
2663 struct mount *mnt = list_entry(tmp, struct mount, mnt_child);
2664
2665 next = tmp->next;
2666 if (!(mnt->mnt.mnt_flags & MNT_SHRINKABLE))
2667 continue;
2668
2669
2670
2671 if (!list_empty(&mnt->mnt_mounts)) {
2672 this_parent = mnt;
2673 goto repeat;
2674 }
2675
2676 if (!propagate_mount_busy(mnt, 1)) {
2677 list_move_tail(&mnt->mnt_expire, graveyard);
2678 found++;
2679 }
2680 }
2681
2682
2683
2684 if (this_parent != parent) {
2685 next = this_parent->mnt_child.next;
2686 this_parent = this_parent->mnt_parent;
2687 goto resume;
2688 }
2689 return found;
2690}
2691
2692
2693
2694
2695
2696
2697
2698static void shrink_submounts(struct mount *mnt)
2699{
2700 LIST_HEAD(graveyard);
2701 struct mount *m;
2702
2703
2704 while (select_submounts(mnt, &graveyard)) {
2705 while (!list_empty(&graveyard)) {
2706 m = list_first_entry(&graveyard, struct mount,
2707 mnt_expire);
2708 touch_mnt_namespace(m->mnt_ns);
2709 umount_tree(m, UMOUNT_PROPAGATE|UMOUNT_SYNC);
2710 }
2711 }
2712}
2713
2714
2715
2716
2717
2718
2719
2720static long exact_copy_from_user(void *to, const void __user * from,
2721 unsigned long n)
2722{
2723 char *t = to;
2724 const char __user *f = from;
2725 char c;
2726
2727 if (!access_ok(VERIFY_READ, from, n))
2728 return n;
2729
2730 while (n) {
2731 if (__get_user(c, f)) {
2732 memset(t, 0, n);
2733 break;
2734 }
2735 *t++ = c;
2736 f++;
2737 n--;
2738 }
2739 return n;
2740}
2741
2742void *copy_mount_options(const void __user * data)
2743{
2744 int i;
2745 unsigned long size;
2746 char *copy;
2747
2748 if (!data)
2749 return NULL;
2750
2751 copy = kmalloc(PAGE_SIZE, GFP_KERNEL);
2752 if (!copy)
2753 return ERR_PTR(-ENOMEM);
2754
2755
2756
2757
2758
2759
2760 size = TASK_SIZE - (unsigned long)data;
2761 if (size > PAGE_SIZE)
2762 size = PAGE_SIZE;
2763
2764 i = size - exact_copy_from_user(copy, data, size);
2765 if (!i) {
2766 kfree(copy);
2767 return ERR_PTR(-EFAULT);
2768 }
2769 if (i != PAGE_SIZE)
2770 memset(copy + i, 0, PAGE_SIZE - i);
2771 return copy;
2772}
2773
2774char *copy_mount_string(const void __user *data)
2775{
2776 return data ? strndup_user(data, PAGE_SIZE) : NULL;
2777}
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793long do_mount(const char *dev_name, const char __user *dir_name,
2794 const char *type_page, unsigned long flags, void *data_page)
2795{
2796 struct path path;
2797 unsigned int mnt_flags = 0, sb_flags;
2798 int retval = 0;
2799
2800
2801 if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
2802 flags &= ~MS_MGC_MSK;
2803
2804
2805 if (data_page)
2806 ((char *)data_page)[PAGE_SIZE - 1] = 0;
2807
2808 if (flags & MS_NOUSER)
2809 return -EINVAL;
2810
2811
2812 retval = user_path(dir_name, &path);
2813 if (retval)
2814 return retval;
2815
2816 retval = security_sb_mount(dev_name, &path,
2817 type_page, flags, data_page);
2818 if (!retval && !may_mount())
2819 retval = -EPERM;
2820 if (!retval && (flags & SB_MANDLOCK) && !may_mandlock())
2821 retval = -EPERM;
2822 if (retval)
2823 goto dput_out;
2824
2825
2826 if (!(flags & MS_NOATIME))
2827 mnt_flags |= MNT_RELATIME;
2828
2829
2830 if (flags & MS_NOSUID)
2831 mnt_flags |= MNT_NOSUID;
2832 if (flags & MS_NODEV)
2833 mnt_flags |= MNT_NODEV;
2834 if (flags & MS_NOEXEC)
2835 mnt_flags |= MNT_NOEXEC;
2836 if (flags & MS_NOATIME)
2837 mnt_flags |= MNT_NOATIME;
2838 if (flags & MS_NODIRATIME)
2839 mnt_flags |= MNT_NODIRATIME;
2840 if (flags & MS_STRICTATIME)
2841 mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
2842 if (flags & MS_RDONLY)
2843 mnt_flags |= MNT_READONLY;
2844
2845
2846 if ((flags & MS_REMOUNT) &&
2847 ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME |
2848 MS_STRICTATIME)) == 0)) {
2849 mnt_flags &= ~MNT_ATIME_MASK;
2850 mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK;
2851 }
2852
2853 sb_flags = flags & (SB_RDONLY |
2854 SB_SYNCHRONOUS |
2855 SB_MANDLOCK |
2856 SB_DIRSYNC |
2857 SB_SILENT |
2858 SB_POSIXACL |
2859 SB_LAZYTIME |
2860 SB_I_VERSION);
2861
2862 if (flags & MS_REMOUNT)
2863 retval = do_remount(&path, flags, sb_flags, mnt_flags,
2864 data_page);
2865 else if (flags & MS_BIND)
2866 retval = do_loopback(&path, dev_name, flags & MS_REC);
2867 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
2868 retval = do_change_type(&path, flags);
2869 else if (flags & MS_MOVE)
2870 retval = do_move_mount(&path, dev_name);
2871 else
2872 retval = do_new_mount(&path, type_page, sb_flags, mnt_flags,
2873 dev_name, data_page);
2874dput_out:
2875 path_put(&path);
2876 return retval;
2877}
2878
2879static struct ucounts *inc_mnt_namespaces(struct user_namespace *ns)
2880{
2881 return inc_ucount(ns, current_euid(), UCOUNT_MNT_NAMESPACES);
2882}
2883
2884static void dec_mnt_namespaces(struct ucounts *ucounts)
2885{
2886 dec_ucount(ucounts, UCOUNT_MNT_NAMESPACES);
2887}
2888
2889static void free_mnt_ns(struct mnt_namespace *ns)
2890{
2891 ns_free_inum(&ns->ns);
2892 dec_mnt_namespaces(ns->ucounts);
2893 put_user_ns(ns->user_ns);
2894 kfree(ns);
2895}
2896
2897
2898
2899
2900
2901
2902
2903
2904static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
2905
2906static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
2907{
2908 struct mnt_namespace *new_ns;
2909 struct ucounts *ucounts;
2910 int ret;
2911
2912 ucounts = inc_mnt_namespaces(user_ns);
2913 if (!ucounts)
2914 return ERR_PTR(-ENOSPC);
2915
2916 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
2917 if (!new_ns) {
2918 dec_mnt_namespaces(ucounts);
2919 return ERR_PTR(-ENOMEM);
2920 }
2921 ret = ns_alloc_inum(&new_ns->ns);
2922 if (ret) {
2923 kfree(new_ns);
2924 dec_mnt_namespaces(ucounts);
2925 return ERR_PTR(ret);
2926 }
2927 new_ns->ns.ops = &mntns_operations;
2928 new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
2929 atomic_set(&new_ns->count, 1);
2930 new_ns->root = NULL;
2931 INIT_LIST_HEAD(&new_ns->list);
2932 init_waitqueue_head(&new_ns->poll);
2933 new_ns->event = 0;
2934 new_ns->user_ns = get_user_ns(user_ns);
2935 new_ns->ucounts = ucounts;
2936 new_ns->mounts = 0;
2937 new_ns->pending_mounts = 0;
2938 return new_ns;
2939}
2940
2941__latent_entropy
2942struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2943 struct user_namespace *user_ns, struct fs_struct *new_fs)
2944{
2945 struct mnt_namespace *new_ns;
2946 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
2947 struct mount *p, *q;
2948 struct mount *old;
2949 struct mount *new;
2950 int copy_flags;
2951
2952 BUG_ON(!ns);
2953
2954 if (likely(!(flags & CLONE_NEWNS))) {
2955 get_mnt_ns(ns);
2956 return ns;
2957 }
2958
2959 old = ns->root;
2960
2961 new_ns = alloc_mnt_ns(user_ns);
2962 if (IS_ERR(new_ns))
2963 return new_ns;
2964
2965 namespace_lock();
2966
2967 copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE;
2968 if (user_ns != ns->user_ns)
2969 copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
2970 new = copy_tree(old, old->mnt.mnt_root, copy_flags);
2971 if (IS_ERR(new)) {
2972 namespace_unlock();
2973 free_mnt_ns(new_ns);
2974 return ERR_CAST(new);
2975 }
2976 new_ns->root = new;
2977 list_add_tail(&new_ns->list, &new->mnt_list);
2978
2979
2980
2981
2982
2983
2984 p = old;
2985 q = new;
2986 while (p) {
2987 q->mnt_ns = new_ns;
2988 new_ns->mounts++;
2989 if (new_fs) {
2990 if (&p->mnt == new_fs->root.mnt) {
2991 new_fs->root.mnt = mntget(&q->mnt);
2992 rootmnt = &p->mnt;
2993 }
2994 if (&p->mnt == new_fs->pwd.mnt) {
2995 new_fs->pwd.mnt = mntget(&q->mnt);
2996 pwdmnt = &p->mnt;
2997 }
2998 }
2999 p = next_mnt(p, old);
3000 q = next_mnt(q, new);
3001 if (!q)
3002 break;
3003 while (p->mnt.mnt_root != q->mnt.mnt_root)
3004 p = next_mnt(p, old);
3005 }
3006 namespace_unlock();
3007
3008 if (rootmnt)
3009 mntput(rootmnt);
3010 if (pwdmnt)
3011 mntput(pwdmnt);
3012
3013 return new_ns;
3014}
3015
3016
3017
3018
3019
3020static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
3021{
3022 struct mnt_namespace *new_ns = alloc_mnt_ns(&init_user_ns);
3023 if (!IS_ERR(new_ns)) {
3024 struct mount *mnt = real_mount(m);
3025 mnt->mnt_ns = new_ns;
3026 new_ns->root = mnt;
3027 new_ns->mounts++;
3028 list_add(&mnt->mnt_list, &new_ns->list);
3029 } else {
3030 mntput(m);
3031 }
3032 return new_ns;
3033}
3034
3035struct dentry *mount_subtree(struct vfsmount *mnt, const char *name)
3036{
3037 struct mnt_namespace *ns;
3038 struct super_block *s;
3039 struct path path;
3040 int err;
3041
3042 ns = create_mnt_ns(mnt);
3043 if (IS_ERR(ns))
3044 return ERR_CAST(ns);
3045
3046 err = vfs_path_lookup(mnt->mnt_root, mnt,
3047 name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);
3048
3049 put_mnt_ns(ns);
3050
3051 if (err)
3052 return ERR_PTR(err);
3053
3054
3055 s = path.mnt->mnt_sb;
3056 atomic_inc(&s->s_active);
3057 mntput(path.mnt);
3058
3059 down_write(&s->s_umount);
3060
3061 return path.dentry;
3062}
3063EXPORT_SYMBOL(mount_subtree);
3064
3065int ksys_mount(char __user *dev_name, char __user *dir_name, char __user *type,
3066 unsigned long flags, void __user *data)
3067{
3068 int ret;
3069 char *kernel_type;
3070 char *kernel_dev;
3071 void *options;
3072
3073 kernel_type = copy_mount_string(type);
3074 ret = PTR_ERR(kernel_type);
3075 if (IS_ERR(kernel_type))
3076 goto out_type;
3077
3078 kernel_dev = copy_mount_string(dev_name);
3079 ret = PTR_ERR(kernel_dev);
3080 if (IS_ERR(kernel_dev))
3081 goto out_dev;
3082
3083 options = copy_mount_options(data);
3084 ret = PTR_ERR(options);
3085 if (IS_ERR(options))
3086 goto out_data;
3087
3088 ret = do_mount(kernel_dev, dir_name, kernel_type, flags, options);
3089
3090 kfree(options);
3091out_data:
3092 kfree(kernel_dev);
3093out_dev:
3094 kfree(kernel_type);
3095out_type:
3096 return ret;
3097}
3098
3099SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
3100 char __user *, type, unsigned long, flags, void __user *, data)
3101{
3102 return ksys_mount(dev_name, dir_name, type, flags, data);
3103}
3104
3105
3106
3107
3108
3109
3110bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
3111 const struct path *root)
3112{
3113 while (&mnt->mnt != root->mnt && mnt_has_parent(mnt)) {
3114 dentry = mnt->mnt_mountpoint;
3115 mnt = mnt->mnt_parent;
3116 }
3117 return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry);
3118}
3119
3120bool path_is_under(const struct path *path1, const struct path *path2)
3121{
3122 bool res;
3123 read_seqlock_excl(&mount_lock);
3124 res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2);
3125 read_sequnlock_excl(&mount_lock);
3126 return res;
3127}
3128EXPORT_SYMBOL(path_is_under);
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
3156 const char __user *, put_old)
3157{
3158 struct path new, old, parent_path, root_parent, root;
3159 struct mount *new_mnt, *root_mnt, *old_mnt;
3160 struct mountpoint *old_mp, *root_mp;
3161 int error;
3162
3163 if (!may_mount())
3164 return -EPERM;
3165
3166 error = user_path_dir(new_root, &new);
3167 if (error)
3168 goto out0;
3169
3170 error = user_path_dir(put_old, &old);
3171 if (error)
3172 goto out1;
3173
3174 error = security_sb_pivotroot(&old, &new);
3175 if (error)
3176 goto out2;
3177
3178 get_fs_root(current->fs, &root);
3179 old_mp = lock_mount(&old);
3180 error = PTR_ERR(old_mp);
3181 if (IS_ERR(old_mp))
3182 goto out3;
3183
3184 error = -EINVAL;
3185 new_mnt = real_mount(new.mnt);
3186 root_mnt = real_mount(root.mnt);
3187 old_mnt = real_mount(old.mnt);
3188 if (IS_MNT_SHARED(old_mnt) ||
3189 IS_MNT_SHARED(new_mnt->mnt_parent) ||
3190 IS_MNT_SHARED(root_mnt->mnt_parent))
3191 goto out4;
3192 if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
3193 goto out4;
3194 if (new_mnt->mnt.mnt_flags & MNT_LOCKED)
3195 goto out4;
3196 error = -ENOENT;
3197 if (d_unlinked(new.dentry))
3198 goto out4;
3199 error = -EBUSY;
3200 if (new_mnt == root_mnt || old_mnt == root_mnt)
3201 goto out4;
3202 error = -EINVAL;
3203 if (root.mnt->mnt_root != root.dentry)
3204 goto out4;
3205 if (!mnt_has_parent(root_mnt))
3206 goto out4;
3207 root_mp = root_mnt->mnt_mp;
3208 if (new.mnt->mnt_root != new.dentry)
3209 goto out4;
3210 if (!mnt_has_parent(new_mnt))
3211 goto out4;
3212
3213 if (!is_path_reachable(old_mnt, old.dentry, &new))
3214 goto out4;
3215
3216 if (!is_path_reachable(new_mnt, new.dentry, &root))
3217 goto out4;
3218 root_mp->m_count++;
3219 lock_mount_hash();
3220 detach_mnt(new_mnt, &parent_path);
3221 detach_mnt(root_mnt, &root_parent);
3222 if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
3223 new_mnt->mnt.mnt_flags |= MNT_LOCKED;
3224 root_mnt->mnt.mnt_flags &= ~MNT_LOCKED;
3225 }
3226
3227 attach_mnt(root_mnt, old_mnt, old_mp);
3228
3229 attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp);
3230 touch_mnt_namespace(current->nsproxy->mnt_ns);
3231
3232 list_del_init(&new_mnt->mnt_expire);
3233 put_mountpoint(root_mp);
3234 unlock_mount_hash();
3235 chroot_fs_refs(&root, &new);
3236 error = 0;
3237out4:
3238 unlock_mount(old_mp);
3239 if (!error) {
3240 path_put(&root_parent);
3241 path_put(&parent_path);
3242 }
3243out3:
3244 path_put(&root);
3245out2:
3246 path_put(&old);
3247out1:
3248 path_put(&new);
3249out0:
3250 return error;
3251}
3252
3253static void __init init_mount_tree(void)
3254{
3255 struct vfsmount *mnt;
3256 struct mnt_namespace *ns;
3257 struct path root;
3258 struct file_system_type *type;
3259
3260 type = get_fs_type("rootfs");
3261 if (!type)
3262 panic("Can't find rootfs type");
3263 mnt = vfs_kern_mount(type, 0, "rootfs", NULL);
3264 put_filesystem(type);
3265 if (IS_ERR(mnt))
3266 panic("Can't create rootfs");
3267
3268 ns = create_mnt_ns(mnt);
3269 if (IS_ERR(ns))
3270 panic("Can't allocate initial namespace");
3271
3272 init_task.nsproxy->mnt_ns = ns;
3273 get_mnt_ns(ns);
3274
3275 root.mnt = mnt;
3276 root.dentry = mnt->mnt_root;
3277 mnt->mnt_flags |= MNT_LOCKED;
3278
3279 set_fs_pwd(current->fs, &root);
3280 set_fs_root(current->fs, &root);
3281}
3282
3283void __init mnt_init(void)
3284{
3285 int err;
3286
3287 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
3288 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
3289
3290 mount_hashtable = alloc_large_system_hash("Mount-cache",
3291 sizeof(struct hlist_head),
3292 mhash_entries, 19,
3293 HASH_ZERO,
3294 &m_hash_shift, &m_hash_mask, 0, 0);
3295 mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache",
3296 sizeof(struct hlist_head),
3297 mphash_entries, 19,
3298 HASH_ZERO,
3299 &mp_hash_shift, &mp_hash_mask, 0, 0);
3300
3301 if (!mount_hashtable || !mountpoint_hashtable)
3302 panic("Failed to allocate mount hash table\n");
3303
3304 kernfs_init();
3305
3306 err = sysfs_init();
3307 if (err)
3308 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
3309 __func__, err);
3310 fs_kobj = kobject_create_and_add("fs", NULL);
3311 if (!fs_kobj)
3312 printk(KERN_WARNING "%s: kobj create error\n", __func__);
3313 init_rootfs();
3314 init_mount_tree();
3315}
3316
3317void put_mnt_ns(struct mnt_namespace *ns)
3318{
3319 if (!atomic_dec_and_test(&ns->count))
3320 return;
3321 drop_collected_mounts(&ns->root->mnt);
3322 free_mnt_ns(ns);
3323}
3324
3325struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
3326{
3327 struct vfsmount *mnt;
3328 mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, data);
3329 if (!IS_ERR(mnt)) {
3330
3331
3332
3333
3334 real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL;
3335 }
3336 return mnt;
3337}
3338EXPORT_SYMBOL_GPL(kern_mount_data);
3339
3340void kern_unmount(struct vfsmount *mnt)
3341{
3342
3343 if (!IS_ERR_OR_NULL(mnt)) {
3344 real_mount(mnt)->mnt_ns = NULL;
3345 synchronize_rcu();
3346 mntput(mnt);
3347 }
3348}
3349EXPORT_SYMBOL(kern_unmount);
3350
3351bool our_mnt(struct vfsmount *mnt)
3352{
3353 return check_mnt(real_mount(mnt));
3354}
3355
3356bool current_chrooted(void)
3357{
3358
3359 struct path ns_root;
3360 struct path fs_root;
3361 bool chrooted;
3362
3363
3364 ns_root.mnt = ¤t->nsproxy->mnt_ns->root->mnt;
3365 ns_root.dentry = ns_root.mnt->mnt_root;
3366 path_get(&ns_root);
3367 while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root))
3368 ;
3369
3370 get_fs_root(current->fs, &fs_root);
3371
3372 chrooted = !path_equal(&fs_root, &ns_root);
3373
3374 path_put(&fs_root);
3375 path_put(&ns_root);
3376
3377 return chrooted;
3378}
3379
3380static bool mnt_already_visible(struct mnt_namespace *ns, struct vfsmount *new,
3381 int *new_mnt_flags)
3382{
3383 int new_flags = *new_mnt_flags;
3384 struct mount *mnt;
3385 bool visible = false;
3386
3387 down_read(&namespace_sem);
3388 list_for_each_entry(mnt, &ns->list, mnt_list) {
3389 struct mount *child;
3390 int mnt_flags;
3391
3392 if (mnt->mnt.mnt_sb->s_type != new->mnt_sb->s_type)
3393 continue;
3394
3395
3396
3397
3398 if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root)
3399 continue;
3400
3401
3402 mnt_flags = mnt->mnt.mnt_flags;
3403
3404
3405 if (sb_rdonly(mnt->mnt.mnt_sb))
3406 mnt_flags |= MNT_LOCK_READONLY;
3407
3408
3409
3410
3411 if ((mnt_flags & MNT_LOCK_READONLY) &&
3412 !(new_flags & MNT_READONLY))
3413 continue;
3414 if ((mnt_flags & MNT_LOCK_ATIME) &&
3415 ((mnt_flags & MNT_ATIME_MASK) != (new_flags & MNT_ATIME_MASK)))
3416 continue;
3417
3418
3419
3420
3421
3422 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
3423 struct inode *inode = child->mnt_mountpoint->d_inode;
3424
3425 if (!(child->mnt.mnt_flags & MNT_LOCKED))
3426 continue;
3427
3428 if (!is_empty_dir_inode(inode))
3429 goto next;
3430 }
3431
3432 *new_mnt_flags |= mnt_flags & (MNT_LOCK_READONLY | \
3433 MNT_LOCK_ATIME);
3434 visible = true;
3435 goto found;
3436 next: ;
3437 }
3438found:
3439 up_read(&namespace_sem);
3440 return visible;
3441}
3442
3443static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags)
3444{
3445 const unsigned long required_iflags = SB_I_NOEXEC | SB_I_NODEV;
3446 struct mnt_namespace *ns = current->nsproxy->mnt_ns;
3447 unsigned long s_iflags;
3448
3449 if (ns->user_ns == &init_user_ns)
3450 return false;
3451
3452
3453 s_iflags = mnt->mnt_sb->s_iflags;
3454 if (!(s_iflags & SB_I_USERNS_VISIBLE))
3455 return false;
3456
3457 if ((s_iflags & required_iflags) != required_iflags) {
3458 WARN_ONCE(1, "Expected s_iflags to contain 0x%lx\n",
3459 required_iflags);
3460 return true;
3461 }
3462
3463 return !mnt_already_visible(ns, mnt, new_mnt_flags);
3464}
3465
3466bool mnt_may_suid(struct vfsmount *mnt)
3467{
3468
3469
3470
3471
3472
3473
3474
3475 return !(mnt->mnt_flags & MNT_NOSUID) && check_mnt(real_mount(mnt)) &&
3476 current_in_userns(mnt->mnt_sb->s_user_ns);
3477}
3478
3479static struct ns_common *mntns_get(struct task_struct *task)
3480{
3481 struct ns_common *ns = NULL;
3482 struct nsproxy *nsproxy;
3483
3484 task_lock(task);
3485 nsproxy = task->nsproxy;
3486 if (nsproxy) {
3487 ns = &nsproxy->mnt_ns->ns;
3488 get_mnt_ns(to_mnt_ns(ns));
3489 }
3490 task_unlock(task);
3491
3492 return ns;
3493}
3494
3495static void mntns_put(struct ns_common *ns)
3496{
3497 put_mnt_ns(to_mnt_ns(ns));
3498}
3499
3500static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
3501{
3502 struct fs_struct *fs = current->fs;
3503 struct mnt_namespace *mnt_ns = to_mnt_ns(ns), *old_mnt_ns;
3504 struct path root;
3505 int err;
3506
3507 if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
3508 !ns_capable(current_user_ns(), CAP_SYS_CHROOT) ||
3509 !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
3510 return -EPERM;
3511
3512 if (fs->users != 1)
3513 return -EINVAL;
3514
3515 get_mnt_ns(mnt_ns);
3516 old_mnt_ns = nsproxy->mnt_ns;
3517 nsproxy->mnt_ns = mnt_ns;
3518
3519
3520 err = vfs_path_lookup(mnt_ns->root->mnt.mnt_root, &mnt_ns->root->mnt,
3521 "/", LOOKUP_DOWN, &root);
3522 if (err) {
3523
3524 nsproxy->mnt_ns = old_mnt_ns;
3525 put_mnt_ns(mnt_ns);
3526 return err;
3527 }
3528
3529 put_mnt_ns(old_mnt_ns);
3530
3531
3532 set_fs_pwd(fs, &root);
3533 set_fs_root(fs, &root);
3534
3535 path_put(&root);
3536 return 0;
3537}
3538
3539static struct user_namespace *mntns_owner(struct ns_common *ns)
3540{
3541 return to_mnt_ns(ns)->user_ns;
3542}
3543
3544const struct proc_ns_operations mntns_operations = {
3545 .name = "mnt",
3546 .type = CLONE_NEWNS,
3547 .get = mntns_get,
3548 .put = mntns_put,
3549 .install = mntns_install,
3550 .owner = mntns_owner,
3551};
3552