1
2
3
4
5
6
7
8
9
10
11#include <linux/syscalls.h>
12#include <linux/export.h>
13#include <linux/capability.h>
14#include <linux/mnt_namespace.h>
15#include <linux/user_namespace.h>
16#include <linux/namei.h>
17#include <linux/security.h>
18#include <linux/cred.h>
19#include <linux/idr.h>
20#include <linux/init.h>
21#include <linux/fs_struct.h>
22#include <linux/fsnotify.h>
23#include <linux/uaccess.h>
24#include <linux/proc_ns.h>
25#include <linux/magic.h>
26#include <linux/bootmem.h>
27#include <linux/task_work.h>
28#include <linux/sched/task.h>
29
30#include "pnode.h"
31#include "internal.h"
32
33
34unsigned int sysctl_mount_max __read_mostly = 100000;
35
36static unsigned int m_hash_mask __read_mostly;
37static unsigned int m_hash_shift __read_mostly;
38static unsigned int mp_hash_mask __read_mostly;
39static unsigned int mp_hash_shift __read_mostly;
40
41static __initdata unsigned long mhash_entries;
42static int __init set_mhash_entries(char *str)
43{
44 if (!str)
45 return 0;
46 mhash_entries = simple_strtoul(str, &str, 0);
47 return 1;
48}
49__setup("mhash_entries=", set_mhash_entries);
50
51static __initdata unsigned long mphash_entries;
52static int __init set_mphash_entries(char *str)
53{
54 if (!str)
55 return 0;
56 mphash_entries = simple_strtoul(str, &str, 0);
57 return 1;
58}
59__setup("mphash_entries=", set_mphash_entries);
60
61static u64 event;
62static DEFINE_IDA(mnt_id_ida);
63static DEFINE_IDA(mnt_group_ida);
64static DEFINE_SPINLOCK(mnt_id_lock);
65static int mnt_id_start = 0;
66static int mnt_group_start = 1;
67
68static struct hlist_head *mount_hashtable __read_mostly;
69static struct hlist_head *mountpoint_hashtable __read_mostly;
70static struct kmem_cache *mnt_cache __read_mostly;
71static DECLARE_RWSEM(namespace_sem);
72
73
74struct kobject *fs_kobj;
75EXPORT_SYMBOL_GPL(fs_kobj);
76
77
78
79
80
81
82
83
84
85__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
86
87static inline struct hlist_head *m_hash(struct vfsmount *mnt, struct dentry *dentry)
88{
89 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
90 tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
91 tmp = tmp + (tmp >> m_hash_shift);
92 return &mount_hashtable[tmp & m_hash_mask];
93}
94
95static inline struct hlist_head *mp_hash(struct dentry *dentry)
96{
97 unsigned long tmp = ((unsigned long)dentry / L1_CACHE_BYTES);
98 tmp = tmp + (tmp >> mp_hash_shift);
99 return &mountpoint_hashtable[tmp & mp_hash_mask];
100}
101
102static int mnt_alloc_id(struct mount *mnt)
103{
104 int res;
105
106retry:
107 ida_pre_get(&mnt_id_ida, GFP_KERNEL);
108 spin_lock(&mnt_id_lock);
109 res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id);
110 if (!res)
111 mnt_id_start = mnt->mnt_id + 1;
112 spin_unlock(&mnt_id_lock);
113 if (res == -EAGAIN)
114 goto retry;
115
116 return res;
117}
118
119static void mnt_free_id(struct mount *mnt)
120{
121 int id = mnt->mnt_id;
122 spin_lock(&mnt_id_lock);
123 ida_remove(&mnt_id_ida, id);
124 if (mnt_id_start > id)
125 mnt_id_start = id;
126 spin_unlock(&mnt_id_lock);
127}
128
129
130
131
132
133
134static int mnt_alloc_group_id(struct mount *mnt)
135{
136 int res;
137
138 if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL))
139 return -ENOMEM;
140
141 res = ida_get_new_above(&mnt_group_ida,
142 mnt_group_start,
143 &mnt->mnt_group_id);
144 if (!res)
145 mnt_group_start = mnt->mnt_group_id + 1;
146
147 return res;
148}
149
150
151
152
153void mnt_release_group_id(struct mount *mnt)
154{
155 int id = mnt->mnt_group_id;
156 ida_remove(&mnt_group_ida, id);
157 if (mnt_group_start > id)
158 mnt_group_start = id;
159 mnt->mnt_group_id = 0;
160}
161
162
163
164
165static inline void mnt_add_count(struct mount *mnt, int n)
166{
167#ifdef CONFIG_SMP
168 this_cpu_add(mnt->mnt_pcp->mnt_count, n);
169#else
170 preempt_disable();
171 mnt->mnt_count += n;
172 preempt_enable();
173#endif
174}
175
176
177
178
179unsigned int mnt_get_count(struct mount *mnt)
180{
181#ifdef CONFIG_SMP
182 unsigned int count = 0;
183 int cpu;
184
185 for_each_possible_cpu(cpu) {
186 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count;
187 }
188
189 return count;
190#else
191 return mnt->mnt_count;
192#endif
193}
194
195static void drop_mountpoint(struct fs_pin *p)
196{
197 struct mount *m = container_of(p, struct mount, mnt_umount);
198 dput(m->mnt_ex_mountpoint);
199 pin_remove(p);
200 mntput(&m->mnt);
201}
202
203static struct mount *alloc_vfsmnt(const char *name)
204{
205 struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
206 if (mnt) {
207 int err;
208
209 err = mnt_alloc_id(mnt);
210 if (err)
211 goto out_free_cache;
212
213 if (name) {
214 mnt->mnt_devname = kstrdup_const(name, GFP_KERNEL);
215 if (!mnt->mnt_devname)
216 goto out_free_id;
217 }
218
219#ifdef CONFIG_SMP
220 mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);
221 if (!mnt->mnt_pcp)
222 goto out_free_devname;
223
224 this_cpu_add(mnt->mnt_pcp->mnt_count, 1);
225#else
226 mnt->mnt_count = 1;
227 mnt->mnt_writers = 0;
228#endif
229
230 INIT_HLIST_NODE(&mnt->mnt_hash);
231 INIT_LIST_HEAD(&mnt->mnt_child);
232 INIT_LIST_HEAD(&mnt->mnt_mounts);
233 INIT_LIST_HEAD(&mnt->mnt_list);
234 INIT_LIST_HEAD(&mnt->mnt_expire);
235 INIT_LIST_HEAD(&mnt->mnt_share);
236 INIT_LIST_HEAD(&mnt->mnt_slave_list);
237 INIT_LIST_HEAD(&mnt->mnt_slave);
238 INIT_HLIST_NODE(&mnt->mnt_mp_list);
239 INIT_LIST_HEAD(&mnt->mnt_umounting);
240 init_fs_pin(&mnt->mnt_umount, drop_mountpoint);
241 }
242 return mnt;
243
244#ifdef CONFIG_SMP
245out_free_devname:
246 kfree_const(mnt->mnt_devname);
247#endif
248out_free_id:
249 mnt_free_id(mnt);
250out_free_cache:
251 kmem_cache_free(mnt_cache, mnt);
252 return NULL;
253}
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274int __mnt_is_readonly(struct vfsmount *mnt)
275{
276 if (mnt->mnt_flags & MNT_READONLY)
277 return 1;
278 if (sb_rdonly(mnt->mnt_sb))
279 return 1;
280 return 0;
281}
282EXPORT_SYMBOL_GPL(__mnt_is_readonly);
283
284static inline void mnt_inc_writers(struct mount *mnt)
285{
286#ifdef CONFIG_SMP
287 this_cpu_inc(mnt->mnt_pcp->mnt_writers);
288#else
289 mnt->mnt_writers++;
290#endif
291}
292
293static inline void mnt_dec_writers(struct mount *mnt)
294{
295#ifdef CONFIG_SMP
296 this_cpu_dec(mnt->mnt_pcp->mnt_writers);
297#else
298 mnt->mnt_writers--;
299#endif
300}
301
302static unsigned int mnt_get_writers(struct mount *mnt)
303{
304#ifdef CONFIG_SMP
305 unsigned int count = 0;
306 int cpu;
307
308 for_each_possible_cpu(cpu) {
309 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers;
310 }
311
312 return count;
313#else
314 return mnt->mnt_writers;
315#endif
316}
317
318static int mnt_is_readonly(struct vfsmount *mnt)
319{
320 if (mnt->mnt_sb->s_readonly_remount)
321 return 1;
322
323 smp_rmb();
324 return __mnt_is_readonly(mnt);
325}
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343int __mnt_want_write(struct vfsmount *m)
344{
345 struct mount *mnt = real_mount(m);
346 int ret = 0;
347
348 preempt_disable();
349 mnt_inc_writers(mnt);
350
351
352
353
354
355 smp_mb();
356 while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
357 cpu_relax();
358
359
360
361
362
363 smp_rmb();
364 if (mnt_is_readonly(m)) {
365 mnt_dec_writers(mnt);
366 ret = -EROFS;
367 }
368 preempt_enable();
369
370 return ret;
371}
372
373
374
375
376
377
378
379
380
381
382int mnt_want_write(struct vfsmount *m)
383{
384 int ret;
385
386 sb_start_write(m->mnt_sb);
387 ret = __mnt_want_write(m);
388 if (ret)
389 sb_end_write(m->mnt_sb);
390 return ret;
391}
392EXPORT_SYMBOL_GPL(mnt_want_write);
393
394
395
396
397
398
399
400
401
402
403
404
405
406int mnt_clone_write(struct vfsmount *mnt)
407{
408
409 if (__mnt_is_readonly(mnt))
410 return -EROFS;
411 preempt_disable();
412 mnt_inc_writers(real_mount(mnt));
413 preempt_enable();
414 return 0;
415}
416EXPORT_SYMBOL_GPL(mnt_clone_write);
417
418
419
420
421
422
423
424
425int __mnt_want_write_file(struct file *file)
426{
427 if (!(file->f_mode & FMODE_WRITER))
428 return __mnt_want_write(file->f_path.mnt);
429 else
430 return mnt_clone_write(file->f_path.mnt);
431}
432
433
434
435
436
437
438
439
440
441
442
443
444
445int mnt_want_write_file_path(struct file *file)
446{
447 int ret;
448
449 sb_start_write(file->f_path.mnt->mnt_sb);
450 ret = __mnt_want_write_file(file);
451 if (ret)
452 sb_end_write(file->f_path.mnt->mnt_sb);
453 return ret;
454}
455
456static inline int may_write_real(struct file *file)
457{
458 struct dentry *dentry = file->f_path.dentry;
459 struct dentry *upperdentry;
460
461
462 if (file->f_mode & FMODE_WRITER)
463 return 0;
464
465
466 if (likely(!(dentry->d_flags & DCACHE_OP_REAL)))
467 return 0;
468
469
470 upperdentry = d_real(dentry, NULL, 0, D_REAL_UPPER);
471 if (upperdentry &&
472 (file_inode(file) == d_inode(upperdentry) ||
473 file_inode(file) == d_inode(dentry)))
474 return 0;
475
476
477 return -EPERM;
478}
479
480
481
482
483
484
485
486
487
488
489
490
491int mnt_want_write_file(struct file *file)
492{
493 int ret;
494
495 ret = may_write_real(file);
496 if (!ret) {
497 sb_start_write(file_inode(file)->i_sb);
498 ret = __mnt_want_write_file(file);
499 if (ret)
500 sb_end_write(file_inode(file)->i_sb);
501 }
502 return ret;
503}
504EXPORT_SYMBOL_GPL(mnt_want_write_file);
505
506
507
508
509
510
511
512
513
514void __mnt_drop_write(struct vfsmount *mnt)
515{
516 preempt_disable();
517 mnt_dec_writers(real_mount(mnt));
518 preempt_enable();
519}
520
521
522
523
524
525
526
527
528
529void mnt_drop_write(struct vfsmount *mnt)
530{
531 __mnt_drop_write(mnt);
532 sb_end_write(mnt->mnt_sb);
533}
534EXPORT_SYMBOL_GPL(mnt_drop_write);
535
536void __mnt_drop_write_file(struct file *file)
537{
538 __mnt_drop_write(file->f_path.mnt);
539}
540
541void mnt_drop_write_file_path(struct file *file)
542{
543 mnt_drop_write(file->f_path.mnt);
544}
545
546void mnt_drop_write_file(struct file *file)
547{
548 __mnt_drop_write(file->f_path.mnt);
549 sb_end_write(file_inode(file)->i_sb);
550}
551EXPORT_SYMBOL(mnt_drop_write_file);
552
553static int mnt_make_readonly(struct mount *mnt)
554{
555 int ret = 0;
556
557 lock_mount_hash();
558 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
559
560
561
562
563 smp_mb();
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581 if (mnt_get_writers(mnt) > 0)
582 ret = -EBUSY;
583 else
584 mnt->mnt.mnt_flags |= MNT_READONLY;
585
586
587
588
589 smp_wmb();
590 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
591 unlock_mount_hash();
592 return ret;
593}
594
595static void __mnt_unmake_readonly(struct mount *mnt)
596{
597 lock_mount_hash();
598 mnt->mnt.mnt_flags &= ~MNT_READONLY;
599 unlock_mount_hash();
600}
601
602int sb_prepare_remount_readonly(struct super_block *sb)
603{
604 struct mount *mnt;
605 int err = 0;
606
607
608 if (atomic_long_read(&sb->s_remove_count))
609 return -EBUSY;
610
611 lock_mount_hash();
612 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
613 if (!(mnt->mnt.mnt_flags & MNT_READONLY)) {
614 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
615 smp_mb();
616 if (mnt_get_writers(mnt) > 0) {
617 err = -EBUSY;
618 break;
619 }
620 }
621 }
622 if (!err && atomic_long_read(&sb->s_remove_count))
623 err = -EBUSY;
624
625 if (!err) {
626 sb->s_readonly_remount = 1;
627 smp_wmb();
628 }
629 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
630 if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
631 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
632 }
633 unlock_mount_hash();
634
635 return err;
636}
637
638static void free_vfsmnt(struct mount *mnt)
639{
640 kfree_const(mnt->mnt_devname);
641#ifdef CONFIG_SMP
642 free_percpu(mnt->mnt_pcp);
643#endif
644 kmem_cache_free(mnt_cache, mnt);
645}
646
647static void delayed_free_vfsmnt(struct rcu_head *head)
648{
649 free_vfsmnt(container_of(head, struct mount, mnt_rcu));
650}
651
652
653int __legitimize_mnt(struct vfsmount *bastard, unsigned seq)
654{
655 struct mount *mnt;
656 if (read_seqretry(&mount_lock, seq))
657 return 1;
658 if (bastard == NULL)
659 return 0;
660 mnt = real_mount(bastard);
661 mnt_add_count(mnt, 1);
662 if (likely(!read_seqretry(&mount_lock, seq)))
663 return 0;
664 if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
665 mnt_add_count(mnt, -1);
666 return 1;
667 }
668 return -1;
669}
670
671
672bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
673{
674 int res = __legitimize_mnt(bastard, seq);
675 if (likely(!res))
676 return true;
677 if (unlikely(res < 0)) {
678 rcu_read_unlock();
679 mntput(bastard);
680 rcu_read_lock();
681 }
682 return false;
683}
684
685
686
687
688
689struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
690{
691 struct hlist_head *head = m_hash(mnt, dentry);
692 struct mount *p;
693
694 hlist_for_each_entry_rcu(p, head, mnt_hash)
695 if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
696 return p;
697 return NULL;
698}
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716struct vfsmount *lookup_mnt(const struct path *path)
717{
718 struct mount *child_mnt;
719 struct vfsmount *m;
720 unsigned seq;
721
722 rcu_read_lock();
723 do {
724 seq = read_seqbegin(&mount_lock);
725 child_mnt = __lookup_mnt(path->mnt, path->dentry);
726 m = child_mnt ? &child_mnt->mnt : NULL;
727 } while (!legitimize_mnt(m, seq));
728 rcu_read_unlock();
729 return m;
730}
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747bool __is_local_mountpoint(struct dentry *dentry)
748{
749 struct mnt_namespace *ns = current->nsproxy->mnt_ns;
750 struct mount *mnt;
751 bool is_covered = false;
752
753 if (!d_mountpoint(dentry))
754 goto out;
755
756 down_read(&namespace_sem);
757 list_for_each_entry(mnt, &ns->list, mnt_list) {
758 is_covered = (mnt->mnt_mountpoint == dentry);
759 if (is_covered)
760 break;
761 }
762 up_read(&namespace_sem);
763out:
764 return is_covered;
765}
766
767static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
768{
769 struct hlist_head *chain = mp_hash(dentry);
770 struct mountpoint *mp;
771
772 hlist_for_each_entry(mp, chain, m_hash) {
773 if (mp->m_dentry == dentry) {
774
775 if (d_unlinked(dentry))
776 return ERR_PTR(-ENOENT);
777 mp->m_count++;
778 return mp;
779 }
780 }
781 return NULL;
782}
783
784static struct mountpoint *get_mountpoint(struct dentry *dentry)
785{
786 struct mountpoint *mp, *new = NULL;
787 int ret;
788
789 if (d_mountpoint(dentry)) {
790mountpoint:
791 read_seqlock_excl(&mount_lock);
792 mp = lookup_mountpoint(dentry);
793 read_sequnlock_excl(&mount_lock);
794 if (mp)
795 goto done;
796 }
797
798 if (!new)
799 new = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
800 if (!new)
801 return ERR_PTR(-ENOMEM);
802
803
804
805 ret = d_set_mounted(dentry);
806
807
808 if (ret == -EBUSY)
809 goto mountpoint;
810
811
812 mp = ERR_PTR(ret);
813 if (ret)
814 goto done;
815
816
817 read_seqlock_excl(&mount_lock);
818 new->m_dentry = dentry;
819 new->m_count = 1;
820 hlist_add_head(&new->m_hash, mp_hash(dentry));
821 INIT_HLIST_HEAD(&new->m_list);
822 read_sequnlock_excl(&mount_lock);
823
824 mp = new;
825 new = NULL;
826done:
827 kfree(new);
828 return mp;
829}
830
831static void put_mountpoint(struct mountpoint *mp)
832{
833 if (!--mp->m_count) {
834 struct dentry *dentry = mp->m_dentry;
835 BUG_ON(!hlist_empty(&mp->m_list));
836 spin_lock(&dentry->d_lock);
837 dentry->d_flags &= ~DCACHE_MOUNTED;
838 spin_unlock(&dentry->d_lock);
839 hlist_del(&mp->m_hash);
840 kfree(mp);
841 }
842}
843
844static inline int check_mnt(struct mount *mnt)
845{
846 return mnt->mnt_ns == current->nsproxy->mnt_ns;
847}
848
849
850
851
852static void touch_mnt_namespace(struct mnt_namespace *ns)
853{
854 if (ns) {
855 ns->event = ++event;
856 wake_up_interruptible(&ns->poll);
857 }
858}
859
860
861
862
863static void __touch_mnt_namespace(struct mnt_namespace *ns)
864{
865 if (ns && ns->event != event) {
866 ns->event = event;
867 wake_up_interruptible(&ns->poll);
868 }
869}
870
871
872
873
874static void unhash_mnt(struct mount *mnt)
875{
876 mnt->mnt_parent = mnt;
877 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
878 list_del_init(&mnt->mnt_child);
879 hlist_del_init_rcu(&mnt->mnt_hash);
880 hlist_del_init(&mnt->mnt_mp_list);
881 put_mountpoint(mnt->mnt_mp);
882 mnt->mnt_mp = NULL;
883}
884
885
886
887
888static void detach_mnt(struct mount *mnt, struct path *old_path)
889{
890 old_path->dentry = mnt->mnt_mountpoint;
891 old_path->mnt = &mnt->mnt_parent->mnt;
892 unhash_mnt(mnt);
893}
894
895
896
897
898static void umount_mnt(struct mount *mnt)
899{
900
901 mnt->mnt_ex_mountpoint = mnt->mnt_mountpoint;
902 unhash_mnt(mnt);
903}
904
905
906
907
908void mnt_set_mountpoint(struct mount *mnt,
909 struct mountpoint *mp,
910 struct mount *child_mnt)
911{
912 mp->m_count++;
913 mnt_add_count(mnt, 1);
914 child_mnt->mnt_mountpoint = dget(mp->m_dentry);
915 child_mnt->mnt_parent = mnt;
916 child_mnt->mnt_mp = mp;
917 hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
918}
919
920static void __attach_mnt(struct mount *mnt, struct mount *parent)
921{
922 hlist_add_head_rcu(&mnt->mnt_hash,
923 m_hash(&parent->mnt, mnt->mnt_mountpoint));
924 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
925}
926
927
928
929
930static void attach_mnt(struct mount *mnt,
931 struct mount *parent,
932 struct mountpoint *mp)
933{
934 mnt_set_mountpoint(parent, mp, mnt);
935 __attach_mnt(mnt, parent);
936}
937
938void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt)
939{
940 struct mountpoint *old_mp = mnt->mnt_mp;
941 struct dentry *old_mountpoint = mnt->mnt_mountpoint;
942 struct mount *old_parent = mnt->mnt_parent;
943
944 list_del_init(&mnt->mnt_child);
945 hlist_del_init(&mnt->mnt_mp_list);
946 hlist_del_init_rcu(&mnt->mnt_hash);
947
948 attach_mnt(mnt, parent, mp);
949
950 put_mountpoint(old_mp);
951
952
953
954
955
956
957
958
959
960
961
962
963 spin_lock(&old_mountpoint->d_lock);
964 old_mountpoint->d_lockref.count--;
965 spin_unlock(&old_mountpoint->d_lock);
966
967 mnt_add_count(old_parent, -1);
968}
969
970
971
972
973static void commit_tree(struct mount *mnt)
974{
975 struct mount *parent = mnt->mnt_parent;
976 struct mount *m;
977 LIST_HEAD(head);
978 struct mnt_namespace *n = parent->mnt_ns;
979
980 BUG_ON(parent == mnt);
981
982 list_add_tail(&head, &mnt->mnt_list);
983 list_for_each_entry(m, &head, mnt_list)
984 m->mnt_ns = n;
985
986 list_splice(&head, n->list.prev);
987
988 n->mounts += n->pending_mounts;
989 n->pending_mounts = 0;
990
991 __attach_mnt(mnt, parent);
992 touch_mnt_namespace(n);
993}
994
995static struct mount *next_mnt(struct mount *p, struct mount *root)
996{
997 struct list_head *next = p->mnt_mounts.next;
998 if (next == &p->mnt_mounts) {
999 while (1) {
1000 if (p == root)
1001 return NULL;
1002 next = p->mnt_child.next;
1003 if (next != &p->mnt_parent->mnt_mounts)
1004 break;
1005 p = p->mnt_parent;
1006 }
1007 }
1008 return list_entry(next, struct mount, mnt_child);
1009}
1010
1011static struct mount *skip_mnt_tree(struct mount *p)
1012{
1013 struct list_head *prev = p->mnt_mounts.prev;
1014 while (prev != &p->mnt_mounts) {
1015 p = list_entry(prev, struct mount, mnt_child);
1016 prev = p->mnt_mounts.prev;
1017 }
1018 return p;
1019}
1020
1021struct vfsmount *
1022vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
1023{
1024 struct mount *mnt;
1025 struct dentry *root;
1026
1027 if (!type)
1028 return ERR_PTR(-ENODEV);
1029
1030 mnt = alloc_vfsmnt(name);
1031 if (!mnt)
1032 return ERR_PTR(-ENOMEM);
1033
1034 if (flags & SB_KERNMOUNT)
1035 mnt->mnt.mnt_flags = MNT_INTERNAL;
1036
1037 root = mount_fs(type, flags, name, data);
1038 if (IS_ERR(root)) {
1039 mnt_free_id(mnt);
1040 free_vfsmnt(mnt);
1041 return ERR_CAST(root);
1042 }
1043
1044 mnt->mnt.mnt_root = root;
1045 mnt->mnt.mnt_sb = root->d_sb;
1046 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
1047 mnt->mnt_parent = mnt;
1048 lock_mount_hash();
1049 list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts);
1050 unlock_mount_hash();
1051 return &mnt->mnt;
1052}
1053EXPORT_SYMBOL_GPL(vfs_kern_mount);
1054
1055struct vfsmount *
1056vfs_submount(const struct dentry *mountpoint, struct file_system_type *type,
1057 const char *name, void *data)
1058{
1059
1060
1061
1062
1063 if (mountpoint->d_sb->s_user_ns != &init_user_ns)
1064 return ERR_PTR(-EPERM);
1065
1066 return vfs_kern_mount(type, SB_SUBMOUNT, name, data);
1067}
1068EXPORT_SYMBOL_GPL(vfs_submount);
1069
1070static struct mount *clone_mnt(struct mount *old, struct dentry *root,
1071 int flag)
1072{
1073 struct super_block *sb = old->mnt.mnt_sb;
1074 struct mount *mnt;
1075 int err;
1076
1077 mnt = alloc_vfsmnt(old->mnt_devname);
1078 if (!mnt)
1079 return ERR_PTR(-ENOMEM);
1080
1081 if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE))
1082 mnt->mnt_group_id = 0;
1083 else
1084 mnt->mnt_group_id = old->mnt_group_id;
1085
1086 if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {
1087 err = mnt_alloc_group_id(mnt);
1088 if (err)
1089 goto out_free;
1090 }
1091
1092 mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED);
1093
1094 if (flag & CL_UNPRIVILEGED) {
1095 mnt->mnt.mnt_flags |= MNT_LOCK_ATIME;
1096
1097 if (mnt->mnt.mnt_flags & MNT_READONLY)
1098 mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
1099
1100 if (mnt->mnt.mnt_flags & MNT_NODEV)
1101 mnt->mnt.mnt_flags |= MNT_LOCK_NODEV;
1102
1103 if (mnt->mnt.mnt_flags & MNT_NOSUID)
1104 mnt->mnt.mnt_flags |= MNT_LOCK_NOSUID;
1105
1106 if (mnt->mnt.mnt_flags & MNT_NOEXEC)
1107 mnt->mnt.mnt_flags |= MNT_LOCK_NOEXEC;
1108 }
1109
1110
1111 if ((flag & CL_UNPRIVILEGED) &&
1112 (!(flag & CL_EXPIRE) || list_empty(&old->mnt_expire)))
1113 mnt->mnt.mnt_flags |= MNT_LOCKED;
1114
1115 atomic_inc(&sb->s_active);
1116 mnt->mnt.mnt_sb = sb;
1117 mnt->mnt.mnt_root = dget(root);
1118 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
1119 mnt->mnt_parent = mnt;
1120 lock_mount_hash();
1121 list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
1122 unlock_mount_hash();
1123
1124 if ((flag & CL_SLAVE) ||
1125 ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) {
1126 list_add(&mnt->mnt_slave, &old->mnt_slave_list);
1127 mnt->mnt_master = old;
1128 CLEAR_MNT_SHARED(mnt);
1129 } else if (!(flag & CL_PRIVATE)) {
1130 if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old))
1131 list_add(&mnt->mnt_share, &old->mnt_share);
1132 if (IS_MNT_SLAVE(old))
1133 list_add(&mnt->mnt_slave, &old->mnt_slave);
1134 mnt->mnt_master = old->mnt_master;
1135 } else {
1136 CLEAR_MNT_SHARED(mnt);
1137 }
1138 if (flag & CL_MAKE_SHARED)
1139 set_mnt_shared(mnt);
1140
1141
1142
1143 if (flag & CL_EXPIRE) {
1144 if (!list_empty(&old->mnt_expire))
1145 list_add(&mnt->mnt_expire, &old->mnt_expire);
1146 }
1147
1148 return mnt;
1149
1150 out_free:
1151 mnt_free_id(mnt);
1152 free_vfsmnt(mnt);
1153 return ERR_PTR(err);
1154}
1155
1156static void cleanup_mnt(struct mount *mnt)
1157{
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168 WARN_ON(mnt_get_writers(mnt));
1169 if (unlikely(mnt->mnt_pins.first))
1170 mnt_pin_kill(mnt);
1171 fsnotify_vfsmount_delete(&mnt->mnt);
1172 dput(mnt->mnt.mnt_root);
1173 deactivate_super(mnt->mnt.mnt_sb);
1174 mnt_free_id(mnt);
1175 call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);
1176}
1177
1178static void __cleanup_mnt(struct rcu_head *head)
1179{
1180 cleanup_mnt(container_of(head, struct mount, mnt_rcu));
1181}
1182
1183static LLIST_HEAD(delayed_mntput_list);
1184static void delayed_mntput(struct work_struct *unused)
1185{
1186 struct llist_node *node = llist_del_all(&delayed_mntput_list);
1187 struct mount *m, *t;
1188
1189 llist_for_each_entry_safe(m, t, node, mnt_llist)
1190 cleanup_mnt(m);
1191}
1192static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
1193
1194static void mntput_no_expire(struct mount *mnt)
1195{
1196 rcu_read_lock();
1197 mnt_add_count(mnt, -1);
1198 if (likely(mnt->mnt_ns)) {
1199 rcu_read_unlock();
1200 return;
1201 }
1202 lock_mount_hash();
1203 if (mnt_get_count(mnt)) {
1204 rcu_read_unlock();
1205 unlock_mount_hash();
1206 return;
1207 }
1208 if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
1209 rcu_read_unlock();
1210 unlock_mount_hash();
1211 return;
1212 }
1213 mnt->mnt.mnt_flags |= MNT_DOOMED;
1214 rcu_read_unlock();
1215
1216 list_del(&mnt->mnt_instance);
1217
1218 if (unlikely(!list_empty(&mnt->mnt_mounts))) {
1219 struct mount *p, *tmp;
1220 list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) {
1221 umount_mnt(p);
1222 }
1223 }
1224 unlock_mount_hash();
1225
1226 if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {
1227 struct task_struct *task = current;
1228 if (likely(!(task->flags & PF_KTHREAD))) {
1229 init_task_work(&mnt->mnt_rcu, __cleanup_mnt);
1230 if (!task_work_add(task, &mnt->mnt_rcu, true))
1231 return;
1232 }
1233 if (llist_add(&mnt->mnt_llist, &delayed_mntput_list))
1234 schedule_delayed_work(&delayed_mntput_work, 1);
1235 return;
1236 }
1237 cleanup_mnt(mnt);
1238}
1239
1240void mntput(struct vfsmount *mnt)
1241{
1242 if (mnt) {
1243 struct mount *m = real_mount(mnt);
1244
1245 if (unlikely(m->mnt_expiry_mark))
1246 m->mnt_expiry_mark = 0;
1247 mntput_no_expire(m);
1248 }
1249}
1250EXPORT_SYMBOL(mntput);
1251
1252struct vfsmount *mntget(struct vfsmount *mnt)
1253{
1254 if (mnt)
1255 mnt_add_count(real_mount(mnt), 1);
1256 return mnt;
1257}
1258EXPORT_SYMBOL(mntget);
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270bool path_is_mountpoint(const struct path *path)
1271{
1272 unsigned seq;
1273 bool res;
1274
1275 if (!d_mountpoint(path->dentry))
1276 return false;
1277
1278 rcu_read_lock();
1279 do {
1280 seq = read_seqbegin(&mount_lock);
1281 res = __path_is_mountpoint(path);
1282 } while (read_seqretry(&mount_lock, seq));
1283 rcu_read_unlock();
1284
1285 return res;
1286}
1287EXPORT_SYMBOL(path_is_mountpoint);
1288
1289struct vfsmount *mnt_clone_internal(const struct path *path)
1290{
1291 struct mount *p;
1292 p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE);
1293 if (IS_ERR(p))
1294 return ERR_CAST(p);
1295 p->mnt.mnt_flags |= MNT_INTERNAL;
1296 return &p->mnt;
1297}
1298
1299#ifdef CONFIG_PROC_FS
1300
1301static void *m_start(struct seq_file *m, loff_t *pos)
1302{
1303 struct proc_mounts *p = m->private;
1304
1305 down_read(&namespace_sem);
1306 if (p->cached_event == p->ns->event) {
1307 void *v = p->cached_mount;
1308 if (*pos == p->cached_index)
1309 return v;
1310 if (*pos == p->cached_index + 1) {
1311 v = seq_list_next(v, &p->ns->list, &p->cached_index);
1312 return p->cached_mount = v;
1313 }
1314 }
1315
1316 p->cached_event = p->ns->event;
1317 p->cached_mount = seq_list_start(&p->ns->list, *pos);
1318 p->cached_index = *pos;
1319 return p->cached_mount;
1320}
1321
1322static void *m_next(struct seq_file *m, void *v, loff_t *pos)
1323{
1324 struct proc_mounts *p = m->private;
1325
1326 p->cached_mount = seq_list_next(v, &p->ns->list, pos);
1327 p->cached_index = *pos;
1328 return p->cached_mount;
1329}
1330
1331static void m_stop(struct seq_file *m, void *v)
1332{
1333 up_read(&namespace_sem);
1334}
1335
1336static int m_show(struct seq_file *m, void *v)
1337{
1338 struct proc_mounts *p = m->private;
1339 struct mount *r = list_entry(v, struct mount, mnt_list);
1340 return p->show(m, &r->mnt);
1341}
1342
1343const struct seq_operations mounts_op = {
1344 .start = m_start,
1345 .next = m_next,
1346 .stop = m_stop,
1347 .show = m_show,
1348};
1349#endif
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359int may_umount_tree(struct vfsmount *m)
1360{
1361 struct mount *mnt = real_mount(m);
1362 int actual_refs = 0;
1363 int minimum_refs = 0;
1364 struct mount *p;
1365 BUG_ON(!m);
1366
1367
1368 lock_mount_hash();
1369 for (p = mnt; p; p = next_mnt(p, mnt)) {
1370 actual_refs += mnt_get_count(p);
1371 minimum_refs += 2;
1372 }
1373 unlock_mount_hash();
1374
1375 if (actual_refs > minimum_refs)
1376 return 0;
1377
1378 return 1;
1379}
1380
1381EXPORT_SYMBOL(may_umount_tree);
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396int may_umount(struct vfsmount *mnt)
1397{
1398 int ret = 1;
1399 down_read(&namespace_sem);
1400 lock_mount_hash();
1401 if (propagate_mount_busy(real_mount(mnt), 2))
1402 ret = 0;
1403 unlock_mount_hash();
1404 up_read(&namespace_sem);
1405 return ret;
1406}
1407
1408EXPORT_SYMBOL(may_umount);
1409
1410static HLIST_HEAD(unmounted);
1411
1412static void namespace_unlock(void)
1413{
1414 struct hlist_head head;
1415
1416 hlist_move_list(&unmounted, &head);
1417
1418 up_write(&namespace_sem);
1419
1420 if (likely(hlist_empty(&head)))
1421 return;
1422
1423 synchronize_rcu();
1424
1425 group_pin_kill(&head);
1426}
1427
1428static inline void namespace_lock(void)
1429{
1430 down_write(&namespace_sem);
1431}
1432
1433enum umount_tree_flags {
1434 UMOUNT_SYNC = 1,
1435 UMOUNT_PROPAGATE = 2,
1436 UMOUNT_CONNECTED = 4,
1437};
1438
1439static bool disconnect_mount(struct mount *mnt, enum umount_tree_flags how)
1440{
1441
1442 if (how & UMOUNT_SYNC)
1443 return true;
1444
1445
1446 if (!mnt_has_parent(mnt))
1447 return true;
1448
1449
1450
1451
1452
1453 if (!(mnt->mnt_parent->mnt.mnt_flags & MNT_UMOUNT))
1454 return true;
1455
1456
1457 if (how & UMOUNT_CONNECTED)
1458 return false;
1459
1460
1461 if (IS_MNT_LOCKED(mnt))
1462 return false;
1463
1464
1465 return true;
1466}
1467
1468
1469
1470
1471
1472static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
1473{
1474 LIST_HEAD(tmp_list);
1475 struct mount *p;
1476
1477 if (how & UMOUNT_PROPAGATE)
1478 propagate_mount_unlock(mnt);
1479
1480
1481 for (p = mnt; p; p = next_mnt(p, mnt)) {
1482 p->mnt.mnt_flags |= MNT_UMOUNT;
1483 list_move(&p->mnt_list, &tmp_list);
1484 }
1485
1486
1487 list_for_each_entry(p, &tmp_list, mnt_list) {
1488 list_del_init(&p->mnt_child);
1489 }
1490
1491
1492 if (how & UMOUNT_PROPAGATE)
1493 propagate_umount(&tmp_list);
1494
1495 while (!list_empty(&tmp_list)) {
1496 struct mnt_namespace *ns;
1497 bool disconnect;
1498 p = list_first_entry(&tmp_list, struct mount, mnt_list);
1499 list_del_init(&p->mnt_expire);
1500 list_del_init(&p->mnt_list);
1501 ns = p->mnt_ns;
1502 if (ns) {
1503 ns->mounts--;
1504 __touch_mnt_namespace(ns);
1505 }
1506 p->mnt_ns = NULL;
1507 if (how & UMOUNT_SYNC)
1508 p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
1509
1510 disconnect = disconnect_mount(p, how);
1511
1512 pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt,
1513 disconnect ? &unmounted : NULL);
1514 if (mnt_has_parent(p)) {
1515 mnt_add_count(p->mnt_parent, -1);
1516 if (!disconnect) {
1517
1518 list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts);
1519 } else {
1520 umount_mnt(p);
1521 }
1522 }
1523 change_mnt_propagation(p, MS_PRIVATE);
1524 }
1525}
1526
1527static void shrink_submounts(struct mount *mnt);
1528
1529static int do_umount(struct mount *mnt, int flags)
1530{
1531 struct super_block *sb = mnt->mnt.mnt_sb;
1532 int retval;
1533
1534 retval = security_sb_umount(&mnt->mnt, flags);
1535 if (retval)
1536 return retval;
1537
1538
1539
1540
1541
1542
1543
1544 if (flags & MNT_EXPIRE) {
1545 if (&mnt->mnt == current->fs->root.mnt ||
1546 flags & (MNT_FORCE | MNT_DETACH))
1547 return -EINVAL;
1548
1549
1550
1551
1552
1553 lock_mount_hash();
1554 if (mnt_get_count(mnt) != 2) {
1555 unlock_mount_hash();
1556 return -EBUSY;
1557 }
1558 unlock_mount_hash();
1559
1560 if (!xchg(&mnt->mnt_expiry_mark, 1))
1561 return -EAGAIN;
1562 }
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574 if (flags & MNT_FORCE && sb->s_op->umount_begin) {
1575 sb->s_op->umount_begin(sb);
1576 }
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587 if (&mnt->mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) {
1588
1589
1590
1591
1592 if (!capable(CAP_SYS_ADMIN))
1593 return -EPERM;
1594 down_write(&sb->s_umount);
1595 if (!sb_rdonly(sb))
1596 retval = do_remount_sb(sb, SB_RDONLY, NULL, 0);
1597 up_write(&sb->s_umount);
1598 return retval;
1599 }
1600
1601 namespace_lock();
1602 lock_mount_hash();
1603 event++;
1604
1605 if (flags & MNT_DETACH) {
1606 if (!list_empty(&mnt->mnt_list))
1607 umount_tree(mnt, UMOUNT_PROPAGATE);
1608 retval = 0;
1609 } else {
1610 shrink_submounts(mnt);
1611 retval = -EBUSY;
1612 if (!propagate_mount_busy(mnt, 2)) {
1613 if (!list_empty(&mnt->mnt_list))
1614 umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
1615 retval = 0;
1616 }
1617 }
1618 unlock_mount_hash();
1619 namespace_unlock();
1620 return retval;
1621}
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633void __detach_mounts(struct dentry *dentry)
1634{
1635 struct mountpoint *mp;
1636 struct mount *mnt;
1637
1638 namespace_lock();
1639 lock_mount_hash();
1640 mp = lookup_mountpoint(dentry);
1641 if (IS_ERR_OR_NULL(mp))
1642 goto out_unlock;
1643
1644 event++;
1645 while (!hlist_empty(&mp->m_list)) {
1646 mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
1647 if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
1648 hlist_add_head(&mnt->mnt_umount.s_list, &unmounted);
1649 umount_mnt(mnt);
1650 }
1651 else umount_tree(mnt, UMOUNT_CONNECTED);
1652 }
1653 put_mountpoint(mp);
1654out_unlock:
1655 unlock_mount_hash();
1656 namespace_unlock();
1657}
1658
1659
1660
1661
1662static inline bool may_mount(void)
1663{
1664 return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
1665}
1666
1667static inline bool may_mandlock(void)
1668{
1669#ifndef CONFIG_MANDATORY_FILE_LOCKING
1670 return false;
1671#endif
1672 return capable(CAP_SYS_ADMIN);
1673}
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1684{
1685 struct path path;
1686 struct mount *mnt;
1687 int retval;
1688 int lookup_flags = 0;
1689
1690 if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
1691 return -EINVAL;
1692
1693 if (!may_mount())
1694 return -EPERM;
1695
1696 if (!(flags & UMOUNT_NOFOLLOW))
1697 lookup_flags |= LOOKUP_FOLLOW;
1698
1699 retval = user_path_mountpoint_at(AT_FDCWD, name, lookup_flags, &path);
1700 if (retval)
1701 goto out;
1702 mnt = real_mount(path.mnt);
1703 retval = -EINVAL;
1704 if (path.dentry != path.mnt->mnt_root)
1705 goto dput_and_out;
1706 if (!check_mnt(mnt))
1707 goto dput_and_out;
1708 if (mnt->mnt.mnt_flags & MNT_LOCKED)
1709 goto dput_and_out;
1710 retval = -EPERM;
1711 if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
1712 goto dput_and_out;
1713
1714 retval = do_umount(mnt, flags);
1715dput_and_out:
1716
1717 dput(path.dentry);
1718 mntput_no_expire(mnt);
1719out:
1720 return retval;
1721}
1722
1723#ifdef __ARCH_WANT_SYS_OLDUMOUNT
1724
1725
1726
1727
1728SYSCALL_DEFINE1(oldumount, char __user *, name)
1729{
1730 return sys_umount(name, 0);
1731}
1732
1733#endif
1734
1735static bool is_mnt_ns_file(struct dentry *dentry)
1736{
1737
1738 return dentry->d_op == &ns_dentry_operations &&
1739 dentry->d_fsdata == &mntns_operations;
1740}
1741
1742struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
1743{
1744 return container_of(ns, struct mnt_namespace, ns);
1745}
1746
1747static bool mnt_ns_loop(struct dentry *dentry)
1748{
1749
1750
1751
1752 struct mnt_namespace *mnt_ns;
1753 if (!is_mnt_ns_file(dentry))
1754 return false;
1755
1756 mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode));
1757 return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
1758}
1759
1760struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1761 int flag)
1762{
1763 struct mount *res, *p, *q, *r, *parent;
1764
1765 if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(mnt))
1766 return ERR_PTR(-EINVAL);
1767
1768 if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(dentry))
1769 return ERR_PTR(-EINVAL);
1770
1771 res = q = clone_mnt(mnt, dentry, flag);
1772 if (IS_ERR(q))
1773 return q;
1774
1775 q->mnt_mountpoint = mnt->mnt_mountpoint;
1776
1777 p = mnt;
1778 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
1779 struct mount *s;
1780 if (!is_subdir(r->mnt_mountpoint, dentry))
1781 continue;
1782
1783 for (s = r; s; s = next_mnt(s, r)) {
1784 if (!(flag & CL_COPY_UNBINDABLE) &&
1785 IS_MNT_UNBINDABLE(s)) {
1786 s = skip_mnt_tree(s);
1787 continue;
1788 }
1789 if (!(flag & CL_COPY_MNT_NS_FILE) &&
1790 is_mnt_ns_file(s->mnt.mnt_root)) {
1791 s = skip_mnt_tree(s);
1792 continue;
1793 }
1794 while (p != s->mnt_parent) {
1795 p = p->mnt_parent;
1796 q = q->mnt_parent;
1797 }
1798 p = s;
1799 parent = q;
1800 q = clone_mnt(p, p->mnt.mnt_root, flag);
1801 if (IS_ERR(q))
1802 goto out;
1803 lock_mount_hash();
1804 list_add_tail(&q->mnt_list, &res->mnt_list);
1805 attach_mnt(q, parent, p->mnt_mp);
1806 unlock_mount_hash();
1807 }
1808 }
1809 return res;
1810out:
1811 if (res) {
1812 lock_mount_hash();
1813 umount_tree(res, UMOUNT_SYNC);
1814 unlock_mount_hash();
1815 }
1816 return q;
1817}
1818
1819
1820
1821struct vfsmount *collect_mounts(const struct path *path)
1822{
1823 struct mount *tree;
1824 namespace_lock();
1825 if (!check_mnt(real_mount(path->mnt)))
1826 tree = ERR_PTR(-EINVAL);
1827 else
1828 tree = copy_tree(real_mount(path->mnt), path->dentry,
1829 CL_COPY_ALL | CL_PRIVATE);
1830 namespace_unlock();
1831 if (IS_ERR(tree))
1832 return ERR_CAST(tree);
1833 return &tree->mnt;
1834}
1835
1836void drop_collected_mounts(struct vfsmount *mnt)
1837{
1838 namespace_lock();
1839 lock_mount_hash();
1840 umount_tree(real_mount(mnt), UMOUNT_SYNC);
1841 unlock_mount_hash();
1842 namespace_unlock();
1843}
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854struct vfsmount *clone_private_mount(const struct path *path)
1855{
1856 struct mount *old_mnt = real_mount(path->mnt);
1857 struct mount *new_mnt;
1858
1859 if (IS_MNT_UNBINDABLE(old_mnt))
1860 return ERR_PTR(-EINVAL);
1861
1862 new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
1863 if (IS_ERR(new_mnt))
1864 return ERR_CAST(new_mnt);
1865
1866 return &new_mnt->mnt;
1867}
1868EXPORT_SYMBOL_GPL(clone_private_mount);
1869
1870int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
1871 struct vfsmount *root)
1872{
1873 struct mount *mnt;
1874 int res = f(root, arg);
1875 if (res)
1876 return res;
1877 list_for_each_entry(mnt, &real_mount(root)->mnt_list, mnt_list) {
1878 res = f(&mnt->mnt, arg);
1879 if (res)
1880 return res;
1881 }
1882 return 0;
1883}
1884
1885static void cleanup_group_ids(struct mount *mnt, struct mount *end)
1886{
1887 struct mount *p;
1888
1889 for (p = mnt; p != end; p = next_mnt(p, mnt)) {
1890 if (p->mnt_group_id && !IS_MNT_SHARED(p))
1891 mnt_release_group_id(p);
1892 }
1893}
1894
1895static int invent_group_ids(struct mount *mnt, bool recurse)
1896{
1897 struct mount *p;
1898
1899 for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) {
1900 if (!p->mnt_group_id && !IS_MNT_SHARED(p)) {
1901 int err = mnt_alloc_group_id(p);
1902 if (err) {
1903 cleanup_group_ids(mnt, p);
1904 return err;
1905 }
1906 }
1907 }
1908
1909 return 0;
1910}
1911
1912int count_mounts(struct mnt_namespace *ns, struct mount *mnt)
1913{
1914 unsigned int max = READ_ONCE(sysctl_mount_max);
1915 unsigned int mounts = 0, old, pending, sum;
1916 struct mount *p;
1917
1918 for (p = mnt; p; p = next_mnt(p, mnt))
1919 mounts++;
1920
1921 old = ns->mounts;
1922 pending = ns->pending_mounts;
1923 sum = old + pending;
1924 if ((old > sum) ||
1925 (pending > sum) ||
1926 (max < sum) ||
1927 (mounts > (max - sum)))
1928 return -ENOSPC;
1929
1930 ns->pending_mounts = pending + mounts;
1931 return 0;
1932}
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997static int attach_recursive_mnt(struct mount *source_mnt,
1998 struct mount *dest_mnt,
1999 struct mountpoint *dest_mp,
2000 struct path *parent_path)
2001{
2002 HLIST_HEAD(tree_list);
2003 struct mnt_namespace *ns = dest_mnt->mnt_ns;
2004 struct mountpoint *smp;
2005 struct mount *child, *p;
2006 struct hlist_node *n;
2007 int err;
2008
2009
2010
2011
2012 smp = get_mountpoint(source_mnt->mnt.mnt_root);
2013 if (IS_ERR(smp))
2014 return PTR_ERR(smp);
2015
2016
2017 if (!parent_path) {
2018 err = count_mounts(ns, source_mnt);
2019 if (err)
2020 goto out;
2021 }
2022
2023 if (IS_MNT_SHARED(dest_mnt)) {
2024 err = invent_group_ids(source_mnt, true);
2025 if (err)
2026 goto out;
2027 err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
2028 lock_mount_hash();
2029 if (err)
2030 goto out_cleanup_ids;
2031 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
2032 set_mnt_shared(p);
2033 } else {
2034 lock_mount_hash();
2035 }
2036 if (parent_path) {
2037 detach_mnt(source_mnt, parent_path);
2038 attach_mnt(source_mnt, dest_mnt, dest_mp);
2039 touch_mnt_namespace(source_mnt->mnt_ns);
2040 } else {
2041 mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
2042 commit_tree(source_mnt);
2043 }
2044
2045 hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
2046 struct mount *q;
2047 hlist_del_init(&child->mnt_hash);
2048 q = __lookup_mnt(&child->mnt_parent->mnt,
2049 child->mnt_mountpoint);
2050 if (q)
2051 mnt_change_mountpoint(child, smp, q);
2052 commit_tree(child);
2053 }
2054 put_mountpoint(smp);
2055 unlock_mount_hash();
2056
2057 return 0;
2058
2059 out_cleanup_ids:
2060 while (!hlist_empty(&tree_list)) {
2061 child = hlist_entry(tree_list.first, struct mount, mnt_hash);
2062 child->mnt_parent->mnt_ns->pending_mounts = 0;
2063 umount_tree(child, UMOUNT_SYNC);
2064 }
2065 unlock_mount_hash();
2066 cleanup_group_ids(source_mnt, NULL);
2067 out:
2068 ns->pending_mounts = 0;
2069
2070 read_seqlock_excl(&mount_lock);
2071 put_mountpoint(smp);
2072 read_sequnlock_excl(&mount_lock);
2073
2074 return err;
2075}
2076
2077static struct mountpoint *lock_mount(struct path *path)
2078{
2079 struct vfsmount *mnt;
2080 struct dentry *dentry = path->dentry;
2081retry:
2082 inode_lock(dentry->d_inode);
2083 if (unlikely(cant_mount(dentry))) {
2084 inode_unlock(dentry->d_inode);
2085 return ERR_PTR(-ENOENT);
2086 }
2087 namespace_lock();
2088 mnt = lookup_mnt(path);
2089 if (likely(!mnt)) {
2090 struct mountpoint *mp = get_mountpoint(dentry);
2091 if (IS_ERR(mp)) {
2092 namespace_unlock();
2093 inode_unlock(dentry->d_inode);
2094 return mp;
2095 }
2096 return mp;
2097 }
2098 namespace_unlock();
2099 inode_unlock(path->dentry->d_inode);
2100 path_put(path);
2101 path->mnt = mnt;
2102 dentry = path->dentry = dget(mnt->mnt_root);
2103 goto retry;
2104}
2105
2106static void unlock_mount(struct mountpoint *where)
2107{
2108 struct dentry *dentry = where->m_dentry;
2109
2110 read_seqlock_excl(&mount_lock);
2111 put_mountpoint(where);
2112 read_sequnlock_excl(&mount_lock);
2113
2114 namespace_unlock();
2115 inode_unlock(dentry->d_inode);
2116}
2117
2118static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
2119{
2120 if (mnt->mnt.mnt_sb->s_flags & SB_NOUSER)
2121 return -EINVAL;
2122
2123 if (d_is_dir(mp->m_dentry) !=
2124 d_is_dir(mnt->mnt.mnt_root))
2125 return -ENOTDIR;
2126
2127 return attach_recursive_mnt(mnt, p, mp, NULL);
2128}
2129
2130
2131
2132
2133
2134static int flags_to_propagation_type(int ms_flags)
2135{
2136 int type = ms_flags & ~(MS_REC | MS_SILENT);
2137
2138
2139 if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
2140 return 0;
2141
2142 if (!is_power_of_2(type))
2143 return 0;
2144 return type;
2145}
2146
2147
2148
2149
2150static int do_change_type(struct path *path, int ms_flags)
2151{
2152 struct mount *m;
2153 struct mount *mnt = real_mount(path->mnt);
2154 int recurse = ms_flags & MS_REC;
2155 int type;
2156 int err = 0;
2157
2158 if (path->dentry != path->mnt->mnt_root)
2159 return -EINVAL;
2160
2161 type = flags_to_propagation_type(ms_flags);
2162 if (!type)
2163 return -EINVAL;
2164
2165 namespace_lock();
2166 if (type == MS_SHARED) {
2167 err = invent_group_ids(mnt, recurse);
2168 if (err)
2169 goto out_unlock;
2170 }
2171
2172 lock_mount_hash();
2173 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
2174 change_mnt_propagation(m, type);
2175 unlock_mount_hash();
2176
2177 out_unlock:
2178 namespace_unlock();
2179 return err;
2180}
2181
2182static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
2183{
2184 struct mount *child;
2185 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
2186 if (!is_subdir(child->mnt_mountpoint, dentry))
2187 continue;
2188
2189 if (child->mnt.mnt_flags & MNT_LOCKED)
2190 return true;
2191 }
2192 return false;
2193}
2194
2195
2196
2197
2198static int do_loopback(struct path *path, const char *old_name,
2199 int recurse)
2200{
2201 struct path old_path;
2202 struct mount *mnt = NULL, *old, *parent;
2203 struct mountpoint *mp;
2204 int err;
2205 if (!old_name || !*old_name)
2206 return -EINVAL;
2207 err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path);
2208 if (err)
2209 return err;
2210
2211 err = -EINVAL;
2212 if (mnt_ns_loop(old_path.dentry))
2213 goto out;
2214
2215 mp = lock_mount(path);
2216 err = PTR_ERR(mp);
2217 if (IS_ERR(mp))
2218 goto out;
2219
2220 old = real_mount(old_path.mnt);
2221 parent = real_mount(path->mnt);
2222
2223 err = -EINVAL;
2224 if (IS_MNT_UNBINDABLE(old))
2225 goto out2;
2226
2227 if (!check_mnt(parent))
2228 goto out2;
2229
2230 if (!check_mnt(old) && old_path.dentry->d_op != &ns_dentry_operations)
2231 goto out2;
2232
2233 if (!recurse && has_locked_children(old, old_path.dentry))
2234 goto out2;
2235
2236 if (recurse)
2237 mnt = copy_tree(old, old_path.dentry, CL_COPY_MNT_NS_FILE);
2238 else
2239 mnt = clone_mnt(old, old_path.dentry, 0);
2240
2241 if (IS_ERR(mnt)) {
2242 err = PTR_ERR(mnt);
2243 goto out2;
2244 }
2245
2246 mnt->mnt.mnt_flags &= ~MNT_LOCKED;
2247
2248 err = graft_tree(mnt, parent, mp);
2249 if (err) {
2250 lock_mount_hash();
2251 umount_tree(mnt, UMOUNT_SYNC);
2252 unlock_mount_hash();
2253 }
2254out2:
2255 unlock_mount(mp);
2256out:
2257 path_put(&old_path);
2258 return err;
2259}
2260
2261static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
2262{
2263 int error = 0;
2264 int readonly_request = 0;
2265
2266 if (ms_flags & MS_RDONLY)
2267 readonly_request = 1;
2268 if (readonly_request == __mnt_is_readonly(mnt))
2269 return 0;
2270
2271 if (readonly_request)
2272 error = mnt_make_readonly(real_mount(mnt));
2273 else
2274 __mnt_unmake_readonly(real_mount(mnt));
2275 return error;
2276}
2277
2278
2279
2280
2281
2282
2283static int do_remount(struct path *path, int ms_flags, int sb_flags,
2284 int mnt_flags, void *data)
2285{
2286 int err;
2287 struct super_block *sb = path->mnt->mnt_sb;
2288 struct mount *mnt = real_mount(path->mnt);
2289
2290 if (!check_mnt(mnt))
2291 return -EINVAL;
2292
2293 if (path->dentry != path->mnt->mnt_root)
2294 return -EINVAL;
2295
2296
2297
2298
2299
2300
2301
2302 if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) &&
2303 !(mnt_flags & MNT_READONLY)) {
2304 return -EPERM;
2305 }
2306 if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
2307 !(mnt_flags & MNT_NODEV)) {
2308 return -EPERM;
2309 }
2310 if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
2311 !(mnt_flags & MNT_NOSUID)) {
2312 return -EPERM;
2313 }
2314 if ((mnt->mnt.mnt_flags & MNT_LOCK_NOEXEC) &&
2315 !(mnt_flags & MNT_NOEXEC)) {
2316 return -EPERM;
2317 }
2318 if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) &&
2319 ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) {
2320 return -EPERM;
2321 }
2322
2323 err = security_sb_remount(sb, data);
2324 if (err)
2325 return err;
2326
2327 down_write(&sb->s_umount);
2328 if (ms_flags & MS_BIND)
2329 err = change_mount_flags(path->mnt, ms_flags);
2330 else if (!capable(CAP_SYS_ADMIN))
2331 err = -EPERM;
2332 else
2333 err = do_remount_sb(sb, sb_flags, data, 0);
2334 if (!err) {
2335 lock_mount_hash();
2336 mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
2337 mnt->mnt.mnt_flags = mnt_flags;
2338 touch_mnt_namespace(mnt->mnt_ns);
2339 unlock_mount_hash();
2340 }
2341 up_write(&sb->s_umount);
2342 return err;
2343}
2344
2345static inline int tree_contains_unbindable(struct mount *mnt)
2346{
2347 struct mount *p;
2348 for (p = mnt; p; p = next_mnt(p, mnt)) {
2349 if (IS_MNT_UNBINDABLE(p))
2350 return 1;
2351 }
2352 return 0;
2353}
2354
2355static int do_move_mount(struct path *path, const char *old_name)
2356{
2357 struct path old_path, parent_path;
2358 struct mount *p;
2359 struct mount *old;
2360 struct mountpoint *mp;
2361 int err;
2362 if (!old_name || !*old_name)
2363 return -EINVAL;
2364 err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
2365 if (err)
2366 return err;
2367
2368 mp = lock_mount(path);
2369 err = PTR_ERR(mp);
2370 if (IS_ERR(mp))
2371 goto out;
2372
2373 old = real_mount(old_path.mnt);
2374 p = real_mount(path->mnt);
2375
2376 err = -EINVAL;
2377 if (!check_mnt(p) || !check_mnt(old))
2378 goto out1;
2379
2380 if (old->mnt.mnt_flags & MNT_LOCKED)
2381 goto out1;
2382
2383 err = -EINVAL;
2384 if (old_path.dentry != old_path.mnt->mnt_root)
2385 goto out1;
2386
2387 if (!mnt_has_parent(old))
2388 goto out1;
2389
2390 if (d_is_dir(path->dentry) !=
2391 d_is_dir(old_path.dentry))
2392 goto out1;
2393
2394
2395
2396 if (IS_MNT_SHARED(old->mnt_parent))
2397 goto out1;
2398
2399
2400
2401
2402 if (IS_MNT_SHARED(p) && tree_contains_unbindable(old))
2403 goto out1;
2404 err = -ELOOP;
2405 for (; mnt_has_parent(p); p = p->mnt_parent)
2406 if (p == old)
2407 goto out1;
2408
2409 err = attach_recursive_mnt(old, real_mount(path->mnt), mp, &parent_path);
2410 if (err)
2411 goto out1;
2412
2413
2414
2415 list_del_init(&old->mnt_expire);
2416out1:
2417 unlock_mount(mp);
2418out:
2419 if (!err)
2420 path_put(&parent_path);
2421 path_put(&old_path);
2422 return err;
2423}
2424
2425static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
2426{
2427 int err;
2428 const char *subtype = strchr(fstype, '.');
2429 if (subtype) {
2430 subtype++;
2431 err = -EINVAL;
2432 if (!subtype[0])
2433 goto err;
2434 } else
2435 subtype = "";
2436
2437 mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL);
2438 err = -ENOMEM;
2439 if (!mnt->mnt_sb->s_subtype)
2440 goto err;
2441 return mnt;
2442
2443 err:
2444 mntput(mnt);
2445 return ERR_PTR(err);
2446}
2447
2448
2449
2450
2451static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
2452{
2453 struct mountpoint *mp;
2454 struct mount *parent;
2455 int err;
2456
2457 mnt_flags &= ~MNT_INTERNAL_FLAGS;
2458
2459 mp = lock_mount(path);
2460 if (IS_ERR(mp))
2461 return PTR_ERR(mp);
2462
2463 parent = real_mount(path->mnt);
2464 err = -EINVAL;
2465 if (unlikely(!check_mnt(parent))) {
2466
2467 if (!(mnt_flags & MNT_SHRINKABLE))
2468 goto unlock;
2469
2470 if (!parent->mnt_ns)
2471 goto unlock;
2472 }
2473
2474
2475 err = -EBUSY;
2476 if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb &&
2477 path->mnt->mnt_root == path->dentry)
2478 goto unlock;
2479
2480 err = -EINVAL;
2481 if (d_is_symlink(newmnt->mnt.mnt_root))
2482 goto unlock;
2483
2484 newmnt->mnt.mnt_flags = mnt_flags;
2485 err = graft_tree(newmnt, parent, mp);
2486
2487unlock:
2488 unlock_mount(mp);
2489 return err;
2490}
2491
2492static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags);
2493
2494
2495
2496
2497
2498static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
2499 int mnt_flags, const char *name, void *data)
2500{
2501 struct file_system_type *type;
2502 struct vfsmount *mnt;
2503 int err;
2504
2505 if (!fstype)
2506 return -EINVAL;
2507
2508 type = get_fs_type(fstype);
2509 if (!type)
2510 return -ENODEV;
2511
2512 mnt = vfs_kern_mount(type, sb_flags, name, data);
2513 if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
2514 !mnt->mnt_sb->s_subtype)
2515 mnt = fs_set_subtype(mnt, fstype);
2516
2517 put_filesystem(type);
2518 if (IS_ERR(mnt))
2519 return PTR_ERR(mnt);
2520
2521 if (mount_too_revealing(mnt, &mnt_flags)) {
2522 mntput(mnt);
2523 return -EPERM;
2524 }
2525
2526 err = do_add_mount(real_mount(mnt), path, mnt_flags);
2527 if (err)
2528 mntput(mnt);
2529 return err;
2530}
2531
2532int finish_automount(struct vfsmount *m, struct path *path)
2533{
2534 struct mount *mnt = real_mount(m);
2535 int err;
2536
2537
2538
2539 BUG_ON(mnt_get_count(mnt) < 2);
2540
2541 if (m->mnt_sb == path->mnt->mnt_sb &&
2542 m->mnt_root == path->dentry) {
2543 err = -ELOOP;
2544 goto fail;
2545 }
2546
2547 err = do_add_mount(mnt, path, path->mnt->mnt_flags | MNT_SHRINKABLE);
2548 if (!err)
2549 return 0;
2550fail:
2551
2552 if (!list_empty(&mnt->mnt_expire)) {
2553 namespace_lock();
2554 list_del_init(&mnt->mnt_expire);
2555 namespace_unlock();
2556 }
2557 mntput(m);
2558 mntput(m);
2559 return err;
2560}
2561
2562
2563
2564
2565
2566
2567void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
2568{
2569 namespace_lock();
2570
2571 list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);
2572
2573 namespace_unlock();
2574}
2575EXPORT_SYMBOL(mnt_set_expiry);
2576
2577
2578
2579
2580
2581
2582void mark_mounts_for_expiry(struct list_head *mounts)
2583{
2584 struct mount *mnt, *next;
2585 LIST_HEAD(graveyard);
2586
2587 if (list_empty(mounts))
2588 return;
2589
2590 namespace_lock();
2591 lock_mount_hash();
2592
2593
2594
2595
2596
2597
2598
2599 list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
2600 if (!xchg(&mnt->mnt_expiry_mark, 1) ||
2601 propagate_mount_busy(mnt, 1))
2602 continue;
2603 list_move(&mnt->mnt_expire, &graveyard);
2604 }
2605 while (!list_empty(&graveyard)) {
2606 mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
2607 touch_mnt_namespace(mnt->mnt_ns);
2608 umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
2609 }
2610 unlock_mount_hash();
2611 namespace_unlock();
2612}
2613
2614EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
2615
2616
2617
2618
2619
2620
2621
2622static int select_submounts(struct mount *parent, struct list_head *graveyard)
2623{
2624 struct mount *this_parent = parent;
2625 struct list_head *next;
2626 int found = 0;
2627
2628repeat:
2629 next = this_parent->mnt_mounts.next;
2630resume:
2631 while (next != &this_parent->mnt_mounts) {
2632 struct list_head *tmp = next;
2633 struct mount *mnt = list_entry(tmp, struct mount, mnt_child);
2634
2635 next = tmp->next;
2636 if (!(mnt->mnt.mnt_flags & MNT_SHRINKABLE))
2637 continue;
2638
2639
2640
2641 if (!list_empty(&mnt->mnt_mounts)) {
2642 this_parent = mnt;
2643 goto repeat;
2644 }
2645
2646 if (!propagate_mount_busy(mnt, 1)) {
2647 list_move_tail(&mnt->mnt_expire, graveyard);
2648 found++;
2649 }
2650 }
2651
2652
2653
2654 if (this_parent != parent) {
2655 next = this_parent->mnt_child.next;
2656 this_parent = this_parent->mnt_parent;
2657 goto resume;
2658 }
2659 return found;
2660}
2661
2662
2663
2664
2665
2666
2667
2668static void shrink_submounts(struct mount *mnt)
2669{
2670 LIST_HEAD(graveyard);
2671 struct mount *m;
2672
2673
2674 while (select_submounts(mnt, &graveyard)) {
2675 while (!list_empty(&graveyard)) {
2676 m = list_first_entry(&graveyard, struct mount,
2677 mnt_expire);
2678 touch_mnt_namespace(m->mnt_ns);
2679 umount_tree(m, UMOUNT_PROPAGATE|UMOUNT_SYNC);
2680 }
2681 }
2682}
2683
2684
2685
2686
2687
2688
2689
2690static long exact_copy_from_user(void *to, const void __user * from,
2691 unsigned long n)
2692{
2693 char *t = to;
2694 const char __user *f = from;
2695 char c;
2696
2697 if (!access_ok(VERIFY_READ, from, n))
2698 return n;
2699
2700 while (n) {
2701 if (__get_user(c, f)) {
2702 memset(t, 0, n);
2703 break;
2704 }
2705 *t++ = c;
2706 f++;
2707 n--;
2708 }
2709 return n;
2710}
2711
2712void *copy_mount_options(const void __user * data)
2713{
2714 int i;
2715 unsigned long size;
2716 char *copy;
2717
2718 if (!data)
2719 return NULL;
2720
2721 copy = kmalloc(PAGE_SIZE, GFP_KERNEL);
2722 if (!copy)
2723 return ERR_PTR(-ENOMEM);
2724
2725
2726
2727
2728
2729
2730 size = TASK_SIZE - (unsigned long)data;
2731 if (size > PAGE_SIZE)
2732 size = PAGE_SIZE;
2733
2734 i = size - exact_copy_from_user(copy, data, size);
2735 if (!i) {
2736 kfree(copy);
2737 return ERR_PTR(-EFAULT);
2738 }
2739 if (i != PAGE_SIZE)
2740 memset(copy + i, 0, PAGE_SIZE - i);
2741 return copy;
2742}
2743
2744char *copy_mount_string(const void __user *data)
2745{
2746 return data ? strndup_user(data, PAGE_SIZE) : NULL;
2747}
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763long do_mount(const char *dev_name, const char __user *dir_name,
2764 const char *type_page, unsigned long flags, void *data_page)
2765{
2766 struct path path;
2767 unsigned int mnt_flags = 0, sb_flags;
2768 int retval = 0;
2769
2770
2771 if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
2772 flags &= ~MS_MGC_MSK;
2773
2774
2775 if (data_page)
2776 ((char *)data_page)[PAGE_SIZE - 1] = 0;
2777
2778 if (flags & MS_NOUSER)
2779 return -EINVAL;
2780
2781
2782 retval = user_path(dir_name, &path);
2783 if (retval)
2784 return retval;
2785
2786 retval = security_sb_mount(dev_name, &path,
2787 type_page, flags, data_page);
2788 if (!retval && !may_mount())
2789 retval = -EPERM;
2790 if (!retval && (flags & SB_MANDLOCK) && !may_mandlock())
2791 retval = -EPERM;
2792 if (retval)
2793 goto dput_out;
2794
2795
2796 if (!(flags & MS_NOATIME))
2797 mnt_flags |= MNT_RELATIME;
2798
2799
2800 if (flags & MS_NOSUID)
2801 mnt_flags |= MNT_NOSUID;
2802 if (flags & MS_NODEV)
2803 mnt_flags |= MNT_NODEV;
2804 if (flags & MS_NOEXEC)
2805 mnt_flags |= MNT_NOEXEC;
2806 if (flags & MS_NOATIME)
2807 mnt_flags |= MNT_NOATIME;
2808 if (flags & MS_NODIRATIME)
2809 mnt_flags |= MNT_NODIRATIME;
2810 if (flags & MS_STRICTATIME)
2811 mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
2812 if (flags & SB_RDONLY)
2813 mnt_flags |= MNT_READONLY;
2814
2815
2816 if ((flags & MS_REMOUNT) &&
2817 ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME |
2818 MS_STRICTATIME)) == 0)) {
2819 mnt_flags &= ~MNT_ATIME_MASK;
2820 mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK;
2821 }
2822
2823 sb_flags = flags & (SB_RDONLY |
2824 SB_SYNCHRONOUS |
2825 SB_MANDLOCK |
2826 SB_DIRSYNC |
2827 SB_SILENT |
2828 SB_POSIXACL |
2829 SB_I_VERSION);
2830
2831 if (flags & MS_REMOUNT)
2832 retval = do_remount(&path, flags, sb_flags, mnt_flags,
2833 data_page);
2834 else if (flags & MS_BIND)
2835 retval = do_loopback(&path, dev_name, flags & MS_REC);
2836 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
2837 retval = do_change_type(&path, flags);
2838 else if (flags & MS_MOVE)
2839 retval = do_move_mount(&path, dev_name);
2840 else
2841 retval = do_new_mount(&path, type_page, sb_flags, mnt_flags,
2842 dev_name, data_page);
2843dput_out:
2844 path_put(&path);
2845 return retval;
2846}
2847
2848static struct ucounts *inc_mnt_namespaces(struct user_namespace *ns)
2849{
2850 return inc_ucount(ns, current_euid(), UCOUNT_MNT_NAMESPACES);
2851}
2852
2853static void dec_mnt_namespaces(struct ucounts *ucounts)
2854{
2855 dec_ucount(ucounts, UCOUNT_MNT_NAMESPACES);
2856}
2857
2858static void free_mnt_ns(struct mnt_namespace *ns)
2859{
2860 ns_free_inum(&ns->ns);
2861 dec_mnt_namespaces(ns->ucounts);
2862 put_user_ns(ns->user_ns);
2863 kfree(ns);
2864}
2865
2866
2867
2868
2869
2870
2871
2872
2873static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
2874
2875static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
2876{
2877 struct mnt_namespace *new_ns;
2878 struct ucounts *ucounts;
2879 int ret;
2880
2881 ucounts = inc_mnt_namespaces(user_ns);
2882 if (!ucounts)
2883 return ERR_PTR(-ENOSPC);
2884
2885 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
2886 if (!new_ns) {
2887 dec_mnt_namespaces(ucounts);
2888 return ERR_PTR(-ENOMEM);
2889 }
2890 ret = ns_alloc_inum(&new_ns->ns);
2891 if (ret) {
2892 kfree(new_ns);
2893 dec_mnt_namespaces(ucounts);
2894 return ERR_PTR(ret);
2895 }
2896 new_ns->ns.ops = &mntns_operations;
2897 new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
2898 atomic_set(&new_ns->count, 1);
2899 new_ns->root = NULL;
2900 INIT_LIST_HEAD(&new_ns->list);
2901 init_waitqueue_head(&new_ns->poll);
2902 new_ns->event = 0;
2903 new_ns->user_ns = get_user_ns(user_ns);
2904 new_ns->ucounts = ucounts;
2905 new_ns->mounts = 0;
2906 new_ns->pending_mounts = 0;
2907 return new_ns;
2908}
2909
2910__latent_entropy
2911struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2912 struct user_namespace *user_ns, struct fs_struct *new_fs)
2913{
2914 struct mnt_namespace *new_ns;
2915 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
2916 struct mount *p, *q;
2917 struct mount *old;
2918 struct mount *new;
2919 int copy_flags;
2920
2921 BUG_ON(!ns);
2922
2923 if (likely(!(flags & CLONE_NEWNS))) {
2924 get_mnt_ns(ns);
2925 return ns;
2926 }
2927
2928 old = ns->root;
2929
2930 new_ns = alloc_mnt_ns(user_ns);
2931 if (IS_ERR(new_ns))
2932 return new_ns;
2933
2934 namespace_lock();
2935
2936 copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE;
2937 if (user_ns != ns->user_ns)
2938 copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
2939 new = copy_tree(old, old->mnt.mnt_root, copy_flags);
2940 if (IS_ERR(new)) {
2941 namespace_unlock();
2942 free_mnt_ns(new_ns);
2943 return ERR_CAST(new);
2944 }
2945 new_ns->root = new;
2946 list_add_tail(&new_ns->list, &new->mnt_list);
2947
2948
2949
2950
2951
2952
2953 p = old;
2954 q = new;
2955 while (p) {
2956 q->mnt_ns = new_ns;
2957 new_ns->mounts++;
2958 if (new_fs) {
2959 if (&p->mnt == new_fs->root.mnt) {
2960 new_fs->root.mnt = mntget(&q->mnt);
2961 rootmnt = &p->mnt;
2962 }
2963 if (&p->mnt == new_fs->pwd.mnt) {
2964 new_fs->pwd.mnt = mntget(&q->mnt);
2965 pwdmnt = &p->mnt;
2966 }
2967 }
2968 p = next_mnt(p, old);
2969 q = next_mnt(q, new);
2970 if (!q)
2971 break;
2972 while (p->mnt.mnt_root != q->mnt.mnt_root)
2973 p = next_mnt(p, old);
2974 }
2975 namespace_unlock();
2976
2977 if (rootmnt)
2978 mntput(rootmnt);
2979 if (pwdmnt)
2980 mntput(pwdmnt);
2981
2982 return new_ns;
2983}
2984
2985
2986
2987
2988
2989static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
2990{
2991 struct mnt_namespace *new_ns = alloc_mnt_ns(&init_user_ns);
2992 if (!IS_ERR(new_ns)) {
2993 struct mount *mnt = real_mount(m);
2994 mnt->mnt_ns = new_ns;
2995 new_ns->root = mnt;
2996 new_ns->mounts++;
2997 list_add(&mnt->mnt_list, &new_ns->list);
2998 } else {
2999 mntput(m);
3000 }
3001 return new_ns;
3002}
3003
3004struct dentry *mount_subtree(struct vfsmount *mnt, const char *name)
3005{
3006 struct mnt_namespace *ns;
3007 struct super_block *s;
3008 struct path path;
3009 int err;
3010
3011 ns = create_mnt_ns(mnt);
3012 if (IS_ERR(ns))
3013 return ERR_CAST(ns);
3014
3015 err = vfs_path_lookup(mnt->mnt_root, mnt,
3016 name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);
3017
3018 put_mnt_ns(ns);
3019
3020 if (err)
3021 return ERR_PTR(err);
3022
3023
3024 s = path.mnt->mnt_sb;
3025 atomic_inc(&s->s_active);
3026 mntput(path.mnt);
3027
3028 down_write(&s->s_umount);
3029
3030 return path.dentry;
3031}
3032EXPORT_SYMBOL(mount_subtree);
3033
3034SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
3035 char __user *, type, unsigned long, flags, void __user *, data)
3036{
3037 int ret;
3038 char *kernel_type;
3039 char *kernel_dev;
3040 void *options;
3041
3042 kernel_type = copy_mount_string(type);
3043 ret = PTR_ERR(kernel_type);
3044 if (IS_ERR(kernel_type))
3045 goto out_type;
3046
3047 kernel_dev = copy_mount_string(dev_name);
3048 ret = PTR_ERR(kernel_dev);
3049 if (IS_ERR(kernel_dev))
3050 goto out_dev;
3051
3052 options = copy_mount_options(data);
3053 ret = PTR_ERR(options);
3054 if (IS_ERR(options))
3055 goto out_data;
3056
3057 ret = do_mount(kernel_dev, dir_name, kernel_type, flags, options);
3058
3059 kfree(options);
3060out_data:
3061 kfree(kernel_dev);
3062out_dev:
3063 kfree(kernel_type);
3064out_type:
3065 return ret;
3066}
3067
3068
3069
3070
3071
3072
3073bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
3074 const struct path *root)
3075{
3076 while (&mnt->mnt != root->mnt && mnt_has_parent(mnt)) {
3077 dentry = mnt->mnt_mountpoint;
3078 mnt = mnt->mnt_parent;
3079 }
3080 return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry);
3081}
3082
3083bool path_is_under(const struct path *path1, const struct path *path2)
3084{
3085 bool res;
3086 read_seqlock_excl(&mount_lock);
3087 res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2);
3088 read_sequnlock_excl(&mount_lock);
3089 return res;
3090}
3091EXPORT_SYMBOL(path_is_under);
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
3119 const char __user *, put_old)
3120{
3121 struct path new, old, parent_path, root_parent, root;
3122 struct mount *new_mnt, *root_mnt, *old_mnt;
3123 struct mountpoint *old_mp, *root_mp;
3124 int error;
3125
3126 if (!may_mount())
3127 return -EPERM;
3128
3129 error = user_path_dir(new_root, &new);
3130 if (error)
3131 goto out0;
3132
3133 error = user_path_dir(put_old, &old);
3134 if (error)
3135 goto out1;
3136
3137 error = security_sb_pivotroot(&old, &new);
3138 if (error)
3139 goto out2;
3140
3141 get_fs_root(current->fs, &root);
3142 old_mp = lock_mount(&old);
3143 error = PTR_ERR(old_mp);
3144 if (IS_ERR(old_mp))
3145 goto out3;
3146
3147 error = -EINVAL;
3148 new_mnt = real_mount(new.mnt);
3149 root_mnt = real_mount(root.mnt);
3150 old_mnt = real_mount(old.mnt);
3151 if (IS_MNT_SHARED(old_mnt) ||
3152 IS_MNT_SHARED(new_mnt->mnt_parent) ||
3153 IS_MNT_SHARED(root_mnt->mnt_parent))
3154 goto out4;
3155 if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
3156 goto out4;
3157 if (new_mnt->mnt.mnt_flags & MNT_LOCKED)
3158 goto out4;
3159 error = -ENOENT;
3160 if (d_unlinked(new.dentry))
3161 goto out4;
3162 error = -EBUSY;
3163 if (new_mnt == root_mnt || old_mnt == root_mnt)
3164 goto out4;
3165 error = -EINVAL;
3166 if (root.mnt->mnt_root != root.dentry)
3167 goto out4;
3168 if (!mnt_has_parent(root_mnt))
3169 goto out4;
3170 root_mp = root_mnt->mnt_mp;
3171 if (new.mnt->mnt_root != new.dentry)
3172 goto out4;
3173 if (!mnt_has_parent(new_mnt))
3174 goto out4;
3175
3176 if (!is_path_reachable(old_mnt, old.dentry, &new))
3177 goto out4;
3178
3179 if (!is_path_reachable(new_mnt, new.dentry, &root))
3180 goto out4;
3181 root_mp->m_count++;
3182 lock_mount_hash();
3183 detach_mnt(new_mnt, &parent_path);
3184 detach_mnt(root_mnt, &root_parent);
3185 if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
3186 new_mnt->mnt.mnt_flags |= MNT_LOCKED;
3187 root_mnt->mnt.mnt_flags &= ~MNT_LOCKED;
3188 }
3189
3190 attach_mnt(root_mnt, old_mnt, old_mp);
3191
3192 attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp);
3193 touch_mnt_namespace(current->nsproxy->mnt_ns);
3194
3195 list_del_init(&new_mnt->mnt_expire);
3196 put_mountpoint(root_mp);
3197 unlock_mount_hash();
3198 chroot_fs_refs(&root, &new);
3199 error = 0;
3200out4:
3201 unlock_mount(old_mp);
3202 if (!error) {
3203 path_put(&root_parent);
3204 path_put(&parent_path);
3205 }
3206out3:
3207 path_put(&root);
3208out2:
3209 path_put(&old);
3210out1:
3211 path_put(&new);
3212out0:
3213 return error;
3214}
3215
3216static void __init init_mount_tree(void)
3217{
3218 struct vfsmount *mnt;
3219 struct mnt_namespace *ns;
3220 struct path root;
3221 struct file_system_type *type;
3222
3223 type = get_fs_type("rootfs");
3224 if (!type)
3225 panic("Can't find rootfs type");
3226 mnt = vfs_kern_mount(type, 0, "rootfs", NULL);
3227 put_filesystem(type);
3228 if (IS_ERR(mnt))
3229 panic("Can't create rootfs");
3230
3231 ns = create_mnt_ns(mnt);
3232 if (IS_ERR(ns))
3233 panic("Can't allocate initial namespace");
3234
3235 init_task.nsproxy->mnt_ns = ns;
3236 get_mnt_ns(ns);
3237
3238 root.mnt = mnt;
3239 root.dentry = mnt->mnt_root;
3240 mnt->mnt_flags |= MNT_LOCKED;
3241
3242 set_fs_pwd(current->fs, &root);
3243 set_fs_root(current->fs, &root);
3244}
3245
3246void __init mnt_init(void)
3247{
3248 int err;
3249
3250 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
3251 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
3252
3253 mount_hashtable = alloc_large_system_hash("Mount-cache",
3254 sizeof(struct hlist_head),
3255 mhash_entries, 19,
3256 HASH_ZERO,
3257 &m_hash_shift, &m_hash_mask, 0, 0);
3258 mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache",
3259 sizeof(struct hlist_head),
3260 mphash_entries, 19,
3261 HASH_ZERO,
3262 &mp_hash_shift, &mp_hash_mask, 0, 0);
3263
3264 if (!mount_hashtable || !mountpoint_hashtable)
3265 panic("Failed to allocate mount hash table\n");
3266
3267 kernfs_init();
3268
3269 err = sysfs_init();
3270 if (err)
3271 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
3272 __func__, err);
3273 fs_kobj = kobject_create_and_add("fs", NULL);
3274 if (!fs_kobj)
3275 printk(KERN_WARNING "%s: kobj create error\n", __func__);
3276 init_rootfs();
3277 init_mount_tree();
3278}
3279
3280void put_mnt_ns(struct mnt_namespace *ns)
3281{
3282 if (!atomic_dec_and_test(&ns->count))
3283 return;
3284 drop_collected_mounts(&ns->root->mnt);
3285 free_mnt_ns(ns);
3286}
3287
3288struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
3289{
3290 struct vfsmount *mnt;
3291 mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, data);
3292 if (!IS_ERR(mnt)) {
3293
3294
3295
3296
3297 real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL;
3298 }
3299 return mnt;
3300}
3301EXPORT_SYMBOL_GPL(kern_mount_data);
3302
3303void kern_unmount(struct vfsmount *mnt)
3304{
3305
3306 if (!IS_ERR_OR_NULL(mnt)) {
3307 real_mount(mnt)->mnt_ns = NULL;
3308 synchronize_rcu();
3309 mntput(mnt);
3310 }
3311}
3312EXPORT_SYMBOL(kern_unmount);
3313
3314bool our_mnt(struct vfsmount *mnt)
3315{
3316 return check_mnt(real_mount(mnt));
3317}
3318
3319bool current_chrooted(void)
3320{
3321
3322 struct path ns_root;
3323 struct path fs_root;
3324 bool chrooted;
3325
3326
3327 ns_root.mnt = ¤t->nsproxy->mnt_ns->root->mnt;
3328 ns_root.dentry = ns_root.mnt->mnt_root;
3329 path_get(&ns_root);
3330 while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root))
3331 ;
3332
3333 get_fs_root(current->fs, &fs_root);
3334
3335 chrooted = !path_equal(&fs_root, &ns_root);
3336
3337 path_put(&fs_root);
3338 path_put(&ns_root);
3339
3340 return chrooted;
3341}
3342
3343static bool mnt_already_visible(struct mnt_namespace *ns, struct vfsmount *new,
3344 int *new_mnt_flags)
3345{
3346 int new_flags = *new_mnt_flags;
3347 struct mount *mnt;
3348 bool visible = false;
3349
3350 down_read(&namespace_sem);
3351 list_for_each_entry(mnt, &ns->list, mnt_list) {
3352 struct mount *child;
3353 int mnt_flags;
3354
3355 if (mnt->mnt.mnt_sb->s_type != new->mnt_sb->s_type)
3356 continue;
3357
3358
3359
3360
3361 if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root)
3362 continue;
3363
3364
3365 mnt_flags = mnt->mnt.mnt_flags;
3366
3367
3368 if (sb_rdonly(mnt->mnt.mnt_sb))
3369 mnt_flags |= MNT_LOCK_READONLY;
3370
3371
3372
3373
3374 if ((mnt_flags & MNT_LOCK_READONLY) &&
3375 !(new_flags & MNT_READONLY))
3376 continue;
3377 if ((mnt_flags & MNT_LOCK_ATIME) &&
3378 ((mnt_flags & MNT_ATIME_MASK) != (new_flags & MNT_ATIME_MASK)))
3379 continue;
3380
3381
3382
3383
3384
3385 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
3386 struct inode *inode = child->mnt_mountpoint->d_inode;
3387
3388 if (!(child->mnt.mnt_flags & MNT_LOCKED))
3389 continue;
3390
3391 if (!is_empty_dir_inode(inode))
3392 goto next;
3393 }
3394
3395 *new_mnt_flags |= mnt_flags & (MNT_LOCK_READONLY | \
3396 MNT_LOCK_ATIME);
3397 visible = true;
3398 goto found;
3399 next: ;
3400 }
3401found:
3402 up_read(&namespace_sem);
3403 return visible;
3404}
3405
3406static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags)
3407{
3408 const unsigned long required_iflags = SB_I_NOEXEC | SB_I_NODEV;
3409 struct mnt_namespace *ns = current->nsproxy->mnt_ns;
3410 unsigned long s_iflags;
3411
3412 if (ns->user_ns == &init_user_ns)
3413 return false;
3414
3415
3416 s_iflags = mnt->mnt_sb->s_iflags;
3417 if (!(s_iflags & SB_I_USERNS_VISIBLE))
3418 return false;
3419
3420 if ((s_iflags & required_iflags) != required_iflags) {
3421 WARN_ONCE(1, "Expected s_iflags to contain 0x%lx\n",
3422 required_iflags);
3423 return true;
3424 }
3425
3426 return !mnt_already_visible(ns, mnt, new_mnt_flags);
3427}
3428
3429bool mnt_may_suid(struct vfsmount *mnt)
3430{
3431
3432
3433
3434
3435
3436
3437
3438 return !(mnt->mnt_flags & MNT_NOSUID) && check_mnt(real_mount(mnt)) &&
3439 current_in_userns(mnt->mnt_sb->s_user_ns);
3440}
3441
3442static struct ns_common *mntns_get(struct task_struct *task)
3443{
3444 struct ns_common *ns = NULL;
3445 struct nsproxy *nsproxy;
3446
3447 task_lock(task);
3448 nsproxy = task->nsproxy;
3449 if (nsproxy) {
3450 ns = &nsproxy->mnt_ns->ns;
3451 get_mnt_ns(to_mnt_ns(ns));
3452 }
3453 task_unlock(task);
3454
3455 return ns;
3456}
3457
3458static void mntns_put(struct ns_common *ns)
3459{
3460 put_mnt_ns(to_mnt_ns(ns));
3461}
3462
3463static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
3464{
3465 struct fs_struct *fs = current->fs;
3466 struct mnt_namespace *mnt_ns = to_mnt_ns(ns), *old_mnt_ns;
3467 struct path root;
3468 int err;
3469
3470 if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
3471 !ns_capable(current_user_ns(), CAP_SYS_CHROOT) ||
3472 !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
3473 return -EPERM;
3474
3475 if (fs->users != 1)
3476 return -EINVAL;
3477
3478 get_mnt_ns(mnt_ns);
3479 old_mnt_ns = nsproxy->mnt_ns;
3480 nsproxy->mnt_ns = mnt_ns;
3481
3482
3483 err = vfs_path_lookup(mnt_ns->root->mnt.mnt_root, &mnt_ns->root->mnt,
3484 "/", LOOKUP_DOWN, &root);
3485 if (err) {
3486
3487 nsproxy->mnt_ns = old_mnt_ns;
3488 put_mnt_ns(mnt_ns);
3489 return err;
3490 }
3491
3492 put_mnt_ns(old_mnt_ns);
3493
3494
3495 set_fs_pwd(fs, &root);
3496 set_fs_root(fs, &root);
3497
3498 path_put(&root);
3499 return 0;
3500}
3501
3502static struct user_namespace *mntns_owner(struct ns_common *ns)
3503{
3504 return to_mnt_ns(ns)->user_ns;
3505}
3506
3507const struct proc_ns_operations mntns_operations = {
3508 .name = "mnt",
3509 .type = CLONE_NEWNS,
3510 .get = mntns_get,
3511 .put = mntns_put,
3512 .install = mntns_install,
3513 .owner = mntns_owner,
3514};
3515