1
2
3
4
5
6
7
8
9
10
11#include <linux/syscalls.h>
12#include <linux/export.h>
13#include <linux/capability.h>
14#include <linux/mnt_namespace.h>
15#include <linux/user_namespace.h>
16#include <linux/namei.h>
17#include <linux/security.h>
18#include <linux/idr.h>
19#include <linux/init.h>
20#include <linux/fs_struct.h>
21#include <linux/fsnotify.h>
22#include <linux/uaccess.h>
23#include <linux/proc_ns.h>
24#include <linux/magic.h>
25#include <linux/bootmem.h>
26#include "pnode.h"
27#include "internal.h"
28
29static unsigned int m_hash_mask __read_mostly;
30static unsigned int m_hash_shift __read_mostly;
31static unsigned int mp_hash_mask __read_mostly;
32static unsigned int mp_hash_shift __read_mostly;
33
34static __initdata unsigned long mhash_entries;
35static int __init set_mhash_entries(char *str)
36{
37 if (!str)
38 return 0;
39 mhash_entries = simple_strtoul(str, &str, 0);
40 return 1;
41}
42__setup("mhash_entries=", set_mhash_entries);
43
44static __initdata unsigned long mphash_entries;
45static int __init set_mphash_entries(char *str)
46{
47 if (!str)
48 return 0;
49 mphash_entries = simple_strtoul(str, &str, 0);
50 return 1;
51}
52__setup("mphash_entries=", set_mphash_entries);
53
54static u64 event;
55static DEFINE_IDA(mnt_id_ida);
56static DEFINE_IDA(mnt_group_ida);
57static DEFINE_SPINLOCK(mnt_id_lock);
58static int mnt_id_start = 0;
59static int mnt_group_start = 1;
60
61static struct hlist_head *mount_hashtable __read_mostly;
62static struct hlist_head *mountpoint_hashtable __read_mostly;
63static struct kmem_cache *mnt_cache __read_mostly;
64static DECLARE_RWSEM(namespace_sem);
65
66
67struct kobject *fs_kobj;
68EXPORT_SYMBOL_GPL(fs_kobj);
69
70
71
72
73
74
75
76
77
78__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
79
80static inline struct hlist_head *m_hash(struct vfsmount *mnt, struct dentry *dentry)
81{
82 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
83 tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
84 tmp = tmp + (tmp >> m_hash_shift);
85 return &mount_hashtable[tmp & m_hash_mask];
86}
87
88static inline struct hlist_head *mp_hash(struct dentry *dentry)
89{
90 unsigned long tmp = ((unsigned long)dentry / L1_CACHE_BYTES);
91 tmp = tmp + (tmp >> mp_hash_shift);
92 return &mountpoint_hashtable[tmp & mp_hash_mask];
93}
94
95
96
97
98
99static int mnt_alloc_id(struct mount *mnt)
100{
101 int res;
102
103retry:
104 ida_pre_get(&mnt_id_ida, GFP_KERNEL);
105 spin_lock(&mnt_id_lock);
106 res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id);
107 if (!res)
108 mnt_id_start = mnt->mnt_id + 1;
109 spin_unlock(&mnt_id_lock);
110 if (res == -EAGAIN)
111 goto retry;
112
113 return res;
114}
115
116static void mnt_free_id(struct mount *mnt)
117{
118 int id = mnt->mnt_id;
119 spin_lock(&mnt_id_lock);
120 ida_remove(&mnt_id_ida, id);
121 if (mnt_id_start > id)
122 mnt_id_start = id;
123 spin_unlock(&mnt_id_lock);
124}
125
126
127
128
129
130
131static int mnt_alloc_group_id(struct mount *mnt)
132{
133 int res;
134
135 if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL))
136 return -ENOMEM;
137
138 res = ida_get_new_above(&mnt_group_ida,
139 mnt_group_start,
140 &mnt->mnt_group_id);
141 if (!res)
142 mnt_group_start = mnt->mnt_group_id + 1;
143
144 return res;
145}
146
147
148
149
150void mnt_release_group_id(struct mount *mnt)
151{
152 int id = mnt->mnt_group_id;
153 ida_remove(&mnt_group_ida, id);
154 if (mnt_group_start > id)
155 mnt_group_start = id;
156 mnt->mnt_group_id = 0;
157}
158
159
160
161
162static inline void mnt_add_count(struct mount *mnt, int n)
163{
164#ifdef CONFIG_SMP
165 this_cpu_add(mnt->mnt_pcp->mnt_count, n);
166#else
167 preempt_disable();
168 mnt->mnt_count += n;
169 preempt_enable();
170#endif
171}
172
173
174
175
176unsigned int mnt_get_count(struct mount *mnt)
177{
178#ifdef CONFIG_SMP
179 unsigned int count = 0;
180 int cpu;
181
182 for_each_possible_cpu(cpu) {
183 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count;
184 }
185
186 return count;
187#else
188 return mnt->mnt_count;
189#endif
190}
191
192static struct mount *alloc_vfsmnt(const char *name)
193{
194 struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
195 if (mnt) {
196 int err;
197
198 err = mnt_alloc_id(mnt);
199 if (err)
200 goto out_free_cache;
201
202 if (name) {
203 mnt->mnt_devname = kstrdup(name, GFP_KERNEL);
204 if (!mnt->mnt_devname)
205 goto out_free_id;
206 }
207
208#ifdef CONFIG_SMP
209 mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);
210 if (!mnt->mnt_pcp)
211 goto out_free_devname;
212
213 this_cpu_add(mnt->mnt_pcp->mnt_count, 1);
214#else
215 mnt->mnt_count = 1;
216 mnt->mnt_writers = 0;
217#endif
218
219 INIT_HLIST_NODE(&mnt->mnt_hash);
220 INIT_LIST_HEAD(&mnt->mnt_child);
221 INIT_LIST_HEAD(&mnt->mnt_mounts);
222 INIT_LIST_HEAD(&mnt->mnt_list);
223 INIT_LIST_HEAD(&mnt->mnt_expire);
224 INIT_LIST_HEAD(&mnt->mnt_share);
225 INIT_LIST_HEAD(&mnt->mnt_slave_list);
226 INIT_LIST_HEAD(&mnt->mnt_slave);
227#ifdef CONFIG_FSNOTIFY
228 INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
229#endif
230 }
231 return mnt;
232
233#ifdef CONFIG_SMP
234out_free_devname:
235 kfree(mnt->mnt_devname);
236#endif
237out_free_id:
238 mnt_free_id(mnt);
239out_free_cache:
240 kmem_cache_free(mnt_cache, mnt);
241 return NULL;
242}
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263int __mnt_is_readonly(struct vfsmount *mnt)
264{
265 if (mnt->mnt_flags & MNT_READONLY)
266 return 1;
267 if (mnt->mnt_sb->s_flags & MS_RDONLY)
268 return 1;
269 return 0;
270}
271EXPORT_SYMBOL_GPL(__mnt_is_readonly);
272
273static inline void mnt_inc_writers(struct mount *mnt)
274{
275#ifdef CONFIG_SMP
276 this_cpu_inc(mnt->mnt_pcp->mnt_writers);
277#else
278 mnt->mnt_writers++;
279#endif
280}
281
282static inline void mnt_dec_writers(struct mount *mnt)
283{
284#ifdef CONFIG_SMP
285 this_cpu_dec(mnt->mnt_pcp->mnt_writers);
286#else
287 mnt->mnt_writers--;
288#endif
289}
290
291static unsigned int mnt_get_writers(struct mount *mnt)
292{
293#ifdef CONFIG_SMP
294 unsigned int count = 0;
295 int cpu;
296
297 for_each_possible_cpu(cpu) {
298 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers;
299 }
300
301 return count;
302#else
303 return mnt->mnt_writers;
304#endif
305}
306
307static int mnt_is_readonly(struct vfsmount *mnt)
308{
309 if (mnt->mnt_sb->s_readonly_remount)
310 return 1;
311
312 smp_rmb();
313 return __mnt_is_readonly(mnt);
314}
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332int __mnt_want_write(struct vfsmount *m)
333{
334 struct mount *mnt = real_mount(m);
335 int ret = 0;
336
337 preempt_disable();
338 mnt_inc_writers(mnt);
339
340
341
342
343
344 smp_mb();
345 while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
346 cpu_relax();
347
348
349
350
351
352 smp_rmb();
353 if (mnt_is_readonly(m)) {
354 mnt_dec_writers(mnt);
355 ret = -EROFS;
356 }
357 preempt_enable();
358
359 return ret;
360}
361
362
363
364
365
366
367
368
369
370
371int mnt_want_write(struct vfsmount *m)
372{
373 int ret;
374
375 sb_start_write(m->mnt_sb);
376 ret = __mnt_want_write(m);
377 if (ret)
378 sb_end_write(m->mnt_sb);
379 return ret;
380}
381EXPORT_SYMBOL_GPL(mnt_want_write);
382
383
384
385
386
387
388
389
390
391
392
393
394
395int mnt_clone_write(struct vfsmount *mnt)
396{
397
398 if (__mnt_is_readonly(mnt))
399 return -EROFS;
400 preempt_disable();
401 mnt_inc_writers(real_mount(mnt));
402 preempt_enable();
403 return 0;
404}
405EXPORT_SYMBOL_GPL(mnt_clone_write);
406
407
408
409
410
411
412
413
414int __mnt_want_write_file(struct file *file)
415{
416 if (!(file->f_mode & FMODE_WRITER))
417 return __mnt_want_write(file->f_path.mnt);
418 else
419 return mnt_clone_write(file->f_path.mnt);
420}
421
422
423
424
425
426
427
428
429int mnt_want_write_file(struct file *file)
430{
431 int ret;
432
433 sb_start_write(file->f_path.mnt->mnt_sb);
434 ret = __mnt_want_write_file(file);
435 if (ret)
436 sb_end_write(file->f_path.mnt->mnt_sb);
437 return ret;
438}
439EXPORT_SYMBOL_GPL(mnt_want_write_file);
440
441
442
443
444
445
446
447
448
449void __mnt_drop_write(struct vfsmount *mnt)
450{
451 preempt_disable();
452 mnt_dec_writers(real_mount(mnt));
453 preempt_enable();
454}
455
456
457
458
459
460
461
462
463
464void mnt_drop_write(struct vfsmount *mnt)
465{
466 __mnt_drop_write(mnt);
467 sb_end_write(mnt->mnt_sb);
468}
469EXPORT_SYMBOL_GPL(mnt_drop_write);
470
471void __mnt_drop_write_file(struct file *file)
472{
473 __mnt_drop_write(file->f_path.mnt);
474}
475
476void mnt_drop_write_file(struct file *file)
477{
478 mnt_drop_write(file->f_path.mnt);
479}
480EXPORT_SYMBOL(mnt_drop_write_file);
481
482static int mnt_make_readonly(struct mount *mnt)
483{
484 int ret = 0;
485
486 lock_mount_hash();
487 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
488
489
490
491
492 smp_mb();
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510 if (mnt_get_writers(mnt) > 0)
511 ret = -EBUSY;
512 else
513 mnt->mnt.mnt_flags |= MNT_READONLY;
514
515
516
517
518 smp_wmb();
519 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
520 unlock_mount_hash();
521 return ret;
522}
523
524static void __mnt_unmake_readonly(struct mount *mnt)
525{
526 lock_mount_hash();
527 mnt->mnt.mnt_flags &= ~MNT_READONLY;
528 unlock_mount_hash();
529}
530
531int sb_prepare_remount_readonly(struct super_block *sb)
532{
533 struct mount *mnt;
534 int err = 0;
535
536
537 if (atomic_long_read(&sb->s_remove_count))
538 return -EBUSY;
539
540 lock_mount_hash();
541 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
542 if (!(mnt->mnt.mnt_flags & MNT_READONLY)) {
543 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
544 smp_mb();
545 if (mnt_get_writers(mnt) > 0) {
546 err = -EBUSY;
547 break;
548 }
549 }
550 }
551 if (!err && atomic_long_read(&sb->s_remove_count))
552 err = -EBUSY;
553
554 if (!err) {
555 sb->s_readonly_remount = 1;
556 smp_wmb();
557 }
558 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
559 if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
560 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
561 }
562 unlock_mount_hash();
563
564 return err;
565}
566
567static void free_vfsmnt(struct mount *mnt)
568{
569 kfree(mnt->mnt_devname);
570#ifdef CONFIG_SMP
571 free_percpu(mnt->mnt_pcp);
572#endif
573 kmem_cache_free(mnt_cache, mnt);
574}
575
576static void delayed_free_vfsmnt(struct rcu_head *head)
577{
578 free_vfsmnt(container_of(head, struct mount, mnt_rcu));
579}
580
581
582bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
583{
584 struct mount *mnt;
585 if (read_seqretry(&mount_lock, seq))
586 return false;
587 if (bastard == NULL)
588 return true;
589 mnt = real_mount(bastard);
590 mnt_add_count(mnt, 1);
591 if (likely(!read_seqretry(&mount_lock, seq)))
592 return true;
593 if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
594 mnt_add_count(mnt, -1);
595 return false;
596 }
597 rcu_read_unlock();
598 mntput(bastard);
599 rcu_read_lock();
600 return false;
601}
602
603
604
605
606
607struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
608{
609 struct hlist_head *head = m_hash(mnt, dentry);
610 struct mount *p;
611
612 hlist_for_each_entry_rcu(p, head, mnt_hash)
613 if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
614 return p;
615 return NULL;
616}
617
618
619
620
621
622struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
623{
624 struct mount *p, *res;
625 res = p = __lookup_mnt(mnt, dentry);
626 if (!p)
627 goto out;
628 hlist_for_each_entry_continue(p, mnt_hash) {
629 if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry)
630 break;
631 res = p;
632 }
633out:
634 return res;
635}
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653struct vfsmount *lookup_mnt(struct path *path)
654{
655 struct mount *child_mnt;
656 struct vfsmount *m;
657 unsigned seq;
658
659 rcu_read_lock();
660 do {
661 seq = read_seqbegin(&mount_lock);
662 child_mnt = __lookup_mnt(path->mnt, path->dentry);
663 m = child_mnt ? &child_mnt->mnt : NULL;
664 } while (!legitimize_mnt(m, seq));
665 rcu_read_unlock();
666 return m;
667}
668
669static struct mountpoint *new_mountpoint(struct dentry *dentry)
670{
671 struct hlist_head *chain = mp_hash(dentry);
672 struct mountpoint *mp;
673 int ret;
674
675 hlist_for_each_entry(mp, chain, m_hash) {
676 if (mp->m_dentry == dentry) {
677
678 if (d_unlinked(dentry))
679 return ERR_PTR(-ENOENT);
680 mp->m_count++;
681 return mp;
682 }
683 }
684
685 mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
686 if (!mp)
687 return ERR_PTR(-ENOMEM);
688
689 ret = d_set_mounted(dentry);
690 if (ret) {
691 kfree(mp);
692 return ERR_PTR(ret);
693 }
694
695 mp->m_dentry = dentry;
696 mp->m_count = 1;
697 hlist_add_head(&mp->m_hash, chain);
698 return mp;
699}
700
701static void put_mountpoint(struct mountpoint *mp)
702{
703 if (!--mp->m_count) {
704 struct dentry *dentry = mp->m_dentry;
705 spin_lock(&dentry->d_lock);
706 dentry->d_flags &= ~DCACHE_MOUNTED;
707 spin_unlock(&dentry->d_lock);
708 hlist_del(&mp->m_hash);
709 kfree(mp);
710 }
711}
712
713static inline int check_mnt(struct mount *mnt)
714{
715 return mnt->mnt_ns == current->nsproxy->mnt_ns;
716}
717
718
719
720
721static void touch_mnt_namespace(struct mnt_namespace *ns)
722{
723 if (ns) {
724 ns->event = ++event;
725 wake_up_interruptible(&ns->poll);
726 }
727}
728
729
730
731
732static void __touch_mnt_namespace(struct mnt_namespace *ns)
733{
734 if (ns && ns->event != event) {
735 ns->event = event;
736 wake_up_interruptible(&ns->poll);
737 }
738}
739
740
741
742
743static void detach_mnt(struct mount *mnt, struct path *old_path)
744{
745 old_path->dentry = mnt->mnt_mountpoint;
746 old_path->mnt = &mnt->mnt_parent->mnt;
747 mnt->mnt_parent = mnt;
748 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
749 list_del_init(&mnt->mnt_child);
750 hlist_del_init_rcu(&mnt->mnt_hash);
751 put_mountpoint(mnt->mnt_mp);
752 mnt->mnt_mp = NULL;
753}
754
755
756
757
758void mnt_set_mountpoint(struct mount *mnt,
759 struct mountpoint *mp,
760 struct mount *child_mnt)
761{
762 mp->m_count++;
763 mnt_add_count(mnt, 1);
764 child_mnt->mnt_mountpoint = dget(mp->m_dentry);
765 child_mnt->mnt_parent = mnt;
766 child_mnt->mnt_mp = mp;
767}
768
769
770
771
772static void attach_mnt(struct mount *mnt,
773 struct mount *parent,
774 struct mountpoint *mp)
775{
776 mnt_set_mountpoint(parent, mp, mnt);
777 hlist_add_head_rcu(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry));
778 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
779}
780
781static void attach_shadowed(struct mount *mnt,
782 struct mount *parent,
783 struct mount *shadows)
784{
785 if (shadows) {
786 hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash);
787 list_add(&mnt->mnt_child, &shadows->mnt_child);
788 } else {
789 hlist_add_head_rcu(&mnt->mnt_hash,
790 m_hash(&parent->mnt, mnt->mnt_mountpoint));
791 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
792 }
793}
794
795
796
797
798static void commit_tree(struct mount *mnt, struct mount *shadows)
799{
800 struct mount *parent = mnt->mnt_parent;
801 struct mount *m;
802 LIST_HEAD(head);
803 struct mnt_namespace *n = parent->mnt_ns;
804
805 BUG_ON(parent == mnt);
806
807 list_add_tail(&head, &mnt->mnt_list);
808 list_for_each_entry(m, &head, mnt_list)
809 m->mnt_ns = n;
810
811 list_splice(&head, n->list.prev);
812
813 attach_shadowed(mnt, parent, shadows);
814 touch_mnt_namespace(n);
815}
816
817static struct mount *next_mnt(struct mount *p, struct mount *root)
818{
819 struct list_head *next = p->mnt_mounts.next;
820 if (next == &p->mnt_mounts) {
821 while (1) {
822 if (p == root)
823 return NULL;
824 next = p->mnt_child.next;
825 if (next != &p->mnt_parent->mnt_mounts)
826 break;
827 p = p->mnt_parent;
828 }
829 }
830 return list_entry(next, struct mount, mnt_child);
831}
832
833static struct mount *skip_mnt_tree(struct mount *p)
834{
835 struct list_head *prev = p->mnt_mounts.prev;
836 while (prev != &p->mnt_mounts) {
837 p = list_entry(prev, struct mount, mnt_child);
838 prev = p->mnt_mounts.prev;
839 }
840 return p;
841}
842
843struct vfsmount *
844vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
845{
846 struct mount *mnt;
847 struct dentry *root;
848
849 if (!type)
850 return ERR_PTR(-ENODEV);
851
852 mnt = alloc_vfsmnt(name);
853 if (!mnt)
854 return ERR_PTR(-ENOMEM);
855
856 if (flags & MS_KERNMOUNT)
857 mnt->mnt.mnt_flags = MNT_INTERNAL;
858
859 root = mount_fs(type, flags, name, data);
860 if (IS_ERR(root)) {
861 mnt_free_id(mnt);
862 free_vfsmnt(mnt);
863 return ERR_CAST(root);
864 }
865
866 mnt->mnt.mnt_root = root;
867 mnt->mnt.mnt_sb = root->d_sb;
868 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
869 mnt->mnt_parent = mnt;
870 lock_mount_hash();
871 list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts);
872 unlock_mount_hash();
873 return &mnt->mnt;
874}
875EXPORT_SYMBOL_GPL(vfs_kern_mount);
876
877static struct mount *clone_mnt(struct mount *old, struct dentry *root,
878 int flag)
879{
880 struct super_block *sb = old->mnt.mnt_sb;
881 struct mount *mnt;
882 int err;
883
884 mnt = alloc_vfsmnt(old->mnt_devname);
885 if (!mnt)
886 return ERR_PTR(-ENOMEM);
887
888 if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE))
889 mnt->mnt_group_id = 0;
890 else
891 mnt->mnt_group_id = old->mnt_group_id;
892
893 if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {
894 err = mnt_alloc_group_id(mnt);
895 if (err)
896 goto out_free;
897 }
898
899 mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED);
900
901 if (flag & CL_UNPRIVILEGED) {
902 mnt->mnt.mnt_flags |= MNT_LOCK_ATIME;
903
904 if (mnt->mnt.mnt_flags & MNT_READONLY)
905 mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
906
907 if (mnt->mnt.mnt_flags & MNT_NODEV)
908 mnt->mnt.mnt_flags |= MNT_LOCK_NODEV;
909
910 if (mnt->mnt.mnt_flags & MNT_NOSUID)
911 mnt->mnt.mnt_flags |= MNT_LOCK_NOSUID;
912
913 if (mnt->mnt.mnt_flags & MNT_NOEXEC)
914 mnt->mnt.mnt_flags |= MNT_LOCK_NOEXEC;
915 }
916
917
918 if ((flag & CL_UNPRIVILEGED) && list_empty(&old->mnt_expire))
919 mnt->mnt.mnt_flags |= MNT_LOCKED;
920
921 atomic_inc(&sb->s_active);
922 mnt->mnt.mnt_sb = sb;
923 mnt->mnt.mnt_root = dget(root);
924 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
925 mnt->mnt_parent = mnt;
926 lock_mount_hash();
927 list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
928 unlock_mount_hash();
929
930 if ((flag & CL_SLAVE) ||
931 ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) {
932 list_add(&mnt->mnt_slave, &old->mnt_slave_list);
933 mnt->mnt_master = old;
934 CLEAR_MNT_SHARED(mnt);
935 } else if (!(flag & CL_PRIVATE)) {
936 if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old))
937 list_add(&mnt->mnt_share, &old->mnt_share);
938 if (IS_MNT_SLAVE(old))
939 list_add(&mnt->mnt_slave, &old->mnt_slave);
940 mnt->mnt_master = old->mnt_master;
941 }
942 if (flag & CL_MAKE_SHARED)
943 set_mnt_shared(mnt);
944
945
946
947 if (flag & CL_EXPIRE) {
948 if (!list_empty(&old->mnt_expire))
949 list_add(&mnt->mnt_expire, &old->mnt_expire);
950 }
951
952 return mnt;
953
954 out_free:
955 mnt_free_id(mnt);
956 free_vfsmnt(mnt);
957 return ERR_PTR(err);
958}
959
960static void mntput_no_expire(struct mount *mnt)
961{
962 rcu_read_lock();
963 mnt_add_count(mnt, -1);
964 if (likely(mnt->mnt_ns)) {
965 rcu_read_unlock();
966 return;
967 }
968 lock_mount_hash();
969 if (mnt_get_count(mnt)) {
970 rcu_read_unlock();
971 unlock_mount_hash();
972 return;
973 }
974 if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
975 rcu_read_unlock();
976 unlock_mount_hash();
977 return;
978 }
979 mnt->mnt.mnt_flags |= MNT_DOOMED;
980 rcu_read_unlock();
981
982 list_del(&mnt->mnt_instance);
983 unlock_mount_hash();
984
985
986
987
988
989
990
991
992
993
994
995 WARN_ON(mnt_get_writers(mnt));
996 if (unlikely(mnt->mnt_pins.first))
997 mnt_pin_kill(mnt);
998 fsnotify_vfsmount_delete(&mnt->mnt);
999 dput(mnt->mnt.mnt_root);
1000 deactivate_super(mnt->mnt.mnt_sb);
1001 mnt_free_id(mnt);
1002 call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);
1003}
1004
1005void mntput(struct vfsmount *mnt)
1006{
1007 if (mnt) {
1008 struct mount *m = real_mount(mnt);
1009
1010 if (unlikely(m->mnt_expiry_mark))
1011 m->mnt_expiry_mark = 0;
1012 mntput_no_expire(m);
1013 }
1014}
1015EXPORT_SYMBOL(mntput);
1016
1017struct vfsmount *mntget(struct vfsmount *mnt)
1018{
1019 if (mnt)
1020 mnt_add_count(real_mount(mnt), 1);
1021 return mnt;
1022}
1023EXPORT_SYMBOL(mntget);
1024
1025struct vfsmount *mnt_clone_internal(struct path *path)
1026{
1027 struct mount *p;
1028 p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE);
1029 if (IS_ERR(p))
1030 return ERR_CAST(p);
1031 p->mnt.mnt_flags |= MNT_INTERNAL;
1032 return &p->mnt;
1033}
1034
1035static inline void mangle(struct seq_file *m, const char *s)
1036{
1037 seq_escape(m, s, " \t\n\\");
1038}
1039
1040
1041
1042
1043
1044
1045
1046int generic_show_options(struct seq_file *m, struct dentry *root)
1047{
1048 const char *options;
1049
1050 rcu_read_lock();
1051 options = rcu_dereference(root->d_sb->s_options);
1052
1053 if (options != NULL && options[0]) {
1054 seq_putc(m, ',');
1055 mangle(m, options);
1056 }
1057 rcu_read_unlock();
1058
1059 return 0;
1060}
1061EXPORT_SYMBOL(generic_show_options);
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076void save_mount_options(struct super_block *sb, char *options)
1077{
1078 BUG_ON(sb->s_options);
1079 rcu_assign_pointer(sb->s_options, kstrdup(options, GFP_KERNEL));
1080}
1081EXPORT_SYMBOL(save_mount_options);
1082
1083void replace_mount_options(struct super_block *sb, char *options)
1084{
1085 char *old = sb->s_options;
1086 rcu_assign_pointer(sb->s_options, options);
1087 if (old) {
1088 synchronize_rcu();
1089 kfree(old);
1090 }
1091}
1092EXPORT_SYMBOL(replace_mount_options);
1093
1094#ifdef CONFIG_PROC_FS
1095
1096static void *m_start(struct seq_file *m, loff_t *pos)
1097{
1098 struct proc_mounts *p = proc_mounts(m);
1099
1100 down_read(&namespace_sem);
1101 if (p->cached_event == p->ns->event) {
1102 void *v = p->cached_mount;
1103 if (*pos == p->cached_index)
1104 return v;
1105 if (*pos == p->cached_index + 1) {
1106 v = seq_list_next(v, &p->ns->list, &p->cached_index);
1107 return p->cached_mount = v;
1108 }
1109 }
1110
1111 p->cached_event = p->ns->event;
1112 p->cached_mount = seq_list_start(&p->ns->list, *pos);
1113 p->cached_index = *pos;
1114 return p->cached_mount;
1115}
1116
1117static void *m_next(struct seq_file *m, void *v, loff_t *pos)
1118{
1119 struct proc_mounts *p = proc_mounts(m);
1120
1121 p->cached_mount = seq_list_next(v, &p->ns->list, pos);
1122 p->cached_index = *pos;
1123 return p->cached_mount;
1124}
1125
1126static void m_stop(struct seq_file *m, void *v)
1127{
1128 up_read(&namespace_sem);
1129}
1130
1131static int m_show(struct seq_file *m, void *v)
1132{
1133 struct proc_mounts *p = proc_mounts(m);
1134 struct mount *r = list_entry(v, struct mount, mnt_list);
1135 return p->show(m, &r->mnt);
1136}
1137
1138const struct seq_operations mounts_op = {
1139 .start = m_start,
1140 .next = m_next,
1141 .stop = m_stop,
1142 .show = m_show,
1143};
1144#endif
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154int may_umount_tree(struct vfsmount *m)
1155{
1156 struct mount *mnt = real_mount(m);
1157 int actual_refs = 0;
1158 int minimum_refs = 0;
1159 struct mount *p;
1160 BUG_ON(!m);
1161
1162
1163 lock_mount_hash();
1164 for (p = mnt; p; p = next_mnt(p, mnt)) {
1165 actual_refs += mnt_get_count(p);
1166 minimum_refs += 2;
1167 }
1168 unlock_mount_hash();
1169
1170 if (actual_refs > minimum_refs)
1171 return 0;
1172
1173 return 1;
1174}
1175
1176EXPORT_SYMBOL(may_umount_tree);
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191int may_umount(struct vfsmount *mnt)
1192{
1193 int ret = 1;
1194 down_read(&namespace_sem);
1195 lock_mount_hash();
1196 if (propagate_mount_busy(real_mount(mnt), 2))
1197 ret = 0;
1198 unlock_mount_hash();
1199 up_read(&namespace_sem);
1200 return ret;
1201}
1202
1203EXPORT_SYMBOL(may_umount);
1204
1205static HLIST_HEAD(unmounted);
1206
1207static void namespace_unlock(void)
1208{
1209 struct mount *mnt;
1210 struct hlist_head head = unmounted;
1211
1212 if (likely(hlist_empty(&head))) {
1213 up_write(&namespace_sem);
1214 return;
1215 }
1216
1217 head.first->pprev = &head.first;
1218 INIT_HLIST_HEAD(&unmounted);
1219
1220
1221 hlist_for_each_entry(mnt, &head, mnt_hash)
1222 if (mnt->mnt_ex_mountpoint.mnt)
1223 mntget(mnt->mnt_ex_mountpoint.mnt);
1224
1225 up_write(&namespace_sem);
1226
1227 synchronize_rcu();
1228
1229 while (!hlist_empty(&head)) {
1230 mnt = hlist_entry(head.first, struct mount, mnt_hash);
1231 hlist_del_init(&mnt->mnt_hash);
1232 if (mnt->mnt_ex_mountpoint.mnt)
1233 path_put(&mnt->mnt_ex_mountpoint);
1234 mntput(&mnt->mnt);
1235 }
1236}
1237
1238static inline void namespace_lock(void)
1239{
1240 down_write(&namespace_sem);
1241}
1242
1243
1244
1245
1246
1247
1248
1249
1250void umount_tree(struct mount *mnt, int how)
1251{
1252 HLIST_HEAD(tmp_list);
1253 struct mount *p;
1254 struct mount *last = NULL;
1255
1256 for (p = mnt; p; p = next_mnt(p, mnt)) {
1257 hlist_del_init_rcu(&p->mnt_hash);
1258 hlist_add_head(&p->mnt_hash, &tmp_list);
1259 }
1260
1261 hlist_for_each_entry(p, &tmp_list, mnt_hash)
1262 list_del_init(&p->mnt_child);
1263
1264 if (how)
1265 propagate_umount(&tmp_list);
1266
1267 hlist_for_each_entry(p, &tmp_list, mnt_hash) {
1268 list_del_init(&p->mnt_expire);
1269 list_del_init(&p->mnt_list);
1270 __touch_mnt_namespace(p->mnt_ns);
1271 p->mnt_ns = NULL;
1272 if (how < 2)
1273 p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
1274 if (mnt_has_parent(p)) {
1275 put_mountpoint(p->mnt_mp);
1276 mnt_add_count(p->mnt_parent, -1);
1277
1278 p->mnt_ex_mountpoint.dentry = p->mnt_mountpoint;
1279 p->mnt_ex_mountpoint.mnt = &p->mnt_parent->mnt;
1280 p->mnt_mountpoint = p->mnt.mnt_root;
1281 p->mnt_parent = p;
1282 p->mnt_mp = NULL;
1283 }
1284 change_mnt_propagation(p, MS_PRIVATE);
1285 last = p;
1286 }
1287 if (last) {
1288 last->mnt_hash.next = unmounted.first;
1289 unmounted.first = tmp_list.first;
1290 unmounted.first->pprev = &unmounted.first;
1291 }
1292}
1293
1294static void shrink_submounts(struct mount *mnt);
1295
1296static int do_umount(struct mount *mnt, int flags)
1297{
1298 struct super_block *sb = mnt->mnt.mnt_sb;
1299 int retval;
1300
1301 retval = security_sb_umount(&mnt->mnt, flags);
1302 if (retval)
1303 return retval;
1304
1305
1306
1307
1308
1309
1310
1311 if (flags & MNT_EXPIRE) {
1312 if (&mnt->mnt == current->fs->root.mnt ||
1313 flags & (MNT_FORCE | MNT_DETACH))
1314 return -EINVAL;
1315
1316
1317
1318
1319
1320 lock_mount_hash();
1321 if (mnt_get_count(mnt) != 2) {
1322 unlock_mount_hash();
1323 return -EBUSY;
1324 }
1325 unlock_mount_hash();
1326
1327 if (!xchg(&mnt->mnt_expiry_mark, 1))
1328 return -EAGAIN;
1329 }
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341 if (flags & MNT_FORCE && sb->s_op->umount_begin) {
1342 sb->s_op->umount_begin(sb);
1343 }
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354 if (&mnt->mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) {
1355
1356
1357
1358
1359 down_write(&sb->s_umount);
1360 if (!(sb->s_flags & MS_RDONLY))
1361 retval = do_remount_sb(sb, MS_RDONLY, NULL, 0);
1362 up_write(&sb->s_umount);
1363 return retval;
1364 }
1365
1366 namespace_lock();
1367 lock_mount_hash();
1368 event++;
1369
1370 if (flags & MNT_DETACH) {
1371 if (!list_empty(&mnt->mnt_list))
1372 umount_tree(mnt, 2);
1373 retval = 0;
1374 } else {
1375 shrink_submounts(mnt);
1376 retval = -EBUSY;
1377 if (!propagate_mount_busy(mnt, 2)) {
1378 if (!list_empty(&mnt->mnt_list))
1379 umount_tree(mnt, 1);
1380 retval = 0;
1381 }
1382 }
1383 unlock_mount_hash();
1384 namespace_unlock();
1385 return retval;
1386}
1387
1388
1389
1390
1391static inline bool may_mount(void)
1392{
1393 return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
1394}
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1405{
1406 struct path path;
1407 struct mount *mnt;
1408 int retval;
1409 int lookup_flags = 0;
1410
1411 if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
1412 return -EINVAL;
1413
1414 if (!may_mount())
1415 return -EPERM;
1416
1417 if (!(flags & UMOUNT_NOFOLLOW))
1418 lookup_flags |= LOOKUP_FOLLOW;
1419
1420 retval = user_path_mountpoint_at(AT_FDCWD, name, lookup_flags, &path);
1421 if (retval)
1422 goto out;
1423 mnt = real_mount(path.mnt);
1424 retval = -EINVAL;
1425 if (path.dentry != path.mnt->mnt_root)
1426 goto dput_and_out;
1427 if (!check_mnt(mnt))
1428 goto dput_and_out;
1429 if (mnt->mnt.mnt_flags & MNT_LOCKED)
1430 goto dput_and_out;
1431
1432 retval = do_umount(mnt, flags);
1433dput_and_out:
1434
1435 dput(path.dentry);
1436 mntput_no_expire(mnt);
1437out:
1438 return retval;
1439}
1440
1441#ifdef __ARCH_WANT_SYS_OLDUMOUNT
1442
1443
1444
1445
1446SYSCALL_DEFINE1(oldumount, char __user *, name)
1447{
1448 return sys_umount(name, 0);
1449}
1450
1451#endif
1452
1453static bool is_mnt_ns_file(struct dentry *dentry)
1454{
1455
1456 struct inode *inode = dentry->d_inode;
1457 struct proc_ns *ei;
1458
1459 if (!proc_ns_inode(inode))
1460 return false;
1461
1462 ei = get_proc_ns(inode);
1463 if (ei->ns_ops != &mntns_operations)
1464 return false;
1465
1466 return true;
1467}
1468
1469static bool mnt_ns_loop(struct dentry *dentry)
1470{
1471
1472
1473
1474 struct mnt_namespace *mnt_ns;
1475 if (!is_mnt_ns_file(dentry))
1476 return false;
1477
1478 mnt_ns = get_proc_ns(dentry->d_inode)->ns;
1479 return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
1480}
1481
1482struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1483 int flag)
1484{
1485 struct mount *res, *p, *q, *r, *parent;
1486
1487 if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(mnt))
1488 return ERR_PTR(-EINVAL);
1489
1490 if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(dentry))
1491 return ERR_PTR(-EINVAL);
1492
1493 res = q = clone_mnt(mnt, dentry, flag);
1494 if (IS_ERR(q))
1495 return q;
1496
1497 q->mnt.mnt_flags &= ~MNT_LOCKED;
1498 q->mnt_mountpoint = mnt->mnt_mountpoint;
1499
1500 p = mnt;
1501 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
1502 struct mount *s;
1503 if (!is_subdir(r->mnt_mountpoint, dentry))
1504 continue;
1505
1506 for (s = r; s; s = next_mnt(s, r)) {
1507 struct mount *t = NULL;
1508 if (!(flag & CL_COPY_UNBINDABLE) &&
1509 IS_MNT_UNBINDABLE(s)) {
1510 s = skip_mnt_tree(s);
1511 continue;
1512 }
1513 if (!(flag & CL_COPY_MNT_NS_FILE) &&
1514 is_mnt_ns_file(s->mnt.mnt_root)) {
1515 s = skip_mnt_tree(s);
1516 continue;
1517 }
1518 while (p != s->mnt_parent) {
1519 p = p->mnt_parent;
1520 q = q->mnt_parent;
1521 }
1522 p = s;
1523 parent = q;
1524 q = clone_mnt(p, p->mnt.mnt_root, flag);
1525 if (IS_ERR(q))
1526 goto out;
1527 lock_mount_hash();
1528 list_add_tail(&q->mnt_list, &res->mnt_list);
1529 mnt_set_mountpoint(parent, p->mnt_mp, q);
1530 if (!list_empty(&parent->mnt_mounts)) {
1531 t = list_last_entry(&parent->mnt_mounts,
1532 struct mount, mnt_child);
1533 if (t->mnt_mp != p->mnt_mp)
1534 t = NULL;
1535 }
1536 attach_shadowed(q, parent, t);
1537 unlock_mount_hash();
1538 }
1539 }
1540 return res;
1541out:
1542 if (res) {
1543 lock_mount_hash();
1544 umount_tree(res, 0);
1545 unlock_mount_hash();
1546 }
1547 return q;
1548}
1549
1550
1551
1552struct vfsmount *collect_mounts(struct path *path)
1553{
1554 struct mount *tree;
1555 namespace_lock();
1556 tree = copy_tree(real_mount(path->mnt), path->dentry,
1557 CL_COPY_ALL | CL_PRIVATE);
1558 namespace_unlock();
1559 if (IS_ERR(tree))
1560 return ERR_CAST(tree);
1561 return &tree->mnt;
1562}
1563
1564void drop_collected_mounts(struct vfsmount *mnt)
1565{
1566 namespace_lock();
1567 lock_mount_hash();
1568 umount_tree(real_mount(mnt), 0);
1569 unlock_mount_hash();
1570 namespace_unlock();
1571}
1572
1573int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
1574 struct vfsmount *root)
1575{
1576 struct mount *mnt;
1577 int res = f(root, arg);
1578 if (res)
1579 return res;
1580 list_for_each_entry(mnt, &real_mount(root)->mnt_list, mnt_list) {
1581 res = f(&mnt->mnt, arg);
1582 if (res)
1583 return res;
1584 }
1585 return 0;
1586}
1587
1588static void cleanup_group_ids(struct mount *mnt, struct mount *end)
1589{
1590 struct mount *p;
1591
1592 for (p = mnt; p != end; p = next_mnt(p, mnt)) {
1593 if (p->mnt_group_id && !IS_MNT_SHARED(p))
1594 mnt_release_group_id(p);
1595 }
1596}
1597
1598static int invent_group_ids(struct mount *mnt, bool recurse)
1599{
1600 struct mount *p;
1601
1602 for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) {
1603 if (!p->mnt_group_id && !IS_MNT_SHARED(p)) {
1604 int err = mnt_alloc_group_id(p);
1605 if (err) {
1606 cleanup_group_ids(mnt, p);
1607 return err;
1608 }
1609 }
1610 }
1611
1612 return 0;
1613}
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678static int attach_recursive_mnt(struct mount *source_mnt,
1679 struct mount *dest_mnt,
1680 struct mountpoint *dest_mp,
1681 struct path *parent_path)
1682{
1683 HLIST_HEAD(tree_list);
1684 struct mount *child, *p;
1685 struct hlist_node *n;
1686 int err;
1687
1688 if (IS_MNT_SHARED(dest_mnt)) {
1689 err = invent_group_ids(source_mnt, true);
1690 if (err)
1691 goto out;
1692 err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
1693 lock_mount_hash();
1694 if (err)
1695 goto out_cleanup_ids;
1696 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
1697 set_mnt_shared(p);
1698 } else {
1699 lock_mount_hash();
1700 }
1701 if (parent_path) {
1702 detach_mnt(source_mnt, parent_path);
1703 attach_mnt(source_mnt, dest_mnt, dest_mp);
1704 touch_mnt_namespace(source_mnt->mnt_ns);
1705 } else {
1706 mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
1707 commit_tree(source_mnt, NULL);
1708 }
1709
1710 hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
1711 struct mount *q;
1712 hlist_del_init(&child->mnt_hash);
1713 q = __lookup_mnt_last(&child->mnt_parent->mnt,
1714 child->mnt_mountpoint);
1715 commit_tree(child, q);
1716 }
1717 unlock_mount_hash();
1718
1719 return 0;
1720
1721 out_cleanup_ids:
1722 while (!hlist_empty(&tree_list)) {
1723 child = hlist_entry(tree_list.first, struct mount, mnt_hash);
1724 umount_tree(child, 0);
1725 }
1726 unlock_mount_hash();
1727 cleanup_group_ids(source_mnt, NULL);
1728 out:
1729 return err;
1730}
1731
1732static struct mountpoint *lock_mount(struct path *path)
1733{
1734 struct vfsmount *mnt;
1735 struct dentry *dentry = path->dentry;
1736retry:
1737 mutex_lock(&dentry->d_inode->i_mutex);
1738 if (unlikely(cant_mount(dentry))) {
1739 mutex_unlock(&dentry->d_inode->i_mutex);
1740 return ERR_PTR(-ENOENT);
1741 }
1742 namespace_lock();
1743 mnt = lookup_mnt(path);
1744 if (likely(!mnt)) {
1745 struct mountpoint *mp = new_mountpoint(dentry);
1746 if (IS_ERR(mp)) {
1747 namespace_unlock();
1748 mutex_unlock(&dentry->d_inode->i_mutex);
1749 return mp;
1750 }
1751 return mp;
1752 }
1753 namespace_unlock();
1754 mutex_unlock(&path->dentry->d_inode->i_mutex);
1755 path_put(path);
1756 path->mnt = mnt;
1757 dentry = path->dentry = dget(mnt->mnt_root);
1758 goto retry;
1759}
1760
1761static void unlock_mount(struct mountpoint *where)
1762{
1763 struct dentry *dentry = where->m_dentry;
1764 put_mountpoint(where);
1765 namespace_unlock();
1766 mutex_unlock(&dentry->d_inode->i_mutex);
1767}
1768
1769static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
1770{
1771 if (mnt->mnt.mnt_sb->s_flags & MS_NOUSER)
1772 return -EINVAL;
1773
1774 if (S_ISDIR(mp->m_dentry->d_inode->i_mode) !=
1775 S_ISDIR(mnt->mnt.mnt_root->d_inode->i_mode))
1776 return -ENOTDIR;
1777
1778 return attach_recursive_mnt(mnt, p, mp, NULL);
1779}
1780
1781
1782
1783
1784
1785static int flags_to_propagation_type(int flags)
1786{
1787 int type = flags & ~(MS_REC | MS_SILENT);
1788
1789
1790 if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
1791 return 0;
1792
1793 if (!is_power_of_2(type))
1794 return 0;
1795 return type;
1796}
1797
1798
1799
1800
1801static int do_change_type(struct path *path, int flag)
1802{
1803 struct mount *m;
1804 struct mount *mnt = real_mount(path->mnt);
1805 int recurse = flag & MS_REC;
1806 int type;
1807 int err = 0;
1808
1809 if (path->dentry != path->mnt->mnt_root)
1810 return -EINVAL;
1811
1812 type = flags_to_propagation_type(flag);
1813 if (!type)
1814 return -EINVAL;
1815
1816 namespace_lock();
1817 if (type == MS_SHARED) {
1818 err = invent_group_ids(mnt, recurse);
1819 if (err)
1820 goto out_unlock;
1821 }
1822
1823 lock_mount_hash();
1824 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
1825 change_mnt_propagation(m, type);
1826 unlock_mount_hash();
1827
1828 out_unlock:
1829 namespace_unlock();
1830 return err;
1831}
1832
1833static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
1834{
1835 struct mount *child;
1836 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
1837 if (!is_subdir(child->mnt_mountpoint, dentry))
1838 continue;
1839
1840 if (child->mnt.mnt_flags & MNT_LOCKED)
1841 return true;
1842 }
1843 return false;
1844}
1845
1846
1847
1848
1849static int do_loopback(struct path *path, const char *old_name,
1850 int recurse)
1851{
1852 struct path old_path;
1853 struct mount *mnt = NULL, *old, *parent;
1854 struct mountpoint *mp;
1855 int err;
1856 if (!old_name || !*old_name)
1857 return -EINVAL;
1858 err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path);
1859 if (err)
1860 return err;
1861
1862 err = -EINVAL;
1863 if (mnt_ns_loop(old_path.dentry))
1864 goto out;
1865
1866 mp = lock_mount(path);
1867 err = PTR_ERR(mp);
1868 if (IS_ERR(mp))
1869 goto out;
1870
1871 old = real_mount(old_path.mnt);
1872 parent = real_mount(path->mnt);
1873
1874 err = -EINVAL;
1875 if (IS_MNT_UNBINDABLE(old))
1876 goto out2;
1877
1878 if (!check_mnt(parent) || !check_mnt(old))
1879 goto out2;
1880
1881 if (!recurse && has_locked_children(old, old_path.dentry))
1882 goto out2;
1883
1884 if (recurse)
1885 mnt = copy_tree(old, old_path.dentry, CL_COPY_MNT_NS_FILE);
1886 else
1887 mnt = clone_mnt(old, old_path.dentry, 0);
1888
1889 if (IS_ERR(mnt)) {
1890 err = PTR_ERR(mnt);
1891 goto out2;
1892 }
1893
1894 mnt->mnt.mnt_flags &= ~MNT_LOCKED;
1895
1896 err = graft_tree(mnt, parent, mp);
1897 if (err) {
1898 lock_mount_hash();
1899 umount_tree(mnt, 0);
1900 unlock_mount_hash();
1901 }
1902out2:
1903 unlock_mount(mp);
1904out:
1905 path_put(&old_path);
1906 return err;
1907}
1908
1909static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
1910{
1911 int error = 0;
1912 int readonly_request = 0;
1913
1914 if (ms_flags & MS_RDONLY)
1915 readonly_request = 1;
1916 if (readonly_request == __mnt_is_readonly(mnt))
1917 return 0;
1918
1919 if (readonly_request)
1920 error = mnt_make_readonly(real_mount(mnt));
1921 else
1922 __mnt_unmake_readonly(real_mount(mnt));
1923 return error;
1924}
1925
1926
1927
1928
1929
1930
1931static int do_remount(struct path *path, int flags, int mnt_flags,
1932 void *data)
1933{
1934 int err;
1935 struct super_block *sb = path->mnt->mnt_sb;
1936 struct mount *mnt = real_mount(path->mnt);
1937
1938 if (!check_mnt(mnt))
1939 return -EINVAL;
1940
1941 if (path->dentry != path->mnt->mnt_root)
1942 return -EINVAL;
1943
1944
1945
1946
1947
1948
1949
1950 if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) &&
1951 !(mnt_flags & MNT_READONLY)) {
1952 return -EPERM;
1953 }
1954 if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
1955 !(mnt_flags & MNT_NODEV)) {
1956 return -EPERM;
1957 }
1958 if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
1959 !(mnt_flags & MNT_NOSUID)) {
1960 return -EPERM;
1961 }
1962 if ((mnt->mnt.mnt_flags & MNT_LOCK_NOEXEC) &&
1963 !(mnt_flags & MNT_NOEXEC)) {
1964 return -EPERM;
1965 }
1966 if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) &&
1967 ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) {
1968 return -EPERM;
1969 }
1970
1971 err = security_sb_remount(sb, data);
1972 if (err)
1973 return err;
1974
1975 down_write(&sb->s_umount);
1976 if (flags & MS_BIND)
1977 err = change_mount_flags(path->mnt, flags);
1978 else if (!capable(CAP_SYS_ADMIN))
1979 err = -EPERM;
1980 else
1981 err = do_remount_sb(sb, flags, data, 0);
1982 if (!err) {
1983 lock_mount_hash();
1984 mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
1985 mnt->mnt.mnt_flags = mnt_flags;
1986 touch_mnt_namespace(mnt->mnt_ns);
1987 unlock_mount_hash();
1988 }
1989 up_write(&sb->s_umount);
1990 return err;
1991}
1992
1993static inline int tree_contains_unbindable(struct mount *mnt)
1994{
1995 struct mount *p;
1996 for (p = mnt; p; p = next_mnt(p, mnt)) {
1997 if (IS_MNT_UNBINDABLE(p))
1998 return 1;
1999 }
2000 return 0;
2001}
2002
2003static int do_move_mount(struct path *path, const char *old_name)
2004{
2005 struct path old_path, parent_path;
2006 struct mount *p;
2007 struct mount *old;
2008 struct mountpoint *mp;
2009 int err;
2010 if (!old_name || !*old_name)
2011 return -EINVAL;
2012 err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
2013 if (err)
2014 return err;
2015
2016 mp = lock_mount(path);
2017 err = PTR_ERR(mp);
2018 if (IS_ERR(mp))
2019 goto out;
2020
2021 old = real_mount(old_path.mnt);
2022 p = real_mount(path->mnt);
2023
2024 err = -EINVAL;
2025 if (!check_mnt(p) || !check_mnt(old))
2026 goto out1;
2027
2028 if (old->mnt.mnt_flags & MNT_LOCKED)
2029 goto out1;
2030
2031 err = -EINVAL;
2032 if (old_path.dentry != old_path.mnt->mnt_root)
2033 goto out1;
2034
2035 if (!mnt_has_parent(old))
2036 goto out1;
2037
2038 if (S_ISDIR(path->dentry->d_inode->i_mode) !=
2039 S_ISDIR(old_path.dentry->d_inode->i_mode))
2040 goto out1;
2041
2042
2043
2044 if (IS_MNT_SHARED(old->mnt_parent))
2045 goto out1;
2046
2047
2048
2049
2050 if (IS_MNT_SHARED(p) && tree_contains_unbindable(old))
2051 goto out1;
2052 err = -ELOOP;
2053 for (; mnt_has_parent(p); p = p->mnt_parent)
2054 if (p == old)
2055 goto out1;
2056
2057 err = attach_recursive_mnt(old, real_mount(path->mnt), mp, &parent_path);
2058 if (err)
2059 goto out1;
2060
2061
2062
2063 list_del_init(&old->mnt_expire);
2064out1:
2065 unlock_mount(mp);
2066out:
2067 if (!err)
2068 path_put(&parent_path);
2069 path_put(&old_path);
2070 return err;
2071}
2072
2073static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
2074{
2075 int err;
2076 const char *subtype = strchr(fstype, '.');
2077 if (subtype) {
2078 subtype++;
2079 err = -EINVAL;
2080 if (!subtype[0])
2081 goto err;
2082 } else
2083 subtype = "";
2084
2085 mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL);
2086 err = -ENOMEM;
2087 if (!mnt->mnt_sb->s_subtype)
2088 goto err;
2089 return mnt;
2090
2091 err:
2092 mntput(mnt);
2093 return ERR_PTR(err);
2094}
2095
2096
2097
2098
2099static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
2100{
2101 struct mountpoint *mp;
2102 struct mount *parent;
2103 int err;
2104
2105 mnt_flags &= ~MNT_INTERNAL_FLAGS;
2106
2107 mp = lock_mount(path);
2108 if (IS_ERR(mp))
2109 return PTR_ERR(mp);
2110
2111 parent = real_mount(path->mnt);
2112 err = -EINVAL;
2113 if (unlikely(!check_mnt(parent))) {
2114
2115 if (!(mnt_flags & MNT_SHRINKABLE))
2116 goto unlock;
2117
2118 if (!parent->mnt_ns)
2119 goto unlock;
2120 }
2121
2122
2123 err = -EBUSY;
2124 if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb &&
2125 path->mnt->mnt_root == path->dentry)
2126 goto unlock;
2127
2128 err = -EINVAL;
2129 if (S_ISLNK(newmnt->mnt.mnt_root->d_inode->i_mode))
2130 goto unlock;
2131
2132 newmnt->mnt.mnt_flags = mnt_flags;
2133 err = graft_tree(newmnt, parent, mp);
2134
2135unlock:
2136 unlock_mount(mp);
2137 return err;
2138}
2139
2140
2141
2142
2143
2144static int do_new_mount(struct path *path, const char *fstype, int flags,
2145 int mnt_flags, const char *name, void *data)
2146{
2147 struct file_system_type *type;
2148 struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
2149 struct vfsmount *mnt;
2150 int err;
2151
2152 if (!fstype)
2153 return -EINVAL;
2154
2155 type = get_fs_type(fstype);
2156 if (!type)
2157 return -ENODEV;
2158
2159 if (user_ns != &init_user_ns) {
2160 if (!(type->fs_flags & FS_USERNS_MOUNT)) {
2161 put_filesystem(type);
2162 return -EPERM;
2163 }
2164
2165
2166
2167 if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) {
2168 flags |= MS_NODEV;
2169 mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV;
2170 }
2171 }
2172
2173 mnt = vfs_kern_mount(type, flags, name, data);
2174 if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
2175 !mnt->mnt_sb->s_subtype)
2176 mnt = fs_set_subtype(mnt, fstype);
2177
2178 put_filesystem(type);
2179 if (IS_ERR(mnt))
2180 return PTR_ERR(mnt);
2181
2182 err = do_add_mount(real_mount(mnt), path, mnt_flags);
2183 if (err)
2184 mntput(mnt);
2185 return err;
2186}
2187
2188int finish_automount(struct vfsmount *m, struct path *path)
2189{
2190 struct mount *mnt = real_mount(m);
2191 int err;
2192
2193
2194
2195 BUG_ON(mnt_get_count(mnt) < 2);
2196
2197 if (m->mnt_sb == path->mnt->mnt_sb &&
2198 m->mnt_root == path->dentry) {
2199 err = -ELOOP;
2200 goto fail;
2201 }
2202
2203 err = do_add_mount(mnt, path, path->mnt->mnt_flags | MNT_SHRINKABLE);
2204 if (!err)
2205 return 0;
2206fail:
2207
2208 if (!list_empty(&mnt->mnt_expire)) {
2209 namespace_lock();
2210 list_del_init(&mnt->mnt_expire);
2211 namespace_unlock();
2212 }
2213 mntput(m);
2214 mntput(m);
2215 return err;
2216}
2217
2218
2219
2220
2221
2222
2223void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
2224{
2225 namespace_lock();
2226
2227 list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);
2228
2229 namespace_unlock();
2230}
2231EXPORT_SYMBOL(mnt_set_expiry);
2232
2233
2234
2235
2236
2237
2238void mark_mounts_for_expiry(struct list_head *mounts)
2239{
2240 struct mount *mnt, *next;
2241 LIST_HEAD(graveyard);
2242
2243 if (list_empty(mounts))
2244 return;
2245
2246 namespace_lock();
2247 lock_mount_hash();
2248
2249
2250
2251
2252
2253
2254
2255 list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
2256 if (!xchg(&mnt->mnt_expiry_mark, 1) ||
2257 propagate_mount_busy(mnt, 1))
2258 continue;
2259 list_move(&mnt->mnt_expire, &graveyard);
2260 }
2261 while (!list_empty(&graveyard)) {
2262 mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
2263 touch_mnt_namespace(mnt->mnt_ns);
2264 umount_tree(mnt, 1);
2265 }
2266 unlock_mount_hash();
2267 namespace_unlock();
2268}
2269
2270EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
2271
2272
2273
2274
2275
2276
2277
2278static int select_submounts(struct mount *parent, struct list_head *graveyard)
2279{
2280 struct mount *this_parent = parent;
2281 struct list_head *next;
2282 int found = 0;
2283
2284repeat:
2285 next = this_parent->mnt_mounts.next;
2286resume:
2287 while (next != &this_parent->mnt_mounts) {
2288 struct list_head *tmp = next;
2289 struct mount *mnt = list_entry(tmp, struct mount, mnt_child);
2290
2291 next = tmp->next;
2292 if (!(mnt->mnt.mnt_flags & MNT_SHRINKABLE))
2293 continue;
2294
2295
2296
2297 if (!list_empty(&mnt->mnt_mounts)) {
2298 this_parent = mnt;
2299 goto repeat;
2300 }
2301
2302 if (!propagate_mount_busy(mnt, 1)) {
2303 list_move_tail(&mnt->mnt_expire, graveyard);
2304 found++;
2305 }
2306 }
2307
2308
2309
2310 if (this_parent != parent) {
2311 next = this_parent->mnt_child.next;
2312 this_parent = this_parent->mnt_parent;
2313 goto resume;
2314 }
2315 return found;
2316}
2317
2318
2319
2320
2321
2322
2323
2324static void shrink_submounts(struct mount *mnt)
2325{
2326 LIST_HEAD(graveyard);
2327 struct mount *m;
2328
2329
2330 while (select_submounts(mnt, &graveyard)) {
2331 while (!list_empty(&graveyard)) {
2332 m = list_first_entry(&graveyard, struct mount,
2333 mnt_expire);
2334 touch_mnt_namespace(m->mnt_ns);
2335 umount_tree(m, 1);
2336 }
2337 }
2338}
2339
2340
2341
2342
2343
2344
2345
2346static long exact_copy_from_user(void *to, const void __user * from,
2347 unsigned long n)
2348{
2349 char *t = to;
2350 const char __user *f = from;
2351 char c;
2352
2353 if (!access_ok(VERIFY_READ, from, n))
2354 return n;
2355
2356 while (n) {
2357 if (__get_user(c, f)) {
2358 memset(t, 0, n);
2359 break;
2360 }
2361 *t++ = c;
2362 f++;
2363 n--;
2364 }
2365 return n;
2366}
2367
2368int copy_mount_options(const void __user * data, unsigned long *where)
2369{
2370 int i;
2371 unsigned long page;
2372 unsigned long size;
2373
2374 *where = 0;
2375 if (!data)
2376 return 0;
2377
2378 if (!(page = __get_free_page(GFP_KERNEL)))
2379 return -ENOMEM;
2380
2381
2382
2383
2384
2385
2386 size = TASK_SIZE - (unsigned long)data;
2387 if (size > PAGE_SIZE)
2388 size = PAGE_SIZE;
2389
2390 i = size - exact_copy_from_user((void *)page, data, size);
2391 if (!i) {
2392 free_page(page);
2393 return -EFAULT;
2394 }
2395 if (i != PAGE_SIZE)
2396 memset((char *)page + i, 0, PAGE_SIZE - i);
2397 *where = page;
2398 return 0;
2399}
2400
2401int copy_mount_string(const void __user *data, char **where)
2402{
2403 char *tmp;
2404
2405 if (!data) {
2406 *where = NULL;
2407 return 0;
2408 }
2409
2410 tmp = strndup_user(data, PAGE_SIZE);
2411 if (IS_ERR(tmp))
2412 return PTR_ERR(tmp);
2413
2414 *where = tmp;
2415 return 0;
2416}
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432long do_mount(const char *dev_name, const char *dir_name,
2433 const char *type_page, unsigned long flags, void *data_page)
2434{
2435 struct path path;
2436 int retval = 0;
2437 int mnt_flags = 0;
2438
2439
2440 if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
2441 flags &= ~MS_MGC_MSK;
2442
2443
2444
2445 if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
2446 return -EINVAL;
2447
2448 if (data_page)
2449 ((char *)data_page)[PAGE_SIZE - 1] = 0;
2450
2451
2452 retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);
2453 if (retval)
2454 return retval;
2455
2456 retval = security_sb_mount(dev_name, &path,
2457 type_page, flags, data_page);
2458 if (!retval && !may_mount())
2459 retval = -EPERM;
2460 if (retval)
2461 goto dput_out;
2462
2463
2464 if (!(flags & MS_NOATIME))
2465 mnt_flags |= MNT_RELATIME;
2466
2467
2468 if (flags & MS_NOSUID)
2469 mnt_flags |= MNT_NOSUID;
2470 if (flags & MS_NODEV)
2471 mnt_flags |= MNT_NODEV;
2472 if (flags & MS_NOEXEC)
2473 mnt_flags |= MNT_NOEXEC;
2474 if (flags & MS_NOATIME)
2475 mnt_flags |= MNT_NOATIME;
2476 if (flags & MS_NODIRATIME)
2477 mnt_flags |= MNT_NODIRATIME;
2478 if (flags & MS_STRICTATIME)
2479 mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
2480 if (flags & MS_RDONLY)
2481 mnt_flags |= MNT_READONLY;
2482
2483
2484 if ((flags & MS_REMOUNT) &&
2485 ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME |
2486 MS_STRICTATIME)) == 0)) {
2487 mnt_flags &= ~MNT_ATIME_MASK;
2488 mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK;
2489 }
2490
2491 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
2492 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
2493 MS_STRICTATIME);
2494
2495 if (flags & MS_REMOUNT)
2496 retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
2497 data_page);
2498 else if (flags & MS_BIND)
2499 retval = do_loopback(&path, dev_name, flags & MS_REC);
2500 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
2501 retval = do_change_type(&path, flags);
2502 else if (flags & MS_MOVE)
2503 retval = do_move_mount(&path, dev_name);
2504 else
2505 retval = do_new_mount(&path, type_page, flags, mnt_flags,
2506 dev_name, data_page);
2507dput_out:
2508 path_put(&path);
2509 return retval;
2510}
2511
2512static void free_mnt_ns(struct mnt_namespace *ns)
2513{
2514 proc_free_inum(ns->proc_inum);
2515 put_user_ns(ns->user_ns);
2516 kfree(ns);
2517}
2518
2519
2520
2521
2522
2523
2524
2525
2526static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
2527
2528static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
2529{
2530 struct mnt_namespace *new_ns;
2531 int ret;
2532
2533 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
2534 if (!new_ns)
2535 return ERR_PTR(-ENOMEM);
2536 ret = proc_alloc_inum(&new_ns->proc_inum);
2537 if (ret) {
2538 kfree(new_ns);
2539 return ERR_PTR(ret);
2540 }
2541 new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
2542 atomic_set(&new_ns->count, 1);
2543 new_ns->root = NULL;
2544 INIT_LIST_HEAD(&new_ns->list);
2545 init_waitqueue_head(&new_ns->poll);
2546 new_ns->event = 0;
2547 new_ns->user_ns = get_user_ns(user_ns);
2548 return new_ns;
2549}
2550
2551struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2552 struct user_namespace *user_ns, struct fs_struct *new_fs)
2553{
2554 struct mnt_namespace *new_ns;
2555 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
2556 struct mount *p, *q;
2557 struct mount *old;
2558 struct mount *new;
2559 int copy_flags;
2560
2561 BUG_ON(!ns);
2562
2563 if (likely(!(flags & CLONE_NEWNS))) {
2564 get_mnt_ns(ns);
2565 return ns;
2566 }
2567
2568 old = ns->root;
2569
2570 new_ns = alloc_mnt_ns(user_ns);
2571 if (IS_ERR(new_ns))
2572 return new_ns;
2573
2574 namespace_lock();
2575
2576 copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE;
2577 if (user_ns != ns->user_ns)
2578 copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
2579 new = copy_tree(old, old->mnt.mnt_root, copy_flags);
2580 if (IS_ERR(new)) {
2581 namespace_unlock();
2582 free_mnt_ns(new_ns);
2583 return ERR_CAST(new);
2584 }
2585 new_ns->root = new;
2586 list_add_tail(&new_ns->list, &new->mnt_list);
2587
2588
2589
2590
2591
2592
2593 p = old;
2594 q = new;
2595 while (p) {
2596 q->mnt_ns = new_ns;
2597 if (new_fs) {
2598 if (&p->mnt == new_fs->root.mnt) {
2599 new_fs->root.mnt = mntget(&q->mnt);
2600 rootmnt = &p->mnt;
2601 }
2602 if (&p->mnt == new_fs->pwd.mnt) {
2603 new_fs->pwd.mnt = mntget(&q->mnt);
2604 pwdmnt = &p->mnt;
2605 }
2606 }
2607 p = next_mnt(p, old);
2608 q = next_mnt(q, new);
2609 if (!q)
2610 break;
2611 while (p->mnt.mnt_root != q->mnt.mnt_root)
2612 p = next_mnt(p, old);
2613 }
2614 namespace_unlock();
2615
2616 if (rootmnt)
2617 mntput(rootmnt);
2618 if (pwdmnt)
2619 mntput(pwdmnt);
2620
2621 return new_ns;
2622}
2623
2624
2625
2626
2627
2628static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
2629{
2630 struct mnt_namespace *new_ns = alloc_mnt_ns(&init_user_ns);
2631 if (!IS_ERR(new_ns)) {
2632 struct mount *mnt = real_mount(m);
2633 mnt->mnt_ns = new_ns;
2634 new_ns->root = mnt;
2635 list_add(&mnt->mnt_list, &new_ns->list);
2636 } else {
2637 mntput(m);
2638 }
2639 return new_ns;
2640}
2641
2642struct dentry *mount_subtree(struct vfsmount *mnt, const char *name)
2643{
2644 struct mnt_namespace *ns;
2645 struct super_block *s;
2646 struct path path;
2647 int err;
2648
2649 ns = create_mnt_ns(mnt);
2650 if (IS_ERR(ns))
2651 return ERR_CAST(ns);
2652
2653 err = vfs_path_lookup(mnt->mnt_root, mnt,
2654 name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);
2655
2656 put_mnt_ns(ns);
2657
2658 if (err)
2659 return ERR_PTR(err);
2660
2661
2662 s = path.mnt->mnt_sb;
2663 atomic_inc(&s->s_active);
2664 mntput(path.mnt);
2665
2666 down_write(&s->s_umount);
2667
2668 return path.dentry;
2669}
2670EXPORT_SYMBOL(mount_subtree);
2671
2672SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
2673 char __user *, type, unsigned long, flags, void __user *, data)
2674{
2675 int ret;
2676 char *kernel_type;
2677 struct filename *kernel_dir;
2678 char *kernel_dev;
2679 unsigned long data_page;
2680
2681 ret = copy_mount_string(type, &kernel_type);
2682 if (ret < 0)
2683 goto out_type;
2684
2685 kernel_dir = getname(dir_name);
2686 if (IS_ERR(kernel_dir)) {
2687 ret = PTR_ERR(kernel_dir);
2688 goto out_dir;
2689 }
2690
2691 ret = copy_mount_string(dev_name, &kernel_dev);
2692 if (ret < 0)
2693 goto out_dev;
2694
2695 ret = copy_mount_options(data, &data_page);
2696 if (ret < 0)
2697 goto out_data;
2698
2699 ret = do_mount(kernel_dev, kernel_dir->name, kernel_type, flags,
2700 (void *) data_page);
2701
2702 free_page(data_page);
2703out_data:
2704 kfree(kernel_dev);
2705out_dev:
2706 putname(kernel_dir);
2707out_dir:
2708 kfree(kernel_type);
2709out_type:
2710 return ret;
2711}
2712
2713
2714
2715
2716
2717
2718bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
2719 const struct path *root)
2720{
2721 while (&mnt->mnt != root->mnt && mnt_has_parent(mnt)) {
2722 dentry = mnt->mnt_mountpoint;
2723 mnt = mnt->mnt_parent;
2724 }
2725 return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry);
2726}
2727
2728int path_is_under(struct path *path1, struct path *path2)
2729{
2730 int res;
2731 read_seqlock_excl(&mount_lock);
2732 res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2);
2733 read_sequnlock_excl(&mount_lock);
2734 return res;
2735}
2736EXPORT_SYMBOL(path_is_under);
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2764 const char __user *, put_old)
2765{
2766 struct path new, old, parent_path, root_parent, root;
2767 struct mount *new_mnt, *root_mnt, *old_mnt;
2768 struct mountpoint *old_mp, *root_mp;
2769 int error;
2770
2771 if (!may_mount())
2772 return -EPERM;
2773
2774 error = user_path_dir(new_root, &new);
2775 if (error)
2776 goto out0;
2777
2778 error = user_path_dir(put_old, &old);
2779 if (error)
2780 goto out1;
2781
2782 error = security_sb_pivotroot(&old, &new);
2783 if (error)
2784 goto out2;
2785
2786 get_fs_root(current->fs, &root);
2787 old_mp = lock_mount(&old);
2788 error = PTR_ERR(old_mp);
2789 if (IS_ERR(old_mp))
2790 goto out3;
2791
2792 error = -EINVAL;
2793 new_mnt = real_mount(new.mnt);
2794 root_mnt = real_mount(root.mnt);
2795 old_mnt = real_mount(old.mnt);
2796 if (IS_MNT_SHARED(old_mnt) ||
2797 IS_MNT_SHARED(new_mnt->mnt_parent) ||
2798 IS_MNT_SHARED(root_mnt->mnt_parent))
2799 goto out4;
2800 if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
2801 goto out4;
2802 if (new_mnt->mnt.mnt_flags & MNT_LOCKED)
2803 goto out4;
2804 error = -ENOENT;
2805 if (d_unlinked(new.dentry))
2806 goto out4;
2807 error = -EBUSY;
2808 if (new_mnt == root_mnt || old_mnt == root_mnt)
2809 goto out4;
2810 error = -EINVAL;
2811 if (root.mnt->mnt_root != root.dentry)
2812 goto out4;
2813 if (!mnt_has_parent(root_mnt))
2814 goto out4;
2815 root_mp = root_mnt->mnt_mp;
2816 if (new.mnt->mnt_root != new.dentry)
2817 goto out4;
2818 if (!mnt_has_parent(new_mnt))
2819 goto out4;
2820
2821 if (!is_path_reachable(old_mnt, old.dentry, &new))
2822 goto out4;
2823 root_mp->m_count++;
2824 lock_mount_hash();
2825 detach_mnt(new_mnt, &parent_path);
2826 detach_mnt(root_mnt, &root_parent);
2827 if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
2828 new_mnt->mnt.mnt_flags |= MNT_LOCKED;
2829 root_mnt->mnt.mnt_flags &= ~MNT_LOCKED;
2830 }
2831
2832 attach_mnt(root_mnt, old_mnt, old_mp);
2833
2834 attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp);
2835 touch_mnt_namespace(current->nsproxy->mnt_ns);
2836 unlock_mount_hash();
2837 chroot_fs_refs(&root, &new);
2838 put_mountpoint(root_mp);
2839 error = 0;
2840out4:
2841 unlock_mount(old_mp);
2842 if (!error) {
2843 path_put(&root_parent);
2844 path_put(&parent_path);
2845 }
2846out3:
2847 path_put(&root);
2848out2:
2849 path_put(&old);
2850out1:
2851 path_put(&new);
2852out0:
2853 return error;
2854}
2855
2856static void __init init_mount_tree(void)
2857{
2858 struct vfsmount *mnt;
2859 struct mnt_namespace *ns;
2860 struct path root;
2861 struct file_system_type *type;
2862
2863 type = get_fs_type("rootfs");
2864 if (!type)
2865 panic("Can't find rootfs type");
2866 mnt = vfs_kern_mount(type, 0, "rootfs", NULL);
2867 put_filesystem(type);
2868 if (IS_ERR(mnt))
2869 panic("Can't create rootfs");
2870
2871 ns = create_mnt_ns(mnt);
2872 if (IS_ERR(ns))
2873 panic("Can't allocate initial namespace");
2874
2875 init_task.nsproxy->mnt_ns = ns;
2876 get_mnt_ns(ns);
2877
2878 root.mnt = mnt;
2879 root.dentry = mnt->mnt_root;
2880
2881 set_fs_pwd(current->fs, &root);
2882 set_fs_root(current->fs, &root);
2883}
2884
2885void __init mnt_init(void)
2886{
2887 unsigned u;
2888 int err;
2889
2890 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
2891 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
2892
2893 mount_hashtable = alloc_large_system_hash("Mount-cache",
2894 sizeof(struct hlist_head),
2895 mhash_entries, 19,
2896 0,
2897 &m_hash_shift, &m_hash_mask, 0, 0);
2898 mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache",
2899 sizeof(struct hlist_head),
2900 mphash_entries, 19,
2901 0,
2902 &mp_hash_shift, &mp_hash_mask, 0, 0);
2903
2904 if (!mount_hashtable || !mountpoint_hashtable)
2905 panic("Failed to allocate mount hash table\n");
2906
2907 for (u = 0; u <= m_hash_mask; u++)
2908 INIT_HLIST_HEAD(&mount_hashtable[u]);
2909 for (u = 0; u <= mp_hash_mask; u++)
2910 INIT_HLIST_HEAD(&mountpoint_hashtable[u]);
2911
2912 kernfs_init();
2913
2914 err = sysfs_init();
2915 if (err)
2916 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
2917 __func__, err);
2918 fs_kobj = kobject_create_and_add("fs", NULL);
2919 if (!fs_kobj)
2920 printk(KERN_WARNING "%s: kobj create error\n", __func__);
2921 init_rootfs();
2922 init_mount_tree();
2923}
2924
2925void put_mnt_ns(struct mnt_namespace *ns)
2926{
2927 if (!atomic_dec_and_test(&ns->count))
2928 return;
2929 drop_collected_mounts(&ns->root->mnt);
2930 free_mnt_ns(ns);
2931}
2932
2933struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
2934{
2935 struct vfsmount *mnt;
2936 mnt = vfs_kern_mount(type, MS_KERNMOUNT, type->name, data);
2937 if (!IS_ERR(mnt)) {
2938
2939
2940
2941
2942 real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL;
2943 }
2944 return mnt;
2945}
2946EXPORT_SYMBOL_GPL(kern_mount_data);
2947
2948void kern_unmount(struct vfsmount *mnt)
2949{
2950
2951 if (!IS_ERR_OR_NULL(mnt)) {
2952 real_mount(mnt)->mnt_ns = NULL;
2953 synchronize_rcu();
2954 mntput(mnt);
2955 }
2956}
2957EXPORT_SYMBOL(kern_unmount);
2958
2959bool our_mnt(struct vfsmount *mnt)
2960{
2961 return check_mnt(real_mount(mnt));
2962}
2963
2964bool current_chrooted(void)
2965{
2966
2967 struct path ns_root;
2968 struct path fs_root;
2969 bool chrooted;
2970
2971
2972 ns_root.mnt = ¤t->nsproxy->mnt_ns->root->mnt;
2973 ns_root.dentry = ns_root.mnt->mnt_root;
2974 path_get(&ns_root);
2975 while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root))
2976 ;
2977
2978 get_fs_root(current->fs, &fs_root);
2979
2980 chrooted = !path_equal(&fs_root, &ns_root);
2981
2982 path_put(&fs_root);
2983 path_put(&ns_root);
2984
2985 return chrooted;
2986}
2987
2988bool fs_fully_visible(struct file_system_type *type)
2989{
2990 struct mnt_namespace *ns = current->nsproxy->mnt_ns;
2991 struct mount *mnt;
2992 bool visible = false;
2993
2994 if (unlikely(!ns))
2995 return false;
2996
2997 down_read(&namespace_sem);
2998 list_for_each_entry(mnt, &ns->list, mnt_list) {
2999 struct mount *child;
3000 if (mnt->mnt.mnt_sb->s_type != type)
3001 continue;
3002
3003
3004
3005
3006 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
3007 struct inode *inode = child->mnt_mountpoint->d_inode;
3008 if (!S_ISDIR(inode->i_mode))
3009 goto next;
3010 if (inode->i_nlink > 2)
3011 goto next;
3012 }
3013 visible = true;
3014 goto found;
3015 next: ;
3016 }
3017found:
3018 up_read(&namespace_sem);
3019 return visible;
3020}
3021
3022static void *mntns_get(struct task_struct *task)
3023{
3024 struct mnt_namespace *ns = NULL;
3025 struct nsproxy *nsproxy;
3026
3027 task_lock(task);
3028 nsproxy = task->nsproxy;
3029 if (nsproxy) {
3030 ns = nsproxy->mnt_ns;
3031 get_mnt_ns(ns);
3032 }
3033 task_unlock(task);
3034
3035 return ns;
3036}
3037
3038static void mntns_put(void *ns)
3039{
3040 put_mnt_ns(ns);
3041}
3042
3043static int mntns_install(struct nsproxy *nsproxy, void *ns)
3044{
3045 struct fs_struct *fs = current->fs;
3046 struct mnt_namespace *mnt_ns = ns;
3047 struct path root;
3048
3049 if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
3050 !ns_capable(current_user_ns(), CAP_SYS_CHROOT) ||
3051 !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
3052 return -EPERM;
3053
3054 if (fs->users != 1)
3055 return -EINVAL;
3056
3057 get_mnt_ns(mnt_ns);
3058 put_mnt_ns(nsproxy->mnt_ns);
3059 nsproxy->mnt_ns = mnt_ns;
3060
3061
3062 root.mnt = &mnt_ns->root->mnt;
3063 root.dentry = mnt_ns->root->mnt.mnt_root;
3064 path_get(&root);
3065 while(d_mountpoint(root.dentry) && follow_down_one(&root))
3066 ;
3067
3068
3069 set_fs_pwd(fs, &root);
3070 set_fs_root(fs, &root);
3071
3072 path_put(&root);
3073 return 0;
3074}
3075
3076static unsigned int mntns_inum(void *ns)
3077{
3078 struct mnt_namespace *mnt_ns = ns;
3079 return mnt_ns->proc_inum;
3080}
3081
3082const struct proc_ns_operations mntns_operations = {
3083 .name = "mnt",
3084 .type = CLONE_NEWNS,
3085 .get = mntns_get,
3086 .put = mntns_put,
3087 .install = mntns_install,
3088 .inum = mntns_inum,
3089};
3090