1
2
3
4
5
6
7
8
9
10
11#include <linux/syscalls.h>
12#include <linux/export.h>
13#include <linux/capability.h>
14#include <linux/mnt_namespace.h>
15#include <linux/user_namespace.h>
16#include <linux/namei.h>
17#include <linux/security.h>
18#include <linux/idr.h>
19#include <linux/init.h>
20#include <linux/fs_struct.h>
21#include <linux/fsnotify.h>
22#include <linux/uaccess.h>
23#include <linux/proc_ns.h>
24#include <linux/magic.h>
25#include <linux/bootmem.h>
26#include <linux/task_work.h>
27#include "pnode.h"
28#include "internal.h"
29
30static unsigned int m_hash_mask __read_mostly;
31static unsigned int m_hash_shift __read_mostly;
32static unsigned int mp_hash_mask __read_mostly;
33static unsigned int mp_hash_shift __read_mostly;
34
35static __initdata unsigned long mhash_entries;
36static int __init set_mhash_entries(char *str)
37{
38 if (!str)
39 return 0;
40 mhash_entries = simple_strtoul(str, &str, 0);
41 return 1;
42}
43__setup("mhash_entries=", set_mhash_entries);
44
45static __initdata unsigned long mphash_entries;
46static int __init set_mphash_entries(char *str)
47{
48 if (!str)
49 return 0;
50 mphash_entries = simple_strtoul(str, &str, 0);
51 return 1;
52}
53__setup("mphash_entries=", set_mphash_entries);
54
55static u64 event;
56static DEFINE_IDA(mnt_id_ida);
57static DEFINE_IDA(mnt_group_ida);
58static DEFINE_SPINLOCK(mnt_id_lock);
59static int mnt_id_start = 0;
60static int mnt_group_start = 1;
61
62static struct hlist_head *mount_hashtable __read_mostly;
63static struct hlist_head *mountpoint_hashtable __read_mostly;
64static struct kmem_cache *mnt_cache __read_mostly;
65static DECLARE_RWSEM(namespace_sem);
66
67
68struct kobject *fs_kobj;
69EXPORT_SYMBOL_GPL(fs_kobj);
70
71
72
73
74
75
76
77
78
79__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
80
81static inline struct hlist_head *m_hash(struct vfsmount *mnt, struct dentry *dentry)
82{
83 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
84 tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
85 tmp = tmp + (tmp >> m_hash_shift);
86 return &mount_hashtable[tmp & m_hash_mask];
87}
88
89static inline struct hlist_head *mp_hash(struct dentry *dentry)
90{
91 unsigned long tmp = ((unsigned long)dentry / L1_CACHE_BYTES);
92 tmp = tmp + (tmp >> mp_hash_shift);
93 return &mountpoint_hashtable[tmp & mp_hash_mask];
94}
95
96
97
98
99
100static int mnt_alloc_id(struct mount *mnt)
101{
102 int res;
103
104retry:
105 ida_pre_get(&mnt_id_ida, GFP_KERNEL);
106 spin_lock(&mnt_id_lock);
107 res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id);
108 if (!res)
109 mnt_id_start = mnt->mnt_id + 1;
110 spin_unlock(&mnt_id_lock);
111 if (res == -EAGAIN)
112 goto retry;
113
114 return res;
115}
116
117static void mnt_free_id(struct mount *mnt)
118{
119 int id = mnt->mnt_id;
120 spin_lock(&mnt_id_lock);
121 ida_remove(&mnt_id_ida, id);
122 if (mnt_id_start > id)
123 mnt_id_start = id;
124 spin_unlock(&mnt_id_lock);
125}
126
127
128
129
130
131
132static int mnt_alloc_group_id(struct mount *mnt)
133{
134 int res;
135
136 if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL))
137 return -ENOMEM;
138
139 res = ida_get_new_above(&mnt_group_ida,
140 mnt_group_start,
141 &mnt->mnt_group_id);
142 if (!res)
143 mnt_group_start = mnt->mnt_group_id + 1;
144
145 return res;
146}
147
148
149
150
151void mnt_release_group_id(struct mount *mnt)
152{
153 int id = mnt->mnt_group_id;
154 ida_remove(&mnt_group_ida, id);
155 if (mnt_group_start > id)
156 mnt_group_start = id;
157 mnt->mnt_group_id = 0;
158}
159
160
161
162
163static inline void mnt_add_count(struct mount *mnt, int n)
164{
165#ifdef CONFIG_SMP
166 this_cpu_add(mnt->mnt_pcp->mnt_count, n);
167#else
168 preempt_disable();
169 mnt->mnt_count += n;
170 preempt_enable();
171#endif
172}
173
174
175
176
177unsigned int mnt_get_count(struct mount *mnt)
178{
179#ifdef CONFIG_SMP
180 unsigned int count = 0;
181 int cpu;
182
183 for_each_possible_cpu(cpu) {
184 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count;
185 }
186
187 return count;
188#else
189 return mnt->mnt_count;
190#endif
191}
192
193static void drop_mountpoint(struct fs_pin *p)
194{
195 struct mount *m = container_of(p, struct mount, mnt_umount);
196 dput(m->mnt_ex_mountpoint);
197 pin_remove(p);
198 mntput(&m->mnt);
199}
200
201static struct mount *alloc_vfsmnt(const char *name)
202{
203 struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
204 if (mnt) {
205 int err;
206
207 err = mnt_alloc_id(mnt);
208 if (err)
209 goto out_free_cache;
210
211 if (name) {
212 mnt->mnt_devname = kstrdup_const(name, GFP_KERNEL);
213 if (!mnt->mnt_devname)
214 goto out_free_id;
215 }
216
217#ifdef CONFIG_SMP
218 mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);
219 if (!mnt->mnt_pcp)
220 goto out_free_devname;
221
222 this_cpu_add(mnt->mnt_pcp->mnt_count, 1);
223#else
224 mnt->mnt_count = 1;
225 mnt->mnt_writers = 0;
226#endif
227
228 INIT_HLIST_NODE(&mnt->mnt_hash);
229 INIT_LIST_HEAD(&mnt->mnt_child);
230 INIT_LIST_HEAD(&mnt->mnt_mounts);
231 INIT_LIST_HEAD(&mnt->mnt_list);
232 INIT_LIST_HEAD(&mnt->mnt_expire);
233 INIT_LIST_HEAD(&mnt->mnt_share);
234 INIT_LIST_HEAD(&mnt->mnt_slave_list);
235 INIT_LIST_HEAD(&mnt->mnt_slave);
236 INIT_HLIST_NODE(&mnt->mnt_mp_list);
237#ifdef CONFIG_FSNOTIFY
238 INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
239#endif
240 init_fs_pin(&mnt->mnt_umount, drop_mountpoint);
241 }
242 return mnt;
243
244#ifdef CONFIG_SMP
245out_free_devname:
246 kfree_const(mnt->mnt_devname);
247#endif
248out_free_id:
249 mnt_free_id(mnt);
250out_free_cache:
251 kmem_cache_free(mnt_cache, mnt);
252 return NULL;
253}
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274int __mnt_is_readonly(struct vfsmount *mnt)
275{
276 if (mnt->mnt_flags & MNT_READONLY)
277 return 1;
278 if (mnt->mnt_sb->s_flags & MS_RDONLY)
279 return 1;
280 return 0;
281}
282EXPORT_SYMBOL_GPL(__mnt_is_readonly);
283
284static inline void mnt_inc_writers(struct mount *mnt)
285{
286#ifdef CONFIG_SMP
287 this_cpu_inc(mnt->mnt_pcp->mnt_writers);
288#else
289 mnt->mnt_writers++;
290#endif
291}
292
293static inline void mnt_dec_writers(struct mount *mnt)
294{
295#ifdef CONFIG_SMP
296 this_cpu_dec(mnt->mnt_pcp->mnt_writers);
297#else
298 mnt->mnt_writers--;
299#endif
300}
301
302static unsigned int mnt_get_writers(struct mount *mnt)
303{
304#ifdef CONFIG_SMP
305 unsigned int count = 0;
306 int cpu;
307
308 for_each_possible_cpu(cpu) {
309 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers;
310 }
311
312 return count;
313#else
314 return mnt->mnt_writers;
315#endif
316}
317
318static int mnt_is_readonly(struct vfsmount *mnt)
319{
320 if (mnt->mnt_sb->s_readonly_remount)
321 return 1;
322
323 smp_rmb();
324 return __mnt_is_readonly(mnt);
325}
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343int __mnt_want_write(struct vfsmount *m)
344{
345 struct mount *mnt = real_mount(m);
346 int ret = 0;
347
348 preempt_disable();
349 mnt_inc_writers(mnt);
350
351
352
353
354
355 smp_mb();
356 while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
357 cpu_relax();
358
359
360
361
362
363 smp_rmb();
364 if (mnt_is_readonly(m)) {
365 mnt_dec_writers(mnt);
366 ret = -EROFS;
367 }
368 preempt_enable();
369
370 return ret;
371}
372
373
374
375
376
377
378
379
380
381
382int mnt_want_write(struct vfsmount *m)
383{
384 int ret;
385
386 sb_start_write(m->mnt_sb);
387 ret = __mnt_want_write(m);
388 if (ret)
389 sb_end_write(m->mnt_sb);
390 return ret;
391}
392EXPORT_SYMBOL_GPL(mnt_want_write);
393
394
395
396
397
398
399
400
401
402
403
404
405
406int mnt_clone_write(struct vfsmount *mnt)
407{
408
409 if (__mnt_is_readonly(mnt))
410 return -EROFS;
411 preempt_disable();
412 mnt_inc_writers(real_mount(mnt));
413 preempt_enable();
414 return 0;
415}
416EXPORT_SYMBOL_GPL(mnt_clone_write);
417
418
419
420
421
422
423
424
425int __mnt_want_write_file(struct file *file)
426{
427 if (!(file->f_mode & FMODE_WRITER))
428 return __mnt_want_write(file->f_path.mnt);
429 else
430 return mnt_clone_write(file->f_path.mnt);
431}
432
433
434
435
436
437
438
439
440int mnt_want_write_file(struct file *file)
441{
442 int ret;
443
444 sb_start_write(file->f_path.mnt->mnt_sb);
445 ret = __mnt_want_write_file(file);
446 if (ret)
447 sb_end_write(file->f_path.mnt->mnt_sb);
448 return ret;
449}
450EXPORT_SYMBOL_GPL(mnt_want_write_file);
451
452
453
454
455
456
457
458
459
460void __mnt_drop_write(struct vfsmount *mnt)
461{
462 preempt_disable();
463 mnt_dec_writers(real_mount(mnt));
464 preempt_enable();
465}
466
467
468
469
470
471
472
473
474
475void mnt_drop_write(struct vfsmount *mnt)
476{
477 __mnt_drop_write(mnt);
478 sb_end_write(mnt->mnt_sb);
479}
480EXPORT_SYMBOL_GPL(mnt_drop_write);
481
482void __mnt_drop_write_file(struct file *file)
483{
484 __mnt_drop_write(file->f_path.mnt);
485}
486
487void mnt_drop_write_file(struct file *file)
488{
489 mnt_drop_write(file->f_path.mnt);
490}
491EXPORT_SYMBOL(mnt_drop_write_file);
492
493static int mnt_make_readonly(struct mount *mnt)
494{
495 int ret = 0;
496
497 lock_mount_hash();
498 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
499
500
501
502
503 smp_mb();
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521 if (mnt_get_writers(mnt) > 0)
522 ret = -EBUSY;
523 else
524 mnt->mnt.mnt_flags |= MNT_READONLY;
525
526
527
528
529 smp_wmb();
530 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
531 unlock_mount_hash();
532 return ret;
533}
534
535static void __mnt_unmake_readonly(struct mount *mnt)
536{
537 lock_mount_hash();
538 mnt->mnt.mnt_flags &= ~MNT_READONLY;
539 unlock_mount_hash();
540}
541
542int sb_prepare_remount_readonly(struct super_block *sb)
543{
544 struct mount *mnt;
545 int err = 0;
546
547
548 if (atomic_long_read(&sb->s_remove_count))
549 return -EBUSY;
550
551 lock_mount_hash();
552 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
553 if (!(mnt->mnt.mnt_flags & MNT_READONLY)) {
554 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
555 smp_mb();
556 if (mnt_get_writers(mnt) > 0) {
557 err = -EBUSY;
558 break;
559 }
560 }
561 }
562 if (!err && atomic_long_read(&sb->s_remove_count))
563 err = -EBUSY;
564
565 if (!err) {
566 sb->s_readonly_remount = 1;
567 smp_wmb();
568 }
569 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
570 if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
571 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
572 }
573 unlock_mount_hash();
574
575 return err;
576}
577
578static void free_vfsmnt(struct mount *mnt)
579{
580 kfree_const(mnt->mnt_devname);
581#ifdef CONFIG_SMP
582 free_percpu(mnt->mnt_pcp);
583#endif
584 kmem_cache_free(mnt_cache, mnt);
585}
586
587static void delayed_free_vfsmnt(struct rcu_head *head)
588{
589 free_vfsmnt(container_of(head, struct mount, mnt_rcu));
590}
591
592
593int __legitimize_mnt(struct vfsmount *bastard, unsigned seq)
594{
595 struct mount *mnt;
596 if (read_seqretry(&mount_lock, seq))
597 return 1;
598 if (bastard == NULL)
599 return 0;
600 mnt = real_mount(bastard);
601 mnt_add_count(mnt, 1);
602 if (likely(!read_seqretry(&mount_lock, seq)))
603 return 0;
604 if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
605 mnt_add_count(mnt, -1);
606 return 1;
607 }
608 return -1;
609}
610
611
612bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
613{
614 int res = __legitimize_mnt(bastard, seq);
615 if (likely(!res))
616 return true;
617 if (unlikely(res < 0)) {
618 rcu_read_unlock();
619 mntput(bastard);
620 rcu_read_lock();
621 }
622 return false;
623}
624
625
626
627
628
629struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
630{
631 struct hlist_head *head = m_hash(mnt, dentry);
632 struct mount *p;
633
634 hlist_for_each_entry_rcu(p, head, mnt_hash)
635 if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
636 return p;
637 return NULL;
638}
639
640
641
642
643
644struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
645{
646 struct mount *p, *res = NULL;
647 p = __lookup_mnt(mnt, dentry);
648 if (!p)
649 goto out;
650 if (!(p->mnt.mnt_flags & MNT_UMOUNT))
651 res = p;
652 hlist_for_each_entry_continue(p, mnt_hash) {
653 if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry)
654 break;
655 if (!(p->mnt.mnt_flags & MNT_UMOUNT))
656 res = p;
657 }
658out:
659 return res;
660}
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678struct vfsmount *lookup_mnt(struct path *path)
679{
680 struct mount *child_mnt;
681 struct vfsmount *m;
682 unsigned seq;
683
684 rcu_read_lock();
685 do {
686 seq = read_seqbegin(&mount_lock);
687 child_mnt = __lookup_mnt(path->mnt, path->dentry);
688 m = child_mnt ? &child_mnt->mnt : NULL;
689 } while (!legitimize_mnt(m, seq));
690 rcu_read_unlock();
691 return m;
692}
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709bool __is_local_mountpoint(struct dentry *dentry)
710{
711 struct mnt_namespace *ns = current->nsproxy->mnt_ns;
712 struct mount *mnt;
713 bool is_covered = false;
714
715 if (!d_mountpoint(dentry))
716 goto out;
717
718 down_read(&namespace_sem);
719 list_for_each_entry(mnt, &ns->list, mnt_list) {
720 is_covered = (mnt->mnt_mountpoint == dentry);
721 if (is_covered)
722 break;
723 }
724 up_read(&namespace_sem);
725out:
726 return is_covered;
727}
728
729static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
730{
731 struct hlist_head *chain = mp_hash(dentry);
732 struct mountpoint *mp;
733
734 hlist_for_each_entry(mp, chain, m_hash) {
735 if (mp->m_dentry == dentry) {
736
737 if (d_unlinked(dentry))
738 return ERR_PTR(-ENOENT);
739 mp->m_count++;
740 return mp;
741 }
742 }
743 return NULL;
744}
745
746static struct mountpoint *new_mountpoint(struct dentry *dentry)
747{
748 struct hlist_head *chain = mp_hash(dentry);
749 struct mountpoint *mp;
750 int ret;
751
752 mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
753 if (!mp)
754 return ERR_PTR(-ENOMEM);
755
756 ret = d_set_mounted(dentry);
757 if (ret) {
758 kfree(mp);
759 return ERR_PTR(ret);
760 }
761
762 mp->m_dentry = dentry;
763 mp->m_count = 1;
764 hlist_add_head(&mp->m_hash, chain);
765 INIT_HLIST_HEAD(&mp->m_list);
766 return mp;
767}
768
769static void put_mountpoint(struct mountpoint *mp)
770{
771 if (!--mp->m_count) {
772 struct dentry *dentry = mp->m_dentry;
773 BUG_ON(!hlist_empty(&mp->m_list));
774 spin_lock(&dentry->d_lock);
775 dentry->d_flags &= ~DCACHE_MOUNTED;
776 spin_unlock(&dentry->d_lock);
777 hlist_del(&mp->m_hash);
778 kfree(mp);
779 }
780}
781
782static inline int check_mnt(struct mount *mnt)
783{
784 return mnt->mnt_ns == current->nsproxy->mnt_ns;
785}
786
787
788
789
790static void touch_mnt_namespace(struct mnt_namespace *ns)
791{
792 if (ns) {
793 ns->event = ++event;
794 wake_up_interruptible(&ns->poll);
795 }
796}
797
798
799
800
801static void __touch_mnt_namespace(struct mnt_namespace *ns)
802{
803 if (ns && ns->event != event) {
804 ns->event = event;
805 wake_up_interruptible(&ns->poll);
806 }
807}
808
809
810
811
812static void unhash_mnt(struct mount *mnt)
813{
814 mnt->mnt_parent = mnt;
815 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
816 list_del_init(&mnt->mnt_child);
817 hlist_del_init_rcu(&mnt->mnt_hash);
818 hlist_del_init(&mnt->mnt_mp_list);
819 put_mountpoint(mnt->mnt_mp);
820 mnt->mnt_mp = NULL;
821}
822
823
824
825
826static void detach_mnt(struct mount *mnt, struct path *old_path)
827{
828 old_path->dentry = mnt->mnt_mountpoint;
829 old_path->mnt = &mnt->mnt_parent->mnt;
830 unhash_mnt(mnt);
831}
832
833
834
835
836static void umount_mnt(struct mount *mnt)
837{
838
839 mnt->mnt_ex_mountpoint = mnt->mnt_mountpoint;
840 unhash_mnt(mnt);
841}
842
843
844
845
846void mnt_set_mountpoint(struct mount *mnt,
847 struct mountpoint *mp,
848 struct mount *child_mnt)
849{
850 mp->m_count++;
851 mnt_add_count(mnt, 1);
852 child_mnt->mnt_mountpoint = dget(mp->m_dentry);
853 child_mnt->mnt_parent = mnt;
854 child_mnt->mnt_mp = mp;
855 hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
856}
857
858
859
860
861static void attach_mnt(struct mount *mnt,
862 struct mount *parent,
863 struct mountpoint *mp)
864{
865 mnt_set_mountpoint(parent, mp, mnt);
866 hlist_add_head_rcu(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry));
867 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
868}
869
870static void attach_shadowed(struct mount *mnt,
871 struct mount *parent,
872 struct mount *shadows)
873{
874 if (shadows) {
875 hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash);
876 list_add(&mnt->mnt_child, &shadows->mnt_child);
877 } else {
878 hlist_add_head_rcu(&mnt->mnt_hash,
879 m_hash(&parent->mnt, mnt->mnt_mountpoint));
880 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
881 }
882}
883
884
885
886
887static void commit_tree(struct mount *mnt, struct mount *shadows)
888{
889 struct mount *parent = mnt->mnt_parent;
890 struct mount *m;
891 LIST_HEAD(head);
892 struct mnt_namespace *n = parent->mnt_ns;
893
894 BUG_ON(parent == mnt);
895
896 list_add_tail(&head, &mnt->mnt_list);
897 list_for_each_entry(m, &head, mnt_list)
898 m->mnt_ns = n;
899
900 list_splice(&head, n->list.prev);
901
902 attach_shadowed(mnt, parent, shadows);
903 touch_mnt_namespace(n);
904}
905
906static struct mount *next_mnt(struct mount *p, struct mount *root)
907{
908 struct list_head *next = p->mnt_mounts.next;
909 if (next == &p->mnt_mounts) {
910 while (1) {
911 if (p == root)
912 return NULL;
913 next = p->mnt_child.next;
914 if (next != &p->mnt_parent->mnt_mounts)
915 break;
916 p = p->mnt_parent;
917 }
918 }
919 return list_entry(next, struct mount, mnt_child);
920}
921
922static struct mount *skip_mnt_tree(struct mount *p)
923{
924 struct list_head *prev = p->mnt_mounts.prev;
925 while (prev != &p->mnt_mounts) {
926 p = list_entry(prev, struct mount, mnt_child);
927 prev = p->mnt_mounts.prev;
928 }
929 return p;
930}
931
932struct vfsmount *
933vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
934{
935 struct mount *mnt;
936 struct dentry *root;
937
938 if (!type)
939 return ERR_PTR(-ENODEV);
940
941 mnt = alloc_vfsmnt(name);
942 if (!mnt)
943 return ERR_PTR(-ENOMEM);
944
945 if (flags & MS_KERNMOUNT)
946 mnt->mnt.mnt_flags = MNT_INTERNAL;
947
948 root = mount_fs(type, flags, name, data);
949 if (IS_ERR(root)) {
950 mnt_free_id(mnt);
951 free_vfsmnt(mnt);
952 return ERR_CAST(root);
953 }
954
955 mnt->mnt.mnt_root = root;
956 mnt->mnt.mnt_sb = root->d_sb;
957 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
958 mnt->mnt_parent = mnt;
959 lock_mount_hash();
960 list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts);
961 unlock_mount_hash();
962 return &mnt->mnt;
963}
964EXPORT_SYMBOL_GPL(vfs_kern_mount);
965
966static struct mount *clone_mnt(struct mount *old, struct dentry *root,
967 int flag)
968{
969 struct super_block *sb = old->mnt.mnt_sb;
970 struct mount *mnt;
971 int err;
972
973 mnt = alloc_vfsmnt(old->mnt_devname);
974 if (!mnt)
975 return ERR_PTR(-ENOMEM);
976
977 if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE))
978 mnt->mnt_group_id = 0;
979 else
980 mnt->mnt_group_id = old->mnt_group_id;
981
982 if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {
983 err = mnt_alloc_group_id(mnt);
984 if (err)
985 goto out_free;
986 }
987
988 mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED);
989
990 if (flag & CL_UNPRIVILEGED) {
991 mnt->mnt.mnt_flags |= MNT_LOCK_ATIME;
992
993 if (mnt->mnt.mnt_flags & MNT_READONLY)
994 mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
995
996 if (mnt->mnt.mnt_flags & MNT_NODEV)
997 mnt->mnt.mnt_flags |= MNT_LOCK_NODEV;
998
999 if (mnt->mnt.mnt_flags & MNT_NOSUID)
1000 mnt->mnt.mnt_flags |= MNT_LOCK_NOSUID;
1001
1002 if (mnt->mnt.mnt_flags & MNT_NOEXEC)
1003 mnt->mnt.mnt_flags |= MNT_LOCK_NOEXEC;
1004 }
1005
1006
1007 if ((flag & CL_UNPRIVILEGED) &&
1008 (!(flag & CL_EXPIRE) || list_empty(&old->mnt_expire)))
1009 mnt->mnt.mnt_flags |= MNT_LOCKED;
1010
1011 atomic_inc(&sb->s_active);
1012 mnt->mnt.mnt_sb = sb;
1013 mnt->mnt.mnt_root = dget(root);
1014 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
1015 mnt->mnt_parent = mnt;
1016 lock_mount_hash();
1017 list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
1018 unlock_mount_hash();
1019
1020 if ((flag & CL_SLAVE) ||
1021 ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) {
1022 list_add(&mnt->mnt_slave, &old->mnt_slave_list);
1023 mnt->mnt_master = old;
1024 CLEAR_MNT_SHARED(mnt);
1025 } else if (!(flag & CL_PRIVATE)) {
1026 if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old))
1027 list_add(&mnt->mnt_share, &old->mnt_share);
1028 if (IS_MNT_SLAVE(old))
1029 list_add(&mnt->mnt_slave, &old->mnt_slave);
1030 mnt->mnt_master = old->mnt_master;
1031 }
1032 if (flag & CL_MAKE_SHARED)
1033 set_mnt_shared(mnt);
1034
1035
1036
1037 if (flag & CL_EXPIRE) {
1038 if (!list_empty(&old->mnt_expire))
1039 list_add(&mnt->mnt_expire, &old->mnt_expire);
1040 }
1041
1042 return mnt;
1043
1044 out_free:
1045 mnt_free_id(mnt);
1046 free_vfsmnt(mnt);
1047 return ERR_PTR(err);
1048}
1049
1050static void cleanup_mnt(struct mount *mnt)
1051{
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062 WARN_ON(mnt_get_writers(mnt));
1063 if (unlikely(mnt->mnt_pins.first))
1064 mnt_pin_kill(mnt);
1065 fsnotify_vfsmount_delete(&mnt->mnt);
1066 dput(mnt->mnt.mnt_root);
1067 deactivate_super(mnt->mnt.mnt_sb);
1068 mnt_free_id(mnt);
1069 call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);
1070}
1071
1072static void __cleanup_mnt(struct rcu_head *head)
1073{
1074 cleanup_mnt(container_of(head, struct mount, mnt_rcu));
1075}
1076
1077static LLIST_HEAD(delayed_mntput_list);
1078static void delayed_mntput(struct work_struct *unused)
1079{
1080 struct llist_node *node = llist_del_all(&delayed_mntput_list);
1081 struct llist_node *next;
1082
1083 for (; node; node = next) {
1084 next = llist_next(node);
1085 cleanup_mnt(llist_entry(node, struct mount, mnt_llist));
1086 }
1087}
1088static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
1089
1090static void mntput_no_expire(struct mount *mnt)
1091{
1092 rcu_read_lock();
1093 mnt_add_count(mnt, -1);
1094 if (likely(mnt->mnt_ns)) {
1095 rcu_read_unlock();
1096 return;
1097 }
1098 lock_mount_hash();
1099 if (mnt_get_count(mnt)) {
1100 rcu_read_unlock();
1101 unlock_mount_hash();
1102 return;
1103 }
1104 if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
1105 rcu_read_unlock();
1106 unlock_mount_hash();
1107 return;
1108 }
1109 mnt->mnt.mnt_flags |= MNT_DOOMED;
1110 rcu_read_unlock();
1111
1112 list_del(&mnt->mnt_instance);
1113
1114 if (unlikely(!list_empty(&mnt->mnt_mounts))) {
1115 struct mount *p, *tmp;
1116 list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) {
1117 umount_mnt(p);
1118 }
1119 }
1120 unlock_mount_hash();
1121
1122 if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {
1123 struct task_struct *task = current;
1124 if (likely(!(task->flags & PF_KTHREAD))) {
1125 init_task_work(&mnt->mnt_rcu, __cleanup_mnt);
1126 if (!task_work_add(task, &mnt->mnt_rcu, true))
1127 return;
1128 }
1129 if (llist_add(&mnt->mnt_llist, &delayed_mntput_list))
1130 schedule_delayed_work(&delayed_mntput_work, 1);
1131 return;
1132 }
1133 cleanup_mnt(mnt);
1134}
1135
1136void mntput(struct vfsmount *mnt)
1137{
1138 if (mnt) {
1139 struct mount *m = real_mount(mnt);
1140
1141 if (unlikely(m->mnt_expiry_mark))
1142 m->mnt_expiry_mark = 0;
1143 mntput_no_expire(m);
1144 }
1145}
1146EXPORT_SYMBOL(mntput);
1147
1148struct vfsmount *mntget(struct vfsmount *mnt)
1149{
1150 if (mnt)
1151 mnt_add_count(real_mount(mnt), 1);
1152 return mnt;
1153}
1154EXPORT_SYMBOL(mntget);
1155
1156struct vfsmount *mnt_clone_internal(struct path *path)
1157{
1158 struct mount *p;
1159 p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE);
1160 if (IS_ERR(p))
1161 return ERR_CAST(p);
1162 p->mnt.mnt_flags |= MNT_INTERNAL;
1163 return &p->mnt;
1164}
1165
1166static inline void mangle(struct seq_file *m, const char *s)
1167{
1168 seq_escape(m, s, " \t\n\\");
1169}
1170
1171
1172
1173
1174
1175
1176
1177int generic_show_options(struct seq_file *m, struct dentry *root)
1178{
1179 const char *options;
1180
1181 rcu_read_lock();
1182 options = rcu_dereference(root->d_sb->s_options);
1183
1184 if (options != NULL && options[0]) {
1185 seq_putc(m, ',');
1186 mangle(m, options);
1187 }
1188 rcu_read_unlock();
1189
1190 return 0;
1191}
1192EXPORT_SYMBOL(generic_show_options);
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207void save_mount_options(struct super_block *sb, char *options)
1208{
1209 BUG_ON(sb->s_options);
1210 rcu_assign_pointer(sb->s_options, kstrdup(options, GFP_KERNEL));
1211}
1212EXPORT_SYMBOL(save_mount_options);
1213
1214void replace_mount_options(struct super_block *sb, char *options)
1215{
1216 char *old = sb->s_options;
1217 rcu_assign_pointer(sb->s_options, options);
1218 if (old) {
1219 synchronize_rcu();
1220 kfree(old);
1221 }
1222}
1223EXPORT_SYMBOL(replace_mount_options);
1224
1225#ifdef CONFIG_PROC_FS
1226
1227static void *m_start(struct seq_file *m, loff_t *pos)
1228{
1229 struct proc_mounts *p = m->private;
1230
1231 down_read(&namespace_sem);
1232 if (p->cached_event == p->ns->event) {
1233 void *v = p->cached_mount;
1234 if (*pos == p->cached_index)
1235 return v;
1236 if (*pos == p->cached_index + 1) {
1237 v = seq_list_next(v, &p->ns->list, &p->cached_index);
1238 return p->cached_mount = v;
1239 }
1240 }
1241
1242 p->cached_event = p->ns->event;
1243 p->cached_mount = seq_list_start(&p->ns->list, *pos);
1244 p->cached_index = *pos;
1245 return p->cached_mount;
1246}
1247
1248static void *m_next(struct seq_file *m, void *v, loff_t *pos)
1249{
1250 struct proc_mounts *p = m->private;
1251
1252 p->cached_mount = seq_list_next(v, &p->ns->list, pos);
1253 p->cached_index = *pos;
1254 return p->cached_mount;
1255}
1256
1257static void m_stop(struct seq_file *m, void *v)
1258{
1259 up_read(&namespace_sem);
1260}
1261
1262static int m_show(struct seq_file *m, void *v)
1263{
1264 struct proc_mounts *p = m->private;
1265 struct mount *r = list_entry(v, struct mount, mnt_list);
1266 return p->show(m, &r->mnt);
1267}
1268
1269const struct seq_operations mounts_op = {
1270 .start = m_start,
1271 .next = m_next,
1272 .stop = m_stop,
1273 .show = m_show,
1274};
1275#endif
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285int may_umount_tree(struct vfsmount *m)
1286{
1287 struct mount *mnt = real_mount(m);
1288 int actual_refs = 0;
1289 int minimum_refs = 0;
1290 struct mount *p;
1291 BUG_ON(!m);
1292
1293
1294 lock_mount_hash();
1295 for (p = mnt; p; p = next_mnt(p, mnt)) {
1296 actual_refs += mnt_get_count(p);
1297 minimum_refs += 2;
1298 }
1299 unlock_mount_hash();
1300
1301 if (actual_refs > minimum_refs)
1302 return 0;
1303
1304 return 1;
1305}
1306
1307EXPORT_SYMBOL(may_umount_tree);
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322int may_umount(struct vfsmount *mnt)
1323{
1324 int ret = 1;
1325 down_read(&namespace_sem);
1326 lock_mount_hash();
1327 if (propagate_mount_busy(real_mount(mnt), 2))
1328 ret = 0;
1329 unlock_mount_hash();
1330 up_read(&namespace_sem);
1331 return ret;
1332}
1333
1334EXPORT_SYMBOL(may_umount);
1335
1336static HLIST_HEAD(unmounted);
1337
1338static void namespace_unlock(void)
1339{
1340 struct hlist_head head;
1341
1342 hlist_move_list(&unmounted, &head);
1343
1344 up_write(&namespace_sem);
1345
1346 if (likely(hlist_empty(&head)))
1347 return;
1348
1349 synchronize_rcu();
1350
1351 group_pin_kill(&head);
1352}
1353
1354static inline void namespace_lock(void)
1355{
1356 down_write(&namespace_sem);
1357}
1358
1359enum umount_tree_flags {
1360 UMOUNT_SYNC = 1,
1361 UMOUNT_PROPAGATE = 2,
1362 UMOUNT_CONNECTED = 4,
1363};
1364
1365static bool disconnect_mount(struct mount *mnt, enum umount_tree_flags how)
1366{
1367
1368 if (how & UMOUNT_SYNC)
1369 return true;
1370
1371
1372 if (!mnt_has_parent(mnt))
1373 return true;
1374
1375
1376
1377
1378
1379 if (!(mnt->mnt_parent->mnt.mnt_flags & MNT_UMOUNT))
1380 return true;
1381
1382
1383 if (how & UMOUNT_CONNECTED)
1384 return false;
1385
1386
1387 if (IS_MNT_LOCKED(mnt))
1388 return false;
1389
1390
1391 return true;
1392}
1393
1394
1395
1396
1397
1398static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
1399{
1400 LIST_HEAD(tmp_list);
1401 struct mount *p;
1402
1403 if (how & UMOUNT_PROPAGATE)
1404 propagate_mount_unlock(mnt);
1405
1406
1407 for (p = mnt; p; p = next_mnt(p, mnt)) {
1408 p->mnt.mnt_flags |= MNT_UMOUNT;
1409 list_move(&p->mnt_list, &tmp_list);
1410 }
1411
1412
1413 list_for_each_entry(p, &tmp_list, mnt_list) {
1414 list_del_init(&p->mnt_child);
1415 }
1416
1417
1418 if (how & UMOUNT_PROPAGATE)
1419 propagate_umount(&tmp_list);
1420
1421 while (!list_empty(&tmp_list)) {
1422 bool disconnect;
1423 p = list_first_entry(&tmp_list, struct mount, mnt_list);
1424 list_del_init(&p->mnt_expire);
1425 list_del_init(&p->mnt_list);
1426 __touch_mnt_namespace(p->mnt_ns);
1427 p->mnt_ns = NULL;
1428 if (how & UMOUNT_SYNC)
1429 p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
1430
1431 disconnect = disconnect_mount(p, how);
1432
1433 pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt,
1434 disconnect ? &unmounted : NULL);
1435 if (mnt_has_parent(p)) {
1436 mnt_add_count(p->mnt_parent, -1);
1437 if (!disconnect) {
1438
1439 list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts);
1440 } else {
1441 umount_mnt(p);
1442 }
1443 }
1444 change_mnt_propagation(p, MS_PRIVATE);
1445 }
1446}
1447
1448static void shrink_submounts(struct mount *mnt);
1449
1450static int do_umount(struct mount *mnt, int flags)
1451{
1452 struct super_block *sb = mnt->mnt.mnt_sb;
1453 int retval;
1454
1455 retval = security_sb_umount(&mnt->mnt, flags);
1456 if (retval)
1457 return retval;
1458
1459
1460
1461
1462
1463
1464
1465 if (flags & MNT_EXPIRE) {
1466 if (&mnt->mnt == current->fs->root.mnt ||
1467 flags & (MNT_FORCE | MNT_DETACH))
1468 return -EINVAL;
1469
1470
1471
1472
1473
1474 lock_mount_hash();
1475 if (mnt_get_count(mnt) != 2) {
1476 unlock_mount_hash();
1477 return -EBUSY;
1478 }
1479 unlock_mount_hash();
1480
1481 if (!xchg(&mnt->mnt_expiry_mark, 1))
1482 return -EAGAIN;
1483 }
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495 if (flags & MNT_FORCE && sb->s_op->umount_begin) {
1496 sb->s_op->umount_begin(sb);
1497 }
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508 if (&mnt->mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) {
1509
1510
1511
1512
1513 if (!capable(CAP_SYS_ADMIN))
1514 return -EPERM;
1515 down_write(&sb->s_umount);
1516 if (!(sb->s_flags & MS_RDONLY))
1517 retval = do_remount_sb(sb, MS_RDONLY, NULL, 0);
1518 up_write(&sb->s_umount);
1519 return retval;
1520 }
1521
1522 namespace_lock();
1523 lock_mount_hash();
1524 event++;
1525
1526 if (flags & MNT_DETACH) {
1527 if (!list_empty(&mnt->mnt_list))
1528 umount_tree(mnt, UMOUNT_PROPAGATE);
1529 retval = 0;
1530 } else {
1531 shrink_submounts(mnt);
1532 retval = -EBUSY;
1533 if (!propagate_mount_busy(mnt, 2)) {
1534 if (!list_empty(&mnt->mnt_list))
1535 umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
1536 retval = 0;
1537 }
1538 }
1539 unlock_mount_hash();
1540 namespace_unlock();
1541 return retval;
1542}
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554void __detach_mounts(struct dentry *dentry)
1555{
1556 struct mountpoint *mp;
1557 struct mount *mnt;
1558
1559 namespace_lock();
1560 mp = lookup_mountpoint(dentry);
1561 if (IS_ERR_OR_NULL(mp))
1562 goto out_unlock;
1563
1564 lock_mount_hash();
1565 event++;
1566 while (!hlist_empty(&mp->m_list)) {
1567 mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
1568 if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
1569 hlist_add_head(&mnt->mnt_umount.s_list, &unmounted);
1570 umount_mnt(mnt);
1571 }
1572 else umount_tree(mnt, UMOUNT_CONNECTED);
1573 }
1574 unlock_mount_hash();
1575 put_mountpoint(mp);
1576out_unlock:
1577 namespace_unlock();
1578}
1579
1580
1581
1582
1583static inline bool may_mount(void)
1584{
1585 return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
1586}
1587
1588static inline bool may_mandlock(void)
1589{
1590#ifndef CONFIG_MANDATORY_FILE_LOCKING
1591 return false;
1592#endif
1593 return capable(CAP_SYS_ADMIN);
1594}
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1605{
1606 struct path path;
1607 struct mount *mnt;
1608 int retval;
1609 int lookup_flags = 0;
1610
1611 if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
1612 return -EINVAL;
1613
1614 if (!may_mount())
1615 return -EPERM;
1616
1617 if (!(flags & UMOUNT_NOFOLLOW))
1618 lookup_flags |= LOOKUP_FOLLOW;
1619
1620 retval = user_path_mountpoint_at(AT_FDCWD, name, lookup_flags, &path);
1621 if (retval)
1622 goto out;
1623 mnt = real_mount(path.mnt);
1624 retval = -EINVAL;
1625 if (path.dentry != path.mnt->mnt_root)
1626 goto dput_and_out;
1627 if (!check_mnt(mnt))
1628 goto dput_and_out;
1629 if (mnt->mnt.mnt_flags & MNT_LOCKED)
1630 goto dput_and_out;
1631 retval = -EPERM;
1632 if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
1633 goto dput_and_out;
1634
1635 retval = do_umount(mnt, flags);
1636dput_and_out:
1637
1638 dput(path.dentry);
1639 mntput_no_expire(mnt);
1640out:
1641 return retval;
1642}
1643
1644#ifdef __ARCH_WANT_SYS_OLDUMOUNT
1645
1646
1647
1648
1649SYSCALL_DEFINE1(oldumount, char __user *, name)
1650{
1651 return sys_umount(name, 0);
1652}
1653
1654#endif
1655
1656static bool is_mnt_ns_file(struct dentry *dentry)
1657{
1658
1659 return dentry->d_op == &ns_dentry_operations &&
1660 dentry->d_fsdata == &mntns_operations;
1661}
1662
1663struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
1664{
1665 return container_of(ns, struct mnt_namespace, ns);
1666}
1667
1668static bool mnt_ns_loop(struct dentry *dentry)
1669{
1670
1671
1672
1673 struct mnt_namespace *mnt_ns;
1674 if (!is_mnt_ns_file(dentry))
1675 return false;
1676
1677 mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode));
1678 return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
1679}
1680
1681struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1682 int flag)
1683{
1684 struct mount *res, *p, *q, *r, *parent;
1685
1686 if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(mnt))
1687 return ERR_PTR(-EINVAL);
1688
1689 if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(dentry))
1690 return ERR_PTR(-EINVAL);
1691
1692 res = q = clone_mnt(mnt, dentry, flag);
1693 if (IS_ERR(q))
1694 return q;
1695
1696 q->mnt_mountpoint = mnt->mnt_mountpoint;
1697
1698 p = mnt;
1699 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
1700 struct mount *s;
1701 if (!is_subdir(r->mnt_mountpoint, dentry))
1702 continue;
1703
1704 for (s = r; s; s = next_mnt(s, r)) {
1705 struct mount *t = NULL;
1706 if (!(flag & CL_COPY_UNBINDABLE) &&
1707 IS_MNT_UNBINDABLE(s)) {
1708 s = skip_mnt_tree(s);
1709 continue;
1710 }
1711 if (!(flag & CL_COPY_MNT_NS_FILE) &&
1712 is_mnt_ns_file(s->mnt.mnt_root)) {
1713 s = skip_mnt_tree(s);
1714 continue;
1715 }
1716 while (p != s->mnt_parent) {
1717 p = p->mnt_parent;
1718 q = q->mnt_parent;
1719 }
1720 p = s;
1721 parent = q;
1722 q = clone_mnt(p, p->mnt.mnt_root, flag);
1723 if (IS_ERR(q))
1724 goto out;
1725 lock_mount_hash();
1726 list_add_tail(&q->mnt_list, &res->mnt_list);
1727 mnt_set_mountpoint(parent, p->mnt_mp, q);
1728 if (!list_empty(&parent->mnt_mounts)) {
1729 t = list_last_entry(&parent->mnt_mounts,
1730 struct mount, mnt_child);
1731 if (t->mnt_mp != p->mnt_mp)
1732 t = NULL;
1733 }
1734 attach_shadowed(q, parent, t);
1735 unlock_mount_hash();
1736 }
1737 }
1738 return res;
1739out:
1740 if (res) {
1741 lock_mount_hash();
1742 umount_tree(res, UMOUNT_SYNC);
1743 unlock_mount_hash();
1744 }
1745 return q;
1746}
1747
1748
1749
1750struct vfsmount *collect_mounts(struct path *path)
1751{
1752 struct mount *tree;
1753 namespace_lock();
1754 if (!check_mnt(real_mount(path->mnt)))
1755 tree = ERR_PTR(-EINVAL);
1756 else
1757 tree = copy_tree(real_mount(path->mnt), path->dentry,
1758 CL_COPY_ALL | CL_PRIVATE);
1759 namespace_unlock();
1760 if (IS_ERR(tree))
1761 return ERR_CAST(tree);
1762 return &tree->mnt;
1763}
1764
1765void drop_collected_mounts(struct vfsmount *mnt)
1766{
1767 namespace_lock();
1768 lock_mount_hash();
1769 umount_tree(real_mount(mnt), UMOUNT_SYNC);
1770 unlock_mount_hash();
1771 namespace_unlock();
1772}
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783struct vfsmount *clone_private_mount(struct path *path)
1784{
1785 struct mount *old_mnt = real_mount(path->mnt);
1786 struct mount *new_mnt;
1787
1788 if (IS_MNT_UNBINDABLE(old_mnt))
1789 return ERR_PTR(-EINVAL);
1790
1791 down_read(&namespace_sem);
1792 new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
1793 up_read(&namespace_sem);
1794 if (IS_ERR(new_mnt))
1795 return ERR_CAST(new_mnt);
1796
1797 return &new_mnt->mnt;
1798}
1799EXPORT_SYMBOL_GPL(clone_private_mount);
1800
1801int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
1802 struct vfsmount *root)
1803{
1804 struct mount *mnt;
1805 int res = f(root, arg);
1806 if (res)
1807 return res;
1808 list_for_each_entry(mnt, &real_mount(root)->mnt_list, mnt_list) {
1809 res = f(&mnt->mnt, arg);
1810 if (res)
1811 return res;
1812 }
1813 return 0;
1814}
1815
1816static void cleanup_group_ids(struct mount *mnt, struct mount *end)
1817{
1818 struct mount *p;
1819
1820 for (p = mnt; p != end; p = next_mnt(p, mnt)) {
1821 if (p->mnt_group_id && !IS_MNT_SHARED(p))
1822 mnt_release_group_id(p);
1823 }
1824}
1825
1826static int invent_group_ids(struct mount *mnt, bool recurse)
1827{
1828 struct mount *p;
1829
1830 for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) {
1831 if (!p->mnt_group_id && !IS_MNT_SHARED(p)) {
1832 int err = mnt_alloc_group_id(p);
1833 if (err) {
1834 cleanup_group_ids(mnt, p);
1835 return err;
1836 }
1837 }
1838 }
1839
1840 return 0;
1841}
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906static int attach_recursive_mnt(struct mount *source_mnt,
1907 struct mount *dest_mnt,
1908 struct mountpoint *dest_mp,
1909 struct path *parent_path)
1910{
1911 HLIST_HEAD(tree_list);
1912 struct mount *child, *p;
1913 struct hlist_node *n;
1914 int err;
1915
1916 if (IS_MNT_SHARED(dest_mnt)) {
1917 err = invent_group_ids(source_mnt, true);
1918 if (err)
1919 goto out;
1920 err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
1921 lock_mount_hash();
1922 if (err)
1923 goto out_cleanup_ids;
1924 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
1925 set_mnt_shared(p);
1926 } else {
1927 lock_mount_hash();
1928 }
1929 if (parent_path) {
1930 detach_mnt(source_mnt, parent_path);
1931 attach_mnt(source_mnt, dest_mnt, dest_mp);
1932 touch_mnt_namespace(source_mnt->mnt_ns);
1933 } else {
1934 mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
1935 commit_tree(source_mnt, NULL);
1936 }
1937
1938 hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
1939 struct mount *q;
1940 hlist_del_init(&child->mnt_hash);
1941 q = __lookup_mnt_last(&child->mnt_parent->mnt,
1942 child->mnt_mountpoint);
1943 commit_tree(child, q);
1944 }
1945 unlock_mount_hash();
1946
1947 return 0;
1948
1949 out_cleanup_ids:
1950 while (!hlist_empty(&tree_list)) {
1951 child = hlist_entry(tree_list.first, struct mount, mnt_hash);
1952 umount_tree(child, UMOUNT_SYNC);
1953 }
1954 unlock_mount_hash();
1955 cleanup_group_ids(source_mnt, NULL);
1956 out:
1957 return err;
1958}
1959
1960static struct mountpoint *lock_mount(struct path *path)
1961{
1962 struct vfsmount *mnt;
1963 struct dentry *dentry = path->dentry;
1964retry:
1965 inode_lock(dentry->d_inode);
1966 if (unlikely(cant_mount(dentry))) {
1967 inode_unlock(dentry->d_inode);
1968 return ERR_PTR(-ENOENT);
1969 }
1970 namespace_lock();
1971 mnt = lookup_mnt(path);
1972 if (likely(!mnt)) {
1973 struct mountpoint *mp = lookup_mountpoint(dentry);
1974 if (!mp)
1975 mp = new_mountpoint(dentry);
1976 if (IS_ERR(mp)) {
1977 namespace_unlock();
1978 inode_unlock(dentry->d_inode);
1979 return mp;
1980 }
1981 return mp;
1982 }
1983 namespace_unlock();
1984 inode_unlock(path->dentry->d_inode);
1985 path_put(path);
1986 path->mnt = mnt;
1987 dentry = path->dentry = dget(mnt->mnt_root);
1988 goto retry;
1989}
1990
1991static void unlock_mount(struct mountpoint *where)
1992{
1993 struct dentry *dentry = where->m_dentry;
1994 put_mountpoint(where);
1995 namespace_unlock();
1996 inode_unlock(dentry->d_inode);
1997}
1998
1999static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
2000{
2001 if (mnt->mnt.mnt_sb->s_flags & MS_NOUSER)
2002 return -EINVAL;
2003
2004 if (d_is_dir(mp->m_dentry) !=
2005 d_is_dir(mnt->mnt.mnt_root))
2006 return -ENOTDIR;
2007
2008 return attach_recursive_mnt(mnt, p, mp, NULL);
2009}
2010
2011
2012
2013
2014
2015static int flags_to_propagation_type(int flags)
2016{
2017 int type = flags & ~(MS_REC | MS_SILENT);
2018
2019
2020 if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
2021 return 0;
2022
2023 if (!is_power_of_2(type))
2024 return 0;
2025 return type;
2026}
2027
2028
2029
2030
2031static int do_change_type(struct path *path, int flag)
2032{
2033 struct mount *m;
2034 struct mount *mnt = real_mount(path->mnt);
2035 int recurse = flag & MS_REC;
2036 int type;
2037 int err = 0;
2038
2039 if (path->dentry != path->mnt->mnt_root)
2040 return -EINVAL;
2041
2042 type = flags_to_propagation_type(flag);
2043 if (!type)
2044 return -EINVAL;
2045
2046 namespace_lock();
2047 if (type == MS_SHARED) {
2048 err = invent_group_ids(mnt, recurse);
2049 if (err)
2050 goto out_unlock;
2051 }
2052
2053 lock_mount_hash();
2054 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
2055 change_mnt_propagation(m, type);
2056 unlock_mount_hash();
2057
2058 out_unlock:
2059 namespace_unlock();
2060 return err;
2061}
2062
2063static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
2064{
2065 struct mount *child;
2066 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
2067 if (!is_subdir(child->mnt_mountpoint, dentry))
2068 continue;
2069
2070 if (child->mnt.mnt_flags & MNT_LOCKED)
2071 return true;
2072 }
2073 return false;
2074}
2075
2076
2077
2078
2079static int do_loopback(struct path *path, const char *old_name,
2080 int recurse)
2081{
2082 struct path old_path;
2083 struct mount *mnt = NULL, *old, *parent;
2084 struct mountpoint *mp;
2085 int err;
2086 if (!old_name || !*old_name)
2087 return -EINVAL;
2088 err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path);
2089 if (err)
2090 return err;
2091
2092 err = -EINVAL;
2093 if (mnt_ns_loop(old_path.dentry))
2094 goto out;
2095
2096 mp = lock_mount(path);
2097 err = PTR_ERR(mp);
2098 if (IS_ERR(mp))
2099 goto out;
2100
2101 old = real_mount(old_path.mnt);
2102 parent = real_mount(path->mnt);
2103
2104 err = -EINVAL;
2105 if (IS_MNT_UNBINDABLE(old))
2106 goto out2;
2107
2108 if (!check_mnt(parent))
2109 goto out2;
2110
2111 if (!check_mnt(old) && old_path.dentry->d_op != &ns_dentry_operations)
2112 goto out2;
2113
2114 if (!recurse && has_locked_children(old, old_path.dentry))
2115 goto out2;
2116
2117 if (recurse)
2118 mnt = copy_tree(old, old_path.dentry, CL_COPY_MNT_NS_FILE);
2119 else
2120 mnt = clone_mnt(old, old_path.dentry, 0);
2121
2122 if (IS_ERR(mnt)) {
2123 err = PTR_ERR(mnt);
2124 goto out2;
2125 }
2126
2127 mnt->mnt.mnt_flags &= ~MNT_LOCKED;
2128
2129 err = graft_tree(mnt, parent, mp);
2130 if (err) {
2131 lock_mount_hash();
2132 umount_tree(mnt, UMOUNT_SYNC);
2133 unlock_mount_hash();
2134 }
2135out2:
2136 unlock_mount(mp);
2137out:
2138 path_put(&old_path);
2139 return err;
2140}
2141
2142static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
2143{
2144 int error = 0;
2145 int readonly_request = 0;
2146
2147 if (ms_flags & MS_RDONLY)
2148 readonly_request = 1;
2149 if (readonly_request == __mnt_is_readonly(mnt))
2150 return 0;
2151
2152 if (readonly_request)
2153 error = mnt_make_readonly(real_mount(mnt));
2154 else
2155 __mnt_unmake_readonly(real_mount(mnt));
2156 return error;
2157}
2158
2159
2160
2161
2162
2163
2164static int do_remount(struct path *path, int flags, int mnt_flags,
2165 void *data)
2166{
2167 int err;
2168 struct super_block *sb = path->mnt->mnt_sb;
2169 struct mount *mnt = real_mount(path->mnt);
2170
2171 if (!check_mnt(mnt))
2172 return -EINVAL;
2173
2174 if (path->dentry != path->mnt->mnt_root)
2175 return -EINVAL;
2176
2177
2178
2179
2180
2181
2182
2183 if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) &&
2184 !(mnt_flags & MNT_READONLY)) {
2185 return -EPERM;
2186 }
2187 if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
2188 !(mnt_flags & MNT_NODEV)) {
2189 return -EPERM;
2190 }
2191 if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
2192 !(mnt_flags & MNT_NOSUID)) {
2193 return -EPERM;
2194 }
2195 if ((mnt->mnt.mnt_flags & MNT_LOCK_NOEXEC) &&
2196 !(mnt_flags & MNT_NOEXEC)) {
2197 return -EPERM;
2198 }
2199 if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) &&
2200 ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) {
2201 return -EPERM;
2202 }
2203
2204 err = security_sb_remount(sb, data);
2205 if (err)
2206 return err;
2207
2208 down_write(&sb->s_umount);
2209 if (flags & MS_BIND)
2210 err = change_mount_flags(path->mnt, flags);
2211 else if (!capable(CAP_SYS_ADMIN))
2212 err = -EPERM;
2213 else
2214 err = do_remount_sb(sb, flags, data, 0);
2215 if (!err) {
2216 lock_mount_hash();
2217 mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
2218 mnt->mnt.mnt_flags = mnt_flags;
2219 touch_mnt_namespace(mnt->mnt_ns);
2220 unlock_mount_hash();
2221 }
2222 up_write(&sb->s_umount);
2223 return err;
2224}
2225
2226static inline int tree_contains_unbindable(struct mount *mnt)
2227{
2228 struct mount *p;
2229 for (p = mnt; p; p = next_mnt(p, mnt)) {
2230 if (IS_MNT_UNBINDABLE(p))
2231 return 1;
2232 }
2233 return 0;
2234}
2235
2236static int do_move_mount(struct path *path, const char *old_name)
2237{
2238 struct path old_path, parent_path;
2239 struct mount *p;
2240 struct mount *old;
2241 struct mountpoint *mp;
2242 int err;
2243 if (!old_name || !*old_name)
2244 return -EINVAL;
2245 err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
2246 if (err)
2247 return err;
2248
2249 mp = lock_mount(path);
2250 err = PTR_ERR(mp);
2251 if (IS_ERR(mp))
2252 goto out;
2253
2254 old = real_mount(old_path.mnt);
2255 p = real_mount(path->mnt);
2256
2257 err = -EINVAL;
2258 if (!check_mnt(p) || !check_mnt(old))
2259 goto out1;
2260
2261 if (old->mnt.mnt_flags & MNT_LOCKED)
2262 goto out1;
2263
2264 err = -EINVAL;
2265 if (old_path.dentry != old_path.mnt->mnt_root)
2266 goto out1;
2267
2268 if (!mnt_has_parent(old))
2269 goto out1;
2270
2271 if (d_is_dir(path->dentry) !=
2272 d_is_dir(old_path.dentry))
2273 goto out1;
2274
2275
2276
2277 if (IS_MNT_SHARED(old->mnt_parent))
2278 goto out1;
2279
2280
2281
2282
2283 if (IS_MNT_SHARED(p) && tree_contains_unbindable(old))
2284 goto out1;
2285 err = -ELOOP;
2286 for (; mnt_has_parent(p); p = p->mnt_parent)
2287 if (p == old)
2288 goto out1;
2289
2290 err = attach_recursive_mnt(old, real_mount(path->mnt), mp, &parent_path);
2291 if (err)
2292 goto out1;
2293
2294
2295
2296 list_del_init(&old->mnt_expire);
2297out1:
2298 unlock_mount(mp);
2299out:
2300 if (!err)
2301 path_put(&parent_path);
2302 path_put(&old_path);
2303 return err;
2304}
2305
2306static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
2307{
2308 int err;
2309 const char *subtype = strchr(fstype, '.');
2310 if (subtype) {
2311 subtype++;
2312 err = -EINVAL;
2313 if (!subtype[0])
2314 goto err;
2315 } else
2316 subtype = "";
2317
2318 mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL);
2319 err = -ENOMEM;
2320 if (!mnt->mnt_sb->s_subtype)
2321 goto err;
2322 return mnt;
2323
2324 err:
2325 mntput(mnt);
2326 return ERR_PTR(err);
2327}
2328
2329
2330
2331
2332static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
2333{
2334 struct mountpoint *mp;
2335 struct mount *parent;
2336 int err;
2337
2338 mnt_flags &= ~MNT_INTERNAL_FLAGS;
2339
2340 mp = lock_mount(path);
2341 if (IS_ERR(mp))
2342 return PTR_ERR(mp);
2343
2344 parent = real_mount(path->mnt);
2345 err = -EINVAL;
2346 if (unlikely(!check_mnt(parent))) {
2347
2348 if (!(mnt_flags & MNT_SHRINKABLE))
2349 goto unlock;
2350
2351 if (!parent->mnt_ns)
2352 goto unlock;
2353 }
2354
2355
2356 err = -EBUSY;
2357 if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb &&
2358 path->mnt->mnt_root == path->dentry)
2359 goto unlock;
2360
2361 err = -EINVAL;
2362 if (d_is_symlink(newmnt->mnt.mnt_root))
2363 goto unlock;
2364
2365 newmnt->mnt.mnt_flags = mnt_flags;
2366 err = graft_tree(newmnt, parent, mp);
2367
2368unlock:
2369 unlock_mount(mp);
2370 return err;
2371}
2372
2373static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags);
2374
2375
2376
2377
2378
2379static int do_new_mount(struct path *path, const char *fstype, int flags,
2380 int mnt_flags, const char *name, void *data)
2381{
2382 struct file_system_type *type;
2383 struct vfsmount *mnt;
2384 int err;
2385
2386 if (!fstype)
2387 return -EINVAL;
2388
2389 type = get_fs_type(fstype);
2390 if (!type)
2391 return -ENODEV;
2392
2393 mnt = vfs_kern_mount(type, flags, name, data);
2394 if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
2395 !mnt->mnt_sb->s_subtype)
2396 mnt = fs_set_subtype(mnt, fstype);
2397
2398 put_filesystem(type);
2399 if (IS_ERR(mnt))
2400 return PTR_ERR(mnt);
2401
2402 if (mount_too_revealing(mnt, &mnt_flags)) {
2403 mntput(mnt);
2404 return -EPERM;
2405 }
2406
2407 err = do_add_mount(real_mount(mnt), path, mnt_flags);
2408 if (err)
2409 mntput(mnt);
2410 return err;
2411}
2412
2413int finish_automount(struct vfsmount *m, struct path *path)
2414{
2415 struct mount *mnt = real_mount(m);
2416 int err;
2417
2418
2419
2420 BUG_ON(mnt_get_count(mnt) < 2);
2421
2422 if (m->mnt_sb == path->mnt->mnt_sb &&
2423 m->mnt_root == path->dentry) {
2424 err = -ELOOP;
2425 goto fail;
2426 }
2427
2428 err = do_add_mount(mnt, path, path->mnt->mnt_flags | MNT_SHRINKABLE);
2429 if (!err)
2430 return 0;
2431fail:
2432
2433 if (!list_empty(&mnt->mnt_expire)) {
2434 namespace_lock();
2435 list_del_init(&mnt->mnt_expire);
2436 namespace_unlock();
2437 }
2438 mntput(m);
2439 mntput(m);
2440 return err;
2441}
2442
2443
2444
2445
2446
2447
2448void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
2449{
2450 namespace_lock();
2451
2452 list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);
2453
2454 namespace_unlock();
2455}
2456EXPORT_SYMBOL(mnt_set_expiry);
2457
2458
2459
2460
2461
2462
2463void mark_mounts_for_expiry(struct list_head *mounts)
2464{
2465 struct mount *mnt, *next;
2466 LIST_HEAD(graveyard);
2467
2468 if (list_empty(mounts))
2469 return;
2470
2471 namespace_lock();
2472 lock_mount_hash();
2473
2474
2475
2476
2477
2478
2479
2480 list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
2481 if (!xchg(&mnt->mnt_expiry_mark, 1) ||
2482 propagate_mount_busy(mnt, 1))
2483 continue;
2484 list_move(&mnt->mnt_expire, &graveyard);
2485 }
2486 while (!list_empty(&graveyard)) {
2487 mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
2488 touch_mnt_namespace(mnt->mnt_ns);
2489 umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
2490 }
2491 unlock_mount_hash();
2492 namespace_unlock();
2493}
2494
2495EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
2496
2497
2498
2499
2500
2501
2502
2503static int select_submounts(struct mount *parent, struct list_head *graveyard)
2504{
2505 struct mount *this_parent = parent;
2506 struct list_head *next;
2507 int found = 0;
2508
2509repeat:
2510 next = this_parent->mnt_mounts.next;
2511resume:
2512 while (next != &this_parent->mnt_mounts) {
2513 struct list_head *tmp = next;
2514 struct mount *mnt = list_entry(tmp, struct mount, mnt_child);
2515
2516 next = tmp->next;
2517 if (!(mnt->mnt.mnt_flags & MNT_SHRINKABLE))
2518 continue;
2519
2520
2521
2522 if (!list_empty(&mnt->mnt_mounts)) {
2523 this_parent = mnt;
2524 goto repeat;
2525 }
2526
2527 if (!propagate_mount_busy(mnt, 1)) {
2528 list_move_tail(&mnt->mnt_expire, graveyard);
2529 found++;
2530 }
2531 }
2532
2533
2534
2535 if (this_parent != parent) {
2536 next = this_parent->mnt_child.next;
2537 this_parent = this_parent->mnt_parent;
2538 goto resume;
2539 }
2540 return found;
2541}
2542
2543
2544
2545
2546
2547
2548
2549static void shrink_submounts(struct mount *mnt)
2550{
2551 LIST_HEAD(graveyard);
2552 struct mount *m;
2553
2554
2555 while (select_submounts(mnt, &graveyard)) {
2556 while (!list_empty(&graveyard)) {
2557 m = list_first_entry(&graveyard, struct mount,
2558 mnt_expire);
2559 touch_mnt_namespace(m->mnt_ns);
2560 umount_tree(m, UMOUNT_PROPAGATE|UMOUNT_SYNC);
2561 }
2562 }
2563}
2564
2565
2566
2567
2568
2569
2570
2571static long exact_copy_from_user(void *to, const void __user * from,
2572 unsigned long n)
2573{
2574 char *t = to;
2575 const char __user *f = from;
2576 char c;
2577
2578 if (!access_ok(VERIFY_READ, from, n))
2579 return n;
2580
2581 while (n) {
2582 if (__get_user(c, f)) {
2583 memset(t, 0, n);
2584 break;
2585 }
2586 *t++ = c;
2587 f++;
2588 n--;
2589 }
2590 return n;
2591}
2592
2593void *copy_mount_options(const void __user * data)
2594{
2595 int i;
2596 unsigned long size;
2597 char *copy;
2598
2599 if (!data)
2600 return NULL;
2601
2602 copy = kmalloc(PAGE_SIZE, GFP_KERNEL);
2603 if (!copy)
2604 return ERR_PTR(-ENOMEM);
2605
2606
2607
2608
2609
2610
2611 size = TASK_SIZE - (unsigned long)data;
2612 if (size > PAGE_SIZE)
2613 size = PAGE_SIZE;
2614
2615 i = size - exact_copy_from_user(copy, data, size);
2616 if (!i) {
2617 kfree(copy);
2618 return ERR_PTR(-EFAULT);
2619 }
2620 if (i != PAGE_SIZE)
2621 memset(copy + i, 0, PAGE_SIZE - i);
2622 return copy;
2623}
2624
2625char *copy_mount_string(const void __user *data)
2626{
2627 return data ? strndup_user(data, PAGE_SIZE) : NULL;
2628}
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644long do_mount(const char *dev_name, const char __user *dir_name,
2645 const char *type_page, unsigned long flags, void *data_page)
2646{
2647 struct path path;
2648 int retval = 0;
2649 int mnt_flags = 0;
2650
2651
2652 if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
2653 flags &= ~MS_MGC_MSK;
2654
2655
2656 if (data_page)
2657 ((char *)data_page)[PAGE_SIZE - 1] = 0;
2658
2659
2660 retval = user_path(dir_name, &path);
2661 if (retval)
2662 return retval;
2663
2664 retval = security_sb_mount(dev_name, &path,
2665 type_page, flags, data_page);
2666 if (!retval && !may_mount())
2667 retval = -EPERM;
2668 if (!retval && (flags & MS_MANDLOCK) && !may_mandlock())
2669 retval = -EPERM;
2670 if (retval)
2671 goto dput_out;
2672
2673
2674 if (!(flags & MS_NOATIME))
2675 mnt_flags |= MNT_RELATIME;
2676
2677
2678 if (flags & MS_NOSUID)
2679 mnt_flags |= MNT_NOSUID;
2680 if (flags & MS_NODEV)
2681 mnt_flags |= MNT_NODEV;
2682 if (flags & MS_NOEXEC)
2683 mnt_flags |= MNT_NOEXEC;
2684 if (flags & MS_NOATIME)
2685 mnt_flags |= MNT_NOATIME;
2686 if (flags & MS_NODIRATIME)
2687 mnt_flags |= MNT_NODIRATIME;
2688 if (flags & MS_STRICTATIME)
2689 mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
2690 if (flags & MS_RDONLY)
2691 mnt_flags |= MNT_READONLY;
2692
2693
2694 if ((flags & MS_REMOUNT) &&
2695 ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME |
2696 MS_STRICTATIME)) == 0)) {
2697 mnt_flags &= ~MNT_ATIME_MASK;
2698 mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK;
2699 }
2700
2701 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
2702 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
2703 MS_STRICTATIME);
2704
2705 if (flags & MS_REMOUNT)
2706 retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
2707 data_page);
2708 else if (flags & MS_BIND)
2709 retval = do_loopback(&path, dev_name, flags & MS_REC);
2710 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
2711 retval = do_change_type(&path, flags);
2712 else if (flags & MS_MOVE)
2713 retval = do_move_mount(&path, dev_name);
2714 else
2715 retval = do_new_mount(&path, type_page, flags, mnt_flags,
2716 dev_name, data_page);
2717dput_out:
2718 path_put(&path);
2719 return retval;
2720}
2721
2722static void free_mnt_ns(struct mnt_namespace *ns)
2723{
2724 ns_free_inum(&ns->ns);
2725 put_user_ns(ns->user_ns);
2726 kfree(ns);
2727}
2728
2729
2730
2731
2732
2733
2734
2735
2736static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
2737
2738static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
2739{
2740 struct mnt_namespace *new_ns;
2741 int ret;
2742
2743 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
2744 if (!new_ns)
2745 return ERR_PTR(-ENOMEM);
2746 ret = ns_alloc_inum(&new_ns->ns);
2747 if (ret) {
2748 kfree(new_ns);
2749 return ERR_PTR(ret);
2750 }
2751 new_ns->ns.ops = &mntns_operations;
2752 new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
2753 atomic_set(&new_ns->count, 1);
2754 new_ns->root = NULL;
2755 INIT_LIST_HEAD(&new_ns->list);
2756 init_waitqueue_head(&new_ns->poll);
2757 new_ns->event = 0;
2758 new_ns->user_ns = get_user_ns(user_ns);
2759 return new_ns;
2760}
2761
2762struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2763 struct user_namespace *user_ns, struct fs_struct *new_fs)
2764{
2765 struct mnt_namespace *new_ns;
2766 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
2767 struct mount *p, *q;
2768 struct mount *old;
2769 struct mount *new;
2770 int copy_flags;
2771
2772 BUG_ON(!ns);
2773
2774 if (likely(!(flags & CLONE_NEWNS))) {
2775 get_mnt_ns(ns);
2776 return ns;
2777 }
2778
2779 old = ns->root;
2780
2781 new_ns = alloc_mnt_ns(user_ns);
2782 if (IS_ERR(new_ns))
2783 return new_ns;
2784
2785 namespace_lock();
2786
2787 copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE;
2788 if (user_ns != ns->user_ns)
2789 copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
2790 new = copy_tree(old, old->mnt.mnt_root, copy_flags);
2791 if (IS_ERR(new)) {
2792 namespace_unlock();
2793 free_mnt_ns(new_ns);
2794 return ERR_CAST(new);
2795 }
2796 new_ns->root = new;
2797 list_add_tail(&new_ns->list, &new->mnt_list);
2798
2799
2800
2801
2802
2803
2804 p = old;
2805 q = new;
2806 while (p) {
2807 q->mnt_ns = new_ns;
2808 if (new_fs) {
2809 if (&p->mnt == new_fs->root.mnt) {
2810 new_fs->root.mnt = mntget(&q->mnt);
2811 rootmnt = &p->mnt;
2812 }
2813 if (&p->mnt == new_fs->pwd.mnt) {
2814 new_fs->pwd.mnt = mntget(&q->mnt);
2815 pwdmnt = &p->mnt;
2816 }
2817 }
2818 p = next_mnt(p, old);
2819 q = next_mnt(q, new);
2820 if (!q)
2821 break;
2822 while (p->mnt.mnt_root != q->mnt.mnt_root)
2823 p = next_mnt(p, old);
2824 }
2825 namespace_unlock();
2826
2827 if (rootmnt)
2828 mntput(rootmnt);
2829 if (pwdmnt)
2830 mntput(pwdmnt);
2831
2832 return new_ns;
2833}
2834
2835
2836
2837
2838
2839static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
2840{
2841 struct mnt_namespace *new_ns = alloc_mnt_ns(&init_user_ns);
2842 if (!IS_ERR(new_ns)) {
2843 struct mount *mnt = real_mount(m);
2844 mnt->mnt_ns = new_ns;
2845 new_ns->root = mnt;
2846 list_add(&mnt->mnt_list, &new_ns->list);
2847 } else {
2848 mntput(m);
2849 }
2850 return new_ns;
2851}
2852
2853struct dentry *mount_subtree(struct vfsmount *mnt, const char *name)
2854{
2855 struct mnt_namespace *ns;
2856 struct super_block *s;
2857 struct path path;
2858 int err;
2859
2860 ns = create_mnt_ns(mnt);
2861 if (IS_ERR(ns))
2862 return ERR_CAST(ns);
2863
2864 err = vfs_path_lookup(mnt->mnt_root, mnt,
2865 name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);
2866
2867 put_mnt_ns(ns);
2868
2869 if (err)
2870 return ERR_PTR(err);
2871
2872
2873 s = path.mnt->mnt_sb;
2874 atomic_inc(&s->s_active);
2875 mntput(path.mnt);
2876
2877 down_write(&s->s_umount);
2878
2879 return path.dentry;
2880}
2881EXPORT_SYMBOL(mount_subtree);
2882
2883SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
2884 char __user *, type, unsigned long, flags, void __user *, data)
2885{
2886 int ret;
2887 char *kernel_type;
2888 char *kernel_dev;
2889 void *options;
2890
2891 kernel_type = copy_mount_string(type);
2892 ret = PTR_ERR(kernel_type);
2893 if (IS_ERR(kernel_type))
2894 goto out_type;
2895
2896 kernel_dev = copy_mount_string(dev_name);
2897 ret = PTR_ERR(kernel_dev);
2898 if (IS_ERR(kernel_dev))
2899 goto out_dev;
2900
2901 options = copy_mount_options(data);
2902 ret = PTR_ERR(options);
2903 if (IS_ERR(options))
2904 goto out_data;
2905
2906 ret = do_mount(kernel_dev, dir_name, kernel_type, flags, options);
2907
2908 kfree(options);
2909out_data:
2910 kfree(kernel_dev);
2911out_dev:
2912 kfree(kernel_type);
2913out_type:
2914 return ret;
2915}
2916
2917
2918
2919
2920
2921
2922bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
2923 const struct path *root)
2924{
2925 while (&mnt->mnt != root->mnt && mnt_has_parent(mnt)) {
2926 dentry = mnt->mnt_mountpoint;
2927 mnt = mnt->mnt_parent;
2928 }
2929 return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry);
2930}
2931
2932bool path_is_under(struct path *path1, struct path *path2)
2933{
2934 bool res;
2935 read_seqlock_excl(&mount_lock);
2936 res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2);
2937 read_sequnlock_excl(&mount_lock);
2938 return res;
2939}
2940EXPORT_SYMBOL(path_is_under);
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2968 const char __user *, put_old)
2969{
2970 struct path new, old, parent_path, root_parent, root;
2971 struct mount *new_mnt, *root_mnt, *old_mnt;
2972 struct mountpoint *old_mp, *root_mp;
2973 int error;
2974
2975 if (!may_mount())
2976 return -EPERM;
2977
2978 error = user_path_dir(new_root, &new);
2979 if (error)
2980 goto out0;
2981
2982 error = user_path_dir(put_old, &old);
2983 if (error)
2984 goto out1;
2985
2986 error = security_sb_pivotroot(&old, &new);
2987 if (error)
2988 goto out2;
2989
2990 get_fs_root(current->fs, &root);
2991 old_mp = lock_mount(&old);
2992 error = PTR_ERR(old_mp);
2993 if (IS_ERR(old_mp))
2994 goto out3;
2995
2996 error = -EINVAL;
2997 new_mnt = real_mount(new.mnt);
2998 root_mnt = real_mount(root.mnt);
2999 old_mnt = real_mount(old.mnt);
3000 if (IS_MNT_SHARED(old_mnt) ||
3001 IS_MNT_SHARED(new_mnt->mnt_parent) ||
3002 IS_MNT_SHARED(root_mnt->mnt_parent))
3003 goto out4;
3004 if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
3005 goto out4;
3006 if (new_mnt->mnt.mnt_flags & MNT_LOCKED)
3007 goto out4;
3008 error = -ENOENT;
3009 if (d_unlinked(new.dentry))
3010 goto out4;
3011 error = -EBUSY;
3012 if (new_mnt == root_mnt || old_mnt == root_mnt)
3013 goto out4;
3014 error = -EINVAL;
3015 if (root.mnt->mnt_root != root.dentry)
3016 goto out4;
3017 if (!mnt_has_parent(root_mnt))
3018 goto out4;
3019 root_mp = root_mnt->mnt_mp;
3020 if (new.mnt->mnt_root != new.dentry)
3021 goto out4;
3022 if (!mnt_has_parent(new_mnt))
3023 goto out4;
3024
3025 if (!is_path_reachable(old_mnt, old.dentry, &new))
3026 goto out4;
3027
3028 if (!is_path_reachable(new_mnt, new.dentry, &root))
3029 goto out4;
3030 root_mp->m_count++;
3031 lock_mount_hash();
3032 detach_mnt(new_mnt, &parent_path);
3033 detach_mnt(root_mnt, &root_parent);
3034 if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
3035 new_mnt->mnt.mnt_flags |= MNT_LOCKED;
3036 root_mnt->mnt.mnt_flags &= ~MNT_LOCKED;
3037 }
3038
3039 attach_mnt(root_mnt, old_mnt, old_mp);
3040
3041 attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp);
3042 touch_mnt_namespace(current->nsproxy->mnt_ns);
3043
3044 list_del_init(&new_mnt->mnt_expire);
3045 unlock_mount_hash();
3046 chroot_fs_refs(&root, &new);
3047 put_mountpoint(root_mp);
3048 error = 0;
3049out4:
3050 unlock_mount(old_mp);
3051 if (!error) {
3052 path_put(&root_parent);
3053 path_put(&parent_path);
3054 }
3055out3:
3056 path_put(&root);
3057out2:
3058 path_put(&old);
3059out1:
3060 path_put(&new);
3061out0:
3062 return error;
3063}
3064
3065static void __init init_mount_tree(void)
3066{
3067 struct vfsmount *mnt;
3068 struct mnt_namespace *ns;
3069 struct path root;
3070 struct file_system_type *type;
3071
3072 type = get_fs_type("rootfs");
3073 if (!type)
3074 panic("Can't find rootfs type");
3075 mnt = vfs_kern_mount(type, 0, "rootfs", NULL);
3076 put_filesystem(type);
3077 if (IS_ERR(mnt))
3078 panic("Can't create rootfs");
3079
3080 ns = create_mnt_ns(mnt);
3081 if (IS_ERR(ns))
3082 panic("Can't allocate initial namespace");
3083
3084 init_task.nsproxy->mnt_ns = ns;
3085 get_mnt_ns(ns);
3086
3087 root.mnt = mnt;
3088 root.dentry = mnt->mnt_root;
3089 mnt->mnt_flags |= MNT_LOCKED;
3090
3091 set_fs_pwd(current->fs, &root);
3092 set_fs_root(current->fs, &root);
3093}
3094
3095void __init mnt_init(void)
3096{
3097 unsigned u;
3098 int err;
3099
3100 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
3101 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
3102
3103 mount_hashtable = alloc_large_system_hash("Mount-cache",
3104 sizeof(struct hlist_head),
3105 mhash_entries, 19,
3106 0,
3107 &m_hash_shift, &m_hash_mask, 0, 0);
3108 mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache",
3109 sizeof(struct hlist_head),
3110 mphash_entries, 19,
3111 0,
3112 &mp_hash_shift, &mp_hash_mask, 0, 0);
3113
3114 if (!mount_hashtable || !mountpoint_hashtable)
3115 panic("Failed to allocate mount hash table\n");
3116
3117 for (u = 0; u <= m_hash_mask; u++)
3118 INIT_HLIST_HEAD(&mount_hashtable[u]);
3119 for (u = 0; u <= mp_hash_mask; u++)
3120 INIT_HLIST_HEAD(&mountpoint_hashtable[u]);
3121
3122 kernfs_init();
3123
3124 err = sysfs_init();
3125 if (err)
3126 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
3127 __func__, err);
3128 fs_kobj = kobject_create_and_add("fs", NULL);
3129 if (!fs_kobj)
3130 printk(KERN_WARNING "%s: kobj create error\n", __func__);
3131 init_rootfs();
3132 init_mount_tree();
3133}
3134
3135void put_mnt_ns(struct mnt_namespace *ns)
3136{
3137 if (!atomic_dec_and_test(&ns->count))
3138 return;
3139 drop_collected_mounts(&ns->root->mnt);
3140 free_mnt_ns(ns);
3141}
3142
3143struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
3144{
3145 struct vfsmount *mnt;
3146 mnt = vfs_kern_mount(type, MS_KERNMOUNT, type->name, data);
3147 if (!IS_ERR(mnt)) {
3148
3149
3150
3151
3152 real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL;
3153 }
3154 return mnt;
3155}
3156EXPORT_SYMBOL_GPL(kern_mount_data);
3157
3158void kern_unmount(struct vfsmount *mnt)
3159{
3160
3161 if (!IS_ERR_OR_NULL(mnt)) {
3162 real_mount(mnt)->mnt_ns = NULL;
3163 synchronize_rcu();
3164 mntput(mnt);
3165 }
3166}
3167EXPORT_SYMBOL(kern_unmount);
3168
3169bool our_mnt(struct vfsmount *mnt)
3170{
3171 return check_mnt(real_mount(mnt));
3172}
3173
3174bool current_chrooted(void)
3175{
3176
3177 struct path ns_root;
3178 struct path fs_root;
3179 bool chrooted;
3180
3181
3182 ns_root.mnt = ¤t->nsproxy->mnt_ns->root->mnt;
3183 ns_root.dentry = ns_root.mnt->mnt_root;
3184 path_get(&ns_root);
3185 while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root))
3186 ;
3187
3188 get_fs_root(current->fs, &fs_root);
3189
3190 chrooted = !path_equal(&fs_root, &ns_root);
3191
3192 path_put(&fs_root);
3193 path_put(&ns_root);
3194
3195 return chrooted;
3196}
3197
3198static bool mnt_already_visible(struct mnt_namespace *ns, struct vfsmount *new,
3199 int *new_mnt_flags)
3200{
3201 int new_flags = *new_mnt_flags;
3202 struct mount *mnt;
3203 bool visible = false;
3204
3205 down_read(&namespace_sem);
3206 list_for_each_entry(mnt, &ns->list, mnt_list) {
3207 struct mount *child;
3208 int mnt_flags;
3209
3210 if (mnt->mnt.mnt_sb->s_type != new->mnt_sb->s_type)
3211 continue;
3212
3213
3214
3215
3216 if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root)
3217 continue;
3218
3219
3220 mnt_flags = mnt->mnt.mnt_flags;
3221
3222
3223 if (mnt->mnt.mnt_sb->s_flags & MS_RDONLY)
3224 mnt_flags |= MNT_LOCK_READONLY;
3225
3226
3227
3228
3229 if ((mnt_flags & MNT_LOCK_READONLY) &&
3230 !(new_flags & MNT_READONLY))
3231 continue;
3232 if ((mnt_flags & MNT_LOCK_ATIME) &&
3233 ((mnt_flags & MNT_ATIME_MASK) != (new_flags & MNT_ATIME_MASK)))
3234 continue;
3235
3236
3237
3238
3239
3240 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
3241 struct inode *inode = child->mnt_mountpoint->d_inode;
3242
3243 if (!(child->mnt.mnt_flags & MNT_LOCKED))
3244 continue;
3245
3246 if (!is_empty_dir_inode(inode))
3247 goto next;
3248 }
3249
3250 *new_mnt_flags |= mnt_flags & (MNT_LOCK_READONLY | \
3251 MNT_LOCK_ATIME);
3252 visible = true;
3253 goto found;
3254 next: ;
3255 }
3256found:
3257 up_read(&namespace_sem);
3258 return visible;
3259}
3260
3261static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags)
3262{
3263 const unsigned long required_iflags = SB_I_NOEXEC | SB_I_NODEV;
3264 struct mnt_namespace *ns = current->nsproxy->mnt_ns;
3265 unsigned long s_iflags;
3266
3267 if (ns->user_ns == &init_user_ns)
3268 return false;
3269
3270
3271 s_iflags = mnt->mnt_sb->s_iflags;
3272 if (!(s_iflags & SB_I_USERNS_VISIBLE))
3273 return false;
3274
3275 if ((s_iflags & required_iflags) != required_iflags) {
3276 WARN_ONCE(1, "Expected s_iflags to contain 0x%lx\n",
3277 required_iflags);
3278 return true;
3279 }
3280
3281 return !mnt_already_visible(ns, mnt, new_mnt_flags);
3282}
3283
3284bool mnt_may_suid(struct vfsmount *mnt)
3285{
3286
3287
3288
3289
3290
3291
3292
3293 return !(mnt->mnt_flags & MNT_NOSUID) && check_mnt(real_mount(mnt)) &&
3294 current_in_userns(mnt->mnt_sb->s_user_ns);
3295}
3296
3297static struct ns_common *mntns_get(struct task_struct *task)
3298{
3299 struct ns_common *ns = NULL;
3300 struct nsproxy *nsproxy;
3301
3302 task_lock(task);
3303 nsproxy = task->nsproxy;
3304 if (nsproxy) {
3305 ns = &nsproxy->mnt_ns->ns;
3306 get_mnt_ns(to_mnt_ns(ns));
3307 }
3308 task_unlock(task);
3309
3310 return ns;
3311}
3312
3313static void mntns_put(struct ns_common *ns)
3314{
3315 put_mnt_ns(to_mnt_ns(ns));
3316}
3317
3318static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
3319{
3320 struct fs_struct *fs = current->fs;
3321 struct mnt_namespace *mnt_ns = to_mnt_ns(ns);
3322 struct path root;
3323
3324 if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
3325 !ns_capable(current_user_ns(), CAP_SYS_CHROOT) ||
3326 !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
3327 return -EPERM;
3328
3329 if (fs->users != 1)
3330 return -EINVAL;
3331
3332 get_mnt_ns(mnt_ns);
3333 put_mnt_ns(nsproxy->mnt_ns);
3334 nsproxy->mnt_ns = mnt_ns;
3335
3336
3337 root.mnt = &mnt_ns->root->mnt;
3338 root.dentry = mnt_ns->root->mnt.mnt_root;
3339 path_get(&root);
3340 while(d_mountpoint(root.dentry) && follow_down_one(&root))
3341 ;
3342
3343
3344 set_fs_pwd(fs, &root);
3345 set_fs_root(fs, &root);
3346
3347 path_put(&root);
3348 return 0;
3349}
3350
3351const struct proc_ns_operations mntns_operations = {
3352 .name = "mnt",
3353 .type = CLONE_NEWNS,
3354 .get = mntns_get,
3355 .put = mntns_put,
3356 .install = mntns_install,
3357};
3358