1
2
3
4
5
6
7
8
9
10
11#include <linux/syscalls.h>
12#include <linux/export.h>
13#include <linux/capability.h>
14#include <linux/mnt_namespace.h>
15#include <linux/user_namespace.h>
16#include <linux/namei.h>
17#include <linux/security.h>
18#include <linux/idr.h>
19#include <linux/init.h>
20#include <linux/fs_struct.h>
21#include <linux/fsnotify.h>
22#include <linux/uaccess.h>
23#include <linux/proc_ns.h>
24#include <linux/magic.h>
25#include <linux/bootmem.h>
26#include <linux/task_work.h>
27#include "pnode.h"
28#include "internal.h"
29
30static unsigned int m_hash_mask __read_mostly;
31static unsigned int m_hash_shift __read_mostly;
32static unsigned int mp_hash_mask __read_mostly;
33static unsigned int mp_hash_shift __read_mostly;
34
35static __initdata unsigned long mhash_entries;
36static int __init set_mhash_entries(char *str)
37{
38 if (!str)
39 return 0;
40 mhash_entries = simple_strtoul(str, &str, 0);
41 return 1;
42}
43__setup("mhash_entries=", set_mhash_entries);
44
45static __initdata unsigned long mphash_entries;
46static int __init set_mphash_entries(char *str)
47{
48 if (!str)
49 return 0;
50 mphash_entries = simple_strtoul(str, &str, 0);
51 return 1;
52}
53__setup("mphash_entries=", set_mphash_entries);
54
55static u64 event;
56static DEFINE_IDA(mnt_id_ida);
57static DEFINE_IDA(mnt_group_ida);
58static DEFINE_SPINLOCK(mnt_id_lock);
59static int mnt_id_start = 0;
60static int mnt_group_start = 1;
61
62static struct hlist_head *mount_hashtable __read_mostly;
63static struct hlist_head *mountpoint_hashtable __read_mostly;
64static struct kmem_cache *mnt_cache __read_mostly;
65static DECLARE_RWSEM(namespace_sem);
66
67
68struct kobject *fs_kobj;
69EXPORT_SYMBOL_GPL(fs_kobj);
70
71
72
73
74
75
76
77
78
79__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
80
81static inline struct hlist_head *m_hash(struct vfsmount *mnt, struct dentry *dentry)
82{
83 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
84 tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
85 tmp = tmp + (tmp >> m_hash_shift);
86 return &mount_hashtable[tmp & m_hash_mask];
87}
88
89static inline struct hlist_head *mp_hash(struct dentry *dentry)
90{
91 unsigned long tmp = ((unsigned long)dentry / L1_CACHE_BYTES);
92 tmp = tmp + (tmp >> mp_hash_shift);
93 return &mountpoint_hashtable[tmp & mp_hash_mask];
94}
95
96
97
98
99
100static int mnt_alloc_id(struct mount *mnt)
101{
102 int res;
103
104retry:
105 ida_pre_get(&mnt_id_ida, GFP_KERNEL);
106 spin_lock(&mnt_id_lock);
107 res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id);
108 if (!res)
109 mnt_id_start = mnt->mnt_id + 1;
110 spin_unlock(&mnt_id_lock);
111 if (res == -EAGAIN)
112 goto retry;
113
114 return res;
115}
116
117static void mnt_free_id(struct mount *mnt)
118{
119 int id = mnt->mnt_id;
120 spin_lock(&mnt_id_lock);
121 ida_remove(&mnt_id_ida, id);
122 if (mnt_id_start > id)
123 mnt_id_start = id;
124 spin_unlock(&mnt_id_lock);
125}
126
127
128
129
130
131
132static int mnt_alloc_group_id(struct mount *mnt)
133{
134 int res;
135
136 if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL))
137 return -ENOMEM;
138
139 res = ida_get_new_above(&mnt_group_ida,
140 mnt_group_start,
141 &mnt->mnt_group_id);
142 if (!res)
143 mnt_group_start = mnt->mnt_group_id + 1;
144
145 return res;
146}
147
148
149
150
151void mnt_release_group_id(struct mount *mnt)
152{
153 int id = mnt->mnt_group_id;
154 ida_remove(&mnt_group_ida, id);
155 if (mnt_group_start > id)
156 mnt_group_start = id;
157 mnt->mnt_group_id = 0;
158}
159
160
161
162
163static inline void mnt_add_count(struct mount *mnt, int n)
164{
165#ifdef CONFIG_SMP
166 this_cpu_add(mnt->mnt_pcp->mnt_count, n);
167#else
168 preempt_disable();
169 mnt->mnt_count += n;
170 preempt_enable();
171#endif
172}
173
174
175
176
177unsigned int mnt_get_count(struct mount *mnt)
178{
179#ifdef CONFIG_SMP
180 unsigned int count = 0;
181 int cpu;
182
183 for_each_possible_cpu(cpu) {
184 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count;
185 }
186
187 return count;
188#else
189 return mnt->mnt_count;
190#endif
191}
192
193static struct mount *alloc_vfsmnt(const char *name)
194{
195 struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
196 if (mnt) {
197 int err;
198
199 err = mnt_alloc_id(mnt);
200 if (err)
201 goto out_free_cache;
202
203 if (name) {
204 mnt->mnt_devname = kstrdup(name, GFP_KERNEL);
205 if (!mnt->mnt_devname)
206 goto out_free_id;
207 }
208
209#ifdef CONFIG_SMP
210 mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);
211 if (!mnt->mnt_pcp)
212 goto out_free_devname;
213
214 this_cpu_add(mnt->mnt_pcp->mnt_count, 1);
215#else
216 mnt->mnt_count = 1;
217 mnt->mnt_writers = 0;
218#endif
219
220 INIT_HLIST_NODE(&mnt->mnt_hash);
221 INIT_LIST_HEAD(&mnt->mnt_child);
222 INIT_LIST_HEAD(&mnt->mnt_mounts);
223 INIT_LIST_HEAD(&mnt->mnt_list);
224 INIT_LIST_HEAD(&mnt->mnt_expire);
225 INIT_LIST_HEAD(&mnt->mnt_share);
226 INIT_LIST_HEAD(&mnt->mnt_slave_list);
227 INIT_LIST_HEAD(&mnt->mnt_slave);
228 INIT_HLIST_NODE(&mnt->mnt_mp_list);
229#ifdef CONFIG_FSNOTIFY
230 INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
231#endif
232 }
233 return mnt;
234
235#ifdef CONFIG_SMP
236out_free_devname:
237 kfree(mnt->mnt_devname);
238#endif
239out_free_id:
240 mnt_free_id(mnt);
241out_free_cache:
242 kmem_cache_free(mnt_cache, mnt);
243 return NULL;
244}
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265int __mnt_is_readonly(struct vfsmount *mnt)
266{
267 if (mnt->mnt_flags & MNT_READONLY)
268 return 1;
269 if (mnt->mnt_sb->s_flags & MS_RDONLY)
270 return 1;
271 return 0;
272}
273EXPORT_SYMBOL_GPL(__mnt_is_readonly);
274
275static inline void mnt_inc_writers(struct mount *mnt)
276{
277#ifdef CONFIG_SMP
278 this_cpu_inc(mnt->mnt_pcp->mnt_writers);
279#else
280 mnt->mnt_writers++;
281#endif
282}
283
284static inline void mnt_dec_writers(struct mount *mnt)
285{
286#ifdef CONFIG_SMP
287 this_cpu_dec(mnt->mnt_pcp->mnt_writers);
288#else
289 mnt->mnt_writers--;
290#endif
291}
292
293static unsigned int mnt_get_writers(struct mount *mnt)
294{
295#ifdef CONFIG_SMP
296 unsigned int count = 0;
297 int cpu;
298
299 for_each_possible_cpu(cpu) {
300 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers;
301 }
302
303 return count;
304#else
305 return mnt->mnt_writers;
306#endif
307}
308
309static int mnt_is_readonly(struct vfsmount *mnt)
310{
311 if (mnt->mnt_sb->s_readonly_remount)
312 return 1;
313
314 smp_rmb();
315 return __mnt_is_readonly(mnt);
316}
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334int __mnt_want_write(struct vfsmount *m)
335{
336 struct mount *mnt = real_mount(m);
337 int ret = 0;
338
339 preempt_disable();
340 mnt_inc_writers(mnt);
341
342
343
344
345
346 smp_mb();
347 while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
348 cpu_relax();
349
350
351
352
353
354 smp_rmb();
355 if (mnt_is_readonly(m)) {
356 mnt_dec_writers(mnt);
357 ret = -EROFS;
358 }
359 preempt_enable();
360
361 return ret;
362}
363
364
365
366
367
368
369
370
371
372
373int mnt_want_write(struct vfsmount *m)
374{
375 int ret;
376
377 sb_start_write(m->mnt_sb);
378 ret = __mnt_want_write(m);
379 if (ret)
380 sb_end_write(m->mnt_sb);
381 return ret;
382}
383EXPORT_SYMBOL_GPL(mnt_want_write);
384
385
386
387
388
389
390
391
392
393
394
395
396
397int mnt_clone_write(struct vfsmount *mnt)
398{
399
400 if (__mnt_is_readonly(mnt))
401 return -EROFS;
402 preempt_disable();
403 mnt_inc_writers(real_mount(mnt));
404 preempt_enable();
405 return 0;
406}
407EXPORT_SYMBOL_GPL(mnt_clone_write);
408
409
410
411
412
413
414
415
416int __mnt_want_write_file(struct file *file)
417{
418 if (!(file->f_mode & FMODE_WRITER))
419 return __mnt_want_write(file->f_path.mnt);
420 else
421 return mnt_clone_write(file->f_path.mnt);
422}
423
424
425
426
427
428
429
430
431int mnt_want_write_file(struct file *file)
432{
433 int ret;
434
435 sb_start_write(file->f_path.mnt->mnt_sb);
436 ret = __mnt_want_write_file(file);
437 if (ret)
438 sb_end_write(file->f_path.mnt->mnt_sb);
439 return ret;
440}
441EXPORT_SYMBOL_GPL(mnt_want_write_file);
442
443
444
445
446
447
448
449
450
451void __mnt_drop_write(struct vfsmount *mnt)
452{
453 preempt_disable();
454 mnt_dec_writers(real_mount(mnt));
455 preempt_enable();
456}
457
458
459
460
461
462
463
464
465
466void mnt_drop_write(struct vfsmount *mnt)
467{
468 __mnt_drop_write(mnt);
469 sb_end_write(mnt->mnt_sb);
470}
471EXPORT_SYMBOL_GPL(mnt_drop_write);
472
473void __mnt_drop_write_file(struct file *file)
474{
475 __mnt_drop_write(file->f_path.mnt);
476}
477
478void mnt_drop_write_file(struct file *file)
479{
480 mnt_drop_write(file->f_path.mnt);
481}
482EXPORT_SYMBOL(mnt_drop_write_file);
483
484static int mnt_make_readonly(struct mount *mnt)
485{
486 int ret = 0;
487
488 lock_mount_hash();
489 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
490
491
492
493
494 smp_mb();
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512 if (mnt_get_writers(mnt) > 0)
513 ret = -EBUSY;
514 else
515 mnt->mnt.mnt_flags |= MNT_READONLY;
516
517
518
519
520 smp_wmb();
521 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
522 unlock_mount_hash();
523 return ret;
524}
525
526static void __mnt_unmake_readonly(struct mount *mnt)
527{
528 lock_mount_hash();
529 mnt->mnt.mnt_flags &= ~MNT_READONLY;
530 unlock_mount_hash();
531}
532
533int sb_prepare_remount_readonly(struct super_block *sb)
534{
535 struct mount *mnt;
536 int err = 0;
537
538
539 if (atomic_long_read(&sb->s_remove_count))
540 return -EBUSY;
541
542 lock_mount_hash();
543 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
544 if (!(mnt->mnt.mnt_flags & MNT_READONLY)) {
545 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
546 smp_mb();
547 if (mnt_get_writers(mnt) > 0) {
548 err = -EBUSY;
549 break;
550 }
551 }
552 }
553 if (!err && atomic_long_read(&sb->s_remove_count))
554 err = -EBUSY;
555
556 if (!err) {
557 sb->s_readonly_remount = 1;
558 smp_wmb();
559 }
560 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
561 if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
562 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
563 }
564 unlock_mount_hash();
565
566 return err;
567}
568
569static void free_vfsmnt(struct mount *mnt)
570{
571 kfree(mnt->mnt_devname);
572#ifdef CONFIG_SMP
573 free_percpu(mnt->mnt_pcp);
574#endif
575 kmem_cache_free(mnt_cache, mnt);
576}
577
578static void delayed_free_vfsmnt(struct rcu_head *head)
579{
580 free_vfsmnt(container_of(head, struct mount, mnt_rcu));
581}
582
583
584bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
585{
586 struct mount *mnt;
587 if (read_seqretry(&mount_lock, seq))
588 return false;
589 if (bastard == NULL)
590 return true;
591 mnt = real_mount(bastard);
592 mnt_add_count(mnt, 1);
593 if (likely(!read_seqretry(&mount_lock, seq)))
594 return true;
595 if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
596 mnt_add_count(mnt, -1);
597 return false;
598 }
599 rcu_read_unlock();
600 mntput(bastard);
601 rcu_read_lock();
602 return false;
603}
604
605
606
607
608
609struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
610{
611 struct hlist_head *head = m_hash(mnt, dentry);
612 struct mount *p;
613
614 hlist_for_each_entry_rcu(p, head, mnt_hash)
615 if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
616 return p;
617 return NULL;
618}
619
620
621
622
623
624struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
625{
626 struct mount *p, *res;
627 res = p = __lookup_mnt(mnt, dentry);
628 if (!p)
629 goto out;
630 hlist_for_each_entry_continue(p, mnt_hash) {
631 if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry)
632 break;
633 res = p;
634 }
635out:
636 return res;
637}
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655struct vfsmount *lookup_mnt(struct path *path)
656{
657 struct mount *child_mnt;
658 struct vfsmount *m;
659 unsigned seq;
660
661 rcu_read_lock();
662 do {
663 seq = read_seqbegin(&mount_lock);
664 child_mnt = __lookup_mnt(path->mnt, path->dentry);
665 m = child_mnt ? &child_mnt->mnt : NULL;
666 } while (!legitimize_mnt(m, seq));
667 rcu_read_unlock();
668 return m;
669}
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686bool __is_local_mountpoint(struct dentry *dentry)
687{
688 struct mnt_namespace *ns = current->nsproxy->mnt_ns;
689 struct mount *mnt;
690 bool is_covered = false;
691
692 if (!d_mountpoint(dentry))
693 goto out;
694
695 down_read(&namespace_sem);
696 list_for_each_entry(mnt, &ns->list, mnt_list) {
697 is_covered = (mnt->mnt_mountpoint == dentry);
698 if (is_covered)
699 break;
700 }
701 up_read(&namespace_sem);
702out:
703 return is_covered;
704}
705
706static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
707{
708 struct hlist_head *chain = mp_hash(dentry);
709 struct mountpoint *mp;
710
711 hlist_for_each_entry(mp, chain, m_hash) {
712 if (mp->m_dentry == dentry) {
713
714 if (d_unlinked(dentry))
715 return ERR_PTR(-ENOENT);
716 mp->m_count++;
717 return mp;
718 }
719 }
720 return NULL;
721}
722
723static struct mountpoint *new_mountpoint(struct dentry *dentry)
724{
725 struct hlist_head *chain = mp_hash(dentry);
726 struct mountpoint *mp;
727 int ret;
728
729 mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
730 if (!mp)
731 return ERR_PTR(-ENOMEM);
732
733 ret = d_set_mounted(dentry);
734 if (ret) {
735 kfree(mp);
736 return ERR_PTR(ret);
737 }
738
739 mp->m_dentry = dentry;
740 mp->m_count = 1;
741 hlist_add_head(&mp->m_hash, chain);
742 INIT_HLIST_HEAD(&mp->m_list);
743 return mp;
744}
745
746static void put_mountpoint(struct mountpoint *mp)
747{
748 if (!--mp->m_count) {
749 struct dentry *dentry = mp->m_dentry;
750 BUG_ON(!hlist_empty(&mp->m_list));
751 spin_lock(&dentry->d_lock);
752 dentry->d_flags &= ~DCACHE_MOUNTED;
753 spin_unlock(&dentry->d_lock);
754 hlist_del(&mp->m_hash);
755 kfree(mp);
756 }
757}
758
759static inline int check_mnt(struct mount *mnt)
760{
761 return mnt->mnt_ns == current->nsproxy->mnt_ns;
762}
763
764
765
766
767static void touch_mnt_namespace(struct mnt_namespace *ns)
768{
769 if (ns) {
770 ns->event = ++event;
771 wake_up_interruptible(&ns->poll);
772 }
773}
774
775
776
777
778static void __touch_mnt_namespace(struct mnt_namespace *ns)
779{
780 if (ns && ns->event != event) {
781 ns->event = event;
782 wake_up_interruptible(&ns->poll);
783 }
784}
785
786
787
788
789static void detach_mnt(struct mount *mnt, struct path *old_path)
790{
791 old_path->dentry = mnt->mnt_mountpoint;
792 old_path->mnt = &mnt->mnt_parent->mnt;
793 mnt->mnt_parent = mnt;
794 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
795 list_del_init(&mnt->mnt_child);
796 hlist_del_init_rcu(&mnt->mnt_hash);
797 hlist_del_init(&mnt->mnt_mp_list);
798 put_mountpoint(mnt->mnt_mp);
799 mnt->mnt_mp = NULL;
800}
801
802
803
804
805void mnt_set_mountpoint(struct mount *mnt,
806 struct mountpoint *mp,
807 struct mount *child_mnt)
808{
809 mp->m_count++;
810 mnt_add_count(mnt, 1);
811 child_mnt->mnt_mountpoint = dget(mp->m_dentry);
812 child_mnt->mnt_parent = mnt;
813 child_mnt->mnt_mp = mp;
814 hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
815}
816
817
818
819
820static void attach_mnt(struct mount *mnt,
821 struct mount *parent,
822 struct mountpoint *mp)
823{
824 mnt_set_mountpoint(parent, mp, mnt);
825 hlist_add_head_rcu(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry));
826 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
827}
828
829static void attach_shadowed(struct mount *mnt,
830 struct mount *parent,
831 struct mount *shadows)
832{
833 if (shadows) {
834 hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash);
835 list_add(&mnt->mnt_child, &shadows->mnt_child);
836 } else {
837 hlist_add_head_rcu(&mnt->mnt_hash,
838 m_hash(&parent->mnt, mnt->mnt_mountpoint));
839 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
840 }
841}
842
843
844
845
846static void commit_tree(struct mount *mnt, struct mount *shadows)
847{
848 struct mount *parent = mnt->mnt_parent;
849 struct mount *m;
850 LIST_HEAD(head);
851 struct mnt_namespace *n = parent->mnt_ns;
852
853 BUG_ON(parent == mnt);
854
855 list_add_tail(&head, &mnt->mnt_list);
856 list_for_each_entry(m, &head, mnt_list)
857 m->mnt_ns = n;
858
859 list_splice(&head, n->list.prev);
860
861 attach_shadowed(mnt, parent, shadows);
862 touch_mnt_namespace(n);
863}
864
865static struct mount *next_mnt(struct mount *p, struct mount *root)
866{
867 struct list_head *next = p->mnt_mounts.next;
868 if (next == &p->mnt_mounts) {
869 while (1) {
870 if (p == root)
871 return NULL;
872 next = p->mnt_child.next;
873 if (next != &p->mnt_parent->mnt_mounts)
874 break;
875 p = p->mnt_parent;
876 }
877 }
878 return list_entry(next, struct mount, mnt_child);
879}
880
881static struct mount *skip_mnt_tree(struct mount *p)
882{
883 struct list_head *prev = p->mnt_mounts.prev;
884 while (prev != &p->mnt_mounts) {
885 p = list_entry(prev, struct mount, mnt_child);
886 prev = p->mnt_mounts.prev;
887 }
888 return p;
889}
890
891struct vfsmount *
892vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
893{
894 struct mount *mnt;
895 struct dentry *root;
896
897 if (!type)
898 return ERR_PTR(-ENODEV);
899
900 mnt = alloc_vfsmnt(name);
901 if (!mnt)
902 return ERR_PTR(-ENOMEM);
903
904 if (flags & MS_KERNMOUNT)
905 mnt->mnt.mnt_flags = MNT_INTERNAL;
906
907 root = mount_fs(type, flags, name, data);
908 if (IS_ERR(root)) {
909 mnt_free_id(mnt);
910 free_vfsmnt(mnt);
911 return ERR_CAST(root);
912 }
913
914 mnt->mnt.mnt_root = root;
915 mnt->mnt.mnt_sb = root->d_sb;
916 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
917 mnt->mnt_parent = mnt;
918 lock_mount_hash();
919 list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts);
920 unlock_mount_hash();
921 return &mnt->mnt;
922}
923EXPORT_SYMBOL_GPL(vfs_kern_mount);
924
925static struct mount *clone_mnt(struct mount *old, struct dentry *root,
926 int flag)
927{
928 struct super_block *sb = old->mnt.mnt_sb;
929 struct mount *mnt;
930 int err;
931
932 mnt = alloc_vfsmnt(old->mnt_devname);
933 if (!mnt)
934 return ERR_PTR(-ENOMEM);
935
936 if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE))
937 mnt->mnt_group_id = 0;
938 else
939 mnt->mnt_group_id = old->mnt_group_id;
940
941 if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {
942 err = mnt_alloc_group_id(mnt);
943 if (err)
944 goto out_free;
945 }
946
947 mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED);
948
949 if (flag & CL_UNPRIVILEGED) {
950 mnt->mnt.mnt_flags |= MNT_LOCK_ATIME;
951
952 if (mnt->mnt.mnt_flags & MNT_READONLY)
953 mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
954
955 if (mnt->mnt.mnt_flags & MNT_NODEV)
956 mnt->mnt.mnt_flags |= MNT_LOCK_NODEV;
957
958 if (mnt->mnt.mnt_flags & MNT_NOSUID)
959 mnt->mnt.mnt_flags |= MNT_LOCK_NOSUID;
960
961 if (mnt->mnt.mnt_flags & MNT_NOEXEC)
962 mnt->mnt.mnt_flags |= MNT_LOCK_NOEXEC;
963 }
964
965
966 if ((flag & CL_UNPRIVILEGED) &&
967 (!(flag & CL_EXPIRE) || list_empty(&old->mnt_expire)))
968 mnt->mnt.mnt_flags |= MNT_LOCKED;
969
970 atomic_inc(&sb->s_active);
971 mnt->mnt.mnt_sb = sb;
972 mnt->mnt.mnt_root = dget(root);
973 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
974 mnt->mnt_parent = mnt;
975 lock_mount_hash();
976 list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
977 unlock_mount_hash();
978
979 if ((flag & CL_SLAVE) ||
980 ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) {
981 list_add(&mnt->mnt_slave, &old->mnt_slave_list);
982 mnt->mnt_master = old;
983 CLEAR_MNT_SHARED(mnt);
984 } else if (!(flag & CL_PRIVATE)) {
985 if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old))
986 list_add(&mnt->mnt_share, &old->mnt_share);
987 if (IS_MNT_SLAVE(old))
988 list_add(&mnt->mnt_slave, &old->mnt_slave);
989 mnt->mnt_master = old->mnt_master;
990 }
991 if (flag & CL_MAKE_SHARED)
992 set_mnt_shared(mnt);
993
994
995
996 if (flag & CL_EXPIRE) {
997 if (!list_empty(&old->mnt_expire))
998 list_add(&mnt->mnt_expire, &old->mnt_expire);
999 }
1000
1001 return mnt;
1002
1003 out_free:
1004 mnt_free_id(mnt);
1005 free_vfsmnt(mnt);
1006 return ERR_PTR(err);
1007}
1008
1009static void cleanup_mnt(struct mount *mnt)
1010{
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021 WARN_ON(mnt_get_writers(mnt));
1022 if (unlikely(mnt->mnt_pins.first))
1023 mnt_pin_kill(mnt);
1024 fsnotify_vfsmount_delete(&mnt->mnt);
1025 dput(mnt->mnt.mnt_root);
1026 deactivate_super(mnt->mnt.mnt_sb);
1027 mnt_free_id(mnt);
1028 call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);
1029}
1030
1031static void __cleanup_mnt(struct rcu_head *head)
1032{
1033 cleanup_mnt(container_of(head, struct mount, mnt_rcu));
1034}
1035
1036static LLIST_HEAD(delayed_mntput_list);
1037static void delayed_mntput(struct work_struct *unused)
1038{
1039 struct llist_node *node = llist_del_all(&delayed_mntput_list);
1040 struct llist_node *next;
1041
1042 for (; node; node = next) {
1043 next = llist_next(node);
1044 cleanup_mnt(llist_entry(node, struct mount, mnt_llist));
1045 }
1046}
1047static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
1048
1049static void mntput_no_expire(struct mount *mnt)
1050{
1051 rcu_read_lock();
1052 mnt_add_count(mnt, -1);
1053 if (likely(mnt->mnt_ns)) {
1054 rcu_read_unlock();
1055 return;
1056 }
1057 lock_mount_hash();
1058 if (mnt_get_count(mnt)) {
1059 rcu_read_unlock();
1060 unlock_mount_hash();
1061 return;
1062 }
1063 if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
1064 rcu_read_unlock();
1065 unlock_mount_hash();
1066 return;
1067 }
1068 mnt->mnt.mnt_flags |= MNT_DOOMED;
1069 rcu_read_unlock();
1070
1071 list_del(&mnt->mnt_instance);
1072 unlock_mount_hash();
1073
1074 if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {
1075 struct task_struct *task = current;
1076 if (likely(!(task->flags & PF_KTHREAD))) {
1077 init_task_work(&mnt->mnt_rcu, __cleanup_mnt);
1078 if (!task_work_add(task, &mnt->mnt_rcu, true))
1079 return;
1080 }
1081 if (llist_add(&mnt->mnt_llist, &delayed_mntput_list))
1082 schedule_delayed_work(&delayed_mntput_work, 1);
1083 return;
1084 }
1085 cleanup_mnt(mnt);
1086}
1087
1088void mntput(struct vfsmount *mnt)
1089{
1090 if (mnt) {
1091 struct mount *m = real_mount(mnt);
1092
1093 if (unlikely(m->mnt_expiry_mark))
1094 m->mnt_expiry_mark = 0;
1095 mntput_no_expire(m);
1096 }
1097}
1098EXPORT_SYMBOL(mntput);
1099
1100struct vfsmount *mntget(struct vfsmount *mnt)
1101{
1102 if (mnt)
1103 mnt_add_count(real_mount(mnt), 1);
1104 return mnt;
1105}
1106EXPORT_SYMBOL(mntget);
1107
1108struct vfsmount *mnt_clone_internal(struct path *path)
1109{
1110 struct mount *p;
1111 p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE);
1112 if (IS_ERR(p))
1113 return ERR_CAST(p);
1114 p->mnt.mnt_flags |= MNT_INTERNAL;
1115 return &p->mnt;
1116}
1117
1118static inline void mangle(struct seq_file *m, const char *s)
1119{
1120 seq_escape(m, s, " \t\n\\");
1121}
1122
1123
1124
1125
1126
1127
1128
1129int generic_show_options(struct seq_file *m, struct dentry *root)
1130{
1131 const char *options;
1132
1133 rcu_read_lock();
1134 options = rcu_dereference(root->d_sb->s_options);
1135
1136 if (options != NULL && options[0]) {
1137 seq_putc(m, ',');
1138 mangle(m, options);
1139 }
1140 rcu_read_unlock();
1141
1142 return 0;
1143}
1144EXPORT_SYMBOL(generic_show_options);
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159void save_mount_options(struct super_block *sb, char *options)
1160{
1161 BUG_ON(sb->s_options);
1162 rcu_assign_pointer(sb->s_options, kstrdup(options, GFP_KERNEL));
1163}
1164EXPORT_SYMBOL(save_mount_options);
1165
1166void replace_mount_options(struct super_block *sb, char *options)
1167{
1168 char *old = sb->s_options;
1169 rcu_assign_pointer(sb->s_options, options);
1170 if (old) {
1171 synchronize_rcu();
1172 kfree(old);
1173 }
1174}
1175EXPORT_SYMBOL(replace_mount_options);
1176
1177#ifdef CONFIG_PROC_FS
1178
1179static void *m_start(struct seq_file *m, loff_t *pos)
1180{
1181 struct proc_mounts *p = proc_mounts(m);
1182
1183 down_read(&namespace_sem);
1184 if (p->cached_event == p->ns->event) {
1185 void *v = p->cached_mount;
1186 if (*pos == p->cached_index)
1187 return v;
1188 if (*pos == p->cached_index + 1) {
1189 v = seq_list_next(v, &p->ns->list, &p->cached_index);
1190 return p->cached_mount = v;
1191 }
1192 }
1193
1194 p->cached_event = p->ns->event;
1195 p->cached_mount = seq_list_start(&p->ns->list, *pos);
1196 p->cached_index = *pos;
1197 return p->cached_mount;
1198}
1199
1200static void *m_next(struct seq_file *m, void *v, loff_t *pos)
1201{
1202 struct proc_mounts *p = proc_mounts(m);
1203
1204 p->cached_mount = seq_list_next(v, &p->ns->list, pos);
1205 p->cached_index = *pos;
1206 return p->cached_mount;
1207}
1208
1209static void m_stop(struct seq_file *m, void *v)
1210{
1211 up_read(&namespace_sem);
1212}
1213
1214static int m_show(struct seq_file *m, void *v)
1215{
1216 struct proc_mounts *p = proc_mounts(m);
1217 struct mount *r = list_entry(v, struct mount, mnt_list);
1218 return p->show(m, &r->mnt);
1219}
1220
1221const struct seq_operations mounts_op = {
1222 .start = m_start,
1223 .next = m_next,
1224 .stop = m_stop,
1225 .show = m_show,
1226};
1227#endif
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237int may_umount_tree(struct vfsmount *m)
1238{
1239 struct mount *mnt = real_mount(m);
1240 int actual_refs = 0;
1241 int minimum_refs = 0;
1242 struct mount *p;
1243 BUG_ON(!m);
1244
1245
1246 lock_mount_hash();
1247 for (p = mnt; p; p = next_mnt(p, mnt)) {
1248 actual_refs += mnt_get_count(p);
1249 minimum_refs += 2;
1250 }
1251 unlock_mount_hash();
1252
1253 if (actual_refs > minimum_refs)
1254 return 0;
1255
1256 return 1;
1257}
1258
1259EXPORT_SYMBOL(may_umount_tree);
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274int may_umount(struct vfsmount *mnt)
1275{
1276 int ret = 1;
1277 down_read(&namespace_sem);
1278 lock_mount_hash();
1279 if (propagate_mount_busy(real_mount(mnt), 2))
1280 ret = 0;
1281 unlock_mount_hash();
1282 up_read(&namespace_sem);
1283 return ret;
1284}
1285
1286EXPORT_SYMBOL(may_umount);
1287
1288static HLIST_HEAD(unmounted);
1289
1290static void namespace_unlock(void)
1291{
1292 struct mount *mnt;
1293 struct hlist_head head = unmounted;
1294
1295 if (likely(hlist_empty(&head))) {
1296 up_write(&namespace_sem);
1297 return;
1298 }
1299
1300 head.first->pprev = &head.first;
1301 INIT_HLIST_HEAD(&unmounted);
1302
1303
1304 hlist_for_each_entry(mnt, &head, mnt_hash)
1305 if (mnt->mnt_ex_mountpoint.mnt)
1306 mntget(mnt->mnt_ex_mountpoint.mnt);
1307
1308 up_write(&namespace_sem);
1309
1310 synchronize_rcu();
1311
1312 while (!hlist_empty(&head)) {
1313 mnt = hlist_entry(head.first, struct mount, mnt_hash);
1314 hlist_del_init(&mnt->mnt_hash);
1315 if (mnt->mnt_ex_mountpoint.mnt)
1316 path_put(&mnt->mnt_ex_mountpoint);
1317 mntput(&mnt->mnt);
1318 }
1319}
1320
1321static inline void namespace_lock(void)
1322{
1323 down_write(&namespace_sem);
1324}
1325
1326
1327
1328
1329
1330
1331
1332
1333void umount_tree(struct mount *mnt, int how)
1334{
1335 HLIST_HEAD(tmp_list);
1336 struct mount *p;
1337 struct mount *last = NULL;
1338
1339 for (p = mnt; p; p = next_mnt(p, mnt)) {
1340 hlist_del_init_rcu(&p->mnt_hash);
1341 hlist_add_head(&p->mnt_hash, &tmp_list);
1342 }
1343
1344 hlist_for_each_entry(p, &tmp_list, mnt_hash)
1345 list_del_init(&p->mnt_child);
1346
1347 if (how)
1348 propagate_umount(&tmp_list);
1349
1350 hlist_for_each_entry(p, &tmp_list, mnt_hash) {
1351 list_del_init(&p->mnt_expire);
1352 list_del_init(&p->mnt_list);
1353 __touch_mnt_namespace(p->mnt_ns);
1354 p->mnt_ns = NULL;
1355 if (how < 2)
1356 p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
1357 if (mnt_has_parent(p)) {
1358 hlist_del_init(&p->mnt_mp_list);
1359 put_mountpoint(p->mnt_mp);
1360 mnt_add_count(p->mnt_parent, -1);
1361
1362 p->mnt_ex_mountpoint.dentry = p->mnt_mountpoint;
1363 p->mnt_ex_mountpoint.mnt = &p->mnt_parent->mnt;
1364 p->mnt_mountpoint = p->mnt.mnt_root;
1365 p->mnt_parent = p;
1366 p->mnt_mp = NULL;
1367 }
1368 change_mnt_propagation(p, MS_PRIVATE);
1369 last = p;
1370 }
1371 if (last) {
1372 last->mnt_hash.next = unmounted.first;
1373 if (unmounted.first)
1374 unmounted.first->pprev = &last->mnt_hash.next;
1375 unmounted.first = tmp_list.first;
1376 unmounted.first->pprev = &unmounted.first;
1377 }
1378}
1379
1380static void shrink_submounts(struct mount *mnt);
1381
1382static int do_umount(struct mount *mnt, int flags)
1383{
1384 struct super_block *sb = mnt->mnt.mnt_sb;
1385 int retval;
1386
1387 retval = security_sb_umount(&mnt->mnt, flags);
1388 if (retval)
1389 return retval;
1390
1391
1392
1393
1394
1395
1396
1397 if (flags & MNT_EXPIRE) {
1398 if (&mnt->mnt == current->fs->root.mnt ||
1399 flags & (MNT_FORCE | MNT_DETACH))
1400 return -EINVAL;
1401
1402
1403
1404
1405
1406 lock_mount_hash();
1407 if (mnt_get_count(mnt) != 2) {
1408 unlock_mount_hash();
1409 return -EBUSY;
1410 }
1411 unlock_mount_hash();
1412
1413 if (!xchg(&mnt->mnt_expiry_mark, 1))
1414 return -EAGAIN;
1415 }
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427 if (flags & MNT_FORCE && sb->s_op->umount_begin) {
1428 sb->s_op->umount_begin(sb);
1429 }
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440 if (&mnt->mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) {
1441
1442
1443
1444
1445 if (!capable(CAP_SYS_ADMIN))
1446 return -EPERM;
1447 down_write(&sb->s_umount);
1448 if (!(sb->s_flags & MS_RDONLY))
1449 retval = do_remount_sb(sb, MS_RDONLY, NULL, 0);
1450 up_write(&sb->s_umount);
1451 return retval;
1452 }
1453
1454 namespace_lock();
1455 lock_mount_hash();
1456 event++;
1457
1458 if (flags & MNT_DETACH) {
1459 if (!list_empty(&mnt->mnt_list))
1460 umount_tree(mnt, 2);
1461 retval = 0;
1462 } else {
1463 shrink_submounts(mnt);
1464 retval = -EBUSY;
1465 if (!propagate_mount_busy(mnt, 2)) {
1466 if (!list_empty(&mnt->mnt_list))
1467 umount_tree(mnt, 1);
1468 retval = 0;
1469 }
1470 }
1471 unlock_mount_hash();
1472 namespace_unlock();
1473 return retval;
1474}
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486void __detach_mounts(struct dentry *dentry)
1487{
1488 struct mountpoint *mp;
1489 struct mount *mnt;
1490
1491 namespace_lock();
1492 mp = lookup_mountpoint(dentry);
1493 if (!mp)
1494 goto out_unlock;
1495
1496 lock_mount_hash();
1497 while (!hlist_empty(&mp->m_list)) {
1498 mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
1499 umount_tree(mnt, 2);
1500 }
1501 unlock_mount_hash();
1502 put_mountpoint(mp);
1503out_unlock:
1504 namespace_unlock();
1505}
1506
1507
1508
1509
1510static inline bool may_mount(void)
1511{
1512 return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
1513}
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1524{
1525 struct path path;
1526 struct mount *mnt;
1527 int retval;
1528 int lookup_flags = 0;
1529
1530 if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
1531 return -EINVAL;
1532
1533 if (!may_mount())
1534 return -EPERM;
1535
1536 if (!(flags & UMOUNT_NOFOLLOW))
1537 lookup_flags |= LOOKUP_FOLLOW;
1538
1539 retval = user_path_mountpoint_at(AT_FDCWD, name, lookup_flags, &path);
1540 if (retval)
1541 goto out;
1542 mnt = real_mount(path.mnt);
1543 retval = -EINVAL;
1544 if (path.dentry != path.mnt->mnt_root)
1545 goto dput_and_out;
1546 if (!check_mnt(mnt))
1547 goto dput_and_out;
1548 if (mnt->mnt.mnt_flags & MNT_LOCKED)
1549 goto dput_and_out;
1550 retval = -EPERM;
1551 if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
1552 goto dput_and_out;
1553
1554 retval = do_umount(mnt, flags);
1555dput_and_out:
1556
1557 dput(path.dentry);
1558 mntput_no_expire(mnt);
1559out:
1560 return retval;
1561}
1562
1563#ifdef __ARCH_WANT_SYS_OLDUMOUNT
1564
1565
1566
1567
1568SYSCALL_DEFINE1(oldumount, char __user *, name)
1569{
1570 return sys_umount(name, 0);
1571}
1572
1573#endif
1574
1575static bool is_mnt_ns_file(struct dentry *dentry)
1576{
1577
1578 return dentry->d_op == &ns_dentry_operations &&
1579 dentry->d_fsdata == &mntns_operations;
1580}
1581
1582struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
1583{
1584 return container_of(ns, struct mnt_namespace, ns);
1585}
1586
1587static bool mnt_ns_loop(struct dentry *dentry)
1588{
1589
1590
1591
1592 struct mnt_namespace *mnt_ns;
1593 if (!is_mnt_ns_file(dentry))
1594 return false;
1595
1596 mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode));
1597 return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
1598}
1599
1600struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1601 int flag)
1602{
1603 struct mount *res, *p, *q, *r, *parent;
1604
1605 if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(mnt))
1606 return ERR_PTR(-EINVAL);
1607
1608 if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(dentry))
1609 return ERR_PTR(-EINVAL);
1610
1611 res = q = clone_mnt(mnt, dentry, flag);
1612 if (IS_ERR(q))
1613 return q;
1614
1615 q->mnt_mountpoint = mnt->mnt_mountpoint;
1616
1617 p = mnt;
1618 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
1619 struct mount *s;
1620 if (!is_subdir(r->mnt_mountpoint, dentry))
1621 continue;
1622
1623 for (s = r; s; s = next_mnt(s, r)) {
1624 struct mount *t = NULL;
1625 if (!(flag & CL_COPY_UNBINDABLE) &&
1626 IS_MNT_UNBINDABLE(s)) {
1627 s = skip_mnt_tree(s);
1628 continue;
1629 }
1630 if (!(flag & CL_COPY_MNT_NS_FILE) &&
1631 is_mnt_ns_file(s->mnt.mnt_root)) {
1632 s = skip_mnt_tree(s);
1633 continue;
1634 }
1635 while (p != s->mnt_parent) {
1636 p = p->mnt_parent;
1637 q = q->mnt_parent;
1638 }
1639 p = s;
1640 parent = q;
1641 q = clone_mnt(p, p->mnt.mnt_root, flag);
1642 if (IS_ERR(q))
1643 goto out;
1644 lock_mount_hash();
1645 list_add_tail(&q->mnt_list, &res->mnt_list);
1646 mnt_set_mountpoint(parent, p->mnt_mp, q);
1647 if (!list_empty(&parent->mnt_mounts)) {
1648 t = list_last_entry(&parent->mnt_mounts,
1649 struct mount, mnt_child);
1650 if (t->mnt_mp != p->mnt_mp)
1651 t = NULL;
1652 }
1653 attach_shadowed(q, parent, t);
1654 unlock_mount_hash();
1655 }
1656 }
1657 return res;
1658out:
1659 if (res) {
1660 lock_mount_hash();
1661 umount_tree(res, 0);
1662 unlock_mount_hash();
1663 }
1664 return q;
1665}
1666
1667
1668
1669struct vfsmount *collect_mounts(struct path *path)
1670{
1671 struct mount *tree;
1672 namespace_lock();
1673 tree = copy_tree(real_mount(path->mnt), path->dentry,
1674 CL_COPY_ALL | CL_PRIVATE);
1675 namespace_unlock();
1676 if (IS_ERR(tree))
1677 return ERR_CAST(tree);
1678 return &tree->mnt;
1679}
1680
1681void drop_collected_mounts(struct vfsmount *mnt)
1682{
1683 namespace_lock();
1684 lock_mount_hash();
1685 umount_tree(real_mount(mnt), 0);
1686 unlock_mount_hash();
1687 namespace_unlock();
1688}
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699struct vfsmount *clone_private_mount(struct path *path)
1700{
1701 struct mount *old_mnt = real_mount(path->mnt);
1702 struct mount *new_mnt;
1703
1704 if (IS_MNT_UNBINDABLE(old_mnt))
1705 return ERR_PTR(-EINVAL);
1706
1707 down_read(&namespace_sem);
1708 new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
1709 up_read(&namespace_sem);
1710 if (IS_ERR(new_mnt))
1711 return ERR_CAST(new_mnt);
1712
1713 return &new_mnt->mnt;
1714}
1715EXPORT_SYMBOL_GPL(clone_private_mount);
1716
1717int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
1718 struct vfsmount *root)
1719{
1720 struct mount *mnt;
1721 int res = f(root, arg);
1722 if (res)
1723 return res;
1724 list_for_each_entry(mnt, &real_mount(root)->mnt_list, mnt_list) {
1725 res = f(&mnt->mnt, arg);
1726 if (res)
1727 return res;
1728 }
1729 return 0;
1730}
1731
1732static void cleanup_group_ids(struct mount *mnt, struct mount *end)
1733{
1734 struct mount *p;
1735
1736 for (p = mnt; p != end; p = next_mnt(p, mnt)) {
1737 if (p->mnt_group_id && !IS_MNT_SHARED(p))
1738 mnt_release_group_id(p);
1739 }
1740}
1741
1742static int invent_group_ids(struct mount *mnt, bool recurse)
1743{
1744 struct mount *p;
1745
1746 for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) {
1747 if (!p->mnt_group_id && !IS_MNT_SHARED(p)) {
1748 int err = mnt_alloc_group_id(p);
1749 if (err) {
1750 cleanup_group_ids(mnt, p);
1751 return err;
1752 }
1753 }
1754 }
1755
1756 return 0;
1757}
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822static int attach_recursive_mnt(struct mount *source_mnt,
1823 struct mount *dest_mnt,
1824 struct mountpoint *dest_mp,
1825 struct path *parent_path)
1826{
1827 HLIST_HEAD(tree_list);
1828 struct mount *child, *p;
1829 struct hlist_node *n;
1830 int err;
1831
1832 if (IS_MNT_SHARED(dest_mnt)) {
1833 err = invent_group_ids(source_mnt, true);
1834 if (err)
1835 goto out;
1836 err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
1837 lock_mount_hash();
1838 if (err)
1839 goto out_cleanup_ids;
1840 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
1841 set_mnt_shared(p);
1842 } else {
1843 lock_mount_hash();
1844 }
1845 if (parent_path) {
1846 detach_mnt(source_mnt, parent_path);
1847 attach_mnt(source_mnt, dest_mnt, dest_mp);
1848 touch_mnt_namespace(source_mnt->mnt_ns);
1849 } else {
1850 mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
1851 commit_tree(source_mnt, NULL);
1852 }
1853
1854 hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
1855 struct mount *q;
1856 hlist_del_init(&child->mnt_hash);
1857 q = __lookup_mnt_last(&child->mnt_parent->mnt,
1858 child->mnt_mountpoint);
1859 commit_tree(child, q);
1860 }
1861 unlock_mount_hash();
1862
1863 return 0;
1864
1865 out_cleanup_ids:
1866 while (!hlist_empty(&tree_list)) {
1867 child = hlist_entry(tree_list.first, struct mount, mnt_hash);
1868 umount_tree(child, 0);
1869 }
1870 unlock_mount_hash();
1871 cleanup_group_ids(source_mnt, NULL);
1872 out:
1873 return err;
1874}
1875
1876static struct mountpoint *lock_mount(struct path *path)
1877{
1878 struct vfsmount *mnt;
1879 struct dentry *dentry = path->dentry;
1880retry:
1881 mutex_lock(&dentry->d_inode->i_mutex);
1882 if (unlikely(cant_mount(dentry))) {
1883 mutex_unlock(&dentry->d_inode->i_mutex);
1884 return ERR_PTR(-ENOENT);
1885 }
1886 namespace_lock();
1887 mnt = lookup_mnt(path);
1888 if (likely(!mnt)) {
1889 struct mountpoint *mp = lookup_mountpoint(dentry);
1890 if (!mp)
1891 mp = new_mountpoint(dentry);
1892 if (IS_ERR(mp)) {
1893 namespace_unlock();
1894 mutex_unlock(&dentry->d_inode->i_mutex);
1895 return mp;
1896 }
1897 return mp;
1898 }
1899 namespace_unlock();
1900 mutex_unlock(&path->dentry->d_inode->i_mutex);
1901 path_put(path);
1902 path->mnt = mnt;
1903 dentry = path->dentry = dget(mnt->mnt_root);
1904 goto retry;
1905}
1906
1907static void unlock_mount(struct mountpoint *where)
1908{
1909 struct dentry *dentry = where->m_dentry;
1910 put_mountpoint(where);
1911 namespace_unlock();
1912 mutex_unlock(&dentry->d_inode->i_mutex);
1913}
1914
1915static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
1916{
1917 if (mnt->mnt.mnt_sb->s_flags & MS_NOUSER)
1918 return -EINVAL;
1919
1920 if (S_ISDIR(mp->m_dentry->d_inode->i_mode) !=
1921 S_ISDIR(mnt->mnt.mnt_root->d_inode->i_mode))
1922 return -ENOTDIR;
1923
1924 return attach_recursive_mnt(mnt, p, mp, NULL);
1925}
1926
1927
1928
1929
1930
1931static int flags_to_propagation_type(int flags)
1932{
1933 int type = flags & ~(MS_REC | MS_SILENT);
1934
1935
1936 if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
1937 return 0;
1938
1939 if (!is_power_of_2(type))
1940 return 0;
1941 return type;
1942}
1943
1944
1945
1946
1947static int do_change_type(struct path *path, int flag)
1948{
1949 struct mount *m;
1950 struct mount *mnt = real_mount(path->mnt);
1951 int recurse = flag & MS_REC;
1952 int type;
1953 int err = 0;
1954
1955 if (path->dentry != path->mnt->mnt_root)
1956 return -EINVAL;
1957
1958 type = flags_to_propagation_type(flag);
1959 if (!type)
1960 return -EINVAL;
1961
1962 namespace_lock();
1963 if (type == MS_SHARED) {
1964 err = invent_group_ids(mnt, recurse);
1965 if (err)
1966 goto out_unlock;
1967 }
1968
1969 lock_mount_hash();
1970 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
1971 change_mnt_propagation(m, type);
1972 unlock_mount_hash();
1973
1974 out_unlock:
1975 namespace_unlock();
1976 return err;
1977}
1978
1979static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
1980{
1981 struct mount *child;
1982 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
1983 if (!is_subdir(child->mnt_mountpoint, dentry))
1984 continue;
1985
1986 if (child->mnt.mnt_flags & MNT_LOCKED)
1987 return true;
1988 }
1989 return false;
1990}
1991
1992
1993
1994
1995static int do_loopback(struct path *path, const char *old_name,
1996 int recurse)
1997{
1998 struct path old_path;
1999 struct mount *mnt = NULL, *old, *parent;
2000 struct mountpoint *mp;
2001 int err;
2002 if (!old_name || !*old_name)
2003 return -EINVAL;
2004 err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path);
2005 if (err)
2006 return err;
2007
2008 err = -EINVAL;
2009 if (mnt_ns_loop(old_path.dentry))
2010 goto out;
2011
2012 mp = lock_mount(path);
2013 err = PTR_ERR(mp);
2014 if (IS_ERR(mp))
2015 goto out;
2016
2017 old = real_mount(old_path.mnt);
2018 parent = real_mount(path->mnt);
2019
2020 err = -EINVAL;
2021 if (IS_MNT_UNBINDABLE(old))
2022 goto out2;
2023
2024 if (!check_mnt(parent))
2025 goto out2;
2026
2027 if (!check_mnt(old) && old_path.dentry->d_op != &ns_dentry_operations)
2028 goto out2;
2029
2030 if (!recurse && has_locked_children(old, old_path.dentry))
2031 goto out2;
2032
2033 if (recurse)
2034 mnt = copy_tree(old, old_path.dentry, CL_COPY_MNT_NS_FILE);
2035 else
2036 mnt = clone_mnt(old, old_path.dentry, 0);
2037
2038 if (IS_ERR(mnt)) {
2039 err = PTR_ERR(mnt);
2040 goto out2;
2041 }
2042
2043 mnt->mnt.mnt_flags &= ~MNT_LOCKED;
2044
2045 err = graft_tree(mnt, parent, mp);
2046 if (err) {
2047 lock_mount_hash();
2048 umount_tree(mnt, 0);
2049 unlock_mount_hash();
2050 }
2051out2:
2052 unlock_mount(mp);
2053out:
2054 path_put(&old_path);
2055 return err;
2056}
2057
2058static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
2059{
2060 int error = 0;
2061 int readonly_request = 0;
2062
2063 if (ms_flags & MS_RDONLY)
2064 readonly_request = 1;
2065 if (readonly_request == __mnt_is_readonly(mnt))
2066 return 0;
2067
2068 if (readonly_request)
2069 error = mnt_make_readonly(real_mount(mnt));
2070 else
2071 __mnt_unmake_readonly(real_mount(mnt));
2072 return error;
2073}
2074
2075
2076
2077
2078
2079
2080static int do_remount(struct path *path, int flags, int mnt_flags,
2081 void *data)
2082{
2083 int err;
2084 struct super_block *sb = path->mnt->mnt_sb;
2085 struct mount *mnt = real_mount(path->mnt);
2086
2087 if (!check_mnt(mnt))
2088 return -EINVAL;
2089
2090 if (path->dentry != path->mnt->mnt_root)
2091 return -EINVAL;
2092
2093
2094
2095
2096
2097
2098
2099 if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) &&
2100 !(mnt_flags & MNT_READONLY)) {
2101 return -EPERM;
2102 }
2103 if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
2104 !(mnt_flags & MNT_NODEV)) {
2105
2106 if ((mnt->mnt_ns->user_ns != &init_user_ns) &&
2107 !(sb->s_type->fs_flags & FS_USERNS_DEV_MOUNT)) {
2108 mnt_flags |= MNT_NODEV;
2109 } else {
2110 return -EPERM;
2111 }
2112 }
2113 if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
2114 !(mnt_flags & MNT_NOSUID)) {
2115 return -EPERM;
2116 }
2117 if ((mnt->mnt.mnt_flags & MNT_LOCK_NOEXEC) &&
2118 !(mnt_flags & MNT_NOEXEC)) {
2119 return -EPERM;
2120 }
2121 if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) &&
2122 ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) {
2123 return -EPERM;
2124 }
2125
2126 err = security_sb_remount(sb, data);
2127 if (err)
2128 return err;
2129
2130 down_write(&sb->s_umount);
2131 if (flags & MS_BIND)
2132 err = change_mount_flags(path->mnt, flags);
2133 else if (!capable(CAP_SYS_ADMIN))
2134 err = -EPERM;
2135 else
2136 err = do_remount_sb(sb, flags, data, 0);
2137 if (!err) {
2138 lock_mount_hash();
2139 mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
2140 mnt->mnt.mnt_flags = mnt_flags;
2141 touch_mnt_namespace(mnt->mnt_ns);
2142 unlock_mount_hash();
2143 }
2144 up_write(&sb->s_umount);
2145 return err;
2146}
2147
2148static inline int tree_contains_unbindable(struct mount *mnt)
2149{
2150 struct mount *p;
2151 for (p = mnt; p; p = next_mnt(p, mnt)) {
2152 if (IS_MNT_UNBINDABLE(p))
2153 return 1;
2154 }
2155 return 0;
2156}
2157
2158static int do_move_mount(struct path *path, const char *old_name)
2159{
2160 struct path old_path, parent_path;
2161 struct mount *p;
2162 struct mount *old;
2163 struct mountpoint *mp;
2164 int err;
2165 if (!old_name || !*old_name)
2166 return -EINVAL;
2167 err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
2168 if (err)
2169 return err;
2170
2171 mp = lock_mount(path);
2172 err = PTR_ERR(mp);
2173 if (IS_ERR(mp))
2174 goto out;
2175
2176 old = real_mount(old_path.mnt);
2177 p = real_mount(path->mnt);
2178
2179 err = -EINVAL;
2180 if (!check_mnt(p) || !check_mnt(old))
2181 goto out1;
2182
2183 if (old->mnt.mnt_flags & MNT_LOCKED)
2184 goto out1;
2185
2186 err = -EINVAL;
2187 if (old_path.dentry != old_path.mnt->mnt_root)
2188 goto out1;
2189
2190 if (!mnt_has_parent(old))
2191 goto out1;
2192
2193 if (S_ISDIR(path->dentry->d_inode->i_mode) !=
2194 S_ISDIR(old_path.dentry->d_inode->i_mode))
2195 goto out1;
2196
2197
2198
2199 if (IS_MNT_SHARED(old->mnt_parent))
2200 goto out1;
2201
2202
2203
2204
2205 if (IS_MNT_SHARED(p) && tree_contains_unbindable(old))
2206 goto out1;
2207 err = -ELOOP;
2208 for (; mnt_has_parent(p); p = p->mnt_parent)
2209 if (p == old)
2210 goto out1;
2211
2212 err = attach_recursive_mnt(old, real_mount(path->mnt), mp, &parent_path);
2213 if (err)
2214 goto out1;
2215
2216
2217
2218 list_del_init(&old->mnt_expire);
2219out1:
2220 unlock_mount(mp);
2221out:
2222 if (!err)
2223 path_put(&parent_path);
2224 path_put(&old_path);
2225 return err;
2226}
2227
2228static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
2229{
2230 int err;
2231 const char *subtype = strchr(fstype, '.');
2232 if (subtype) {
2233 subtype++;
2234 err = -EINVAL;
2235 if (!subtype[0])
2236 goto err;
2237 } else
2238 subtype = "";
2239
2240 mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL);
2241 err = -ENOMEM;
2242 if (!mnt->mnt_sb->s_subtype)
2243 goto err;
2244 return mnt;
2245
2246 err:
2247 mntput(mnt);
2248 return ERR_PTR(err);
2249}
2250
2251
2252
2253
2254static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
2255{
2256 struct mountpoint *mp;
2257 struct mount *parent;
2258 int err;
2259
2260 mnt_flags &= ~MNT_INTERNAL_FLAGS;
2261
2262 mp = lock_mount(path);
2263 if (IS_ERR(mp))
2264 return PTR_ERR(mp);
2265
2266 parent = real_mount(path->mnt);
2267 err = -EINVAL;
2268 if (unlikely(!check_mnt(parent))) {
2269
2270 if (!(mnt_flags & MNT_SHRINKABLE))
2271 goto unlock;
2272
2273 if (!parent->mnt_ns)
2274 goto unlock;
2275 }
2276
2277
2278 err = -EBUSY;
2279 if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb &&
2280 path->mnt->mnt_root == path->dentry)
2281 goto unlock;
2282
2283 err = -EINVAL;
2284 if (S_ISLNK(newmnt->mnt.mnt_root->d_inode->i_mode))
2285 goto unlock;
2286
2287 newmnt->mnt.mnt_flags = mnt_flags;
2288 err = graft_tree(newmnt, parent, mp);
2289
2290unlock:
2291 unlock_mount(mp);
2292 return err;
2293}
2294
2295
2296
2297
2298
2299static int do_new_mount(struct path *path, const char *fstype, int flags,
2300 int mnt_flags, const char *name, void *data)
2301{
2302 struct file_system_type *type;
2303 struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
2304 struct vfsmount *mnt;
2305 int err;
2306
2307 if (!fstype)
2308 return -EINVAL;
2309
2310 type = get_fs_type(fstype);
2311 if (!type)
2312 return -ENODEV;
2313
2314 if (user_ns != &init_user_ns) {
2315 if (!(type->fs_flags & FS_USERNS_MOUNT)) {
2316 put_filesystem(type);
2317 return -EPERM;
2318 }
2319
2320
2321
2322 if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) {
2323 flags |= MS_NODEV;
2324 mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV;
2325 }
2326 }
2327
2328 mnt = vfs_kern_mount(type, flags, name, data);
2329 if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
2330 !mnt->mnt_sb->s_subtype)
2331 mnt = fs_set_subtype(mnt, fstype);
2332
2333 put_filesystem(type);
2334 if (IS_ERR(mnt))
2335 return PTR_ERR(mnt);
2336
2337 err = do_add_mount(real_mount(mnt), path, mnt_flags);
2338 if (err)
2339 mntput(mnt);
2340 return err;
2341}
2342
2343int finish_automount(struct vfsmount *m, struct path *path)
2344{
2345 struct mount *mnt = real_mount(m);
2346 int err;
2347
2348
2349
2350 BUG_ON(mnt_get_count(mnt) < 2);
2351
2352 if (m->mnt_sb == path->mnt->mnt_sb &&
2353 m->mnt_root == path->dentry) {
2354 err = -ELOOP;
2355 goto fail;
2356 }
2357
2358 err = do_add_mount(mnt, path, path->mnt->mnt_flags | MNT_SHRINKABLE);
2359 if (!err)
2360 return 0;
2361fail:
2362
2363 if (!list_empty(&mnt->mnt_expire)) {
2364 namespace_lock();
2365 list_del_init(&mnt->mnt_expire);
2366 namespace_unlock();
2367 }
2368 mntput(m);
2369 mntput(m);
2370 return err;
2371}
2372
2373
2374
2375
2376
2377
2378void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
2379{
2380 namespace_lock();
2381
2382 list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);
2383
2384 namespace_unlock();
2385}
2386EXPORT_SYMBOL(mnt_set_expiry);
2387
2388
2389
2390
2391
2392
2393void mark_mounts_for_expiry(struct list_head *mounts)
2394{
2395 struct mount *mnt, *next;
2396 LIST_HEAD(graveyard);
2397
2398 if (list_empty(mounts))
2399 return;
2400
2401 namespace_lock();
2402 lock_mount_hash();
2403
2404
2405
2406
2407
2408
2409
2410 list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
2411 if (!xchg(&mnt->mnt_expiry_mark, 1) ||
2412 propagate_mount_busy(mnt, 1))
2413 continue;
2414 list_move(&mnt->mnt_expire, &graveyard);
2415 }
2416 while (!list_empty(&graveyard)) {
2417 mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
2418 touch_mnt_namespace(mnt->mnt_ns);
2419 umount_tree(mnt, 1);
2420 }
2421 unlock_mount_hash();
2422 namespace_unlock();
2423}
2424
2425EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
2426
2427
2428
2429
2430
2431
2432
2433static int select_submounts(struct mount *parent, struct list_head *graveyard)
2434{
2435 struct mount *this_parent = parent;
2436 struct list_head *next;
2437 int found = 0;
2438
2439repeat:
2440 next = this_parent->mnt_mounts.next;
2441resume:
2442 while (next != &this_parent->mnt_mounts) {
2443 struct list_head *tmp = next;
2444 struct mount *mnt = list_entry(tmp, struct mount, mnt_child);
2445
2446 next = tmp->next;
2447 if (!(mnt->mnt.mnt_flags & MNT_SHRINKABLE))
2448 continue;
2449
2450
2451
2452 if (!list_empty(&mnt->mnt_mounts)) {
2453 this_parent = mnt;
2454 goto repeat;
2455 }
2456
2457 if (!propagate_mount_busy(mnt, 1)) {
2458 list_move_tail(&mnt->mnt_expire, graveyard);
2459 found++;
2460 }
2461 }
2462
2463
2464
2465 if (this_parent != parent) {
2466 next = this_parent->mnt_child.next;
2467 this_parent = this_parent->mnt_parent;
2468 goto resume;
2469 }
2470 return found;
2471}
2472
2473
2474
2475
2476
2477
2478
2479static void shrink_submounts(struct mount *mnt)
2480{
2481 LIST_HEAD(graveyard);
2482 struct mount *m;
2483
2484
2485 while (select_submounts(mnt, &graveyard)) {
2486 while (!list_empty(&graveyard)) {
2487 m = list_first_entry(&graveyard, struct mount,
2488 mnt_expire);
2489 touch_mnt_namespace(m->mnt_ns);
2490 umount_tree(m, 1);
2491 }
2492 }
2493}
2494
2495
2496
2497
2498
2499
2500
2501static long exact_copy_from_user(void *to, const void __user * from,
2502 unsigned long n)
2503{
2504 char *t = to;
2505 const char __user *f = from;
2506 char c;
2507
2508 if (!access_ok(VERIFY_READ, from, n))
2509 return n;
2510
2511 while (n) {
2512 if (__get_user(c, f)) {
2513 memset(t, 0, n);
2514 break;
2515 }
2516 *t++ = c;
2517 f++;
2518 n--;
2519 }
2520 return n;
2521}
2522
2523int copy_mount_options(const void __user * data, unsigned long *where)
2524{
2525 int i;
2526 unsigned long page;
2527 unsigned long size;
2528
2529 *where = 0;
2530 if (!data)
2531 return 0;
2532
2533 if (!(page = __get_free_page(GFP_KERNEL)))
2534 return -ENOMEM;
2535
2536
2537
2538
2539
2540
2541 size = TASK_SIZE - (unsigned long)data;
2542 if (size > PAGE_SIZE)
2543 size = PAGE_SIZE;
2544
2545 i = size - exact_copy_from_user((void *)page, data, size);
2546 if (!i) {
2547 free_page(page);
2548 return -EFAULT;
2549 }
2550 if (i != PAGE_SIZE)
2551 memset((char *)page + i, 0, PAGE_SIZE - i);
2552 *where = page;
2553 return 0;
2554}
2555
2556char *copy_mount_string(const void __user *data)
2557{
2558 return data ? strndup_user(data, PAGE_SIZE) : NULL;
2559}
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575long do_mount(const char *dev_name, const char __user *dir_name,
2576 const char *type_page, unsigned long flags, void *data_page)
2577{
2578 struct path path;
2579 int retval = 0;
2580 int mnt_flags = 0;
2581
2582
2583 if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
2584 flags &= ~MS_MGC_MSK;
2585
2586
2587 if (data_page)
2588 ((char *)data_page)[PAGE_SIZE - 1] = 0;
2589
2590
2591 retval = user_path(dir_name, &path);
2592 if (retval)
2593 return retval;
2594
2595 retval = security_sb_mount(dev_name, &path,
2596 type_page, flags, data_page);
2597 if (!retval && !may_mount())
2598 retval = -EPERM;
2599 if (retval)
2600 goto dput_out;
2601
2602
2603 if (!(flags & MS_NOATIME))
2604 mnt_flags |= MNT_RELATIME;
2605
2606
2607 if (flags & MS_NOSUID)
2608 mnt_flags |= MNT_NOSUID;
2609 if (flags & MS_NODEV)
2610 mnt_flags |= MNT_NODEV;
2611 if (flags & MS_NOEXEC)
2612 mnt_flags |= MNT_NOEXEC;
2613 if (flags & MS_NOATIME)
2614 mnt_flags |= MNT_NOATIME;
2615 if (flags & MS_NODIRATIME)
2616 mnt_flags |= MNT_NODIRATIME;
2617 if (flags & MS_STRICTATIME)
2618 mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
2619 if (flags & MS_RDONLY)
2620 mnt_flags |= MNT_READONLY;
2621
2622
2623 if ((flags & MS_REMOUNT) &&
2624 ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME |
2625 MS_STRICTATIME)) == 0)) {
2626 mnt_flags &= ~MNT_ATIME_MASK;
2627 mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK;
2628 }
2629
2630 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
2631 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
2632 MS_STRICTATIME);
2633
2634 if (flags & MS_REMOUNT)
2635 retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
2636 data_page);
2637 else if (flags & MS_BIND)
2638 retval = do_loopback(&path, dev_name, flags & MS_REC);
2639 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
2640 retval = do_change_type(&path, flags);
2641 else if (flags & MS_MOVE)
2642 retval = do_move_mount(&path, dev_name);
2643 else
2644 retval = do_new_mount(&path, type_page, flags, mnt_flags,
2645 dev_name, data_page);
2646dput_out:
2647 path_put(&path);
2648 return retval;
2649}
2650
2651static void free_mnt_ns(struct mnt_namespace *ns)
2652{
2653 ns_free_inum(&ns->ns);
2654 put_user_ns(ns->user_ns);
2655 kfree(ns);
2656}
2657
2658
2659
2660
2661
2662
2663
2664
2665static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
2666
2667static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
2668{
2669 struct mnt_namespace *new_ns;
2670 int ret;
2671
2672 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
2673 if (!new_ns)
2674 return ERR_PTR(-ENOMEM);
2675 ret = ns_alloc_inum(&new_ns->ns);
2676 if (ret) {
2677 kfree(new_ns);
2678 return ERR_PTR(ret);
2679 }
2680 new_ns->ns.ops = &mntns_operations;
2681 new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
2682 atomic_set(&new_ns->count, 1);
2683 new_ns->root = NULL;
2684 INIT_LIST_HEAD(&new_ns->list);
2685 init_waitqueue_head(&new_ns->poll);
2686 new_ns->event = 0;
2687 new_ns->user_ns = get_user_ns(user_ns);
2688 return new_ns;
2689}
2690
2691struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2692 struct user_namespace *user_ns, struct fs_struct *new_fs)
2693{
2694 struct mnt_namespace *new_ns;
2695 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
2696 struct mount *p, *q;
2697 struct mount *old;
2698 struct mount *new;
2699 int copy_flags;
2700
2701 BUG_ON(!ns);
2702
2703 if (likely(!(flags & CLONE_NEWNS))) {
2704 get_mnt_ns(ns);
2705 return ns;
2706 }
2707
2708 old = ns->root;
2709
2710 new_ns = alloc_mnt_ns(user_ns);
2711 if (IS_ERR(new_ns))
2712 return new_ns;
2713
2714 namespace_lock();
2715
2716 copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE;
2717 if (user_ns != ns->user_ns)
2718 copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
2719 new = copy_tree(old, old->mnt.mnt_root, copy_flags);
2720 if (IS_ERR(new)) {
2721 namespace_unlock();
2722 free_mnt_ns(new_ns);
2723 return ERR_CAST(new);
2724 }
2725 new_ns->root = new;
2726 list_add_tail(&new_ns->list, &new->mnt_list);
2727
2728
2729
2730
2731
2732
2733 p = old;
2734 q = new;
2735 while (p) {
2736 q->mnt_ns = new_ns;
2737 if (new_fs) {
2738 if (&p->mnt == new_fs->root.mnt) {
2739 new_fs->root.mnt = mntget(&q->mnt);
2740 rootmnt = &p->mnt;
2741 }
2742 if (&p->mnt == new_fs->pwd.mnt) {
2743 new_fs->pwd.mnt = mntget(&q->mnt);
2744 pwdmnt = &p->mnt;
2745 }
2746 }
2747 p = next_mnt(p, old);
2748 q = next_mnt(q, new);
2749 if (!q)
2750 break;
2751 while (p->mnt.mnt_root != q->mnt.mnt_root)
2752 p = next_mnt(p, old);
2753 }
2754 namespace_unlock();
2755
2756 if (rootmnt)
2757 mntput(rootmnt);
2758 if (pwdmnt)
2759 mntput(pwdmnt);
2760
2761 return new_ns;
2762}
2763
2764
2765
2766
2767
2768static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
2769{
2770 struct mnt_namespace *new_ns = alloc_mnt_ns(&init_user_ns);
2771 if (!IS_ERR(new_ns)) {
2772 struct mount *mnt = real_mount(m);
2773 mnt->mnt_ns = new_ns;
2774 new_ns->root = mnt;
2775 list_add(&mnt->mnt_list, &new_ns->list);
2776 } else {
2777 mntput(m);
2778 }
2779 return new_ns;
2780}
2781
2782struct dentry *mount_subtree(struct vfsmount *mnt, const char *name)
2783{
2784 struct mnt_namespace *ns;
2785 struct super_block *s;
2786 struct path path;
2787 int err;
2788
2789 ns = create_mnt_ns(mnt);
2790 if (IS_ERR(ns))
2791 return ERR_CAST(ns);
2792
2793 err = vfs_path_lookup(mnt->mnt_root, mnt,
2794 name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);
2795
2796 put_mnt_ns(ns);
2797
2798 if (err)
2799 return ERR_PTR(err);
2800
2801
2802 s = path.mnt->mnt_sb;
2803 atomic_inc(&s->s_active);
2804 mntput(path.mnt);
2805
2806 down_write(&s->s_umount);
2807
2808 return path.dentry;
2809}
2810EXPORT_SYMBOL(mount_subtree);
2811
2812SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
2813 char __user *, type, unsigned long, flags, void __user *, data)
2814{
2815 int ret;
2816 char *kernel_type;
2817 char *kernel_dev;
2818 unsigned long data_page;
2819
2820 kernel_type = copy_mount_string(type);
2821 ret = PTR_ERR(kernel_type);
2822 if (IS_ERR(kernel_type))
2823 goto out_type;
2824
2825 kernel_dev = copy_mount_string(dev_name);
2826 ret = PTR_ERR(kernel_dev);
2827 if (IS_ERR(kernel_dev))
2828 goto out_dev;
2829
2830 ret = copy_mount_options(data, &data_page);
2831 if (ret < 0)
2832 goto out_data;
2833
2834 ret = do_mount(kernel_dev, dir_name, kernel_type, flags,
2835 (void *) data_page);
2836
2837 free_page(data_page);
2838out_data:
2839 kfree(kernel_dev);
2840out_dev:
2841 kfree(kernel_type);
2842out_type:
2843 return ret;
2844}
2845
2846
2847
2848
2849
2850
2851bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
2852 const struct path *root)
2853{
2854 while (&mnt->mnt != root->mnt && mnt_has_parent(mnt)) {
2855 dentry = mnt->mnt_mountpoint;
2856 mnt = mnt->mnt_parent;
2857 }
2858 return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry);
2859}
2860
2861int path_is_under(struct path *path1, struct path *path2)
2862{
2863 int res;
2864 read_seqlock_excl(&mount_lock);
2865 res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2);
2866 read_sequnlock_excl(&mount_lock);
2867 return res;
2868}
2869EXPORT_SYMBOL(path_is_under);
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2897 const char __user *, put_old)
2898{
2899 struct path new, old, parent_path, root_parent, root;
2900 struct mount *new_mnt, *root_mnt, *old_mnt;
2901 struct mountpoint *old_mp, *root_mp;
2902 int error;
2903
2904 if (!may_mount())
2905 return -EPERM;
2906
2907 error = user_path_dir(new_root, &new);
2908 if (error)
2909 goto out0;
2910
2911 error = user_path_dir(put_old, &old);
2912 if (error)
2913 goto out1;
2914
2915 error = security_sb_pivotroot(&old, &new);
2916 if (error)
2917 goto out2;
2918
2919 get_fs_root(current->fs, &root);
2920 old_mp = lock_mount(&old);
2921 error = PTR_ERR(old_mp);
2922 if (IS_ERR(old_mp))
2923 goto out3;
2924
2925 error = -EINVAL;
2926 new_mnt = real_mount(new.mnt);
2927 root_mnt = real_mount(root.mnt);
2928 old_mnt = real_mount(old.mnt);
2929 if (IS_MNT_SHARED(old_mnt) ||
2930 IS_MNT_SHARED(new_mnt->mnt_parent) ||
2931 IS_MNT_SHARED(root_mnt->mnt_parent))
2932 goto out4;
2933 if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
2934 goto out4;
2935 if (new_mnt->mnt.mnt_flags & MNT_LOCKED)
2936 goto out4;
2937 error = -ENOENT;
2938 if (d_unlinked(new.dentry))
2939 goto out4;
2940 error = -EBUSY;
2941 if (new_mnt == root_mnt || old_mnt == root_mnt)
2942 goto out4;
2943 error = -EINVAL;
2944 if (root.mnt->mnt_root != root.dentry)
2945 goto out4;
2946 if (!mnt_has_parent(root_mnt))
2947 goto out4;
2948 root_mp = root_mnt->mnt_mp;
2949 if (new.mnt->mnt_root != new.dentry)
2950 goto out4;
2951 if (!mnt_has_parent(new_mnt))
2952 goto out4;
2953
2954 if (!is_path_reachable(old_mnt, old.dentry, &new))
2955 goto out4;
2956
2957 if (!is_path_reachable(new_mnt, new.dentry, &root))
2958 goto out4;
2959 root_mp->m_count++;
2960 lock_mount_hash();
2961 detach_mnt(new_mnt, &parent_path);
2962 detach_mnt(root_mnt, &root_parent);
2963 if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
2964 new_mnt->mnt.mnt_flags |= MNT_LOCKED;
2965 root_mnt->mnt.mnt_flags &= ~MNT_LOCKED;
2966 }
2967
2968 attach_mnt(root_mnt, old_mnt, old_mp);
2969
2970 attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp);
2971 touch_mnt_namespace(current->nsproxy->mnt_ns);
2972
2973 list_del_init(&new_mnt->mnt_expire);
2974 unlock_mount_hash();
2975 chroot_fs_refs(&root, &new);
2976 put_mountpoint(root_mp);
2977 error = 0;
2978out4:
2979 unlock_mount(old_mp);
2980 if (!error) {
2981 path_put(&root_parent);
2982 path_put(&parent_path);
2983 }
2984out3:
2985 path_put(&root);
2986out2:
2987 path_put(&old);
2988out1:
2989 path_put(&new);
2990out0:
2991 return error;
2992}
2993
2994static void __init init_mount_tree(void)
2995{
2996 struct vfsmount *mnt;
2997 struct mnt_namespace *ns;
2998 struct path root;
2999 struct file_system_type *type;
3000
3001 type = get_fs_type("rootfs");
3002 if (!type)
3003 panic("Can't find rootfs type");
3004 mnt = vfs_kern_mount(type, 0, "rootfs", NULL);
3005 put_filesystem(type);
3006 if (IS_ERR(mnt))
3007 panic("Can't create rootfs");
3008
3009 ns = create_mnt_ns(mnt);
3010 if (IS_ERR(ns))
3011 panic("Can't allocate initial namespace");
3012
3013 init_task.nsproxy->mnt_ns = ns;
3014 get_mnt_ns(ns);
3015
3016 root.mnt = mnt;
3017 root.dentry = mnt->mnt_root;
3018 mnt->mnt_flags |= MNT_LOCKED;
3019
3020 set_fs_pwd(current->fs, &root);
3021 set_fs_root(current->fs, &root);
3022}
3023
3024void __init mnt_init(void)
3025{
3026 unsigned u;
3027 int err;
3028
3029 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
3030 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
3031
3032 mount_hashtable = alloc_large_system_hash("Mount-cache",
3033 sizeof(struct hlist_head),
3034 mhash_entries, 19,
3035 0,
3036 &m_hash_shift, &m_hash_mask, 0, 0);
3037 mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache",
3038 sizeof(struct hlist_head),
3039 mphash_entries, 19,
3040 0,
3041 &mp_hash_shift, &mp_hash_mask, 0, 0);
3042
3043 if (!mount_hashtable || !mountpoint_hashtable)
3044 panic("Failed to allocate mount hash table\n");
3045
3046 for (u = 0; u <= m_hash_mask; u++)
3047 INIT_HLIST_HEAD(&mount_hashtable[u]);
3048 for (u = 0; u <= mp_hash_mask; u++)
3049 INIT_HLIST_HEAD(&mountpoint_hashtable[u]);
3050
3051 kernfs_init();
3052
3053 err = sysfs_init();
3054 if (err)
3055 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
3056 __func__, err);
3057 fs_kobj = kobject_create_and_add("fs", NULL);
3058 if (!fs_kobj)
3059 printk(KERN_WARNING "%s: kobj create error\n", __func__);
3060 init_rootfs();
3061 init_mount_tree();
3062}
3063
3064void put_mnt_ns(struct mnt_namespace *ns)
3065{
3066 if (!atomic_dec_and_test(&ns->count))
3067 return;
3068 drop_collected_mounts(&ns->root->mnt);
3069 free_mnt_ns(ns);
3070}
3071
3072struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
3073{
3074 struct vfsmount *mnt;
3075 mnt = vfs_kern_mount(type, MS_KERNMOUNT, type->name, data);
3076 if (!IS_ERR(mnt)) {
3077
3078
3079
3080
3081 real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL;
3082 }
3083 return mnt;
3084}
3085EXPORT_SYMBOL_GPL(kern_mount_data);
3086
3087void kern_unmount(struct vfsmount *mnt)
3088{
3089
3090 if (!IS_ERR_OR_NULL(mnt)) {
3091 real_mount(mnt)->mnt_ns = NULL;
3092 synchronize_rcu();
3093 mntput(mnt);
3094 }
3095}
3096EXPORT_SYMBOL(kern_unmount);
3097
3098bool our_mnt(struct vfsmount *mnt)
3099{
3100 return check_mnt(real_mount(mnt));
3101}
3102
3103bool current_chrooted(void)
3104{
3105
3106 struct path ns_root;
3107 struct path fs_root;
3108 bool chrooted;
3109
3110
3111 ns_root.mnt = ¤t->nsproxy->mnt_ns->root->mnt;
3112 ns_root.dentry = ns_root.mnt->mnt_root;
3113 path_get(&ns_root);
3114 while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root))
3115 ;
3116
3117 get_fs_root(current->fs, &fs_root);
3118
3119 chrooted = !path_equal(&fs_root, &ns_root);
3120
3121 path_put(&fs_root);
3122 path_put(&ns_root);
3123
3124 return chrooted;
3125}
3126
3127bool fs_fully_visible(struct file_system_type *type)
3128{
3129 struct mnt_namespace *ns = current->nsproxy->mnt_ns;
3130 struct mount *mnt;
3131 bool visible = false;
3132
3133 if (unlikely(!ns))
3134 return false;
3135
3136 down_read(&namespace_sem);
3137 list_for_each_entry(mnt, &ns->list, mnt_list) {
3138 struct mount *child;
3139 if (mnt->mnt.mnt_sb->s_type != type)
3140 continue;
3141
3142
3143
3144
3145 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
3146 struct inode *inode = child->mnt_mountpoint->d_inode;
3147 if (!S_ISDIR(inode->i_mode))
3148 goto next;
3149 if (inode->i_nlink > 2)
3150 goto next;
3151 }
3152 visible = true;
3153 goto found;
3154 next: ;
3155 }
3156found:
3157 up_read(&namespace_sem);
3158 return visible;
3159}
3160
3161static struct ns_common *mntns_get(struct task_struct *task)
3162{
3163 struct ns_common *ns = NULL;
3164 struct nsproxy *nsproxy;
3165
3166 task_lock(task);
3167 nsproxy = task->nsproxy;
3168 if (nsproxy) {
3169 ns = &nsproxy->mnt_ns->ns;
3170 get_mnt_ns(to_mnt_ns(ns));
3171 }
3172 task_unlock(task);
3173
3174 return ns;
3175}
3176
3177static void mntns_put(struct ns_common *ns)
3178{
3179 put_mnt_ns(to_mnt_ns(ns));
3180}
3181
3182static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
3183{
3184 struct fs_struct *fs = current->fs;
3185 struct mnt_namespace *mnt_ns = to_mnt_ns(ns);
3186 struct path root;
3187
3188 if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
3189 !ns_capable(current_user_ns(), CAP_SYS_CHROOT) ||
3190 !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
3191 return -EPERM;
3192
3193 if (fs->users != 1)
3194 return -EINVAL;
3195
3196 get_mnt_ns(mnt_ns);
3197 put_mnt_ns(nsproxy->mnt_ns);
3198 nsproxy->mnt_ns = mnt_ns;
3199
3200
3201 root.mnt = &mnt_ns->root->mnt;
3202 root.dentry = mnt_ns->root->mnt.mnt_root;
3203 path_get(&root);
3204 while(d_mountpoint(root.dentry) && follow_down_one(&root))
3205 ;
3206
3207
3208 set_fs_pwd(fs, &root);
3209 set_fs_root(fs, &root);
3210
3211 path_put(&root);
3212 return 0;
3213}
3214
3215const struct proc_ns_operations mntns_operations = {
3216 .name = "mnt",
3217 .type = CLONE_NEWNS,
3218 .get = mntns_get,
3219 .put = mntns_put,
3220 .install = mntns_install,
3221};
3222