1
2
3
4
5
6
7
8
9
10
11#include <linux/syscalls.h>
12#include <linux/slab.h>
13#include <linux/sched.h>
14#include <linux/smp_lock.h>
15#include <linux/init.h>
16#include <linux/kernel.h>
17#include <linux/acct.h>
18#include <linux/capability.h>
19#include <linux/cpumask.h>
20#include <linux/module.h>
21#include <linux/sysfs.h>
22#include <linux/seq_file.h>
23#include <linux/mnt_namespace.h>
24#include <linux/namei.h>
25#include <linux/nsproxy.h>
26#include <linux/security.h>
27#include <linux/mount.h>
28#include <linux/ramfs.h>
29#include <linux/log2.h>
30#include <linux/idr.h>
31#include <linux/fs_struct.h>
32#include <asm/uaccess.h>
33#include <asm/unistd.h>
34#include "pnode.h"
35#include "internal.h"
36
37#define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head))
38#define HASH_SIZE (1UL << HASH_SHIFT)
39
40
41__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
42
43static int event;
44static DEFINE_IDA(mnt_id_ida);
45static DEFINE_IDA(mnt_group_ida);
46static int mnt_id_start = 0;
47static int mnt_group_start = 1;
48
49static struct list_head *mount_hashtable __read_mostly;
50static struct kmem_cache *mnt_cache __read_mostly;
51static struct rw_semaphore namespace_sem;
52
53
54struct kobject *fs_kobj;
55EXPORT_SYMBOL_GPL(fs_kobj);
56
57static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
58{
59 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
60 tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
61 tmp = tmp + (tmp >> HASH_SHIFT);
62 return tmp & (HASH_SIZE - 1);
63}
64
65#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16)
66
67
68static int mnt_alloc_id(struct vfsmount *mnt)
69{
70 int res;
71
72retry:
73 ida_pre_get(&mnt_id_ida, GFP_KERNEL);
74 spin_lock(&vfsmount_lock);
75 res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id);
76 if (!res)
77 mnt_id_start = mnt->mnt_id + 1;
78 spin_unlock(&vfsmount_lock);
79 if (res == -EAGAIN)
80 goto retry;
81
82 return res;
83}
84
85static void mnt_free_id(struct vfsmount *mnt)
86{
87 int id = mnt->mnt_id;
88 spin_lock(&vfsmount_lock);
89 ida_remove(&mnt_id_ida, id);
90 if (mnt_id_start > id)
91 mnt_id_start = id;
92 spin_unlock(&vfsmount_lock);
93}
94
95
96
97
98
99
100static int mnt_alloc_group_id(struct vfsmount *mnt)
101{
102 int res;
103
104 if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL))
105 return -ENOMEM;
106
107 res = ida_get_new_above(&mnt_group_ida,
108 mnt_group_start,
109 &mnt->mnt_group_id);
110 if (!res)
111 mnt_group_start = mnt->mnt_group_id + 1;
112
113 return res;
114}
115
116
117
118
119void mnt_release_group_id(struct vfsmount *mnt)
120{
121 int id = mnt->mnt_group_id;
122 ida_remove(&mnt_group_ida, id);
123 if (mnt_group_start > id)
124 mnt_group_start = id;
125 mnt->mnt_group_id = 0;
126}
127
128struct vfsmount *alloc_vfsmnt(const char *name)
129{
130 struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
131 if (mnt) {
132 int err;
133
134 err = mnt_alloc_id(mnt);
135 if (err)
136 goto out_free_cache;
137
138 if (name) {
139 mnt->mnt_devname = kstrdup(name, GFP_KERNEL);
140 if (!mnt->mnt_devname)
141 goto out_free_id;
142 }
143
144 atomic_set(&mnt->mnt_count, 1);
145 INIT_LIST_HEAD(&mnt->mnt_hash);
146 INIT_LIST_HEAD(&mnt->mnt_child);
147 INIT_LIST_HEAD(&mnt->mnt_mounts);
148 INIT_LIST_HEAD(&mnt->mnt_list);
149 INIT_LIST_HEAD(&mnt->mnt_expire);
150 INIT_LIST_HEAD(&mnt->mnt_share);
151 INIT_LIST_HEAD(&mnt->mnt_slave_list);
152 INIT_LIST_HEAD(&mnt->mnt_slave);
153#ifdef CONFIG_SMP
154 mnt->mnt_writers = alloc_percpu(int);
155 if (!mnt->mnt_writers)
156 goto out_free_devname;
157#else
158 mnt->mnt_writers = 0;
159#endif
160 }
161 return mnt;
162
163#ifdef CONFIG_SMP
164out_free_devname:
165 kfree(mnt->mnt_devname);
166#endif
167out_free_id:
168 mnt_free_id(mnt);
169out_free_cache:
170 kmem_cache_free(mnt_cache, mnt);
171 return NULL;
172}
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193int __mnt_is_readonly(struct vfsmount *mnt)
194{
195 if (mnt->mnt_flags & MNT_READONLY)
196 return 1;
197 if (mnt->mnt_sb->s_flags & MS_RDONLY)
198 return 1;
199 return 0;
200}
201EXPORT_SYMBOL_GPL(__mnt_is_readonly);
202
203static inline void inc_mnt_writers(struct vfsmount *mnt)
204{
205#ifdef CONFIG_SMP
206 (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))++;
207#else
208 mnt->mnt_writers++;
209#endif
210}
211
212static inline void dec_mnt_writers(struct vfsmount *mnt)
213{
214#ifdef CONFIG_SMP
215 (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))--;
216#else
217 mnt->mnt_writers--;
218#endif
219}
220
221static unsigned int count_mnt_writers(struct vfsmount *mnt)
222{
223#ifdef CONFIG_SMP
224 unsigned int count = 0;
225 int cpu;
226
227 for_each_possible_cpu(cpu) {
228 count += *per_cpu_ptr(mnt->mnt_writers, cpu);
229 }
230
231 return count;
232#else
233 return mnt->mnt_writers;
234#endif
235}
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255int mnt_want_write(struct vfsmount *mnt)
256{
257 int ret = 0;
258
259 preempt_disable();
260 inc_mnt_writers(mnt);
261
262
263
264
265
266 smp_mb();
267 while (mnt->mnt_flags & MNT_WRITE_HOLD)
268 cpu_relax();
269
270
271
272
273
274 smp_rmb();
275 if (__mnt_is_readonly(mnt)) {
276 dec_mnt_writers(mnt);
277 ret = -EROFS;
278 goto out;
279 }
280out:
281 preempt_enable();
282 return ret;
283}
284EXPORT_SYMBOL_GPL(mnt_want_write);
285
286
287
288
289
290
291
292
293
294
295
296
297
298int mnt_clone_write(struct vfsmount *mnt)
299{
300
301 if (__mnt_is_readonly(mnt))
302 return -EROFS;
303 preempt_disable();
304 inc_mnt_writers(mnt);
305 preempt_enable();
306 return 0;
307}
308EXPORT_SYMBOL_GPL(mnt_clone_write);
309
310
311
312
313
314
315
316
317int mnt_want_write_file(struct file *file)
318{
319 struct inode *inode = file->f_dentry->d_inode;
320 if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode))
321 return mnt_want_write(file->f_path.mnt);
322 else
323 return mnt_clone_write(file->f_path.mnt);
324}
325EXPORT_SYMBOL_GPL(mnt_want_write_file);
326
327
328
329
330
331
332
333
334
335void mnt_drop_write(struct vfsmount *mnt)
336{
337 preempt_disable();
338 dec_mnt_writers(mnt);
339 preempt_enable();
340}
341EXPORT_SYMBOL_GPL(mnt_drop_write);
342
343static int mnt_make_readonly(struct vfsmount *mnt)
344{
345 int ret = 0;
346
347 spin_lock(&vfsmount_lock);
348 mnt->mnt_flags |= MNT_WRITE_HOLD;
349
350
351
352
353 smp_mb();
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371 if (count_mnt_writers(mnt) > 0)
372 ret = -EBUSY;
373 else
374 mnt->mnt_flags |= MNT_READONLY;
375
376
377
378
379 smp_wmb();
380 mnt->mnt_flags &= ~MNT_WRITE_HOLD;
381 spin_unlock(&vfsmount_lock);
382 return ret;
383}
384
385static void __mnt_unmake_readonly(struct vfsmount *mnt)
386{
387 spin_lock(&vfsmount_lock);
388 mnt->mnt_flags &= ~MNT_READONLY;
389 spin_unlock(&vfsmount_lock);
390}
391
392void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb)
393{
394 mnt->mnt_sb = sb;
395 mnt->mnt_root = dget(sb->s_root);
396}
397
398EXPORT_SYMBOL(simple_set_mnt);
399
400void free_vfsmnt(struct vfsmount *mnt)
401{
402 kfree(mnt->mnt_devname);
403 mnt_free_id(mnt);
404#ifdef CONFIG_SMP
405 free_percpu(mnt->mnt_writers);
406#endif
407 kmem_cache_free(mnt_cache, mnt);
408}
409
410
411
412
413
414struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
415 int dir)
416{
417 struct list_head *head = mount_hashtable + hash(mnt, dentry);
418 struct list_head *tmp = head;
419 struct vfsmount *p, *found = NULL;
420
421 for (;;) {
422 tmp = dir ? tmp->next : tmp->prev;
423 p = NULL;
424 if (tmp == head)
425 break;
426 p = list_entry(tmp, struct vfsmount, mnt_hash);
427 if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry) {
428 found = p;
429 break;
430 }
431 }
432 return found;
433}
434
435
436
437
438
439struct vfsmount *lookup_mnt(struct path *path)
440{
441 struct vfsmount *child_mnt;
442 spin_lock(&vfsmount_lock);
443 if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1)))
444 mntget(child_mnt);
445 spin_unlock(&vfsmount_lock);
446 return child_mnt;
447}
448
449static inline int check_mnt(struct vfsmount *mnt)
450{
451 return mnt->mnt_ns == current->nsproxy->mnt_ns;
452}
453
454static void touch_mnt_namespace(struct mnt_namespace *ns)
455{
456 if (ns) {
457 ns->event = ++event;
458 wake_up_interruptible(&ns->poll);
459 }
460}
461
462static void __touch_mnt_namespace(struct mnt_namespace *ns)
463{
464 if (ns && ns->event != event) {
465 ns->event = event;
466 wake_up_interruptible(&ns->poll);
467 }
468}
469
470static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
471{
472 old_path->dentry = mnt->mnt_mountpoint;
473 old_path->mnt = mnt->mnt_parent;
474 mnt->mnt_parent = mnt;
475 mnt->mnt_mountpoint = mnt->mnt_root;
476 list_del_init(&mnt->mnt_child);
477 list_del_init(&mnt->mnt_hash);
478 old_path->dentry->d_mounted--;
479}
480
481void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
482 struct vfsmount *child_mnt)
483{
484 child_mnt->mnt_parent = mntget(mnt);
485 child_mnt->mnt_mountpoint = dget(dentry);
486 dentry->d_mounted++;
487}
488
489static void attach_mnt(struct vfsmount *mnt, struct path *path)
490{
491 mnt_set_mountpoint(path->mnt, path->dentry, mnt);
492 list_add_tail(&mnt->mnt_hash, mount_hashtable +
493 hash(path->mnt, path->dentry));
494 list_add_tail(&mnt->mnt_child, &path->mnt->mnt_mounts);
495}
496
497
498
499
500static void commit_tree(struct vfsmount *mnt)
501{
502 struct vfsmount *parent = mnt->mnt_parent;
503 struct vfsmount *m;
504 LIST_HEAD(head);
505 struct mnt_namespace *n = parent->mnt_ns;
506
507 BUG_ON(parent == mnt);
508
509 list_add_tail(&head, &mnt->mnt_list);
510 list_for_each_entry(m, &head, mnt_list)
511 m->mnt_ns = n;
512 list_splice(&head, n->list.prev);
513
514 list_add_tail(&mnt->mnt_hash, mount_hashtable +
515 hash(parent, mnt->mnt_mountpoint));
516 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
517 touch_mnt_namespace(n);
518}
519
520static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root)
521{
522 struct list_head *next = p->mnt_mounts.next;
523 if (next == &p->mnt_mounts) {
524 while (1) {
525 if (p == root)
526 return NULL;
527 next = p->mnt_child.next;
528 if (next != &p->mnt_parent->mnt_mounts)
529 break;
530 p = p->mnt_parent;
531 }
532 }
533 return list_entry(next, struct vfsmount, mnt_child);
534}
535
536static struct vfsmount *skip_mnt_tree(struct vfsmount *p)
537{
538 struct list_head *prev = p->mnt_mounts.prev;
539 while (prev != &p->mnt_mounts) {
540 p = list_entry(prev, struct vfsmount, mnt_child);
541 prev = p->mnt_mounts.prev;
542 }
543 return p;
544}
545
546static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
547 int flag)
548{
549 struct super_block *sb = old->mnt_sb;
550 struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname);
551
552 if (mnt) {
553 if (flag & (CL_SLAVE | CL_PRIVATE))
554 mnt->mnt_group_id = 0;
555 else
556 mnt->mnt_group_id = old->mnt_group_id;
557
558 if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {
559 int err = mnt_alloc_group_id(mnt);
560 if (err)
561 goto out_free;
562 }
563
564 mnt->mnt_flags = old->mnt_flags;
565 atomic_inc(&sb->s_active);
566 mnt->mnt_sb = sb;
567 mnt->mnt_root = dget(root);
568 mnt->mnt_mountpoint = mnt->mnt_root;
569 mnt->mnt_parent = mnt;
570
571 if (flag & CL_SLAVE) {
572 list_add(&mnt->mnt_slave, &old->mnt_slave_list);
573 mnt->mnt_master = old;
574 CLEAR_MNT_SHARED(mnt);
575 } else if (!(flag & CL_PRIVATE)) {
576 if ((flag & CL_PROPAGATION) || IS_MNT_SHARED(old))
577 list_add(&mnt->mnt_share, &old->mnt_share);
578 if (IS_MNT_SLAVE(old))
579 list_add(&mnt->mnt_slave, &old->mnt_slave);
580 mnt->mnt_master = old->mnt_master;
581 }
582 if (flag & CL_MAKE_SHARED)
583 set_mnt_shared(mnt);
584
585
586
587 if (flag & CL_EXPIRE) {
588 if (!list_empty(&old->mnt_expire))
589 list_add(&mnt->mnt_expire, &old->mnt_expire);
590 }
591 }
592 return mnt;
593
594 out_free:
595 free_vfsmnt(mnt);
596 return NULL;
597}
598
599static inline void __mntput(struct vfsmount *mnt)
600{
601 struct super_block *sb = mnt->mnt_sb;
602
603
604
605
606
607
608
609
610
611
612 WARN_ON(count_mnt_writers(mnt));
613 dput(mnt->mnt_root);
614 free_vfsmnt(mnt);
615 deactivate_super(sb);
616}
617
618void mntput_no_expire(struct vfsmount *mnt)
619{
620repeat:
621 if (atomic_dec_and_lock(&mnt->mnt_count, &vfsmount_lock)) {
622 if (likely(!mnt->mnt_pinned)) {
623 spin_unlock(&vfsmount_lock);
624 __mntput(mnt);
625 return;
626 }
627 atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count);
628 mnt->mnt_pinned = 0;
629 spin_unlock(&vfsmount_lock);
630 acct_auto_close_mnt(mnt);
631 security_sb_umount_close(mnt);
632 goto repeat;
633 }
634}
635
636EXPORT_SYMBOL(mntput_no_expire);
637
638void mnt_pin(struct vfsmount *mnt)
639{
640 spin_lock(&vfsmount_lock);
641 mnt->mnt_pinned++;
642 spin_unlock(&vfsmount_lock);
643}
644
645EXPORT_SYMBOL(mnt_pin);
646
647void mnt_unpin(struct vfsmount *mnt)
648{
649 spin_lock(&vfsmount_lock);
650 if (mnt->mnt_pinned) {
651 atomic_inc(&mnt->mnt_count);
652 mnt->mnt_pinned--;
653 }
654 spin_unlock(&vfsmount_lock);
655}
656
657EXPORT_SYMBOL(mnt_unpin);
658
659static inline void mangle(struct seq_file *m, const char *s)
660{
661 seq_escape(m, s, " \t\n\\");
662}
663
664
665
666
667
668
669
670int generic_show_options(struct seq_file *m, struct vfsmount *mnt)
671{
672 const char *options;
673
674 rcu_read_lock();
675 options = rcu_dereference(mnt->mnt_sb->s_options);
676
677 if (options != NULL && options[0]) {
678 seq_putc(m, ',');
679 mangle(m, options);
680 }
681 rcu_read_unlock();
682
683 return 0;
684}
685EXPORT_SYMBOL(generic_show_options);
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700void save_mount_options(struct super_block *sb, char *options)
701{
702 BUG_ON(sb->s_options);
703 rcu_assign_pointer(sb->s_options, kstrdup(options, GFP_KERNEL));
704}
705EXPORT_SYMBOL(save_mount_options);
706
707void replace_mount_options(struct super_block *sb, char *options)
708{
709 char *old = sb->s_options;
710 rcu_assign_pointer(sb->s_options, options);
711 if (old) {
712 synchronize_rcu();
713 kfree(old);
714 }
715}
716EXPORT_SYMBOL(replace_mount_options);
717
718#ifdef CONFIG_PROC_FS
719
720static void *m_start(struct seq_file *m, loff_t *pos)
721{
722 struct proc_mounts *p = m->private;
723
724 down_read(&namespace_sem);
725 return seq_list_start(&p->ns->list, *pos);
726}
727
728static void *m_next(struct seq_file *m, void *v, loff_t *pos)
729{
730 struct proc_mounts *p = m->private;
731
732 return seq_list_next(v, &p->ns->list, pos);
733}
734
735static void m_stop(struct seq_file *m, void *v)
736{
737 up_read(&namespace_sem);
738}
739
740struct proc_fs_info {
741 int flag;
742 const char *str;
743};
744
745static int show_sb_opts(struct seq_file *m, struct super_block *sb)
746{
747 static const struct proc_fs_info fs_info[] = {
748 { MS_SYNCHRONOUS, ",sync" },
749 { MS_DIRSYNC, ",dirsync" },
750 { MS_MANDLOCK, ",mand" },
751 { 0, NULL }
752 };
753 const struct proc_fs_info *fs_infop;
754
755 for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
756 if (sb->s_flags & fs_infop->flag)
757 seq_puts(m, fs_infop->str);
758 }
759
760 return security_sb_show_options(m, sb);
761}
762
763static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
764{
765 static const struct proc_fs_info mnt_info[] = {
766 { MNT_NOSUID, ",nosuid" },
767 { MNT_NODEV, ",nodev" },
768 { MNT_NOEXEC, ",noexec" },
769 { MNT_NOATIME, ",noatime" },
770 { MNT_NODIRATIME, ",nodiratime" },
771 { MNT_RELATIME, ",relatime" },
772 { MNT_STRICTATIME, ",strictatime" },
773 { 0, NULL }
774 };
775 const struct proc_fs_info *fs_infop;
776
777 for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) {
778 if (mnt->mnt_flags & fs_infop->flag)
779 seq_puts(m, fs_infop->str);
780 }
781}
782
783static void show_type(struct seq_file *m, struct super_block *sb)
784{
785 mangle(m, sb->s_type->name);
786 if (sb->s_subtype && sb->s_subtype[0]) {
787 seq_putc(m, '.');
788 mangle(m, sb->s_subtype);
789 }
790}
791
792static int show_vfsmnt(struct seq_file *m, void *v)
793{
794 struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
795 int err = 0;
796 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
797
798 mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
799 seq_putc(m, ' ');
800 seq_path(m, &mnt_path, " \t\n\\");
801 seq_putc(m, ' ');
802 show_type(m, mnt->mnt_sb);
803 seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
804 err = show_sb_opts(m, mnt->mnt_sb);
805 if (err)
806 goto out;
807 show_mnt_opts(m, mnt);
808 if (mnt->mnt_sb->s_op->show_options)
809 err = mnt->mnt_sb->s_op->show_options(m, mnt);
810 seq_puts(m, " 0 0\n");
811out:
812 return err;
813}
814
815const struct seq_operations mounts_op = {
816 .start = m_start,
817 .next = m_next,
818 .stop = m_stop,
819 .show = show_vfsmnt
820};
821
822static int show_mountinfo(struct seq_file *m, void *v)
823{
824 struct proc_mounts *p = m->private;
825 struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
826 struct super_block *sb = mnt->mnt_sb;
827 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
828 struct path root = p->root;
829 int err = 0;
830
831 seq_printf(m, "%i %i %u:%u ", mnt->mnt_id, mnt->mnt_parent->mnt_id,
832 MAJOR(sb->s_dev), MINOR(sb->s_dev));
833 seq_dentry(m, mnt->mnt_root, " \t\n\\");
834 seq_putc(m, ' ');
835 seq_path_root(m, &mnt_path, &root, " \t\n\\");
836 if (root.mnt != p->root.mnt || root.dentry != p->root.dentry) {
837
838
839
840
841
842 return SEQ_SKIP;
843 }
844 seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw");
845 show_mnt_opts(m, mnt);
846
847
848 if (IS_MNT_SHARED(mnt))
849 seq_printf(m, " shared:%i", mnt->mnt_group_id);
850 if (IS_MNT_SLAVE(mnt)) {
851 int master = mnt->mnt_master->mnt_group_id;
852 int dom = get_dominating_id(mnt, &p->root);
853 seq_printf(m, " master:%i", master);
854 if (dom && dom != master)
855 seq_printf(m, " propagate_from:%i", dom);
856 }
857 if (IS_MNT_UNBINDABLE(mnt))
858 seq_puts(m, " unbindable");
859
860
861 seq_puts(m, " - ");
862 show_type(m, sb);
863 seq_putc(m, ' ');
864 mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
865 seq_puts(m, sb->s_flags & MS_RDONLY ? " ro" : " rw");
866 err = show_sb_opts(m, sb);
867 if (err)
868 goto out;
869 if (sb->s_op->show_options)
870 err = sb->s_op->show_options(m, mnt);
871 seq_putc(m, '\n');
872out:
873 return err;
874}
875
876const struct seq_operations mountinfo_op = {
877 .start = m_start,
878 .next = m_next,
879 .stop = m_stop,
880 .show = show_mountinfo,
881};
882
883static int show_vfsstat(struct seq_file *m, void *v)
884{
885 struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
886 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
887 int err = 0;
888
889
890 if (mnt->mnt_devname) {
891 seq_puts(m, "device ");
892 mangle(m, mnt->mnt_devname);
893 } else
894 seq_puts(m, "no device");
895
896
897 seq_puts(m, " mounted on ");
898 seq_path(m, &mnt_path, " \t\n\\");
899 seq_putc(m, ' ');
900
901
902 seq_puts(m, "with fstype ");
903 show_type(m, mnt->mnt_sb);
904
905
906 if (mnt->mnt_sb->s_op->show_stats) {
907 seq_putc(m, ' ');
908 err = mnt->mnt_sb->s_op->show_stats(m, mnt);
909 }
910
911 seq_putc(m, '\n');
912 return err;
913}
914
915const struct seq_operations mountstats_op = {
916 .start = m_start,
917 .next = m_next,
918 .stop = m_stop,
919 .show = show_vfsstat,
920};
921#endif
922
923
924
925
926
927
928
929
930
931int may_umount_tree(struct vfsmount *mnt)
932{
933 int actual_refs = 0;
934 int minimum_refs = 0;
935 struct vfsmount *p;
936
937 spin_lock(&vfsmount_lock);
938 for (p = mnt; p; p = next_mnt(p, mnt)) {
939 actual_refs += atomic_read(&p->mnt_count);
940 minimum_refs += 2;
941 }
942 spin_unlock(&vfsmount_lock);
943
944 if (actual_refs > minimum_refs)
945 return 0;
946
947 return 1;
948}
949
950EXPORT_SYMBOL(may_umount_tree);
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965int may_umount(struct vfsmount *mnt)
966{
967 int ret = 1;
968 spin_lock(&vfsmount_lock);
969 if (propagate_mount_busy(mnt, 2))
970 ret = 0;
971 spin_unlock(&vfsmount_lock);
972 return ret;
973}
974
975EXPORT_SYMBOL(may_umount);
976
977void release_mounts(struct list_head *head)
978{
979 struct vfsmount *mnt;
980 while (!list_empty(head)) {
981 mnt = list_first_entry(head, struct vfsmount, mnt_hash);
982 list_del_init(&mnt->mnt_hash);
983 if (mnt->mnt_parent != mnt) {
984 struct dentry *dentry;
985 struct vfsmount *m;
986 spin_lock(&vfsmount_lock);
987 dentry = mnt->mnt_mountpoint;
988 m = mnt->mnt_parent;
989 mnt->mnt_mountpoint = mnt->mnt_root;
990 mnt->mnt_parent = mnt;
991 m->mnt_ghosts--;
992 spin_unlock(&vfsmount_lock);
993 dput(dentry);
994 mntput(m);
995 }
996 mntput(mnt);
997 }
998}
999
1000void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
1001{
1002 struct vfsmount *p;
1003
1004 for (p = mnt; p; p = next_mnt(p, mnt))
1005 list_move(&p->mnt_hash, kill);
1006
1007 if (propagate)
1008 propagate_umount(kill);
1009
1010 list_for_each_entry(p, kill, mnt_hash) {
1011 list_del_init(&p->mnt_expire);
1012 list_del_init(&p->mnt_list);
1013 __touch_mnt_namespace(p->mnt_ns);
1014 p->mnt_ns = NULL;
1015 list_del_init(&p->mnt_child);
1016 if (p->mnt_parent != p) {
1017 p->mnt_parent->mnt_ghosts++;
1018 p->mnt_mountpoint->d_mounted--;
1019 }
1020 change_mnt_propagation(p, MS_PRIVATE);
1021 }
1022}
1023
1024static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts);
1025
1026static int do_umount(struct vfsmount *mnt, int flags)
1027{
1028 struct super_block *sb = mnt->mnt_sb;
1029 int retval;
1030 LIST_HEAD(umount_list);
1031
1032 retval = security_sb_umount(mnt, flags);
1033 if (retval)
1034 return retval;
1035
1036
1037
1038
1039
1040
1041
1042 if (flags & MNT_EXPIRE) {
1043 if (mnt == current->fs->root.mnt ||
1044 flags & (MNT_FORCE | MNT_DETACH))
1045 return -EINVAL;
1046
1047 if (atomic_read(&mnt->mnt_count) != 2)
1048 return -EBUSY;
1049
1050 if (!xchg(&mnt->mnt_expiry_mark, 1))
1051 return -EAGAIN;
1052 }
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064 if (flags & MNT_FORCE && sb->s_op->umount_begin) {
1065 sb->s_op->umount_begin(sb);
1066 }
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077 if (mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) {
1078
1079
1080
1081
1082 down_write(&sb->s_umount);
1083 if (!(sb->s_flags & MS_RDONLY))
1084 retval = do_remount_sb(sb, MS_RDONLY, NULL, 0);
1085 up_write(&sb->s_umount);
1086 return retval;
1087 }
1088
1089 down_write(&namespace_sem);
1090 spin_lock(&vfsmount_lock);
1091 event++;
1092
1093 if (!(flags & MNT_DETACH))
1094 shrink_submounts(mnt, &umount_list);
1095
1096 retval = -EBUSY;
1097 if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) {
1098 if (!list_empty(&mnt->mnt_list))
1099 umount_tree(mnt, 1, &umount_list);
1100 retval = 0;
1101 }
1102 spin_unlock(&vfsmount_lock);
1103 if (retval)
1104 security_sb_umount_busy(mnt);
1105 up_write(&namespace_sem);
1106 release_mounts(&umount_list);
1107 return retval;
1108}
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1119{
1120 struct path path;
1121 int retval;
1122
1123 retval = user_path(name, &path);
1124 if (retval)
1125 goto out;
1126 retval = -EINVAL;
1127 if (path.dentry != path.mnt->mnt_root)
1128 goto dput_and_out;
1129 if (!check_mnt(path.mnt))
1130 goto dput_and_out;
1131
1132 retval = -EPERM;
1133 if (!capable(CAP_SYS_ADMIN))
1134 goto dput_and_out;
1135
1136 retval = do_umount(path.mnt, flags);
1137dput_and_out:
1138
1139 dput(path.dentry);
1140 mntput_no_expire(path.mnt);
1141out:
1142 return retval;
1143}
1144
1145#ifdef __ARCH_WANT_SYS_OLDUMOUNT
1146
1147
1148
1149
1150SYSCALL_DEFINE1(oldumount, char __user *, name)
1151{
1152 return sys_umount(name, 0);
1153}
1154
1155#endif
1156
1157static int mount_is_safe(struct path *path)
1158{
1159 if (capable(CAP_SYS_ADMIN))
1160 return 0;
1161 return -EPERM;
1162#ifdef notyet
1163 if (S_ISLNK(path->dentry->d_inode->i_mode))
1164 return -EPERM;
1165 if (path->dentry->d_inode->i_mode & S_ISVTX) {
1166 if (current_uid() != path->dentry->d_inode->i_uid)
1167 return -EPERM;
1168 }
1169 if (inode_permission(path->dentry->d_inode, MAY_WRITE))
1170 return -EPERM;
1171 return 0;
1172#endif
1173}
1174
1175struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
1176 int flag)
1177{
1178 struct vfsmount *res, *p, *q, *r, *s;
1179 struct path path;
1180
1181 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt))
1182 return NULL;
1183
1184 res = q = clone_mnt(mnt, dentry, flag);
1185 if (!q)
1186 goto Enomem;
1187 q->mnt_mountpoint = mnt->mnt_mountpoint;
1188
1189 p = mnt;
1190 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
1191 if (!is_subdir(r->mnt_mountpoint, dentry))
1192 continue;
1193
1194 for (s = r; s; s = next_mnt(s, r)) {
1195 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(s)) {
1196 s = skip_mnt_tree(s);
1197 continue;
1198 }
1199 while (p != s->mnt_parent) {
1200 p = p->mnt_parent;
1201 q = q->mnt_parent;
1202 }
1203 p = s;
1204 path.mnt = q;
1205 path.dentry = p->mnt_mountpoint;
1206 q = clone_mnt(p, p->mnt_root, flag);
1207 if (!q)
1208 goto Enomem;
1209 spin_lock(&vfsmount_lock);
1210 list_add_tail(&q->mnt_list, &res->mnt_list);
1211 attach_mnt(q, &path);
1212 spin_unlock(&vfsmount_lock);
1213 }
1214 }
1215 return res;
1216Enomem:
1217 if (res) {
1218 LIST_HEAD(umount_list);
1219 spin_lock(&vfsmount_lock);
1220 umount_tree(res, 0, &umount_list);
1221 spin_unlock(&vfsmount_lock);
1222 release_mounts(&umount_list);
1223 }
1224 return NULL;
1225}
1226
1227struct vfsmount *collect_mounts(struct path *path)
1228{
1229 struct vfsmount *tree;
1230 down_write(&namespace_sem);
1231 tree = copy_tree(path->mnt, path->dentry, CL_COPY_ALL | CL_PRIVATE);
1232 up_write(&namespace_sem);
1233 return tree;
1234}
1235
1236void drop_collected_mounts(struct vfsmount *mnt)
1237{
1238 LIST_HEAD(umount_list);
1239 down_write(&namespace_sem);
1240 spin_lock(&vfsmount_lock);
1241 umount_tree(mnt, 0, &umount_list);
1242 spin_unlock(&vfsmount_lock);
1243 up_write(&namespace_sem);
1244 release_mounts(&umount_list);
1245}
1246
1247static void cleanup_group_ids(struct vfsmount *mnt, struct vfsmount *end)
1248{
1249 struct vfsmount *p;
1250
1251 for (p = mnt; p != end; p = next_mnt(p, mnt)) {
1252 if (p->mnt_group_id && !IS_MNT_SHARED(p))
1253 mnt_release_group_id(p);
1254 }
1255}
1256
1257static int invent_group_ids(struct vfsmount *mnt, bool recurse)
1258{
1259 struct vfsmount *p;
1260
1261 for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) {
1262 if (!p->mnt_group_id && !IS_MNT_SHARED(p)) {
1263 int err = mnt_alloc_group_id(p);
1264 if (err) {
1265 cleanup_group_ids(mnt, p);
1266 return err;
1267 }
1268 }
1269 }
1270
1271 return 0;
1272}
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337static int attach_recursive_mnt(struct vfsmount *source_mnt,
1338 struct path *path, struct path *parent_path)
1339{
1340 LIST_HEAD(tree_list);
1341 struct vfsmount *dest_mnt = path->mnt;
1342 struct dentry *dest_dentry = path->dentry;
1343 struct vfsmount *child, *p;
1344 int err;
1345
1346 if (IS_MNT_SHARED(dest_mnt)) {
1347 err = invent_group_ids(source_mnt, true);
1348 if (err)
1349 goto out;
1350 }
1351 err = propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list);
1352 if (err)
1353 goto out_cleanup_ids;
1354
1355 if (IS_MNT_SHARED(dest_mnt)) {
1356 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
1357 set_mnt_shared(p);
1358 }
1359
1360 spin_lock(&vfsmount_lock);
1361 if (parent_path) {
1362 detach_mnt(source_mnt, parent_path);
1363 attach_mnt(source_mnt, path);
1364 touch_mnt_namespace(parent_path->mnt->mnt_ns);
1365 } else {
1366 mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
1367 commit_tree(source_mnt);
1368 }
1369
1370 list_for_each_entry_safe(child, p, &tree_list, mnt_hash) {
1371 list_del_init(&child->mnt_hash);
1372 commit_tree(child);
1373 }
1374 spin_unlock(&vfsmount_lock);
1375 return 0;
1376
1377 out_cleanup_ids:
1378 if (IS_MNT_SHARED(dest_mnt))
1379 cleanup_group_ids(source_mnt, NULL);
1380 out:
1381 return err;
1382}
1383
1384static int graft_tree(struct vfsmount *mnt, struct path *path)
1385{
1386 int err;
1387 if (mnt->mnt_sb->s_flags & MS_NOUSER)
1388 return -EINVAL;
1389
1390 if (S_ISDIR(path->dentry->d_inode->i_mode) !=
1391 S_ISDIR(mnt->mnt_root->d_inode->i_mode))
1392 return -ENOTDIR;
1393
1394 err = -ENOENT;
1395 mutex_lock(&path->dentry->d_inode->i_mutex);
1396 if (IS_DEADDIR(path->dentry->d_inode))
1397 goto out_unlock;
1398
1399 err = security_sb_check_sb(mnt, path);
1400 if (err)
1401 goto out_unlock;
1402
1403 err = -ENOENT;
1404 if (!d_unlinked(path->dentry))
1405 err = attach_recursive_mnt(mnt, path, NULL);
1406out_unlock:
1407 mutex_unlock(&path->dentry->d_inode->i_mutex);
1408 if (!err)
1409 security_sb_post_addmount(mnt, path);
1410 return err;
1411}
1412
1413
1414
1415
1416static int do_change_type(struct path *path, int flag)
1417{
1418 struct vfsmount *m, *mnt = path->mnt;
1419 int recurse = flag & MS_REC;
1420 int type = flag & ~MS_REC;
1421 int err = 0;
1422
1423 if (!capable(CAP_SYS_ADMIN))
1424 return -EPERM;
1425
1426 if (path->dentry != path->mnt->mnt_root)
1427 return -EINVAL;
1428
1429 down_write(&namespace_sem);
1430 if (type == MS_SHARED) {
1431 err = invent_group_ids(mnt, recurse);
1432 if (err)
1433 goto out_unlock;
1434 }
1435
1436 spin_lock(&vfsmount_lock);
1437 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
1438 change_mnt_propagation(m, type);
1439 spin_unlock(&vfsmount_lock);
1440
1441 out_unlock:
1442 up_write(&namespace_sem);
1443 return err;
1444}
1445
1446
1447
1448
1449static int do_loopback(struct path *path, char *old_name,
1450 int recurse)
1451{
1452 struct path old_path;
1453 struct vfsmount *mnt = NULL;
1454 int err = mount_is_safe(path);
1455 if (err)
1456 return err;
1457 if (!old_name || !*old_name)
1458 return -EINVAL;
1459 err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
1460 if (err)
1461 return err;
1462
1463 down_write(&namespace_sem);
1464 err = -EINVAL;
1465 if (IS_MNT_UNBINDABLE(old_path.mnt))
1466 goto out;
1467
1468 if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
1469 goto out;
1470
1471 err = -ENOMEM;
1472 if (recurse)
1473 mnt = copy_tree(old_path.mnt, old_path.dentry, 0);
1474 else
1475 mnt = clone_mnt(old_path.mnt, old_path.dentry, 0);
1476
1477 if (!mnt)
1478 goto out;
1479
1480 err = graft_tree(mnt, path);
1481 if (err) {
1482 LIST_HEAD(umount_list);
1483 spin_lock(&vfsmount_lock);
1484 umount_tree(mnt, 0, &umount_list);
1485 spin_unlock(&vfsmount_lock);
1486 release_mounts(&umount_list);
1487 }
1488
1489out:
1490 up_write(&namespace_sem);
1491 path_put(&old_path);
1492 return err;
1493}
1494
1495static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
1496{
1497 int error = 0;
1498 int readonly_request = 0;
1499
1500 if (ms_flags & MS_RDONLY)
1501 readonly_request = 1;
1502 if (readonly_request == __mnt_is_readonly(mnt))
1503 return 0;
1504
1505 if (readonly_request)
1506 error = mnt_make_readonly(mnt);
1507 else
1508 __mnt_unmake_readonly(mnt);
1509 return error;
1510}
1511
1512
1513
1514
1515
1516
1517static int do_remount(struct path *path, int flags, int mnt_flags,
1518 void *data)
1519{
1520 int err;
1521 struct super_block *sb = path->mnt->mnt_sb;
1522
1523 if (!capable(CAP_SYS_ADMIN))
1524 return -EPERM;
1525
1526 if (!check_mnt(path->mnt))
1527 return -EINVAL;
1528
1529 if (path->dentry != path->mnt->mnt_root)
1530 return -EINVAL;
1531
1532 down_write(&sb->s_umount);
1533 if (flags & MS_BIND)
1534 err = change_mount_flags(path->mnt, flags);
1535 else
1536 err = do_remount_sb(sb, flags, data, 0);
1537 if (!err)
1538 path->mnt->mnt_flags = mnt_flags;
1539 up_write(&sb->s_umount);
1540 if (!err) {
1541 security_sb_post_remount(path->mnt, flags, data);
1542
1543 spin_lock(&vfsmount_lock);
1544 touch_mnt_namespace(path->mnt->mnt_ns);
1545 spin_unlock(&vfsmount_lock);
1546 }
1547 return err;
1548}
1549
1550static inline int tree_contains_unbindable(struct vfsmount *mnt)
1551{
1552 struct vfsmount *p;
1553 for (p = mnt; p; p = next_mnt(p, mnt)) {
1554 if (IS_MNT_UNBINDABLE(p))
1555 return 1;
1556 }
1557 return 0;
1558}
1559
1560static int do_move_mount(struct path *path, char *old_name)
1561{
1562 struct path old_path, parent_path;
1563 struct vfsmount *p;
1564 int err = 0;
1565 if (!capable(CAP_SYS_ADMIN))
1566 return -EPERM;
1567 if (!old_name || !*old_name)
1568 return -EINVAL;
1569 err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
1570 if (err)
1571 return err;
1572
1573 down_write(&namespace_sem);
1574 while (d_mountpoint(path->dentry) &&
1575 follow_down(path))
1576 ;
1577 err = -EINVAL;
1578 if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
1579 goto out;
1580
1581 err = -ENOENT;
1582 mutex_lock(&path->dentry->d_inode->i_mutex);
1583 if (IS_DEADDIR(path->dentry->d_inode))
1584 goto out1;
1585
1586 if (d_unlinked(path->dentry))
1587 goto out1;
1588
1589 err = -EINVAL;
1590 if (old_path.dentry != old_path.mnt->mnt_root)
1591 goto out1;
1592
1593 if (old_path.mnt == old_path.mnt->mnt_parent)
1594 goto out1;
1595
1596 if (S_ISDIR(path->dentry->d_inode->i_mode) !=
1597 S_ISDIR(old_path.dentry->d_inode->i_mode))
1598 goto out1;
1599
1600
1601
1602 if (old_path.mnt->mnt_parent &&
1603 IS_MNT_SHARED(old_path.mnt->mnt_parent))
1604 goto out1;
1605
1606
1607
1608
1609 if (IS_MNT_SHARED(path->mnt) &&
1610 tree_contains_unbindable(old_path.mnt))
1611 goto out1;
1612 err = -ELOOP;
1613 for (p = path->mnt; p->mnt_parent != p; p = p->mnt_parent)
1614 if (p == old_path.mnt)
1615 goto out1;
1616
1617 err = attach_recursive_mnt(old_path.mnt, path, &parent_path);
1618 if (err)
1619 goto out1;
1620
1621
1622
1623 list_del_init(&old_path.mnt->mnt_expire);
1624out1:
1625 mutex_unlock(&path->dentry->d_inode->i_mutex);
1626out:
1627 up_write(&namespace_sem);
1628 if (!err)
1629 path_put(&parent_path);
1630 path_put(&old_path);
1631 return err;
1632}
1633
1634
1635
1636
1637
1638static int do_new_mount(struct path *path, char *type, int flags,
1639 int mnt_flags, char *name, void *data)
1640{
1641 struct vfsmount *mnt;
1642
1643 if (!type)
1644 return -EINVAL;
1645
1646
1647 if (!capable(CAP_SYS_ADMIN))
1648 return -EPERM;
1649
1650 lock_kernel();
1651 mnt = do_kern_mount(type, flags, name, data);
1652 unlock_kernel();
1653 if (IS_ERR(mnt))
1654 return PTR_ERR(mnt);
1655
1656 return do_add_mount(mnt, path, mnt_flags, NULL);
1657}
1658
1659
1660
1661
1662
1663int do_add_mount(struct vfsmount *newmnt, struct path *path,
1664 int mnt_flags, struct list_head *fslist)
1665{
1666 int err;
1667
1668 down_write(&namespace_sem);
1669
1670 while (d_mountpoint(path->dentry) &&
1671 follow_down(path))
1672 ;
1673 err = -EINVAL;
1674 if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt))
1675 goto unlock;
1676
1677
1678 err = -EBUSY;
1679 if (path->mnt->mnt_sb == newmnt->mnt_sb &&
1680 path->mnt->mnt_root == path->dentry)
1681 goto unlock;
1682
1683 err = -EINVAL;
1684 if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode))
1685 goto unlock;
1686
1687 newmnt->mnt_flags = mnt_flags;
1688 if ((err = graft_tree(newmnt, path)))
1689 goto unlock;
1690
1691 if (fslist)
1692 list_add_tail(&newmnt->mnt_expire, fslist);
1693
1694 up_write(&namespace_sem);
1695 return 0;
1696
1697unlock:
1698 up_write(&namespace_sem);
1699 mntput(newmnt);
1700 return err;
1701}
1702
1703EXPORT_SYMBOL_GPL(do_add_mount);
1704
1705
1706
1707
1708
1709
1710void mark_mounts_for_expiry(struct list_head *mounts)
1711{
1712 struct vfsmount *mnt, *next;
1713 LIST_HEAD(graveyard);
1714 LIST_HEAD(umounts);
1715
1716 if (list_empty(mounts))
1717 return;
1718
1719 down_write(&namespace_sem);
1720 spin_lock(&vfsmount_lock);
1721
1722
1723
1724
1725
1726
1727
1728 list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
1729 if (!xchg(&mnt->mnt_expiry_mark, 1) ||
1730 propagate_mount_busy(mnt, 1))
1731 continue;
1732 list_move(&mnt->mnt_expire, &graveyard);
1733 }
1734 while (!list_empty(&graveyard)) {
1735 mnt = list_first_entry(&graveyard, struct vfsmount, mnt_expire);
1736 touch_mnt_namespace(mnt->mnt_ns);
1737 umount_tree(mnt, 1, &umounts);
1738 }
1739 spin_unlock(&vfsmount_lock);
1740 up_write(&namespace_sem);
1741
1742 release_mounts(&umounts);
1743}
1744
1745EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
1746
1747
1748
1749
1750
1751
1752
1753static int select_submounts(struct vfsmount *parent, struct list_head *graveyard)
1754{
1755 struct vfsmount *this_parent = parent;
1756 struct list_head *next;
1757 int found = 0;
1758
1759repeat:
1760 next = this_parent->mnt_mounts.next;
1761resume:
1762 while (next != &this_parent->mnt_mounts) {
1763 struct list_head *tmp = next;
1764 struct vfsmount *mnt = list_entry(tmp, struct vfsmount, mnt_child);
1765
1766 next = tmp->next;
1767 if (!(mnt->mnt_flags & MNT_SHRINKABLE))
1768 continue;
1769
1770
1771
1772 if (!list_empty(&mnt->mnt_mounts)) {
1773 this_parent = mnt;
1774 goto repeat;
1775 }
1776
1777 if (!propagate_mount_busy(mnt, 1)) {
1778 list_move_tail(&mnt->mnt_expire, graveyard);
1779 found++;
1780 }
1781 }
1782
1783
1784
1785 if (this_parent != parent) {
1786 next = this_parent->mnt_child.next;
1787 this_parent = this_parent->mnt_parent;
1788 goto resume;
1789 }
1790 return found;
1791}
1792
1793
1794
1795
1796
1797static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts)
1798{
1799 LIST_HEAD(graveyard);
1800 struct vfsmount *m;
1801
1802
1803 while (select_submounts(mnt, &graveyard)) {
1804 while (!list_empty(&graveyard)) {
1805 m = list_first_entry(&graveyard, struct vfsmount,
1806 mnt_expire);
1807 touch_mnt_namespace(m->mnt_ns);
1808 umount_tree(m, 1, umounts);
1809 }
1810 }
1811}
1812
1813
1814
1815
1816
1817
1818
1819static long exact_copy_from_user(void *to, const void __user * from,
1820 unsigned long n)
1821{
1822 char *t = to;
1823 const char __user *f = from;
1824 char c;
1825
1826 if (!access_ok(VERIFY_READ, from, n))
1827 return n;
1828
1829 while (n) {
1830 if (__get_user(c, f)) {
1831 memset(t, 0, n);
1832 break;
1833 }
1834 *t++ = c;
1835 f++;
1836 n--;
1837 }
1838 return n;
1839}
1840
1841int copy_mount_options(const void __user * data, unsigned long *where)
1842{
1843 int i;
1844 unsigned long page;
1845 unsigned long size;
1846
1847 *where = 0;
1848 if (!data)
1849 return 0;
1850
1851 if (!(page = __get_free_page(GFP_KERNEL)))
1852 return -ENOMEM;
1853
1854
1855
1856
1857
1858
1859 size = TASK_SIZE - (unsigned long)data;
1860 if (size > PAGE_SIZE)
1861 size = PAGE_SIZE;
1862
1863 i = size - exact_copy_from_user((void *)page, data, size);
1864 if (!i) {
1865 free_page(page);
1866 return -EFAULT;
1867 }
1868 if (i != PAGE_SIZE)
1869 memset((char *)page + i, 0, PAGE_SIZE - i);
1870 *where = page;
1871 return 0;
1872}
1873
1874int copy_mount_string(const void __user *data, char **where)
1875{
1876 char *tmp;
1877
1878 if (!data) {
1879 *where = NULL;
1880 return 0;
1881 }
1882
1883 tmp = strndup_user(data, PAGE_SIZE);
1884 if (IS_ERR(tmp))
1885 return PTR_ERR(tmp);
1886
1887 *where = tmp;
1888 return 0;
1889}
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905long do_mount(char *dev_name, char *dir_name, char *type_page,
1906 unsigned long flags, void *data_page)
1907{
1908 struct path path;
1909 int retval = 0;
1910 int mnt_flags = 0;
1911
1912
1913 if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
1914 flags &= ~MS_MGC_MSK;
1915
1916
1917
1918 if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
1919 return -EINVAL;
1920
1921 if (data_page)
1922 ((char *)data_page)[PAGE_SIZE - 1] = 0;
1923
1924
1925 if (!(flags & MS_NOATIME))
1926 mnt_flags |= MNT_RELATIME;
1927
1928
1929 if (flags & MS_NOSUID)
1930 mnt_flags |= MNT_NOSUID;
1931 if (flags & MS_NODEV)
1932 mnt_flags |= MNT_NODEV;
1933 if (flags & MS_NOEXEC)
1934 mnt_flags |= MNT_NOEXEC;
1935 if (flags & MS_NOATIME)
1936 mnt_flags |= MNT_NOATIME;
1937 if (flags & MS_NODIRATIME)
1938 mnt_flags |= MNT_NODIRATIME;
1939 if (flags & MS_STRICTATIME)
1940 mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
1941 if (flags & MS_RDONLY)
1942 mnt_flags |= MNT_READONLY;
1943
1944 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
1945 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
1946 MS_STRICTATIME);
1947
1948
1949 retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);
1950 if (retval)
1951 return retval;
1952
1953 retval = security_sb_mount(dev_name, &path,
1954 type_page, flags, data_page);
1955 if (retval)
1956 goto dput_out;
1957
1958 if (flags & MS_REMOUNT)
1959 retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
1960 data_page);
1961 else if (flags & MS_BIND)
1962 retval = do_loopback(&path, dev_name, flags & MS_REC);
1963 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
1964 retval = do_change_type(&path, flags);
1965 else if (flags & MS_MOVE)
1966 retval = do_move_mount(&path, dev_name);
1967 else
1968 retval = do_new_mount(&path, type_page, flags, mnt_flags,
1969 dev_name, data_page);
1970dput_out:
1971 path_put(&path);
1972 return retval;
1973}
1974
1975static struct mnt_namespace *alloc_mnt_ns(void)
1976{
1977 struct mnt_namespace *new_ns;
1978
1979 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
1980 if (!new_ns)
1981 return ERR_PTR(-ENOMEM);
1982 atomic_set(&new_ns->count, 1);
1983 new_ns->root = NULL;
1984 INIT_LIST_HEAD(&new_ns->list);
1985 init_waitqueue_head(&new_ns->poll);
1986 new_ns->event = 0;
1987 return new_ns;
1988}
1989
1990
1991
1992
1993
1994static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
1995 struct fs_struct *fs)
1996{
1997 struct mnt_namespace *new_ns;
1998 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
1999 struct vfsmount *p, *q;
2000
2001 new_ns = alloc_mnt_ns();
2002 if (IS_ERR(new_ns))
2003 return new_ns;
2004
2005 down_write(&namespace_sem);
2006
2007 new_ns->root = copy_tree(mnt_ns->root, mnt_ns->root->mnt_root,
2008 CL_COPY_ALL | CL_EXPIRE);
2009 if (!new_ns->root) {
2010 up_write(&namespace_sem);
2011 kfree(new_ns);
2012 return ERR_PTR(-ENOMEM);
2013 }
2014 spin_lock(&vfsmount_lock);
2015 list_add_tail(&new_ns->list, &new_ns->root->mnt_list);
2016 spin_unlock(&vfsmount_lock);
2017
2018
2019
2020
2021
2022
2023 p = mnt_ns->root;
2024 q = new_ns->root;
2025 while (p) {
2026 q->mnt_ns = new_ns;
2027 if (fs) {
2028 if (p == fs->root.mnt) {
2029 rootmnt = p;
2030 fs->root.mnt = mntget(q);
2031 }
2032 if (p == fs->pwd.mnt) {
2033 pwdmnt = p;
2034 fs->pwd.mnt = mntget(q);
2035 }
2036 }
2037 p = next_mnt(p, mnt_ns->root);
2038 q = next_mnt(q, new_ns->root);
2039 }
2040 up_write(&namespace_sem);
2041
2042 if (rootmnt)
2043 mntput(rootmnt);
2044 if (pwdmnt)
2045 mntput(pwdmnt);
2046
2047 return new_ns;
2048}
2049
2050struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2051 struct fs_struct *new_fs)
2052{
2053 struct mnt_namespace *new_ns;
2054
2055 BUG_ON(!ns);
2056 get_mnt_ns(ns);
2057
2058 if (!(flags & CLONE_NEWNS))
2059 return ns;
2060
2061 new_ns = dup_mnt_ns(ns, new_fs);
2062
2063 put_mnt_ns(ns);
2064 return new_ns;
2065}
2066
2067
2068
2069
2070
2071struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
2072{
2073 struct mnt_namespace *new_ns;
2074
2075 new_ns = alloc_mnt_ns();
2076 if (!IS_ERR(new_ns)) {
2077 mnt->mnt_ns = new_ns;
2078 new_ns->root = mnt;
2079 list_add(&new_ns->list, &new_ns->root->mnt_list);
2080 }
2081 return new_ns;
2082}
2083EXPORT_SYMBOL(create_mnt_ns);
2084
2085SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
2086 char __user *, type, unsigned long, flags, void __user *, data)
2087{
2088 int ret;
2089 char *kernel_type;
2090 char *kernel_dir;
2091 char *kernel_dev;
2092 unsigned long data_page;
2093
2094 ret = copy_mount_string(type, &kernel_type);
2095 if (ret < 0)
2096 goto out_type;
2097
2098 kernel_dir = getname(dir_name);
2099 if (IS_ERR(kernel_dir)) {
2100 ret = PTR_ERR(kernel_dir);
2101 goto out_dir;
2102 }
2103
2104 ret = copy_mount_string(dev_name, &kernel_dev);
2105 if (ret < 0)
2106 goto out_dev;
2107
2108 ret = copy_mount_options(data, &data_page);
2109 if (ret < 0)
2110 goto out_data;
2111
2112 ret = do_mount(kernel_dev, kernel_dir, kernel_type, flags,
2113 (void *) data_page);
2114
2115 free_page(data_page);
2116out_data:
2117 kfree(kernel_dev);
2118out_dev:
2119 putname(kernel_dir);
2120out_dir:
2121 kfree(kernel_type);
2122out_type:
2123 return ret;
2124}
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2152 const char __user *, put_old)
2153{
2154 struct vfsmount *tmp;
2155 struct path new, old, parent_path, root_parent, root;
2156 int error;
2157
2158 if (!capable(CAP_SYS_ADMIN))
2159 return -EPERM;
2160
2161 error = user_path_dir(new_root, &new);
2162 if (error)
2163 goto out0;
2164 error = -EINVAL;
2165 if (!check_mnt(new.mnt))
2166 goto out1;
2167
2168 error = user_path_dir(put_old, &old);
2169 if (error)
2170 goto out1;
2171
2172 error = security_sb_pivotroot(&old, &new);
2173 if (error) {
2174 path_put(&old);
2175 goto out1;
2176 }
2177
2178 read_lock(¤t->fs->lock);
2179 root = current->fs->root;
2180 path_get(¤t->fs->root);
2181 read_unlock(¤t->fs->lock);
2182 down_write(&namespace_sem);
2183 mutex_lock(&old.dentry->d_inode->i_mutex);
2184 error = -EINVAL;
2185 if (IS_MNT_SHARED(old.mnt) ||
2186 IS_MNT_SHARED(new.mnt->mnt_parent) ||
2187 IS_MNT_SHARED(root.mnt->mnt_parent))
2188 goto out2;
2189 if (!check_mnt(root.mnt))
2190 goto out2;
2191 error = -ENOENT;
2192 if (IS_DEADDIR(new.dentry->d_inode))
2193 goto out2;
2194 if (d_unlinked(new.dentry))
2195 goto out2;
2196 if (d_unlinked(old.dentry))
2197 goto out2;
2198 error = -EBUSY;
2199 if (new.mnt == root.mnt ||
2200 old.mnt == root.mnt)
2201 goto out2;
2202 error = -EINVAL;
2203 if (root.mnt->mnt_root != root.dentry)
2204 goto out2;
2205 if (root.mnt->mnt_parent == root.mnt)
2206 goto out2;
2207 if (new.mnt->mnt_root != new.dentry)
2208 goto out2;
2209 if (new.mnt->mnt_parent == new.mnt)
2210 goto out2;
2211
2212 tmp = old.mnt;
2213 spin_lock(&vfsmount_lock);
2214 if (tmp != new.mnt) {
2215 for (;;) {
2216 if (tmp->mnt_parent == tmp)
2217 goto out3;
2218 if (tmp->mnt_parent == new.mnt)
2219 break;
2220 tmp = tmp->mnt_parent;
2221 }
2222 if (!is_subdir(tmp->mnt_mountpoint, new.dentry))
2223 goto out3;
2224 } else if (!is_subdir(old.dentry, new.dentry))
2225 goto out3;
2226 detach_mnt(new.mnt, &parent_path);
2227 detach_mnt(root.mnt, &root_parent);
2228
2229 attach_mnt(root.mnt, &old);
2230
2231 attach_mnt(new.mnt, &root_parent);
2232 touch_mnt_namespace(current->nsproxy->mnt_ns);
2233 spin_unlock(&vfsmount_lock);
2234 chroot_fs_refs(&root, &new);
2235 security_sb_post_pivotroot(&root, &new);
2236 error = 0;
2237 path_put(&root_parent);
2238 path_put(&parent_path);
2239out2:
2240 mutex_unlock(&old.dentry->d_inode->i_mutex);
2241 up_write(&namespace_sem);
2242 path_put(&root);
2243 path_put(&old);
2244out1:
2245 path_put(&new);
2246out0:
2247 return error;
2248out3:
2249 spin_unlock(&vfsmount_lock);
2250 goto out2;
2251}
2252
2253static void __init init_mount_tree(void)
2254{
2255 struct vfsmount *mnt;
2256 struct mnt_namespace *ns;
2257 struct path root;
2258
2259 mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
2260 if (IS_ERR(mnt))
2261 panic("Can't create rootfs");
2262 ns = create_mnt_ns(mnt);
2263 if (IS_ERR(ns))
2264 panic("Can't allocate initial namespace");
2265
2266 init_task.nsproxy->mnt_ns = ns;
2267 get_mnt_ns(ns);
2268
2269 root.mnt = ns->root;
2270 root.dentry = ns->root->mnt_root;
2271
2272 set_fs_pwd(current->fs, &root);
2273 set_fs_root(current->fs, &root);
2274}
2275
2276void __init mnt_init(void)
2277{
2278 unsigned u;
2279 int err;
2280
2281 init_rwsem(&namespace_sem);
2282
2283 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount),
2284 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
2285
2286 mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
2287
2288 if (!mount_hashtable)
2289 panic("Failed to allocate mount hash table\n");
2290
2291 printk("Mount-cache hash table entries: %lu\n", HASH_SIZE);
2292
2293 for (u = 0; u < HASH_SIZE; u++)
2294 INIT_LIST_HEAD(&mount_hashtable[u]);
2295
2296 err = sysfs_init();
2297 if (err)
2298 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
2299 __func__, err);
2300 fs_kobj = kobject_create_and_add("fs", NULL);
2301 if (!fs_kobj)
2302 printk(KERN_WARNING "%s: kobj create error\n", __func__);
2303 init_rootfs();
2304 init_mount_tree();
2305}
2306
2307void put_mnt_ns(struct mnt_namespace *ns)
2308{
2309 struct vfsmount *root;
2310 LIST_HEAD(umount_list);
2311
2312 if (!atomic_dec_and_lock(&ns->count, &vfsmount_lock))
2313 return;
2314 root = ns->root;
2315 ns->root = NULL;
2316 spin_unlock(&vfsmount_lock);
2317 down_write(&namespace_sem);
2318 spin_lock(&vfsmount_lock);
2319 umount_tree(root, 0, &umount_list);
2320 spin_unlock(&vfsmount_lock);
2321 up_write(&namespace_sem);
2322 release_mounts(&umount_list);
2323 kfree(ns);
2324}
2325EXPORT_SYMBOL(put_mnt_ns);
2326