1
2
3
4
5
6
7
8#include <linux/init.h>
9#include <linux/mm.h>
10#include <linux/fcntl.h>
11#include <linux/slab.h>
12#include <linux/kmod.h>
13#include <linux/major.h>
14#include <linux/device_cgroup.h>
15#include <linux/highmem.h>
16#include <linux/blkdev.h>
17#include <linux/backing-dev.h>
18#include <linux/module.h>
19#include <linux/blkpg.h>
20#include <linux/magic.h>
21#include <linux/dax.h>
22#include <linux/buffer_head.h>
23#include <linux/swap.h>
24#include <linux/pagevec.h>
25#include <linux/writeback.h>
26#include <linux/mpage.h>
27#include <linux/mount.h>
28#include <linux/uio.h>
29#include <linux/namei.h>
30#include <linux/log2.h>
31#include <linux/cleancache.h>
32#include <linux/dax.h>
33#include <linux/badblocks.h>
34#include <linux/task_io_accounting_ops.h>
35#include <linux/falloc.h>
36#include <linux/uaccess.h>
37#include "internal.h"
38
39struct bdev_inode {
40 struct block_device bdev;
41 struct inode vfs_inode;
42};
43
44static const struct address_space_operations def_blk_aops;
45
46static inline struct bdev_inode *BDEV_I(struct inode *inode)
47{
48 return container_of(inode, struct bdev_inode, vfs_inode);
49}
50
51struct block_device *I_BDEV(struct inode *inode)
52{
53 return &BDEV_I(inode)->bdev;
54}
55EXPORT_SYMBOL(I_BDEV);
56
57static void bdev_write_inode(struct block_device *bdev)
58{
59 struct inode *inode = bdev->bd_inode;
60 int ret;
61
62 spin_lock(&inode->i_lock);
63 while (inode->i_state & I_DIRTY) {
64 spin_unlock(&inode->i_lock);
65 ret = write_inode_now(inode, true);
66 if (ret) {
67 char name[BDEVNAME_SIZE];
68 pr_warn_ratelimited("VFS: Dirty inode writeback failed "
69 "for block device %s (err=%d).\n",
70 bdevname(bdev, name), ret);
71 }
72 spin_lock(&inode->i_lock);
73 }
74 spin_unlock(&inode->i_lock);
75}
76
77
78void kill_bdev(struct block_device *bdev)
79{
80 struct address_space *mapping = bdev->bd_inode->i_mapping;
81
82 if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
83 return;
84
85 invalidate_bh_lrus();
86 truncate_inode_pages(mapping, 0);
87}
88EXPORT_SYMBOL(kill_bdev);
89
90
91void invalidate_bdev(struct block_device *bdev)
92{
93 struct address_space *mapping = bdev->bd_inode->i_mapping;
94
95 if (mapping->nrpages) {
96 invalidate_bh_lrus();
97 lru_add_drain_all();
98 invalidate_mapping_pages(mapping, 0, -1);
99 }
100
101
102
103 cleancache_invalidate_inode(mapping);
104}
105EXPORT_SYMBOL(invalidate_bdev);
106
107static void set_init_blocksize(struct block_device *bdev)
108{
109 unsigned bsize = bdev_logical_block_size(bdev);
110 loff_t size = i_size_read(bdev->bd_inode);
111
112 while (bsize < PAGE_SIZE) {
113 if (size & bsize)
114 break;
115 bsize <<= 1;
116 }
117 bdev->bd_block_size = bsize;
118 bdev->bd_inode->i_blkbits = blksize_bits(bsize);
119}
120
121int set_blocksize(struct block_device *bdev, int size)
122{
123
124 if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
125 return -EINVAL;
126
127
128 if (size < bdev_logical_block_size(bdev))
129 return -EINVAL;
130
131
132 if (bdev->bd_block_size != size) {
133 sync_blockdev(bdev);
134 bdev->bd_block_size = size;
135 bdev->bd_inode->i_blkbits = blksize_bits(size);
136 kill_bdev(bdev);
137 }
138 return 0;
139}
140
141EXPORT_SYMBOL(set_blocksize);
142
143int sb_set_blocksize(struct super_block *sb, int size)
144{
145 if (set_blocksize(sb->s_bdev, size))
146 return 0;
147
148
149 sb->s_blocksize = size;
150 sb->s_blocksize_bits = blksize_bits(size);
151 return sb->s_blocksize;
152}
153
154EXPORT_SYMBOL(sb_set_blocksize);
155
156int sb_min_blocksize(struct super_block *sb, int size)
157{
158 int minsize = bdev_logical_block_size(sb->s_bdev);
159 if (size < minsize)
160 size = minsize;
161 return sb_set_blocksize(sb, size);
162}
163
164EXPORT_SYMBOL(sb_min_blocksize);
165
166static int
167blkdev_get_block(struct inode *inode, sector_t iblock,
168 struct buffer_head *bh, int create)
169{
170 bh->b_bdev = I_BDEV(inode);
171 bh->b_blocknr = iblock;
172 set_buffer_mapped(bh);
173 return 0;
174}
175
176static struct inode *bdev_file_inode(struct file *file)
177{
178 return file->f_mapping->host;
179}
180
181static unsigned int dio_bio_write_op(struct kiocb *iocb)
182{
183 unsigned int op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
184
185
186 if (iocb->ki_flags & IOCB_DSYNC)
187 op |= REQ_FUA;
188 return op;
189}
190
191#define DIO_INLINE_BIO_VECS 4
192
193static void blkdev_bio_end_io_simple(struct bio *bio)
194{
195 struct task_struct *waiter = bio->bi_private;
196
197 WRITE_ONCE(bio->bi_private, NULL);
198 blk_wake_io_task(waiter);
199}
200
201static ssize_t
202__blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
203 int nr_pages)
204{
205 struct file *file = iocb->ki_filp;
206 struct block_device *bdev = I_BDEV(bdev_file_inode(file));
207 struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs, *bvec;
208 loff_t pos = iocb->ki_pos;
209 bool should_dirty = false;
210 struct bio bio;
211 ssize_t ret;
212 blk_qc_t qc;
213 int i;
214 struct bvec_iter_all iter_all;
215
216 if ((pos | iov_iter_alignment(iter)) &
217 (bdev_logical_block_size(bdev) - 1))
218 return -EINVAL;
219
220 if (nr_pages <= DIO_INLINE_BIO_VECS)
221 vecs = inline_vecs;
222 else {
223 vecs = kmalloc_array(nr_pages, sizeof(struct bio_vec),
224 GFP_KERNEL);
225 if (!vecs)
226 return -ENOMEM;
227 }
228
229 bio_init(&bio, vecs, nr_pages);
230 bio_set_dev(&bio, bdev);
231 bio.bi_iter.bi_sector = pos >> 9;
232 bio.bi_write_hint = iocb->ki_hint;
233 bio.bi_private = current;
234 bio.bi_end_io = blkdev_bio_end_io_simple;
235 bio.bi_ioprio = iocb->ki_ioprio;
236
237 ret = bio_iov_iter_get_pages(&bio, iter);
238 if (unlikely(ret))
239 goto out;
240 ret = bio.bi_iter.bi_size;
241
242 if (iov_iter_rw(iter) == READ) {
243 bio.bi_opf = REQ_OP_READ;
244 if (iter_is_iovec(iter))
245 should_dirty = true;
246 } else {
247 bio.bi_opf = dio_bio_write_op(iocb);
248 task_io_account_write(ret);
249 }
250 if (iocb->ki_flags & IOCB_HIPRI)
251 bio_set_polled(&bio, iocb);
252
253 qc = submit_bio(&bio);
254 for (;;) {
255 set_current_state(TASK_UNINTERRUPTIBLE);
256 if (!READ_ONCE(bio.bi_private))
257 break;
258 if (!(iocb->ki_flags & IOCB_HIPRI) ||
259 !blk_poll(bdev_get_queue(bdev), qc, true))
260 io_schedule();
261 }
262 __set_current_state(TASK_RUNNING);
263
264 bio_for_each_segment_all(bvec, &bio, i, iter_all) {
265 if (should_dirty && !PageCompound(bvec->bv_page))
266 set_page_dirty_lock(bvec->bv_page);
267 if (!bio_flagged(&bio, BIO_NO_PAGE_REF))
268 put_page(bvec->bv_page);
269 }
270
271 if (unlikely(bio.bi_status))
272 ret = blk_status_to_errno(bio.bi_status);
273
274out:
275 if (vecs != inline_vecs)
276 kfree(vecs);
277
278 bio_uninit(&bio);
279
280 return ret;
281}
282
283struct blkdev_dio {
284 union {
285 struct kiocb *iocb;
286 struct task_struct *waiter;
287 };
288 size_t size;
289 atomic_t ref;
290 bool multi_bio : 1;
291 bool should_dirty : 1;
292 bool is_sync : 1;
293 struct bio bio;
294};
295
296static struct bio_set blkdev_dio_pool;
297
298static int blkdev_iopoll(struct kiocb *kiocb, bool wait)
299{
300 struct block_device *bdev = I_BDEV(kiocb->ki_filp->f_mapping->host);
301 struct request_queue *q = bdev_get_queue(bdev);
302
303 return blk_poll(q, READ_ONCE(kiocb->ki_cookie), wait);
304}
305
306static void blkdev_bio_end_io(struct bio *bio)
307{
308 struct blkdev_dio *dio = bio->bi_private;
309 bool should_dirty = dio->should_dirty;
310
311 if (bio->bi_status && !dio->bio.bi_status)
312 dio->bio.bi_status = bio->bi_status;
313
314 if (!dio->multi_bio || atomic_dec_and_test(&dio->ref)) {
315 if (!dio->is_sync) {
316 struct kiocb *iocb = dio->iocb;
317 ssize_t ret;
318
319 if (likely(!dio->bio.bi_status)) {
320 ret = dio->size;
321 iocb->ki_pos += ret;
322 } else {
323 ret = blk_status_to_errno(dio->bio.bi_status);
324 }
325
326 dio->iocb->ki_complete(iocb, ret, 0);
327 if (dio->multi_bio)
328 bio_put(&dio->bio);
329 } else {
330 struct task_struct *waiter = dio->waiter;
331
332 WRITE_ONCE(dio->waiter, NULL);
333 blk_wake_io_task(waiter);
334 }
335 }
336
337 if (should_dirty) {
338 bio_check_pages_dirty(bio);
339 } else {
340 if (!bio_flagged(bio, BIO_NO_PAGE_REF)) {
341 struct bvec_iter_all iter_all;
342 struct bio_vec *bvec;
343 int i;
344
345 bio_for_each_segment_all(bvec, bio, i, iter_all)
346 put_page(bvec->bv_page);
347 }
348 bio_put(bio);
349 }
350}
351
352static ssize_t
353__blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
354{
355 struct file *file = iocb->ki_filp;
356 struct inode *inode = bdev_file_inode(file);
357 struct block_device *bdev = I_BDEV(inode);
358 struct blk_plug plug;
359 struct blkdev_dio *dio;
360 struct bio *bio;
361 bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0;
362 bool is_read = (iov_iter_rw(iter) == READ), is_sync;
363 loff_t pos = iocb->ki_pos;
364 blk_qc_t qc = BLK_QC_T_NONE;
365 int ret = 0;
366
367 if ((pos | iov_iter_alignment(iter)) &
368 (bdev_logical_block_size(bdev) - 1))
369 return -EINVAL;
370
371 bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool);
372
373 dio = container_of(bio, struct blkdev_dio, bio);
374 dio->is_sync = is_sync = is_sync_kiocb(iocb);
375 if (dio->is_sync) {
376 dio->waiter = current;
377 bio_get(bio);
378 } else {
379 dio->iocb = iocb;
380 }
381
382 dio->size = 0;
383 dio->multi_bio = false;
384 dio->should_dirty = is_read && iter_is_iovec(iter);
385
386
387
388
389
390 if (!is_poll)
391 blk_start_plug(&plug);
392
393 for (;;) {
394 bio_set_dev(bio, bdev);
395 bio->bi_iter.bi_sector = pos >> 9;
396 bio->bi_write_hint = iocb->ki_hint;
397 bio->bi_private = dio;
398 bio->bi_end_io = blkdev_bio_end_io;
399 bio->bi_ioprio = iocb->ki_ioprio;
400
401 ret = bio_iov_iter_get_pages(bio, iter);
402 if (unlikely(ret)) {
403 bio->bi_status = BLK_STS_IOERR;
404 bio_endio(bio);
405 break;
406 }
407
408 if (is_read) {
409 bio->bi_opf = REQ_OP_READ;
410 if (dio->should_dirty)
411 bio_set_pages_dirty(bio);
412 } else {
413 bio->bi_opf = dio_bio_write_op(iocb);
414 task_io_account_write(bio->bi_iter.bi_size);
415 }
416
417 dio->size += bio->bi_iter.bi_size;
418 pos += bio->bi_iter.bi_size;
419
420 nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES);
421 if (!nr_pages) {
422 bool polled = false;
423
424 if (iocb->ki_flags & IOCB_HIPRI) {
425 bio_set_polled(bio, iocb);
426 polled = true;
427 }
428
429 qc = submit_bio(bio);
430
431 if (polled)
432 WRITE_ONCE(iocb->ki_cookie, qc);
433 break;
434 }
435
436 if (!dio->multi_bio) {
437
438
439
440
441
442 if (!is_sync)
443 bio_get(bio);
444 dio->multi_bio = true;
445 atomic_set(&dio->ref, 2);
446 } else {
447 atomic_inc(&dio->ref);
448 }
449
450 submit_bio(bio);
451 bio = bio_alloc(GFP_KERNEL, nr_pages);
452 }
453
454 if (!is_poll)
455 blk_finish_plug(&plug);
456
457 if (!is_sync)
458 return -EIOCBQUEUED;
459
460 for (;;) {
461 set_current_state(TASK_UNINTERRUPTIBLE);
462 if (!READ_ONCE(dio->waiter))
463 break;
464
465 if (!(iocb->ki_flags & IOCB_HIPRI) ||
466 !blk_poll(bdev_get_queue(bdev), qc, true))
467 io_schedule();
468 }
469 __set_current_state(TASK_RUNNING);
470
471 if (!ret)
472 ret = blk_status_to_errno(dio->bio.bi_status);
473 if (likely(!ret))
474 ret = dio->size;
475
476 bio_put(&dio->bio);
477 return ret;
478}
479
480static ssize_t
481blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
482{
483 int nr_pages;
484
485 nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES + 1);
486 if (!nr_pages)
487 return 0;
488 if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_PAGES)
489 return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
490
491 return __blkdev_direct_IO(iocb, iter, min(nr_pages, BIO_MAX_PAGES));
492}
493
494static __init int blkdev_init(void)
495{
496 return bioset_init(&blkdev_dio_pool, 4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS);
497}
498module_init(blkdev_init);
499
500int __sync_blockdev(struct block_device *bdev, int wait)
501{
502 if (!bdev)
503 return 0;
504 if (!wait)
505 return filemap_flush(bdev->bd_inode->i_mapping);
506 return filemap_write_and_wait(bdev->bd_inode->i_mapping);
507}
508
509
510
511
512
513int sync_blockdev(struct block_device *bdev)
514{
515 return __sync_blockdev(bdev, 1);
516}
517EXPORT_SYMBOL(sync_blockdev);
518
519
520
521
522
523
524int fsync_bdev(struct block_device *bdev)
525{
526 struct super_block *sb = get_super(bdev);
527 if (sb) {
528 int res = sync_filesystem(sb);
529 drop_super(sb);
530 return res;
531 }
532 return sync_blockdev(bdev);
533}
534EXPORT_SYMBOL(fsync_bdev);
535
536
537
538
539
540
541
542
543
544
545
546
547
548struct super_block *freeze_bdev(struct block_device *bdev)
549{
550 struct super_block *sb;
551 int error = 0;
552
553 mutex_lock(&bdev->bd_fsfreeze_mutex);
554 if (++bdev->bd_fsfreeze_count > 1) {
555
556
557
558
559
560 sb = get_super(bdev);
561 if (sb)
562 drop_super(sb);
563 mutex_unlock(&bdev->bd_fsfreeze_mutex);
564 return sb;
565 }
566
567 sb = get_active_super(bdev);
568 if (!sb)
569 goto out;
570 if (sb->s_op->freeze_super)
571 error = sb->s_op->freeze_super(sb);
572 else
573 error = freeze_super(sb);
574 if (error) {
575 deactivate_super(sb);
576 bdev->bd_fsfreeze_count--;
577 mutex_unlock(&bdev->bd_fsfreeze_mutex);
578 return ERR_PTR(error);
579 }
580 deactivate_super(sb);
581 out:
582 sync_blockdev(bdev);
583 mutex_unlock(&bdev->bd_fsfreeze_mutex);
584 return sb;
585}
586EXPORT_SYMBOL(freeze_bdev);
587
588
589
590
591
592
593
594
595int thaw_bdev(struct block_device *bdev, struct super_block *sb)
596{
597 int error = -EINVAL;
598
599 mutex_lock(&bdev->bd_fsfreeze_mutex);
600 if (!bdev->bd_fsfreeze_count)
601 goto out;
602
603 error = 0;
604 if (--bdev->bd_fsfreeze_count > 0)
605 goto out;
606
607 if (!sb)
608 goto out;
609
610 if (sb->s_op->thaw_super)
611 error = sb->s_op->thaw_super(sb);
612 else
613 error = thaw_super(sb);
614 if (error)
615 bdev->bd_fsfreeze_count++;
616out:
617 mutex_unlock(&bdev->bd_fsfreeze_mutex);
618 return error;
619}
620EXPORT_SYMBOL(thaw_bdev);
621
622static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
623{
624 return block_write_full_page(page, blkdev_get_block, wbc);
625}
626
627static int blkdev_readpage(struct file * file, struct page * page)
628{
629 return block_read_full_page(page, blkdev_get_block);
630}
631
632static int blkdev_readpages(struct file *file, struct address_space *mapping,
633 struct list_head *pages, unsigned nr_pages)
634{
635 return mpage_readpages(mapping, pages, nr_pages, blkdev_get_block);
636}
637
638static int blkdev_write_begin(struct file *file, struct address_space *mapping,
639 loff_t pos, unsigned len, unsigned flags,
640 struct page **pagep, void **fsdata)
641{
642 return block_write_begin(mapping, pos, len, flags, pagep,
643 blkdev_get_block);
644}
645
646static int blkdev_write_end(struct file *file, struct address_space *mapping,
647 loff_t pos, unsigned len, unsigned copied,
648 struct page *page, void *fsdata)
649{
650 int ret;
651 ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
652
653 unlock_page(page);
654 put_page(page);
655
656 return ret;
657}
658
659
660
661
662
663
664static loff_t block_llseek(struct file *file, loff_t offset, int whence)
665{
666 struct inode *bd_inode = bdev_file_inode(file);
667 loff_t retval;
668
669 inode_lock(bd_inode);
670 retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode));
671 inode_unlock(bd_inode);
672 return retval;
673}
674
675int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
676{
677 struct inode *bd_inode = bdev_file_inode(filp);
678 struct block_device *bdev = I_BDEV(bd_inode);
679 int error;
680
681 error = file_write_and_wait_range(filp, start, end);
682 if (error)
683 return error;
684
685
686
687
688
689
690 error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL);
691 if (error == -EOPNOTSUPP)
692 error = 0;
693
694 return error;
695}
696EXPORT_SYMBOL(blkdev_fsync);
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714int bdev_read_page(struct block_device *bdev, sector_t sector,
715 struct page *page)
716{
717 const struct block_device_operations *ops = bdev->bd_disk->fops;
718 int result = -EOPNOTSUPP;
719
720 if (!ops->rw_page || bdev_get_integrity(bdev))
721 return result;
722
723 result = blk_queue_enter(bdev->bd_queue, 0);
724 if (result)
725 return result;
726 result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
727 REQ_OP_READ);
728 blk_queue_exit(bdev->bd_queue);
729 return result;
730}
731EXPORT_SYMBOL_GPL(bdev_read_page);
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752int bdev_write_page(struct block_device *bdev, sector_t sector,
753 struct page *page, struct writeback_control *wbc)
754{
755 int result;
756 const struct block_device_operations *ops = bdev->bd_disk->fops;
757
758 if (!ops->rw_page || bdev_get_integrity(bdev))
759 return -EOPNOTSUPP;
760 result = blk_queue_enter(bdev->bd_queue, 0);
761 if (result)
762 return result;
763
764 set_page_writeback(page);
765 result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
766 REQ_OP_WRITE);
767 if (result) {
768 end_page_writeback(page);
769 } else {
770 clean_page_buffers(page);
771 unlock_page(page);
772 }
773 blk_queue_exit(bdev->bd_queue);
774 return result;
775}
776EXPORT_SYMBOL_GPL(bdev_write_page);
777
778
779
780
781
782static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
783static struct kmem_cache * bdev_cachep __read_mostly;
784
785static struct inode *bdev_alloc_inode(struct super_block *sb)
786{
787 struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
788 if (!ei)
789 return NULL;
790 return &ei->vfs_inode;
791}
792
793static void bdev_i_callback(struct rcu_head *head)
794{
795 struct inode *inode = container_of(head, struct inode, i_rcu);
796 struct bdev_inode *bdi = BDEV_I(inode);
797
798 kmem_cache_free(bdev_cachep, bdi);
799}
800
801static void bdev_destroy_inode(struct inode *inode)
802{
803 call_rcu(&inode->i_rcu, bdev_i_callback);
804}
805
806static void init_once(void *foo)
807{
808 struct bdev_inode *ei = (struct bdev_inode *) foo;
809 struct block_device *bdev = &ei->bdev;
810
811 memset(bdev, 0, sizeof(*bdev));
812 mutex_init(&bdev->bd_mutex);
813 INIT_LIST_HEAD(&bdev->bd_list);
814#ifdef CONFIG_SYSFS
815 INIT_LIST_HEAD(&bdev->bd_holder_disks);
816#endif
817 bdev->bd_bdi = &noop_backing_dev_info;
818 inode_init_once(&ei->vfs_inode);
819
820 mutex_init(&bdev->bd_fsfreeze_mutex);
821}
822
823static void bdev_evict_inode(struct inode *inode)
824{
825 struct block_device *bdev = &BDEV_I(inode)->bdev;
826 truncate_inode_pages_final(&inode->i_data);
827 invalidate_inode_buffers(inode);
828 clear_inode(inode);
829 spin_lock(&bdev_lock);
830 list_del_init(&bdev->bd_list);
831 spin_unlock(&bdev_lock);
832
833 inode_detach_wb(inode);
834 if (bdev->bd_bdi != &noop_backing_dev_info) {
835 bdi_put(bdev->bd_bdi);
836 bdev->bd_bdi = &noop_backing_dev_info;
837 }
838}
839
840static const struct super_operations bdev_sops = {
841 .statfs = simple_statfs,
842 .alloc_inode = bdev_alloc_inode,
843 .destroy_inode = bdev_destroy_inode,
844 .drop_inode = generic_delete_inode,
845 .evict_inode = bdev_evict_inode,
846};
847
848static struct dentry *bd_mount(struct file_system_type *fs_type,
849 int flags, const char *dev_name, void *data)
850{
851 struct dentry *dent;
852 dent = mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC);
853 if (!IS_ERR(dent))
854 dent->d_sb->s_iflags |= SB_I_CGROUPWB;
855 return dent;
856}
857
858static struct file_system_type bd_type = {
859 .name = "bdev",
860 .mount = bd_mount,
861 .kill_sb = kill_anon_super,
862};
863
864struct super_block *blockdev_superblock __read_mostly;
865EXPORT_SYMBOL_GPL(blockdev_superblock);
866
867void __init bdev_cache_init(void)
868{
869 int err;
870 static struct vfsmount *bd_mnt;
871
872 bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
873 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
874 SLAB_MEM_SPREAD|SLAB_ACCOUNT|SLAB_PANIC),
875 init_once);
876 err = register_filesystem(&bd_type);
877 if (err)
878 panic("Cannot register bdev pseudo-fs");
879 bd_mnt = kern_mount(&bd_type);
880 if (IS_ERR(bd_mnt))
881 panic("Cannot create bdev pseudo-fs");
882 blockdev_superblock = bd_mnt->mnt_sb;
883}
884
885
886
887
888
889
890static inline unsigned long hash(dev_t dev)
891{
892 return MAJOR(dev)+MINOR(dev);
893}
894
895static int bdev_test(struct inode *inode, void *data)
896{
897 return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data;
898}
899
900static int bdev_set(struct inode *inode, void *data)
901{
902 BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data;
903 return 0;
904}
905
906static LIST_HEAD(all_bdevs);
907
908
909
910
911
912void bdev_unhash_inode(dev_t dev)
913{
914 struct inode *inode;
915
916 inode = ilookup5(blockdev_superblock, hash(dev), bdev_test, &dev);
917 if (inode) {
918 remove_inode_hash(inode);
919 iput(inode);
920 }
921}
922
923struct block_device *bdget(dev_t dev)
924{
925 struct block_device *bdev;
926 struct inode *inode;
927
928 inode = iget5_locked(blockdev_superblock, hash(dev),
929 bdev_test, bdev_set, &dev);
930
931 if (!inode)
932 return NULL;
933
934 bdev = &BDEV_I(inode)->bdev;
935
936 if (inode->i_state & I_NEW) {
937 bdev->bd_contains = NULL;
938 bdev->bd_super = NULL;
939 bdev->bd_inode = inode;
940 bdev->bd_block_size = i_blocksize(inode);
941 bdev->bd_part_count = 0;
942 bdev->bd_invalidated = 0;
943 inode->i_mode = S_IFBLK;
944 inode->i_rdev = dev;
945 inode->i_bdev = bdev;
946 inode->i_data.a_ops = &def_blk_aops;
947 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
948 spin_lock(&bdev_lock);
949 list_add(&bdev->bd_list, &all_bdevs);
950 spin_unlock(&bdev_lock);
951 unlock_new_inode(inode);
952 }
953 return bdev;
954}
955
956EXPORT_SYMBOL(bdget);
957
958
959
960
961
962struct block_device *bdgrab(struct block_device *bdev)
963{
964 ihold(bdev->bd_inode);
965 return bdev;
966}
967EXPORT_SYMBOL(bdgrab);
968
969long nr_blockdev_pages(void)
970{
971 struct block_device *bdev;
972 long ret = 0;
973 spin_lock(&bdev_lock);
974 list_for_each_entry(bdev, &all_bdevs, bd_list) {
975 ret += bdev->bd_inode->i_mapping->nrpages;
976 }
977 spin_unlock(&bdev_lock);
978 return ret;
979}
980
981void bdput(struct block_device *bdev)
982{
983 iput(bdev->bd_inode);
984}
985
986EXPORT_SYMBOL(bdput);
987
988static struct block_device *bd_acquire(struct inode *inode)
989{
990 struct block_device *bdev;
991
992 spin_lock(&bdev_lock);
993 bdev = inode->i_bdev;
994 if (bdev && !inode_unhashed(bdev->bd_inode)) {
995 bdgrab(bdev);
996 spin_unlock(&bdev_lock);
997 return bdev;
998 }
999 spin_unlock(&bdev_lock);
1000
1001
1002
1003
1004
1005
1006
1007 if (bdev)
1008 bd_forget(inode);
1009
1010 bdev = bdget(inode->i_rdev);
1011 if (bdev) {
1012 spin_lock(&bdev_lock);
1013 if (!inode->i_bdev) {
1014
1015
1016
1017
1018
1019
1020 bdgrab(bdev);
1021 inode->i_bdev = bdev;
1022 inode->i_mapping = bdev->bd_inode->i_mapping;
1023 }
1024 spin_unlock(&bdev_lock);
1025 }
1026 return bdev;
1027}
1028
1029
1030
1031void bd_forget(struct inode *inode)
1032{
1033 struct block_device *bdev = NULL;
1034
1035 spin_lock(&bdev_lock);
1036 if (!sb_is_blkdev_sb(inode->i_sb))
1037 bdev = inode->i_bdev;
1038 inode->i_bdev = NULL;
1039 inode->i_mapping = &inode->i_data;
1040 spin_unlock(&bdev_lock);
1041
1042 if (bdev)
1043 bdput(bdev);
1044}
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
1061 void *holder)
1062{
1063 if (bdev->bd_holder == holder)
1064 return true;
1065 else if (bdev->bd_holder != NULL)
1066 return false;
1067 else if (whole == bdev)
1068 return true;
1069
1070 else if (whole->bd_holder == bd_may_claim)
1071 return true;
1072 else if (whole->bd_holder != NULL)
1073 return false;
1074 else
1075 return true;
1076}
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096static int bd_prepare_to_claim(struct block_device *bdev,
1097 struct block_device *whole, void *holder)
1098{
1099retry:
1100
1101 if (!bd_may_claim(bdev, whole, holder))
1102 return -EBUSY;
1103
1104
1105 if (whole->bd_claiming) {
1106 wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
1107 DEFINE_WAIT(wait);
1108
1109 prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
1110 spin_unlock(&bdev_lock);
1111 schedule();
1112 finish_wait(wq, &wait);
1113 spin_lock(&bdev_lock);
1114 goto retry;
1115 }
1116
1117
1118 return 0;
1119}
1120
1121static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno)
1122{
1123 struct gendisk *disk = get_gendisk(bdev->bd_dev, partno);
1124
1125 if (!disk)
1126 return NULL;
1127
1128
1129
1130
1131
1132
1133
1134
1135 if (inode_unhashed(bdev->bd_inode)) {
1136 put_disk_and_module(disk);
1137 return NULL;
1138 }
1139 return disk;
1140}
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165static struct block_device *bd_start_claiming(struct block_device *bdev,
1166 void *holder)
1167{
1168 struct gendisk *disk;
1169 struct block_device *whole;
1170 int partno, err;
1171
1172 might_sleep();
1173
1174
1175
1176
1177
1178 disk = bdev_get_gendisk(bdev, &partno);
1179 if (!disk)
1180 return ERR_PTR(-ENXIO);
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190 if (partno)
1191 whole = bdget_disk(disk, 0);
1192 else
1193 whole = bdgrab(bdev);
1194
1195 put_disk_and_module(disk);
1196 if (!whole)
1197 return ERR_PTR(-ENOMEM);
1198
1199
1200 spin_lock(&bdev_lock);
1201
1202 err = bd_prepare_to_claim(bdev, whole, holder);
1203 if (err == 0) {
1204 whole->bd_claiming = holder;
1205 spin_unlock(&bdev_lock);
1206 return whole;
1207 } else {
1208 spin_unlock(&bdev_lock);
1209 bdput(whole);
1210 return ERR_PTR(err);
1211 }
1212}
1213
1214#ifdef CONFIG_SYSFS
1215struct bd_holder_disk {
1216 struct list_head list;
1217 struct gendisk *disk;
1218 int refcnt;
1219};
1220
1221static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
1222 struct gendisk *disk)
1223{
1224 struct bd_holder_disk *holder;
1225
1226 list_for_each_entry(holder, &bdev->bd_holder_disks, list)
1227 if (holder->disk == disk)
1228 return holder;
1229 return NULL;
1230}
1231
1232static int add_symlink(struct kobject *from, struct kobject *to)
1233{
1234 return sysfs_create_link(from, to, kobject_name(to));
1235}
1236
1237static void del_symlink(struct kobject *from, struct kobject *to)
1238{
1239 sysfs_remove_link(from, kobject_name(to));
1240}
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
1271{
1272 struct bd_holder_disk *holder;
1273 int ret = 0;
1274
1275 mutex_lock(&bdev->bd_mutex);
1276
1277 WARN_ON_ONCE(!bdev->bd_holder);
1278
1279
1280 if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir))
1281 goto out_unlock;
1282
1283 holder = bd_find_holder_disk(bdev, disk);
1284 if (holder) {
1285 holder->refcnt++;
1286 goto out_unlock;
1287 }
1288
1289 holder = kzalloc(sizeof(*holder), GFP_KERNEL);
1290 if (!holder) {
1291 ret = -ENOMEM;
1292 goto out_unlock;
1293 }
1294
1295 INIT_LIST_HEAD(&holder->list);
1296 holder->disk = disk;
1297 holder->refcnt = 1;
1298
1299 ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
1300 if (ret)
1301 goto out_free;
1302
1303 ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
1304 if (ret)
1305 goto out_del;
1306
1307
1308
1309
1310 kobject_get(bdev->bd_part->holder_dir);
1311
1312 list_add(&holder->list, &bdev->bd_holder_disks);
1313 goto out_unlock;
1314
1315out_del:
1316 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
1317out_free:
1318 kfree(holder);
1319out_unlock:
1320 mutex_unlock(&bdev->bd_mutex);
1321 return ret;
1322}
1323EXPORT_SYMBOL_GPL(bd_link_disk_holder);
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
1336{
1337 struct bd_holder_disk *holder;
1338
1339 mutex_lock(&bdev->bd_mutex);
1340
1341 holder = bd_find_holder_disk(bdev, disk);
1342
1343 if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
1344 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
1345 del_symlink(bdev->bd_part->holder_dir,
1346 &disk_to_dev(disk)->kobj);
1347 kobject_put(bdev->bd_part->holder_dir);
1348 list_del_init(&holder->list);
1349 kfree(holder);
1350 }
1351
1352 mutex_unlock(&bdev->bd_mutex);
1353}
1354EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
1355#endif
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367static void flush_disk(struct block_device *bdev, bool kill_dirty)
1368{
1369 if (__invalidate_device(bdev, kill_dirty)) {
1370 printk(KERN_WARNING "VFS: busy inodes on changed media or "
1371 "resized disk %s\n",
1372 bdev->bd_disk ? bdev->bd_disk->disk_name : "");
1373 }
1374
1375 if (!bdev->bd_disk)
1376 return;
1377 if (disk_part_scan_enabled(bdev->bd_disk))
1378 bdev->bd_invalidated = 1;
1379}
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391void check_disk_size_change(struct gendisk *disk, struct block_device *bdev,
1392 bool verbose)
1393{
1394 loff_t disk_size, bdev_size;
1395
1396 disk_size = (loff_t)get_capacity(disk) << 9;
1397 bdev_size = i_size_read(bdev->bd_inode);
1398 if (disk_size != bdev_size) {
1399 if (verbose) {
1400 printk(KERN_INFO
1401 "%s: detected capacity change from %lld to %lld\n",
1402 disk->disk_name, bdev_size, disk_size);
1403 }
1404 i_size_write(bdev->bd_inode, disk_size);
1405 if (bdev_size > disk_size)
1406 flush_disk(bdev, false);
1407 }
1408}
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418int revalidate_disk(struct gendisk *disk)
1419{
1420 struct block_device *bdev;
1421 int ret = 0;
1422
1423 if (disk->fops->revalidate_disk)
1424 ret = disk->fops->revalidate_disk(disk);
1425 bdev = bdget_disk(disk, 0);
1426 if (!bdev)
1427 return ret;
1428
1429 mutex_lock(&bdev->bd_mutex);
1430 check_disk_size_change(disk, bdev, ret == 0);
1431 bdev->bd_invalidated = 0;
1432 mutex_unlock(&bdev->bd_mutex);
1433 bdput(bdev);
1434 return ret;
1435}
1436EXPORT_SYMBOL(revalidate_disk);
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447int check_disk_change(struct block_device *bdev)
1448{
1449 struct gendisk *disk = bdev->bd_disk;
1450 const struct block_device_operations *bdops = disk->fops;
1451 unsigned int events;
1452
1453 events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE |
1454 DISK_EVENT_EJECT_REQUEST);
1455 if (!(events & DISK_EVENT_MEDIA_CHANGE))
1456 return 0;
1457
1458 flush_disk(bdev, true);
1459 if (bdops->revalidate_disk)
1460 bdops->revalidate_disk(bdev->bd_disk);
1461 return 1;
1462}
1463
1464EXPORT_SYMBOL(check_disk_change);
1465
1466void bd_set_size(struct block_device *bdev, loff_t size)
1467{
1468 inode_lock(bdev->bd_inode);
1469 i_size_write(bdev->bd_inode, size);
1470 inode_unlock(bdev->bd_inode);
1471}
1472EXPORT_SYMBOL(bd_set_size);
1473
1474static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
1475
1476
1477
1478
1479
1480
1481
1482
1483static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1484{
1485 struct gendisk *disk;
1486 int ret;
1487 int partno;
1488 int perm = 0;
1489 bool first_open = false;
1490
1491 if (mode & FMODE_READ)
1492 perm |= MAY_READ;
1493 if (mode & FMODE_WRITE)
1494 perm |= MAY_WRITE;
1495
1496
1497
1498 if (!for_part) {
1499 ret = devcgroup_inode_permission(bdev->bd_inode, perm);
1500 if (ret != 0) {
1501 bdput(bdev);
1502 return ret;
1503 }
1504 }
1505
1506 restart:
1507
1508 ret = -ENXIO;
1509 disk = bdev_get_gendisk(bdev, &partno);
1510 if (!disk)
1511 goto out;
1512
1513 disk_block_events(disk);
1514 mutex_lock_nested(&bdev->bd_mutex, for_part);
1515 if (!bdev->bd_openers) {
1516 first_open = true;
1517 bdev->bd_disk = disk;
1518 bdev->bd_queue = disk->queue;
1519 bdev->bd_contains = bdev;
1520 bdev->bd_partno = partno;
1521
1522 if (!partno) {
1523 ret = -ENXIO;
1524 bdev->bd_part = disk_get_part(disk, partno);
1525 if (!bdev->bd_part)
1526 goto out_clear;
1527
1528 ret = 0;
1529 if (disk->fops->open) {
1530 ret = disk->fops->open(bdev, mode);
1531 if (ret == -ERESTARTSYS) {
1532
1533
1534
1535
1536 disk_put_part(bdev->bd_part);
1537 bdev->bd_part = NULL;
1538 bdev->bd_disk = NULL;
1539 bdev->bd_queue = NULL;
1540 mutex_unlock(&bdev->bd_mutex);
1541 disk_unblock_events(disk);
1542 put_disk_and_module(disk);
1543 goto restart;
1544 }
1545 }
1546
1547 if (!ret) {
1548 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
1549 set_init_blocksize(bdev);
1550 }
1551
1552
1553
1554
1555
1556
1557
1558 if (bdev->bd_invalidated) {
1559 if (!ret)
1560 rescan_partitions(disk, bdev);
1561 else if (ret == -ENOMEDIUM)
1562 invalidate_partitions(disk, bdev);
1563 }
1564
1565 if (ret)
1566 goto out_clear;
1567 } else {
1568 struct block_device *whole;
1569 whole = bdget_disk(disk, 0);
1570 ret = -ENOMEM;
1571 if (!whole)
1572 goto out_clear;
1573 BUG_ON(for_part);
1574 ret = __blkdev_get(whole, mode, 1);
1575 if (ret)
1576 goto out_clear;
1577 bdev->bd_contains = whole;
1578 bdev->bd_part = disk_get_part(disk, partno);
1579 if (!(disk->flags & GENHD_FL_UP) ||
1580 !bdev->bd_part || !bdev->bd_part->nr_sects) {
1581 ret = -ENXIO;
1582 goto out_clear;
1583 }
1584 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
1585 set_init_blocksize(bdev);
1586 }
1587
1588 if (bdev->bd_bdi == &noop_backing_dev_info)
1589 bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
1590 } else {
1591 if (bdev->bd_contains == bdev) {
1592 ret = 0;
1593 if (bdev->bd_disk->fops->open)
1594 ret = bdev->bd_disk->fops->open(bdev, mode);
1595
1596 if (bdev->bd_invalidated) {
1597 if (!ret)
1598 rescan_partitions(bdev->bd_disk, bdev);
1599 else if (ret == -ENOMEDIUM)
1600 invalidate_partitions(bdev->bd_disk, bdev);
1601 }
1602 if (ret)
1603 goto out_unlock_bdev;
1604 }
1605 }
1606 bdev->bd_openers++;
1607 if (for_part)
1608 bdev->bd_part_count++;
1609 mutex_unlock(&bdev->bd_mutex);
1610 disk_unblock_events(disk);
1611
1612 if (!first_open)
1613 put_disk_and_module(disk);
1614 return 0;
1615
1616 out_clear:
1617 disk_put_part(bdev->bd_part);
1618 bdev->bd_disk = NULL;
1619 bdev->bd_part = NULL;
1620 bdev->bd_queue = NULL;
1621 if (bdev != bdev->bd_contains)
1622 __blkdev_put(bdev->bd_contains, mode, 1);
1623 bdev->bd_contains = NULL;
1624 out_unlock_bdev:
1625 mutex_unlock(&bdev->bd_mutex);
1626 disk_unblock_events(disk);
1627 put_disk_and_module(disk);
1628 out:
1629 bdput(bdev);
1630
1631 return ret;
1632}
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1654{
1655 struct block_device *whole = NULL;
1656 int res;
1657
1658 WARN_ON_ONCE((mode & FMODE_EXCL) && !holder);
1659
1660 if ((mode & FMODE_EXCL) && holder) {
1661 whole = bd_start_claiming(bdev, holder);
1662 if (IS_ERR(whole)) {
1663 bdput(bdev);
1664 return PTR_ERR(whole);
1665 }
1666 }
1667
1668 res = __blkdev_get(bdev, mode, 0);
1669
1670 if (whole) {
1671 struct gendisk *disk = whole->bd_disk;
1672
1673
1674 mutex_lock(&bdev->bd_mutex);
1675 spin_lock(&bdev_lock);
1676
1677 if (!res) {
1678 BUG_ON(!bd_may_claim(bdev, whole, holder));
1679
1680
1681
1682
1683
1684
1685 whole->bd_holders++;
1686 whole->bd_holder = bd_may_claim;
1687 bdev->bd_holders++;
1688 bdev->bd_holder = holder;
1689 }
1690
1691
1692 BUG_ON(whole->bd_claiming != holder);
1693 whole->bd_claiming = NULL;
1694 wake_up_bit(&whole->bd_claiming, 0);
1695
1696 spin_unlock(&bdev_lock);
1697
1698
1699
1700
1701
1702
1703
1704
1705 if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder &&
1706 (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
1707 bdev->bd_write_holder = true;
1708 disk_block_events(disk);
1709 }
1710
1711 mutex_unlock(&bdev->bd_mutex);
1712 bdput(whole);
1713 }
1714
1715 return res;
1716}
1717EXPORT_SYMBOL(blkdev_get);
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
1737 void *holder)
1738{
1739 struct block_device *bdev;
1740 int err;
1741
1742 bdev = lookup_bdev(path);
1743 if (IS_ERR(bdev))
1744 return bdev;
1745
1746 err = blkdev_get(bdev, mode, holder);
1747 if (err)
1748 return ERR_PTR(err);
1749
1750 if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
1751 blkdev_put(bdev, mode);
1752 return ERR_PTR(-EACCES);
1753 }
1754
1755 return bdev;
1756}
1757EXPORT_SYMBOL(blkdev_get_by_path);
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
1782{
1783 struct block_device *bdev;
1784 int err;
1785
1786 bdev = bdget(dev);
1787 if (!bdev)
1788 return ERR_PTR(-ENOMEM);
1789
1790 err = blkdev_get(bdev, mode, holder);
1791 if (err)
1792 return ERR_PTR(err);
1793
1794 return bdev;
1795}
1796EXPORT_SYMBOL(blkdev_get_by_dev);
1797
1798static int blkdev_open(struct inode * inode, struct file * filp)
1799{
1800 struct block_device *bdev;
1801
1802
1803
1804
1805
1806
1807
1808 filp->f_flags |= O_LARGEFILE;
1809
1810 filp->f_mode |= FMODE_NOWAIT;
1811
1812 if (filp->f_flags & O_NDELAY)
1813 filp->f_mode |= FMODE_NDELAY;
1814 if (filp->f_flags & O_EXCL)
1815 filp->f_mode |= FMODE_EXCL;
1816 if ((filp->f_flags & O_ACCMODE) == 3)
1817 filp->f_mode |= FMODE_WRITE_IOCTL;
1818
1819 bdev = bd_acquire(inode);
1820 if (bdev == NULL)
1821 return -ENOMEM;
1822
1823 filp->f_mapping = bdev->bd_inode->i_mapping;
1824 filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
1825
1826 return blkdev_get(bdev, filp->f_mode, filp);
1827}
1828
1829static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1830{
1831 struct gendisk *disk = bdev->bd_disk;
1832 struct block_device *victim = NULL;
1833
1834 mutex_lock_nested(&bdev->bd_mutex, for_part);
1835 if (for_part)
1836 bdev->bd_part_count--;
1837
1838 if (!--bdev->bd_openers) {
1839 WARN_ON_ONCE(bdev->bd_holders);
1840 sync_blockdev(bdev);
1841 kill_bdev(bdev);
1842
1843 bdev_write_inode(bdev);
1844 }
1845 if (bdev->bd_contains == bdev) {
1846 if (disk->fops->release)
1847 disk->fops->release(disk, mode);
1848 }
1849 if (!bdev->bd_openers) {
1850 disk_put_part(bdev->bd_part);
1851 bdev->bd_part = NULL;
1852 bdev->bd_disk = NULL;
1853 if (bdev != bdev->bd_contains)
1854 victim = bdev->bd_contains;
1855 bdev->bd_contains = NULL;
1856
1857 put_disk_and_module(disk);
1858 }
1859 mutex_unlock(&bdev->bd_mutex);
1860 bdput(bdev);
1861 if (victim)
1862 __blkdev_put(victim, mode, 1);
1863}
1864
1865void blkdev_put(struct block_device *bdev, fmode_t mode)
1866{
1867 mutex_lock(&bdev->bd_mutex);
1868
1869 if (mode & FMODE_EXCL) {
1870 bool bdev_free;
1871
1872
1873
1874
1875
1876
1877 spin_lock(&bdev_lock);
1878
1879 WARN_ON_ONCE(--bdev->bd_holders < 0);
1880 WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0);
1881
1882
1883 if ((bdev_free = !bdev->bd_holders))
1884 bdev->bd_holder = NULL;
1885 if (!bdev->bd_contains->bd_holders)
1886 bdev->bd_contains->bd_holder = NULL;
1887
1888 spin_unlock(&bdev_lock);
1889
1890
1891
1892
1893
1894 if (bdev_free && bdev->bd_write_holder) {
1895 disk_unblock_events(bdev->bd_disk);
1896 bdev->bd_write_holder = false;
1897 }
1898 }
1899
1900
1901
1902
1903
1904
1905 disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE);
1906
1907 mutex_unlock(&bdev->bd_mutex);
1908
1909 __blkdev_put(bdev, mode, 0);
1910}
1911EXPORT_SYMBOL(blkdev_put);
1912
1913static int blkdev_close(struct inode * inode, struct file * filp)
1914{
1915 struct block_device *bdev = I_BDEV(bdev_file_inode(filp));
1916 blkdev_put(bdev, filp->f_mode);
1917 return 0;
1918}
1919
1920static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1921{
1922 struct block_device *bdev = I_BDEV(bdev_file_inode(file));
1923 fmode_t mode = file->f_mode;
1924
1925
1926
1927
1928
1929 if (file->f_flags & O_NDELAY)
1930 mode |= FMODE_NDELAY;
1931 else
1932 mode &= ~FMODE_NDELAY;
1933
1934 return blkdev_ioctl(bdev, mode, cmd, arg);
1935}
1936
1937
1938
1939
1940
1941
1942
1943
1944ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
1945{
1946 struct file *file = iocb->ki_filp;
1947 struct inode *bd_inode = bdev_file_inode(file);
1948 loff_t size = i_size_read(bd_inode);
1949 struct blk_plug plug;
1950 ssize_t ret;
1951
1952 if (bdev_read_only(I_BDEV(bd_inode)))
1953 return -EPERM;
1954
1955 if (!iov_iter_count(from))
1956 return 0;
1957
1958 if (iocb->ki_pos >= size)
1959 return -ENOSPC;
1960
1961 if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT)
1962 return -EOPNOTSUPP;
1963
1964 iov_iter_truncate(from, size - iocb->ki_pos);
1965
1966 blk_start_plug(&plug);
1967 ret = __generic_file_write_iter(iocb, from);
1968 if (ret > 0)
1969 ret = generic_write_sync(iocb, ret);
1970 blk_finish_plug(&plug);
1971 return ret;
1972}
1973EXPORT_SYMBOL_GPL(blkdev_write_iter);
1974
1975ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
1976{
1977 struct file *file = iocb->ki_filp;
1978 struct inode *bd_inode = bdev_file_inode(file);
1979 loff_t size = i_size_read(bd_inode);
1980 loff_t pos = iocb->ki_pos;
1981
1982 if (pos >= size)
1983 return 0;
1984
1985 size -= pos;
1986 iov_iter_truncate(to, size);
1987 return generic_file_read_iter(iocb, to);
1988}
1989EXPORT_SYMBOL_GPL(blkdev_read_iter);
1990
1991
1992
1993
1994
1995static int blkdev_releasepage(struct page *page, gfp_t wait)
1996{
1997 struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super;
1998
1999 if (super && super->s_op->bdev_try_to_free_page)
2000 return super->s_op->bdev_try_to_free_page(super, page, wait);
2001
2002 return try_to_free_buffers(page);
2003}
2004
2005static int blkdev_writepages(struct address_space *mapping,
2006 struct writeback_control *wbc)
2007{
2008 return generic_writepages(mapping, wbc);
2009}
2010
2011static const struct address_space_operations def_blk_aops = {
2012 .readpage = blkdev_readpage,
2013 .readpages = blkdev_readpages,
2014 .writepage = blkdev_writepage,
2015 .write_begin = blkdev_write_begin,
2016 .write_end = blkdev_write_end,
2017 .writepages = blkdev_writepages,
2018 .releasepage = blkdev_releasepage,
2019 .direct_IO = blkdev_direct_IO,
2020 .migratepage = buffer_migrate_page_norefs,
2021 .is_dirty_writeback = buffer_check_dirty_writeback,
2022};
2023
2024#define BLKDEV_FALLOC_FL_SUPPORTED \
2025 (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
2026 FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
2027
2028static long blkdev_fallocate(struct file *file, int mode, loff_t start,
2029 loff_t len)
2030{
2031 struct block_device *bdev = I_BDEV(bdev_file_inode(file));
2032 struct address_space *mapping;
2033 loff_t end = start + len - 1;
2034 loff_t isize;
2035 int error;
2036
2037
2038 if (mode & ~BLKDEV_FALLOC_FL_SUPPORTED)
2039 return -EOPNOTSUPP;
2040
2041
2042 isize = i_size_read(bdev->bd_inode);
2043 if (start >= isize)
2044 return -EINVAL;
2045 if (end >= isize) {
2046 if (mode & FALLOC_FL_KEEP_SIZE) {
2047 len = isize - start;
2048 end = start + len - 1;
2049 } else
2050 return -EINVAL;
2051 }
2052
2053
2054
2055
2056 if ((start | len) & (bdev_logical_block_size(bdev) - 1))
2057 return -EINVAL;
2058
2059
2060 mapping = bdev->bd_inode->i_mapping;
2061 truncate_inode_pages_range(mapping, start, end);
2062
2063 switch (mode) {
2064 case FALLOC_FL_ZERO_RANGE:
2065 case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE:
2066 error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
2067 GFP_KERNEL, BLKDEV_ZERO_NOUNMAP);
2068 break;
2069 case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE:
2070 error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
2071 GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK);
2072 break;
2073 case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
2074 error = blkdev_issue_discard(bdev, start >> 9, len >> 9,
2075 GFP_KERNEL, 0);
2076 break;
2077 default:
2078 return -EOPNOTSUPP;
2079 }
2080 if (error)
2081 return error;
2082
2083
2084
2085
2086
2087
2088 return invalidate_inode_pages2_range(mapping,
2089 start >> PAGE_SHIFT,
2090 end >> PAGE_SHIFT);
2091}
2092
2093const struct file_operations def_blk_fops = {
2094 .open = blkdev_open,
2095 .release = blkdev_close,
2096 .llseek = block_llseek,
2097 .read_iter = blkdev_read_iter,
2098 .write_iter = blkdev_write_iter,
2099 .iopoll = blkdev_iopoll,
2100 .mmap = generic_file_mmap,
2101 .fsync = blkdev_fsync,
2102 .unlocked_ioctl = block_ioctl,
2103#ifdef CONFIG_COMPAT
2104 .compat_ioctl = compat_blkdev_ioctl,
2105#endif
2106 .splice_read = generic_file_splice_read,
2107 .splice_write = iter_file_splice_write,
2108 .fallocate = blkdev_fallocate,
2109};
2110
2111int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
2112{
2113 int res;
2114 mm_segment_t old_fs = get_fs();
2115 set_fs(KERNEL_DS);
2116 res = blkdev_ioctl(bdev, 0, cmd, arg);
2117 set_fs(old_fs);
2118 return res;
2119}
2120
2121EXPORT_SYMBOL(ioctl_by_bdev);
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131struct block_device *lookup_bdev(const char *pathname)
2132{
2133 struct block_device *bdev;
2134 struct inode *inode;
2135 struct path path;
2136 int error;
2137
2138 if (!pathname || !*pathname)
2139 return ERR_PTR(-EINVAL);
2140
2141 error = kern_path(pathname, LOOKUP_FOLLOW, &path);
2142 if (error)
2143 return ERR_PTR(error);
2144
2145 inode = d_backing_inode(path.dentry);
2146 error = -ENOTBLK;
2147 if (!S_ISBLK(inode->i_mode))
2148 goto fail;
2149 error = -EACCES;
2150 if (!may_open_dev(&path))
2151 goto fail;
2152 error = -ENOMEM;
2153 bdev = bd_acquire(inode);
2154 if (!bdev)
2155 goto fail;
2156out:
2157 path_put(&path);
2158 return bdev;
2159fail:
2160 bdev = ERR_PTR(error);
2161 goto out;
2162}
2163EXPORT_SYMBOL(lookup_bdev);
2164
2165int __invalidate_device(struct block_device *bdev, bool kill_dirty)
2166{
2167 struct super_block *sb = get_super(bdev);
2168 int res = 0;
2169
2170 if (sb) {
2171
2172
2173
2174
2175
2176
2177 shrink_dcache_sb(sb);
2178 res = invalidate_inodes(sb, kill_dirty);
2179 drop_super(sb);
2180 }
2181 invalidate_bdev(bdev);
2182 return res;
2183}
2184EXPORT_SYMBOL(__invalidate_device);
2185
2186void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
2187{
2188 struct inode *inode, *old_inode = NULL;
2189
2190 spin_lock(&blockdev_superblock->s_inode_list_lock);
2191 list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
2192 struct address_space *mapping = inode->i_mapping;
2193 struct block_device *bdev;
2194
2195 spin_lock(&inode->i_lock);
2196 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) ||
2197 mapping->nrpages == 0) {
2198 spin_unlock(&inode->i_lock);
2199 continue;
2200 }
2201 __iget(inode);
2202 spin_unlock(&inode->i_lock);
2203 spin_unlock(&blockdev_superblock->s_inode_list_lock);
2204
2205
2206
2207
2208
2209
2210
2211
2212 iput(old_inode);
2213 old_inode = inode;
2214 bdev = I_BDEV(inode);
2215
2216 mutex_lock(&bdev->bd_mutex);
2217 if (bdev->bd_openers)
2218 func(bdev, arg);
2219 mutex_unlock(&bdev->bd_mutex);
2220
2221 spin_lock(&blockdev_superblock->s_inode_list_lock);
2222 }
2223 spin_unlock(&blockdev_superblock->s_inode_list_lock);
2224 iput(old_inode);
2225}
2226