1
2
3
4
5
6
7
8
9#include <linux/init.h>
10#include <linux/mm.h>
11#include <linux/fcntl.h>
12#include <linux/slab.h>
13#include <linux/kmod.h>
14#include <linux/major.h>
15#include <linux/device_cgroup.h>
16#include <linux/highmem.h>
17#include <linux/blkdev.h>
18#include <linux/backing-dev.h>
19#include <linux/module.h>
20#include <linux/blkpg.h>
21#include <linux/magic.h>
22#include <linux/buffer_head.h>
23#include <linux/swap.h>
24#include <linux/pagevec.h>
25#include <linux/writeback.h>
26#include <linux/mpage.h>
27#include <linux/mount.h>
28#include <linux/pseudo_fs.h>
29#include <linux/uio.h>
30#include <linux/namei.h>
31#include <linux/log2.h>
32#include <linux/cleancache.h>
33#include <linux/task_io_accounting_ops.h>
34#include <linux/falloc.h>
35#include <linux/uaccess.h>
36#include <linux/suspend.h>
37#include "internal.h"
38
39struct bdev_inode {
40 struct block_device bdev;
41 struct inode vfs_inode;
42};
43
44static const struct address_space_operations def_blk_aops;
45
46static inline struct bdev_inode *BDEV_I(struct inode *inode)
47{
48 return container_of(inode, struct bdev_inode, vfs_inode);
49}
50
51struct block_device *I_BDEV(struct inode *inode)
52{
53 return &BDEV_I(inode)->bdev;
54}
55EXPORT_SYMBOL(I_BDEV);
56
57static void bdev_write_inode(struct block_device *bdev)
58{
59 struct inode *inode = bdev->bd_inode;
60 int ret;
61
62 spin_lock(&inode->i_lock);
63 while (inode->i_state & I_DIRTY) {
64 spin_unlock(&inode->i_lock);
65 ret = write_inode_now(inode, true);
66 if (ret) {
67 char name[BDEVNAME_SIZE];
68 pr_warn_ratelimited("VFS: Dirty inode writeback failed "
69 "for block device %s (err=%d).\n",
70 bdevname(bdev, name), ret);
71 }
72 spin_lock(&inode->i_lock);
73 }
74 spin_unlock(&inode->i_lock);
75}
76
77
78static void kill_bdev(struct block_device *bdev)
79{
80 struct address_space *mapping = bdev->bd_inode->i_mapping;
81
82 if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
83 return;
84
85 invalidate_bh_lrus();
86 truncate_inode_pages(mapping, 0);
87}
88
89
90void invalidate_bdev(struct block_device *bdev)
91{
92 struct address_space *mapping = bdev->bd_inode->i_mapping;
93
94 if (mapping->nrpages) {
95 invalidate_bh_lrus();
96 lru_add_drain_all();
97 invalidate_mapping_pages(mapping, 0, -1);
98 }
99
100
101
102 cleancache_invalidate_inode(mapping);
103}
104EXPORT_SYMBOL(invalidate_bdev);
105
106
107
108
109
110int truncate_bdev_range(struct block_device *bdev, fmode_t mode,
111 loff_t lstart, loff_t lend)
112{
113 struct block_device *claimed_bdev = NULL;
114 int err;
115
116
117
118
119
120
121 if (!(mode & FMODE_EXCL)) {
122 claimed_bdev = bdev->bd_contains;
123 err = bd_prepare_to_claim(bdev, claimed_bdev,
124 truncate_bdev_range);
125 if (err)
126 return err;
127 }
128 truncate_inode_pages_range(bdev->bd_inode->i_mapping, lstart, lend);
129 if (claimed_bdev)
130 bd_abort_claiming(bdev, claimed_bdev, truncate_bdev_range);
131 return 0;
132}
133EXPORT_SYMBOL(truncate_bdev_range);
134
135static void set_init_blocksize(struct block_device *bdev)
136{
137 bdev->bd_inode->i_blkbits = blksize_bits(bdev_logical_block_size(bdev));
138}
139
140int set_blocksize(struct block_device *bdev, int size)
141{
142
143 if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
144 return -EINVAL;
145
146
147 if (size < bdev_logical_block_size(bdev))
148 return -EINVAL;
149
150
151 if (bdev->bd_inode->i_blkbits != blksize_bits(size)) {
152 sync_blockdev(bdev);
153 bdev->bd_inode->i_blkbits = blksize_bits(size);
154 kill_bdev(bdev);
155 }
156 return 0;
157}
158
159EXPORT_SYMBOL(set_blocksize);
160
161int sb_set_blocksize(struct super_block *sb, int size)
162{
163 if (set_blocksize(sb->s_bdev, size))
164 return 0;
165
166
167 sb->s_blocksize = size;
168 sb->s_blocksize_bits = blksize_bits(size);
169 return sb->s_blocksize;
170}
171
172EXPORT_SYMBOL(sb_set_blocksize);
173
174int sb_min_blocksize(struct super_block *sb, int size)
175{
176 int minsize = bdev_logical_block_size(sb->s_bdev);
177 if (size < minsize)
178 size = minsize;
179 return sb_set_blocksize(sb, size);
180}
181
182EXPORT_SYMBOL(sb_min_blocksize);
183
184static int
185blkdev_get_block(struct inode *inode, sector_t iblock,
186 struct buffer_head *bh, int create)
187{
188 bh->b_bdev = I_BDEV(inode);
189 bh->b_blocknr = iblock;
190 set_buffer_mapped(bh);
191 return 0;
192}
193
194static struct inode *bdev_file_inode(struct file *file)
195{
196 return file->f_mapping->host;
197}
198
199static unsigned int dio_bio_write_op(struct kiocb *iocb)
200{
201 unsigned int op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
202
203
204 if (iocb->ki_flags & IOCB_DSYNC)
205 op |= REQ_FUA;
206 return op;
207}
208
209#define DIO_INLINE_BIO_VECS 4
210
211static void blkdev_bio_end_io_simple(struct bio *bio)
212{
213 struct task_struct *waiter = bio->bi_private;
214
215 WRITE_ONCE(bio->bi_private, NULL);
216 blk_wake_io_task(waiter);
217}
218
219static ssize_t
220__blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
221 int nr_pages)
222{
223 struct file *file = iocb->ki_filp;
224 struct block_device *bdev = I_BDEV(bdev_file_inode(file));
225 struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs;
226 loff_t pos = iocb->ki_pos;
227 bool should_dirty = false;
228 struct bio bio;
229 ssize_t ret;
230 blk_qc_t qc;
231
232 if ((pos | iov_iter_alignment(iter)) &
233 (bdev_logical_block_size(bdev) - 1))
234 return -EINVAL;
235
236 if (nr_pages <= DIO_INLINE_BIO_VECS)
237 vecs = inline_vecs;
238 else {
239 vecs = kmalloc_array(nr_pages, sizeof(struct bio_vec),
240 GFP_KERNEL);
241 if (!vecs)
242 return -ENOMEM;
243 }
244
245 bio_init(&bio, vecs, nr_pages);
246 bio_set_dev(&bio, bdev);
247 bio.bi_iter.bi_sector = pos >> 9;
248 bio.bi_write_hint = iocb->ki_hint;
249 bio.bi_private = current;
250 bio.bi_end_io = blkdev_bio_end_io_simple;
251 bio.bi_ioprio = iocb->ki_ioprio;
252
253 ret = bio_iov_iter_get_pages(&bio, iter);
254 if (unlikely(ret))
255 goto out;
256 ret = bio.bi_iter.bi_size;
257
258 if (iov_iter_rw(iter) == READ) {
259 bio.bi_opf = REQ_OP_READ;
260 if (iter_is_iovec(iter))
261 should_dirty = true;
262 } else {
263 bio.bi_opf = dio_bio_write_op(iocb);
264 task_io_account_write(ret);
265 }
266 if (iocb->ki_flags & IOCB_HIPRI)
267 bio_set_polled(&bio, iocb);
268
269 qc = submit_bio(&bio);
270 for (;;) {
271 set_current_state(TASK_UNINTERRUPTIBLE);
272 if (!READ_ONCE(bio.bi_private))
273 break;
274 if (!(iocb->ki_flags & IOCB_HIPRI) ||
275 !blk_poll(bdev_get_queue(bdev), qc, true))
276 blk_io_schedule();
277 }
278 __set_current_state(TASK_RUNNING);
279
280 bio_release_pages(&bio, should_dirty);
281 if (unlikely(bio.bi_status))
282 ret = blk_status_to_errno(bio.bi_status);
283
284out:
285 if (vecs != inline_vecs)
286 kfree(vecs);
287
288 bio_uninit(&bio);
289
290 return ret;
291}
292
293struct blkdev_dio {
294 union {
295 struct kiocb *iocb;
296 struct task_struct *waiter;
297 };
298 size_t size;
299 atomic_t ref;
300 bool multi_bio : 1;
301 bool should_dirty : 1;
302 bool is_sync : 1;
303 struct bio bio;
304};
305
306static struct bio_set blkdev_dio_pool;
307
308static int blkdev_iopoll(struct kiocb *kiocb, bool wait)
309{
310 struct block_device *bdev = I_BDEV(kiocb->ki_filp->f_mapping->host);
311 struct request_queue *q = bdev_get_queue(bdev);
312
313 return blk_poll(q, READ_ONCE(kiocb->ki_cookie), wait);
314}
315
316static void blkdev_bio_end_io(struct bio *bio)
317{
318 struct blkdev_dio *dio = bio->bi_private;
319 bool should_dirty = dio->should_dirty;
320
321 if (bio->bi_status && !dio->bio.bi_status)
322 dio->bio.bi_status = bio->bi_status;
323
324 if (!dio->multi_bio || atomic_dec_and_test(&dio->ref)) {
325 if (!dio->is_sync) {
326 struct kiocb *iocb = dio->iocb;
327 ssize_t ret;
328
329 if (likely(!dio->bio.bi_status)) {
330 ret = dio->size;
331 iocb->ki_pos += ret;
332 } else {
333 ret = blk_status_to_errno(dio->bio.bi_status);
334 }
335
336 dio->iocb->ki_complete(iocb, ret, 0);
337 if (dio->multi_bio)
338 bio_put(&dio->bio);
339 } else {
340 struct task_struct *waiter = dio->waiter;
341
342 WRITE_ONCE(dio->waiter, NULL);
343 blk_wake_io_task(waiter);
344 }
345 }
346
347 if (should_dirty) {
348 bio_check_pages_dirty(bio);
349 } else {
350 bio_release_pages(bio, false);
351 bio_put(bio);
352 }
353}
354
355static ssize_t
356__blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
357{
358 struct file *file = iocb->ki_filp;
359 struct inode *inode = bdev_file_inode(file);
360 struct block_device *bdev = I_BDEV(inode);
361 struct blk_plug plug;
362 struct blkdev_dio *dio;
363 struct bio *bio;
364 bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0;
365 bool is_read = (iov_iter_rw(iter) == READ), is_sync;
366 loff_t pos = iocb->ki_pos;
367 blk_qc_t qc = BLK_QC_T_NONE;
368 int ret = 0;
369
370 if ((pos | iov_iter_alignment(iter)) &
371 (bdev_logical_block_size(bdev) - 1))
372 return -EINVAL;
373
374 bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool);
375
376 dio = container_of(bio, struct blkdev_dio, bio);
377 dio->is_sync = is_sync = is_sync_kiocb(iocb);
378 if (dio->is_sync) {
379 dio->waiter = current;
380 bio_get(bio);
381 } else {
382 dio->iocb = iocb;
383 }
384
385 dio->size = 0;
386 dio->multi_bio = false;
387 dio->should_dirty = is_read && iter_is_iovec(iter);
388
389
390
391
392
393 if (!is_poll)
394 blk_start_plug(&plug);
395
396 for (;;) {
397 bio_set_dev(bio, bdev);
398 bio->bi_iter.bi_sector = pos >> 9;
399 bio->bi_write_hint = iocb->ki_hint;
400 bio->bi_private = dio;
401 bio->bi_end_io = blkdev_bio_end_io;
402 bio->bi_ioprio = iocb->ki_ioprio;
403
404 ret = bio_iov_iter_get_pages(bio, iter);
405 if (unlikely(ret)) {
406 bio->bi_status = BLK_STS_IOERR;
407 bio_endio(bio);
408 break;
409 }
410
411 if (is_read) {
412 bio->bi_opf = REQ_OP_READ;
413 if (dio->should_dirty)
414 bio_set_pages_dirty(bio);
415 } else {
416 bio->bi_opf = dio_bio_write_op(iocb);
417 task_io_account_write(bio->bi_iter.bi_size);
418 }
419
420 dio->size += bio->bi_iter.bi_size;
421 pos += bio->bi_iter.bi_size;
422
423 nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES);
424 if (!nr_pages) {
425 bool polled = false;
426
427 if (iocb->ki_flags & IOCB_HIPRI) {
428 bio_set_polled(bio, iocb);
429 polled = true;
430 }
431
432 qc = submit_bio(bio);
433
434 if (polled)
435 WRITE_ONCE(iocb->ki_cookie, qc);
436 break;
437 }
438
439 if (!dio->multi_bio) {
440
441
442
443
444
445 if (!is_sync)
446 bio_get(bio);
447 dio->multi_bio = true;
448 atomic_set(&dio->ref, 2);
449 } else {
450 atomic_inc(&dio->ref);
451 }
452
453 submit_bio(bio);
454 bio = bio_alloc(GFP_KERNEL, nr_pages);
455 }
456
457 if (!is_poll)
458 blk_finish_plug(&plug);
459
460 if (!is_sync)
461 return -EIOCBQUEUED;
462
463 for (;;) {
464 set_current_state(TASK_UNINTERRUPTIBLE);
465 if (!READ_ONCE(dio->waiter))
466 break;
467
468 if (!(iocb->ki_flags & IOCB_HIPRI) ||
469 !blk_poll(bdev_get_queue(bdev), qc, true))
470 blk_io_schedule();
471 }
472 __set_current_state(TASK_RUNNING);
473
474 if (!ret)
475 ret = blk_status_to_errno(dio->bio.bi_status);
476 if (likely(!ret))
477 ret = dio->size;
478
479 bio_put(&dio->bio);
480 return ret;
481}
482
483static ssize_t
484blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
485{
486 int nr_pages;
487
488 nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES + 1);
489 if (!nr_pages)
490 return 0;
491 if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_PAGES)
492 return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
493
494 return __blkdev_direct_IO(iocb, iter, min(nr_pages, BIO_MAX_PAGES));
495}
496
497static __init int blkdev_init(void)
498{
499 return bioset_init(&blkdev_dio_pool, 4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS);
500}
501module_init(blkdev_init);
502
503int __sync_blockdev(struct block_device *bdev, int wait)
504{
505 if (!bdev)
506 return 0;
507 if (!wait)
508 return filemap_flush(bdev->bd_inode->i_mapping);
509 return filemap_write_and_wait(bdev->bd_inode->i_mapping);
510}
511
512
513
514
515
516int sync_blockdev(struct block_device *bdev)
517{
518 return __sync_blockdev(bdev, 1);
519}
520EXPORT_SYMBOL(sync_blockdev);
521
522
523
524
525
526
527int fsync_bdev(struct block_device *bdev)
528{
529 struct super_block *sb = get_super(bdev);
530 if (sb) {
531 int res = sync_filesystem(sb);
532 drop_super(sb);
533 return res;
534 }
535 return sync_blockdev(bdev);
536}
537EXPORT_SYMBOL(fsync_bdev);
538
539
540
541
542
543
544
545
546
547
548
549
550
551struct super_block *freeze_bdev(struct block_device *bdev)
552{
553 struct super_block *sb;
554 int error = 0;
555
556 mutex_lock(&bdev->bd_fsfreeze_mutex);
557 if (++bdev->bd_fsfreeze_count > 1) {
558
559
560
561
562
563 sb = get_super(bdev);
564 if (sb)
565 drop_super(sb);
566 mutex_unlock(&bdev->bd_fsfreeze_mutex);
567 return sb;
568 }
569
570 sb = get_active_super(bdev);
571 if (!sb)
572 goto out;
573 if (sb->s_op->freeze_super)
574 error = sb->s_op->freeze_super(sb);
575 else
576 error = freeze_super(sb);
577 if (error) {
578 deactivate_super(sb);
579 bdev->bd_fsfreeze_count--;
580 mutex_unlock(&bdev->bd_fsfreeze_mutex);
581 return ERR_PTR(error);
582 }
583 deactivate_super(sb);
584 out:
585 sync_blockdev(bdev);
586 mutex_unlock(&bdev->bd_fsfreeze_mutex);
587 return sb;
588}
589EXPORT_SYMBOL(freeze_bdev);
590
591
592
593
594
595
596
597
598int thaw_bdev(struct block_device *bdev, struct super_block *sb)
599{
600 int error = -EINVAL;
601
602 mutex_lock(&bdev->bd_fsfreeze_mutex);
603 if (!bdev->bd_fsfreeze_count)
604 goto out;
605
606 error = 0;
607 if (--bdev->bd_fsfreeze_count > 0)
608 goto out;
609
610 if (!sb)
611 goto out;
612
613 if (sb->s_op->thaw_super)
614 error = sb->s_op->thaw_super(sb);
615 else
616 error = thaw_super(sb);
617 if (error)
618 bdev->bd_fsfreeze_count++;
619out:
620 mutex_unlock(&bdev->bd_fsfreeze_mutex);
621 return error;
622}
623EXPORT_SYMBOL(thaw_bdev);
624
625static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
626{
627 return block_write_full_page(page, blkdev_get_block, wbc);
628}
629
630static int blkdev_readpage(struct file * file, struct page * page)
631{
632 return block_read_full_page(page, blkdev_get_block);
633}
634
635static void blkdev_readahead(struct readahead_control *rac)
636{
637 mpage_readahead(rac, blkdev_get_block);
638}
639
640static int blkdev_write_begin(struct file *file, struct address_space *mapping,
641 loff_t pos, unsigned len, unsigned flags,
642 struct page **pagep, void **fsdata)
643{
644 return block_write_begin(mapping, pos, len, flags, pagep,
645 blkdev_get_block);
646}
647
648static int blkdev_write_end(struct file *file, struct address_space *mapping,
649 loff_t pos, unsigned len, unsigned copied,
650 struct page *page, void *fsdata)
651{
652 int ret;
653 ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
654
655 unlock_page(page);
656 put_page(page);
657
658 return ret;
659}
660
661
662
663
664
665
666static loff_t block_llseek(struct file *file, loff_t offset, int whence)
667{
668 struct inode *bd_inode = bdev_file_inode(file);
669 loff_t retval;
670
671 inode_lock(bd_inode);
672 retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode));
673 inode_unlock(bd_inode);
674 return retval;
675}
676
677int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
678{
679 struct inode *bd_inode = bdev_file_inode(filp);
680 struct block_device *bdev = I_BDEV(bd_inode);
681 int error;
682
683 error = file_write_and_wait_range(filp, start, end);
684 if (error)
685 return error;
686
687
688
689
690
691
692 error = blkdev_issue_flush(bdev, GFP_KERNEL);
693 if (error == -EOPNOTSUPP)
694 error = 0;
695
696 return error;
697}
698EXPORT_SYMBOL(blkdev_fsync);
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716int bdev_read_page(struct block_device *bdev, sector_t sector,
717 struct page *page)
718{
719 const struct block_device_operations *ops = bdev->bd_disk->fops;
720 int result = -EOPNOTSUPP;
721
722 if (!ops->rw_page || bdev_get_integrity(bdev))
723 return result;
724
725 result = blk_queue_enter(bdev->bd_disk->queue, 0);
726 if (result)
727 return result;
728 result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
729 REQ_OP_READ);
730 blk_queue_exit(bdev->bd_disk->queue);
731 return result;
732}
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753int bdev_write_page(struct block_device *bdev, sector_t sector,
754 struct page *page, struct writeback_control *wbc)
755{
756 int result;
757 const struct block_device_operations *ops = bdev->bd_disk->fops;
758
759 if (!ops->rw_page || bdev_get_integrity(bdev))
760 return -EOPNOTSUPP;
761 result = blk_queue_enter(bdev->bd_disk->queue, 0);
762 if (result)
763 return result;
764
765 set_page_writeback(page);
766 result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
767 REQ_OP_WRITE);
768 if (result) {
769 end_page_writeback(page);
770 } else {
771 clean_page_buffers(page);
772 unlock_page(page);
773 }
774 blk_queue_exit(bdev->bd_disk->queue);
775 return result;
776}
777
778
779
780
781
782static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
783static struct kmem_cache * bdev_cachep __read_mostly;
784
785static struct inode *bdev_alloc_inode(struct super_block *sb)
786{
787 struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
788 if (!ei)
789 return NULL;
790 return &ei->vfs_inode;
791}
792
793static void bdev_free_inode(struct inode *inode)
794{
795 kmem_cache_free(bdev_cachep, BDEV_I(inode));
796}
797
798static void init_once(void *foo)
799{
800 struct bdev_inode *ei = (struct bdev_inode *) foo;
801 struct block_device *bdev = &ei->bdev;
802
803 memset(bdev, 0, sizeof(*bdev));
804 mutex_init(&bdev->bd_mutex);
805#ifdef CONFIG_SYSFS
806 INIT_LIST_HEAD(&bdev->bd_holder_disks);
807#endif
808 bdev->bd_bdi = &noop_backing_dev_info;
809 inode_init_once(&ei->vfs_inode);
810
811 mutex_init(&bdev->bd_fsfreeze_mutex);
812}
813
814static void bdev_evict_inode(struct inode *inode)
815{
816 struct block_device *bdev = &BDEV_I(inode)->bdev;
817 truncate_inode_pages_final(&inode->i_data);
818 invalidate_inode_buffers(inode);
819 clear_inode(inode);
820
821 inode_detach_wb(inode);
822 if (bdev->bd_bdi != &noop_backing_dev_info) {
823 bdi_put(bdev->bd_bdi);
824 bdev->bd_bdi = &noop_backing_dev_info;
825 }
826}
827
828static const struct super_operations bdev_sops = {
829 .statfs = simple_statfs,
830 .alloc_inode = bdev_alloc_inode,
831 .free_inode = bdev_free_inode,
832 .drop_inode = generic_delete_inode,
833 .evict_inode = bdev_evict_inode,
834};
835
836static int bd_init_fs_context(struct fs_context *fc)
837{
838 struct pseudo_fs_context *ctx = init_pseudo(fc, BDEVFS_MAGIC);
839 if (!ctx)
840 return -ENOMEM;
841 fc->s_iflags |= SB_I_CGROUPWB;
842 ctx->ops = &bdev_sops;
843 return 0;
844}
845
846static struct file_system_type bd_type = {
847 .name = "bdev",
848 .init_fs_context = bd_init_fs_context,
849 .kill_sb = kill_anon_super,
850};
851
852struct super_block *blockdev_superblock __read_mostly;
853EXPORT_SYMBOL_GPL(blockdev_superblock);
854
855void __init bdev_cache_init(void)
856{
857 int err;
858 static struct vfsmount *bd_mnt;
859
860 bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
861 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
862 SLAB_MEM_SPREAD|SLAB_ACCOUNT|SLAB_PANIC),
863 init_once);
864 err = register_filesystem(&bd_type);
865 if (err)
866 panic("Cannot register bdev pseudo-fs");
867 bd_mnt = kern_mount(&bd_type);
868 if (IS_ERR(bd_mnt))
869 panic("Cannot create bdev pseudo-fs");
870 blockdev_superblock = bd_mnt->mnt_sb;
871}
872
873
874
875
876
877
878static inline unsigned long hash(dev_t dev)
879{
880 return MAJOR(dev)+MINOR(dev);
881}
882
883static int bdev_test(struct inode *inode, void *data)
884{
885 return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data;
886}
887
888static int bdev_set(struct inode *inode, void *data)
889{
890 BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data;
891 return 0;
892}
893
894static struct block_device *bdget(dev_t dev)
895{
896 struct block_device *bdev;
897 struct inode *inode;
898
899 inode = iget5_locked(blockdev_superblock, hash(dev),
900 bdev_test, bdev_set, &dev);
901
902 if (!inode)
903 return NULL;
904
905 bdev = &BDEV_I(inode)->bdev;
906
907 if (inode->i_state & I_NEW) {
908 spin_lock_init(&bdev->bd_size_lock);
909 bdev->bd_contains = NULL;
910 bdev->bd_super = NULL;
911 bdev->bd_inode = inode;
912 bdev->bd_part_count = 0;
913 inode->i_mode = S_IFBLK;
914 inode->i_rdev = dev;
915 inode->i_bdev = bdev;
916 inode->i_data.a_ops = &def_blk_aops;
917 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
918 unlock_new_inode(inode);
919 }
920 return bdev;
921}
922
923
924
925
926
927struct block_device *bdgrab(struct block_device *bdev)
928{
929 ihold(bdev->bd_inode);
930 return bdev;
931}
932EXPORT_SYMBOL(bdgrab);
933
934struct block_device *bdget_part(struct hd_struct *part)
935{
936 return bdget(part_devt(part));
937}
938
939long nr_blockdev_pages(void)
940{
941 struct inode *inode;
942 long ret = 0;
943
944 spin_lock(&blockdev_superblock->s_inode_list_lock);
945 list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list)
946 ret += inode->i_mapping->nrpages;
947 spin_unlock(&blockdev_superblock->s_inode_list_lock);
948
949 return ret;
950}
951
952void bdput(struct block_device *bdev)
953{
954 iput(bdev->bd_inode);
955}
956
957EXPORT_SYMBOL(bdput);
958
959static struct block_device *bd_acquire(struct inode *inode)
960{
961 struct block_device *bdev;
962
963 spin_lock(&bdev_lock);
964 bdev = inode->i_bdev;
965 if (bdev && !inode_unhashed(bdev->bd_inode)) {
966 bdgrab(bdev);
967 spin_unlock(&bdev_lock);
968 return bdev;
969 }
970 spin_unlock(&bdev_lock);
971
972
973
974
975
976
977
978 if (bdev)
979 bd_forget(inode);
980
981 bdev = bdget(inode->i_rdev);
982 if (bdev) {
983 spin_lock(&bdev_lock);
984 if (!inode->i_bdev) {
985
986
987
988
989
990
991 bdgrab(bdev);
992 inode->i_bdev = bdev;
993 inode->i_mapping = bdev->bd_inode->i_mapping;
994 }
995 spin_unlock(&bdev_lock);
996 }
997 return bdev;
998}
999
1000
1001
1002void bd_forget(struct inode *inode)
1003{
1004 struct block_device *bdev = NULL;
1005
1006 spin_lock(&bdev_lock);
1007 if (!sb_is_blkdev_sb(inode->i_sb))
1008 bdev = inode->i_bdev;
1009 inode->i_bdev = NULL;
1010 inode->i_mapping = &inode->i_data;
1011 spin_unlock(&bdev_lock);
1012
1013 if (bdev)
1014 bdput(bdev);
1015}
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
1032 void *holder)
1033{
1034 if (bdev->bd_holder == holder)
1035 return true;
1036 else if (bdev->bd_holder != NULL)
1037 return false;
1038 else if (whole == bdev)
1039 return true;
1040
1041 else if (whole->bd_holder == bd_may_claim)
1042 return true;
1043 else if (whole->bd_holder != NULL)
1044 return false;
1045 else
1046 return true;
1047}
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062int bd_prepare_to_claim(struct block_device *bdev, struct block_device *whole,
1063 void *holder)
1064{
1065retry:
1066 spin_lock(&bdev_lock);
1067
1068 if (!bd_may_claim(bdev, whole, holder)) {
1069 spin_unlock(&bdev_lock);
1070 return -EBUSY;
1071 }
1072
1073
1074 if (whole->bd_claiming) {
1075 wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
1076 DEFINE_WAIT(wait);
1077
1078 prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
1079 spin_unlock(&bdev_lock);
1080 schedule();
1081 finish_wait(wq, &wait);
1082 goto retry;
1083 }
1084
1085
1086 whole->bd_claiming = holder;
1087 spin_unlock(&bdev_lock);
1088 return 0;
1089}
1090EXPORT_SYMBOL_GPL(bd_prepare_to_claim);
1091
1092static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno)
1093{
1094 struct gendisk *disk = get_gendisk(bdev->bd_dev, partno);
1095
1096 if (!disk)
1097 return NULL;
1098
1099
1100
1101
1102
1103
1104
1105
1106 if (inode_unhashed(bdev->bd_inode)) {
1107 put_disk_and_module(disk);
1108 return NULL;
1109 }
1110 return disk;
1111}
1112
1113static void bd_clear_claiming(struct block_device *whole, void *holder)
1114{
1115 lockdep_assert_held(&bdev_lock);
1116
1117 BUG_ON(whole->bd_claiming != holder);
1118 whole->bd_claiming = NULL;
1119 wake_up_bit(&whole->bd_claiming, 0);
1120}
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131static void bd_finish_claiming(struct block_device *bdev,
1132 struct block_device *whole, void *holder)
1133{
1134 spin_lock(&bdev_lock);
1135 BUG_ON(!bd_may_claim(bdev, whole, holder));
1136
1137
1138
1139
1140 whole->bd_holders++;
1141 whole->bd_holder = bd_may_claim;
1142 bdev->bd_holders++;
1143 bdev->bd_holder = holder;
1144 bd_clear_claiming(whole, holder);
1145 spin_unlock(&bdev_lock);
1146}
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158void bd_abort_claiming(struct block_device *bdev, struct block_device *whole,
1159 void *holder)
1160{
1161 spin_lock(&bdev_lock);
1162 bd_clear_claiming(whole, holder);
1163 spin_unlock(&bdev_lock);
1164}
1165EXPORT_SYMBOL(bd_abort_claiming);
1166
1167#ifdef CONFIG_SYSFS
1168struct bd_holder_disk {
1169 struct list_head list;
1170 struct gendisk *disk;
1171 int refcnt;
1172};
1173
1174static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
1175 struct gendisk *disk)
1176{
1177 struct bd_holder_disk *holder;
1178
1179 list_for_each_entry(holder, &bdev->bd_holder_disks, list)
1180 if (holder->disk == disk)
1181 return holder;
1182 return NULL;
1183}
1184
1185static int add_symlink(struct kobject *from, struct kobject *to)
1186{
1187 return sysfs_create_link(from, to, kobject_name(to));
1188}
1189
1190static void del_symlink(struct kobject *from, struct kobject *to)
1191{
1192 sysfs_remove_link(from, kobject_name(to));
1193}
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
1224{
1225 struct bd_holder_disk *holder;
1226 int ret = 0;
1227
1228 mutex_lock(&bdev->bd_mutex);
1229
1230 WARN_ON_ONCE(!bdev->bd_holder);
1231
1232
1233 if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir))
1234 goto out_unlock;
1235
1236 holder = bd_find_holder_disk(bdev, disk);
1237 if (holder) {
1238 holder->refcnt++;
1239 goto out_unlock;
1240 }
1241
1242 holder = kzalloc(sizeof(*holder), GFP_KERNEL);
1243 if (!holder) {
1244 ret = -ENOMEM;
1245 goto out_unlock;
1246 }
1247
1248 INIT_LIST_HEAD(&holder->list);
1249 holder->disk = disk;
1250 holder->refcnt = 1;
1251
1252 ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
1253 if (ret)
1254 goto out_free;
1255
1256 ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
1257 if (ret)
1258 goto out_del;
1259
1260
1261
1262
1263 kobject_get(bdev->bd_part->holder_dir);
1264
1265 list_add(&holder->list, &bdev->bd_holder_disks);
1266 goto out_unlock;
1267
1268out_del:
1269 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
1270out_free:
1271 kfree(holder);
1272out_unlock:
1273 mutex_unlock(&bdev->bd_mutex);
1274 return ret;
1275}
1276EXPORT_SYMBOL_GPL(bd_link_disk_holder);
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
1289{
1290 struct bd_holder_disk *holder;
1291
1292 mutex_lock(&bdev->bd_mutex);
1293
1294 holder = bd_find_holder_disk(bdev, disk);
1295
1296 if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
1297 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
1298 del_symlink(bdev->bd_part->holder_dir,
1299 &disk_to_dev(disk)->kobj);
1300 kobject_put(bdev->bd_part->holder_dir);
1301 list_del_init(&holder->list);
1302 kfree(holder);
1303 }
1304
1305 mutex_unlock(&bdev->bd_mutex);
1306}
1307EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
1308#endif
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320static void check_disk_size_change(struct gendisk *disk,
1321 struct block_device *bdev, bool verbose)
1322{
1323 loff_t disk_size, bdev_size;
1324
1325 spin_lock(&bdev->bd_size_lock);
1326 disk_size = (loff_t)get_capacity(disk) << 9;
1327 bdev_size = i_size_read(bdev->bd_inode);
1328 if (disk_size != bdev_size) {
1329 if (verbose) {
1330 printk(KERN_INFO
1331 "%s: detected capacity change from %lld to %lld\n",
1332 disk->disk_name, bdev_size, disk_size);
1333 }
1334 i_size_write(bdev->bd_inode, disk_size);
1335 }
1336 spin_unlock(&bdev->bd_size_lock);
1337
1338 if (bdev_size > disk_size) {
1339 if (__invalidate_device(bdev, false))
1340 pr_warn("VFS: busy inodes on resized disk %s\n",
1341 disk->disk_name);
1342 }
1343}
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354void revalidate_disk_size(struct gendisk *disk, bool verbose)
1355{
1356 struct block_device *bdev;
1357
1358
1359
1360
1361
1362 if (disk->flags & GENHD_FL_HIDDEN)
1363 return;
1364
1365 bdev = bdget_disk(disk, 0);
1366 if (bdev) {
1367 check_disk_size_change(disk, bdev, verbose);
1368 bdput(bdev);
1369 }
1370}
1371EXPORT_SYMBOL(revalidate_disk_size);
1372
1373void bd_set_nr_sectors(struct block_device *bdev, sector_t sectors)
1374{
1375 spin_lock(&bdev->bd_size_lock);
1376 i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
1377 spin_unlock(&bdev->bd_size_lock);
1378}
1379EXPORT_SYMBOL(bd_set_nr_sectors);
1380
1381static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
1382
1383int bdev_disk_changed(struct block_device *bdev, bool invalidate)
1384{
1385 struct gendisk *disk = bdev->bd_disk;
1386 int ret;
1387
1388 lockdep_assert_held(&bdev->bd_mutex);
1389
1390 clear_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
1391
1392rescan:
1393 ret = blk_drop_partitions(bdev);
1394 if (ret)
1395 return ret;
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405 if (invalidate) {
1406 if (disk_part_scan_enabled(disk) ||
1407 !(disk->flags & GENHD_FL_REMOVABLE))
1408 set_capacity(disk, 0);
1409 } else {
1410 if (disk->fops->revalidate_disk)
1411 disk->fops->revalidate_disk(disk);
1412 }
1413
1414 check_disk_size_change(disk, bdev, !invalidate);
1415
1416 if (get_capacity(disk)) {
1417 ret = blk_add_partitions(disk, bdev);
1418 if (ret == -EAGAIN)
1419 goto rescan;
1420 } else if (invalidate) {
1421
1422
1423
1424
1425 kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
1426 }
1427
1428 return ret;
1429}
1430
1431
1432
1433
1434EXPORT_SYMBOL_GPL(bdev_disk_changed);
1435
1436
1437
1438
1439
1440
1441
1442
1443static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder,
1444 int for_part)
1445{
1446 struct block_device *whole = NULL, *claiming = NULL;
1447 struct gendisk *disk;
1448 int ret;
1449 int partno;
1450 bool first_open = false, unblock_events = true, need_restart;
1451
1452 restart:
1453 need_restart = false;
1454 ret = -ENXIO;
1455 disk = bdev_get_gendisk(bdev, &partno);
1456 if (!disk)
1457 goto out;
1458
1459 if (partno) {
1460 whole = bdget_disk(disk, 0);
1461 if (!whole) {
1462 ret = -ENOMEM;
1463 goto out_put_disk;
1464 }
1465 }
1466
1467 if (!for_part && (mode & FMODE_EXCL)) {
1468 WARN_ON_ONCE(!holder);
1469 if (whole)
1470 claiming = whole;
1471 else
1472 claiming = bdev;
1473 ret = bd_prepare_to_claim(bdev, claiming, holder);
1474 if (ret)
1475 goto out_put_whole;
1476 }
1477
1478 disk_block_events(disk);
1479 mutex_lock_nested(&bdev->bd_mutex, for_part);
1480 if (!bdev->bd_openers) {
1481 first_open = true;
1482 bdev->bd_disk = disk;
1483 bdev->bd_contains = bdev;
1484 bdev->bd_partno = partno;
1485
1486 if (!partno) {
1487 ret = -ENXIO;
1488 bdev->bd_part = disk_get_part(disk, partno);
1489 if (!bdev->bd_part)
1490 goto out_clear;
1491
1492 ret = 0;
1493 if (disk->fops->open) {
1494 ret = disk->fops->open(bdev, mode);
1495
1496
1497
1498
1499 if (ret == -ERESTARTSYS)
1500 need_restart = true;
1501 }
1502
1503 if (!ret) {
1504 bd_set_nr_sectors(bdev, get_capacity(disk));
1505 set_init_blocksize(bdev);
1506 }
1507
1508
1509
1510
1511
1512
1513
1514 if (test_bit(GD_NEED_PART_SCAN, &disk->state) &&
1515 (!ret || ret == -ENOMEDIUM))
1516 bdev_disk_changed(bdev, ret == -ENOMEDIUM);
1517
1518 if (ret)
1519 goto out_clear;
1520 } else {
1521 BUG_ON(for_part);
1522 ret = __blkdev_get(whole, mode, NULL, 1);
1523 if (ret)
1524 goto out_clear;
1525 bdev->bd_contains = bdgrab(whole);
1526 bdev->bd_part = disk_get_part(disk, partno);
1527 if (!(disk->flags & GENHD_FL_UP) ||
1528 !bdev->bd_part || !bdev->bd_part->nr_sects) {
1529 ret = -ENXIO;
1530 goto out_clear;
1531 }
1532 bd_set_nr_sectors(bdev, bdev->bd_part->nr_sects);
1533 set_init_blocksize(bdev);
1534 }
1535
1536 if (bdev->bd_bdi == &noop_backing_dev_info)
1537 bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
1538 } else {
1539 if (bdev->bd_contains == bdev) {
1540 ret = 0;
1541 if (bdev->bd_disk->fops->open)
1542 ret = bdev->bd_disk->fops->open(bdev, mode);
1543
1544 if (test_bit(GD_NEED_PART_SCAN, &disk->state) &&
1545 (!ret || ret == -ENOMEDIUM))
1546 bdev_disk_changed(bdev, ret == -ENOMEDIUM);
1547 if (ret)
1548 goto out_unlock_bdev;
1549 }
1550 }
1551 bdev->bd_openers++;
1552 if (for_part)
1553 bdev->bd_part_count++;
1554 if (claiming)
1555 bd_finish_claiming(bdev, claiming, holder);
1556
1557
1558
1559
1560
1561
1562
1563 if (claiming && (mode & FMODE_WRITE) && !bdev->bd_write_holder &&
1564 (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
1565 bdev->bd_write_holder = true;
1566 unblock_events = false;
1567 }
1568 mutex_unlock(&bdev->bd_mutex);
1569
1570 if (unblock_events)
1571 disk_unblock_events(disk);
1572
1573
1574 if (!first_open)
1575 put_disk_and_module(disk);
1576 if (whole)
1577 bdput(whole);
1578 return 0;
1579
1580 out_clear:
1581 disk_put_part(bdev->bd_part);
1582 bdev->bd_disk = NULL;
1583 bdev->bd_part = NULL;
1584 if (bdev != bdev->bd_contains)
1585 __blkdev_put(bdev->bd_contains, mode, 1);
1586 bdev->bd_contains = NULL;
1587 out_unlock_bdev:
1588 if (claiming)
1589 bd_abort_claiming(bdev, claiming, holder);
1590 mutex_unlock(&bdev->bd_mutex);
1591 disk_unblock_events(disk);
1592 out_put_whole:
1593 if (whole)
1594 bdput(whole);
1595 out_put_disk:
1596 put_disk_and_module(disk);
1597 if (need_restart)
1598 goto restart;
1599 out:
1600 return ret;
1601}
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622static int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1623{
1624 int ret, perm = 0;
1625
1626 if (mode & FMODE_READ)
1627 perm |= MAY_READ;
1628 if (mode & FMODE_WRITE)
1629 perm |= MAY_WRITE;
1630 ret = devcgroup_inode_permission(bdev->bd_inode, perm);
1631 if (ret)
1632 goto bdput;
1633
1634 ret =__blkdev_get(bdev, mode, holder, 0);
1635 if (ret)
1636 goto bdput;
1637 return 0;
1638
1639bdput:
1640 bdput(bdev);
1641 return ret;
1642}
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
1662 void *holder)
1663{
1664 struct block_device *bdev;
1665 int err;
1666
1667 bdev = lookup_bdev(path);
1668 if (IS_ERR(bdev))
1669 return bdev;
1670
1671 err = blkdev_get(bdev, mode, holder);
1672 if (err)
1673 return ERR_PTR(err);
1674
1675 if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
1676 blkdev_put(bdev, mode);
1677 return ERR_PTR(-EACCES);
1678 }
1679
1680 return bdev;
1681}
1682EXPORT_SYMBOL(blkdev_get_by_path);
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
1707{
1708 struct block_device *bdev;
1709 int err;
1710
1711 bdev = bdget(dev);
1712 if (!bdev)
1713 return ERR_PTR(-ENOMEM);
1714
1715 err = blkdev_get(bdev, mode, holder);
1716 if (err)
1717 return ERR_PTR(err);
1718
1719 return bdev;
1720}
1721EXPORT_SYMBOL(blkdev_get_by_dev);
1722
1723static int blkdev_open(struct inode * inode, struct file * filp)
1724{
1725 struct block_device *bdev;
1726
1727
1728
1729
1730
1731
1732
1733 filp->f_flags |= O_LARGEFILE;
1734
1735 filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
1736
1737 if (filp->f_flags & O_NDELAY)
1738 filp->f_mode |= FMODE_NDELAY;
1739 if (filp->f_flags & O_EXCL)
1740 filp->f_mode |= FMODE_EXCL;
1741 if ((filp->f_flags & O_ACCMODE) == 3)
1742 filp->f_mode |= FMODE_WRITE_IOCTL;
1743
1744 bdev = bd_acquire(inode);
1745 if (bdev == NULL)
1746 return -ENOMEM;
1747
1748 filp->f_mapping = bdev->bd_inode->i_mapping;
1749 filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
1750
1751 return blkdev_get(bdev, filp->f_mode, filp);
1752}
1753
1754static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1755{
1756 struct gendisk *disk = bdev->bd_disk;
1757 struct block_device *victim = NULL;
1758
1759
1760
1761
1762
1763
1764
1765
1766 if (bdev->bd_openers == 1)
1767 sync_blockdev(bdev);
1768
1769 mutex_lock_nested(&bdev->bd_mutex, for_part);
1770 if (for_part)
1771 bdev->bd_part_count--;
1772
1773 if (!--bdev->bd_openers) {
1774 WARN_ON_ONCE(bdev->bd_holders);
1775 sync_blockdev(bdev);
1776 kill_bdev(bdev);
1777
1778 bdev_write_inode(bdev);
1779 }
1780 if (bdev->bd_contains == bdev) {
1781 if (disk->fops->release)
1782 disk->fops->release(disk, mode);
1783 }
1784 if (!bdev->bd_openers) {
1785 disk_put_part(bdev->bd_part);
1786 bdev->bd_part = NULL;
1787 bdev->bd_disk = NULL;
1788 if (bdev != bdev->bd_contains)
1789 victim = bdev->bd_contains;
1790 bdev->bd_contains = NULL;
1791
1792 put_disk_and_module(disk);
1793 }
1794 mutex_unlock(&bdev->bd_mutex);
1795 bdput(bdev);
1796 if (victim)
1797 __blkdev_put(victim, mode, 1);
1798}
1799
1800void blkdev_put(struct block_device *bdev, fmode_t mode)
1801{
1802 mutex_lock(&bdev->bd_mutex);
1803
1804 if (mode & FMODE_EXCL) {
1805 bool bdev_free;
1806
1807
1808
1809
1810
1811
1812 spin_lock(&bdev_lock);
1813
1814 WARN_ON_ONCE(--bdev->bd_holders < 0);
1815 WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0);
1816
1817
1818 if ((bdev_free = !bdev->bd_holders))
1819 bdev->bd_holder = NULL;
1820 if (!bdev->bd_contains->bd_holders)
1821 bdev->bd_contains->bd_holder = NULL;
1822
1823 spin_unlock(&bdev_lock);
1824
1825
1826
1827
1828
1829 if (bdev_free && bdev->bd_write_holder) {
1830 disk_unblock_events(bdev->bd_disk);
1831 bdev->bd_write_holder = false;
1832 }
1833 }
1834
1835
1836
1837
1838
1839
1840 disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE);
1841
1842 mutex_unlock(&bdev->bd_mutex);
1843
1844 __blkdev_put(bdev, mode, 0);
1845}
1846EXPORT_SYMBOL(blkdev_put);
1847
1848static int blkdev_close(struct inode * inode, struct file * filp)
1849{
1850 struct block_device *bdev = I_BDEV(bdev_file_inode(filp));
1851 blkdev_put(bdev, filp->f_mode);
1852 return 0;
1853}
1854
1855static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1856{
1857 struct block_device *bdev = I_BDEV(bdev_file_inode(file));
1858 fmode_t mode = file->f_mode;
1859
1860
1861
1862
1863
1864 if (file->f_flags & O_NDELAY)
1865 mode |= FMODE_NDELAY;
1866 else
1867 mode &= ~FMODE_NDELAY;
1868
1869 return blkdev_ioctl(bdev, mode, cmd, arg);
1870}
1871
1872
1873
1874
1875
1876
1877
1878
1879ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
1880{
1881 struct file *file = iocb->ki_filp;
1882 struct inode *bd_inode = bdev_file_inode(file);
1883 loff_t size = i_size_read(bd_inode);
1884 struct blk_plug plug;
1885 ssize_t ret;
1886
1887 if (bdev_read_only(I_BDEV(bd_inode)))
1888 return -EPERM;
1889
1890 if (IS_SWAPFILE(bd_inode) && !is_hibernate_resume_dev(bd_inode->i_rdev))
1891 return -ETXTBSY;
1892
1893 if (!iov_iter_count(from))
1894 return 0;
1895
1896 if (iocb->ki_pos >= size)
1897 return -ENOSPC;
1898
1899 if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT)
1900 return -EOPNOTSUPP;
1901
1902 iov_iter_truncate(from, size - iocb->ki_pos);
1903
1904 blk_start_plug(&plug);
1905 ret = __generic_file_write_iter(iocb, from);
1906 if (ret > 0)
1907 ret = generic_write_sync(iocb, ret);
1908 blk_finish_plug(&plug);
1909 return ret;
1910}
1911EXPORT_SYMBOL_GPL(blkdev_write_iter);
1912
1913ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
1914{
1915 struct file *file = iocb->ki_filp;
1916 struct inode *bd_inode = bdev_file_inode(file);
1917 loff_t size = i_size_read(bd_inode);
1918 loff_t pos = iocb->ki_pos;
1919
1920 if (pos >= size)
1921 return 0;
1922
1923 size -= pos;
1924 iov_iter_truncate(to, size);
1925 return generic_file_read_iter(iocb, to);
1926}
1927EXPORT_SYMBOL_GPL(blkdev_read_iter);
1928
1929
1930
1931
1932
1933static int blkdev_releasepage(struct page *page, gfp_t wait)
1934{
1935 struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super;
1936
1937 if (super && super->s_op->bdev_try_to_free_page)
1938 return super->s_op->bdev_try_to_free_page(super, page, wait);
1939
1940 return try_to_free_buffers(page);
1941}
1942
1943static int blkdev_writepages(struct address_space *mapping,
1944 struct writeback_control *wbc)
1945{
1946 return generic_writepages(mapping, wbc);
1947}
1948
1949static const struct address_space_operations def_blk_aops = {
1950 .readpage = blkdev_readpage,
1951 .readahead = blkdev_readahead,
1952 .writepage = blkdev_writepage,
1953 .write_begin = blkdev_write_begin,
1954 .write_end = blkdev_write_end,
1955 .writepages = blkdev_writepages,
1956 .releasepage = blkdev_releasepage,
1957 .direct_IO = blkdev_direct_IO,
1958 .migratepage = buffer_migrate_page_norefs,
1959 .is_dirty_writeback = buffer_check_dirty_writeback,
1960};
1961
1962#define BLKDEV_FALLOC_FL_SUPPORTED \
1963 (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
1964 FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
1965
1966static long blkdev_fallocate(struct file *file, int mode, loff_t start,
1967 loff_t len)
1968{
1969 struct block_device *bdev = I_BDEV(bdev_file_inode(file));
1970 loff_t end = start + len - 1;
1971 loff_t isize;
1972 int error;
1973
1974
1975 if (mode & ~BLKDEV_FALLOC_FL_SUPPORTED)
1976 return -EOPNOTSUPP;
1977
1978
1979 isize = i_size_read(bdev->bd_inode);
1980 if (start >= isize)
1981 return -EINVAL;
1982 if (end >= isize) {
1983 if (mode & FALLOC_FL_KEEP_SIZE) {
1984 len = isize - start;
1985 end = start + len - 1;
1986 } else
1987 return -EINVAL;
1988 }
1989
1990
1991
1992
1993 if ((start | len) & (bdev_logical_block_size(bdev) - 1))
1994 return -EINVAL;
1995
1996
1997 error = truncate_bdev_range(bdev, file->f_mode, start, end);
1998 if (error)
1999 return error;
2000
2001 switch (mode) {
2002 case FALLOC_FL_ZERO_RANGE:
2003 case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE:
2004 error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
2005 GFP_KERNEL, BLKDEV_ZERO_NOUNMAP);
2006 break;
2007 case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE:
2008 error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
2009 GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK);
2010 break;
2011 case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
2012 error = blkdev_issue_discard(bdev, start >> 9, len >> 9,
2013 GFP_KERNEL, 0);
2014 break;
2015 default:
2016 return -EOPNOTSUPP;
2017 }
2018 if (error)
2019 return error;
2020
2021
2022
2023
2024
2025
2026 return invalidate_inode_pages2_range(bdev->bd_inode->i_mapping,
2027 start >> PAGE_SHIFT,
2028 end >> PAGE_SHIFT);
2029}
2030
2031const struct file_operations def_blk_fops = {
2032 .open = blkdev_open,
2033 .release = blkdev_close,
2034 .llseek = block_llseek,
2035 .read_iter = blkdev_read_iter,
2036 .write_iter = blkdev_write_iter,
2037 .iopoll = blkdev_iopoll,
2038 .mmap = generic_file_mmap,
2039 .fsync = blkdev_fsync,
2040 .unlocked_ioctl = block_ioctl,
2041#ifdef CONFIG_COMPAT
2042 .compat_ioctl = compat_blkdev_ioctl,
2043#endif
2044 .splice_read = generic_file_splice_read,
2045 .splice_write = iter_file_splice_write,
2046 .fallocate = blkdev_fallocate,
2047};
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057struct block_device *lookup_bdev(const char *pathname)
2058{
2059 struct block_device *bdev;
2060 struct inode *inode;
2061 struct path path;
2062 int error;
2063
2064 if (!pathname || !*pathname)
2065 return ERR_PTR(-EINVAL);
2066
2067 error = kern_path(pathname, LOOKUP_FOLLOW, &path);
2068 if (error)
2069 return ERR_PTR(error);
2070
2071 inode = d_backing_inode(path.dentry);
2072 error = -ENOTBLK;
2073 if (!S_ISBLK(inode->i_mode))
2074 goto fail;
2075 error = -EACCES;
2076 if (!may_open_dev(&path))
2077 goto fail;
2078 error = -ENOMEM;
2079 bdev = bd_acquire(inode);
2080 if (!bdev)
2081 goto fail;
2082out:
2083 path_put(&path);
2084 return bdev;
2085fail:
2086 bdev = ERR_PTR(error);
2087 goto out;
2088}
2089EXPORT_SYMBOL(lookup_bdev);
2090
2091int __invalidate_device(struct block_device *bdev, bool kill_dirty)
2092{
2093 struct super_block *sb = get_super(bdev);
2094 int res = 0;
2095
2096 if (sb) {
2097
2098
2099
2100
2101
2102
2103 shrink_dcache_sb(sb);
2104 res = invalidate_inodes(sb, kill_dirty);
2105 drop_super(sb);
2106 }
2107 invalidate_bdev(bdev);
2108 return res;
2109}
2110EXPORT_SYMBOL(__invalidate_device);
2111
2112void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
2113{
2114 struct inode *inode, *old_inode = NULL;
2115
2116 spin_lock(&blockdev_superblock->s_inode_list_lock);
2117 list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
2118 struct address_space *mapping = inode->i_mapping;
2119 struct block_device *bdev;
2120
2121 spin_lock(&inode->i_lock);
2122 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) ||
2123 mapping->nrpages == 0) {
2124 spin_unlock(&inode->i_lock);
2125 continue;
2126 }
2127 __iget(inode);
2128 spin_unlock(&inode->i_lock);
2129 spin_unlock(&blockdev_superblock->s_inode_list_lock);
2130
2131
2132
2133
2134
2135
2136
2137
2138 iput(old_inode);
2139 old_inode = inode;
2140 bdev = I_BDEV(inode);
2141
2142 mutex_lock(&bdev->bd_mutex);
2143 if (bdev->bd_openers)
2144 func(bdev, arg);
2145 mutex_unlock(&bdev->bd_mutex);
2146
2147 spin_lock(&blockdev_superblock->s_inode_list_lock);
2148 }
2149 spin_unlock(&blockdev_superblock->s_inode_list_lock);
2150 iput(old_inode);
2151}
2152