1
2
3
4
5
6
7
8#include <linux/init.h>
9#include <linux/mm.h>
10#include <linux/fcntl.h>
11#include <linux/slab.h>
12#include <linux/kmod.h>
13#include <linux/major.h>
14#include <linux/device_cgroup.h>
15#include <linux/highmem.h>
16#include <linux/blkdev.h>
17#include <linux/backing-dev.h>
18#include <linux/module.h>
19#include <linux/blkpg.h>
20#include <linux/magic.h>
21#include <linux/buffer_head.h>
22#include <linux/swap.h>
23#include <linux/pagevec.h>
24#include <linux/writeback.h>
25#include <linux/mpage.h>
26#include <linux/mount.h>
27#include <linux/pseudo_fs.h>
28#include <linux/uio.h>
29#include <linux/namei.h>
30#include <linux/log2.h>
31#include <linux/cleancache.h>
32#include <linux/task_io_accounting_ops.h>
33#include <linux/falloc.h>
34#include <linux/part_stat.h>
35#include <linux/uaccess.h>
36#include <linux/suspend.h>
37#include "internal.h"
38
39struct bdev_inode {
40 struct block_device bdev;
41 struct inode vfs_inode;
42};
43
44static const struct address_space_operations def_blk_aops;
45
46static inline struct bdev_inode *BDEV_I(struct inode *inode)
47{
48 return container_of(inode, struct bdev_inode, vfs_inode);
49}
50
51struct block_device *I_BDEV(struct inode *inode)
52{
53 return &BDEV_I(inode)->bdev;
54}
55EXPORT_SYMBOL(I_BDEV);
56
57static void bdev_write_inode(struct block_device *bdev)
58{
59 struct inode *inode = bdev->bd_inode;
60 int ret;
61
62 spin_lock(&inode->i_lock);
63 while (inode->i_state & I_DIRTY) {
64 spin_unlock(&inode->i_lock);
65 ret = write_inode_now(inode, true);
66 if (ret) {
67 char name[BDEVNAME_SIZE];
68 pr_warn_ratelimited("VFS: Dirty inode writeback failed "
69 "for block device %s (err=%d).\n",
70 bdevname(bdev, name), ret);
71 }
72 spin_lock(&inode->i_lock);
73 }
74 spin_unlock(&inode->i_lock);
75}
76
77
78static void kill_bdev(struct block_device *bdev)
79{
80 struct address_space *mapping = bdev->bd_inode->i_mapping;
81
82 if (mapping_empty(mapping))
83 return;
84
85 invalidate_bh_lrus();
86 truncate_inode_pages(mapping, 0);
87}
88
89
90void invalidate_bdev(struct block_device *bdev)
91{
92 struct address_space *mapping = bdev->bd_inode->i_mapping;
93
94 if (mapping->nrpages) {
95 invalidate_bh_lrus();
96 lru_add_drain_all();
97 invalidate_mapping_pages(mapping, 0, -1);
98 }
99
100
101
102 cleancache_invalidate_inode(mapping);
103}
104EXPORT_SYMBOL(invalidate_bdev);
105
106
107
108
109
110int truncate_bdev_range(struct block_device *bdev, fmode_t mode,
111 loff_t lstart, loff_t lend)
112{
113
114
115
116
117
118 if (!(mode & FMODE_EXCL)) {
119 int err = bd_prepare_to_claim(bdev, truncate_bdev_range);
120 if (err)
121 goto invalidate;
122 }
123
124 truncate_inode_pages_range(bdev->bd_inode->i_mapping, lstart, lend);
125 if (!(mode & FMODE_EXCL))
126 bd_abort_claiming(bdev, truncate_bdev_range);
127 return 0;
128
129invalidate:
130
131
132
133
134 return invalidate_inode_pages2_range(bdev->bd_inode->i_mapping,
135 lstart >> PAGE_SHIFT,
136 lend >> PAGE_SHIFT);
137}
138
139static void set_init_blocksize(struct block_device *bdev)
140{
141 unsigned int bsize = bdev_logical_block_size(bdev);
142 loff_t size = i_size_read(bdev->bd_inode);
143
144 while (bsize < PAGE_SIZE) {
145 if (size & bsize)
146 break;
147 bsize <<= 1;
148 }
149 bdev->bd_inode->i_blkbits = blksize_bits(bsize);
150}
151
152int set_blocksize(struct block_device *bdev, int size)
153{
154
155 if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
156 return -EINVAL;
157
158
159 if (size < bdev_logical_block_size(bdev))
160 return -EINVAL;
161
162
163 if (bdev->bd_inode->i_blkbits != blksize_bits(size)) {
164 sync_blockdev(bdev);
165 bdev->bd_inode->i_blkbits = blksize_bits(size);
166 kill_bdev(bdev);
167 }
168 return 0;
169}
170
171EXPORT_SYMBOL(set_blocksize);
172
173int sb_set_blocksize(struct super_block *sb, int size)
174{
175 if (set_blocksize(sb->s_bdev, size))
176 return 0;
177
178
179 sb->s_blocksize = size;
180 sb->s_blocksize_bits = blksize_bits(size);
181 return sb->s_blocksize;
182}
183
184EXPORT_SYMBOL(sb_set_blocksize);
185
186int sb_min_blocksize(struct super_block *sb, int size)
187{
188 int minsize = bdev_logical_block_size(sb->s_bdev);
189 if (size < minsize)
190 size = minsize;
191 return sb_set_blocksize(sb, size);
192}
193
194EXPORT_SYMBOL(sb_min_blocksize);
195
196static int
197blkdev_get_block(struct inode *inode, sector_t iblock,
198 struct buffer_head *bh, int create)
199{
200 bh->b_bdev = I_BDEV(inode);
201 bh->b_blocknr = iblock;
202 set_buffer_mapped(bh);
203 return 0;
204}
205
206static struct inode *bdev_file_inode(struct file *file)
207{
208 return file->f_mapping->host;
209}
210
211static unsigned int dio_bio_write_op(struct kiocb *iocb)
212{
213 unsigned int op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
214
215
216 if (iocb->ki_flags & IOCB_DSYNC)
217 op |= REQ_FUA;
218 return op;
219}
220
221#define DIO_INLINE_BIO_VECS 4
222
223static void blkdev_bio_end_io_simple(struct bio *bio)
224{
225 struct task_struct *waiter = bio->bi_private;
226
227 WRITE_ONCE(bio->bi_private, NULL);
228 blk_wake_io_task(waiter);
229}
230
231static ssize_t
232__blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
233 unsigned int nr_pages)
234{
235 struct file *file = iocb->ki_filp;
236 struct block_device *bdev = I_BDEV(bdev_file_inode(file));
237 struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs;
238 loff_t pos = iocb->ki_pos;
239 bool should_dirty = false;
240 struct bio bio;
241 ssize_t ret;
242 blk_qc_t qc;
243
244 if ((pos | iov_iter_alignment(iter)) &
245 (bdev_logical_block_size(bdev) - 1))
246 return -EINVAL;
247
248 if (nr_pages <= DIO_INLINE_BIO_VECS)
249 vecs = inline_vecs;
250 else {
251 vecs = kmalloc_array(nr_pages, sizeof(struct bio_vec),
252 GFP_KERNEL);
253 if (!vecs)
254 return -ENOMEM;
255 }
256
257 bio_init(&bio, vecs, nr_pages);
258 bio_set_dev(&bio, bdev);
259 bio.bi_iter.bi_sector = pos >> 9;
260 bio.bi_write_hint = iocb->ki_hint;
261 bio.bi_private = current;
262 bio.bi_end_io = blkdev_bio_end_io_simple;
263 bio.bi_ioprio = iocb->ki_ioprio;
264
265 ret = bio_iov_iter_get_pages(&bio, iter);
266 if (unlikely(ret))
267 goto out;
268 ret = bio.bi_iter.bi_size;
269
270 if (iov_iter_rw(iter) == READ) {
271 bio.bi_opf = REQ_OP_READ;
272 if (iter_is_iovec(iter))
273 should_dirty = true;
274 } else {
275 bio.bi_opf = dio_bio_write_op(iocb);
276 task_io_account_write(ret);
277 }
278 if (iocb->ki_flags & IOCB_NOWAIT)
279 bio.bi_opf |= REQ_NOWAIT;
280 if (iocb->ki_flags & IOCB_HIPRI)
281 bio_set_polled(&bio, iocb);
282
283 qc = submit_bio(&bio);
284 for (;;) {
285 set_current_state(TASK_UNINTERRUPTIBLE);
286 if (!READ_ONCE(bio.bi_private))
287 break;
288 if (!(iocb->ki_flags & IOCB_HIPRI) ||
289 !blk_poll(bdev_get_queue(bdev), qc, true))
290 blk_io_schedule();
291 }
292 __set_current_state(TASK_RUNNING);
293
294 bio_release_pages(&bio, should_dirty);
295 if (unlikely(bio.bi_status))
296 ret = blk_status_to_errno(bio.bi_status);
297
298out:
299 if (vecs != inline_vecs)
300 kfree(vecs);
301
302 bio_uninit(&bio);
303
304 return ret;
305}
306
307struct blkdev_dio {
308 union {
309 struct kiocb *iocb;
310 struct task_struct *waiter;
311 };
312 size_t size;
313 atomic_t ref;
314 bool multi_bio : 1;
315 bool should_dirty : 1;
316 bool is_sync : 1;
317 struct bio bio;
318};
319
320static struct bio_set blkdev_dio_pool;
321
322static int blkdev_iopoll(struct kiocb *kiocb, bool wait)
323{
324 struct block_device *bdev = I_BDEV(kiocb->ki_filp->f_mapping->host);
325 struct request_queue *q = bdev_get_queue(bdev);
326
327 return blk_poll(q, READ_ONCE(kiocb->ki_cookie), wait);
328}
329
330static void blkdev_bio_end_io(struct bio *bio)
331{
332 struct blkdev_dio *dio = bio->bi_private;
333 bool should_dirty = dio->should_dirty;
334
335 if (bio->bi_status && !dio->bio.bi_status)
336 dio->bio.bi_status = bio->bi_status;
337
338 if (!dio->multi_bio || atomic_dec_and_test(&dio->ref)) {
339 if (!dio->is_sync) {
340 struct kiocb *iocb = dio->iocb;
341 ssize_t ret;
342
343 if (likely(!dio->bio.bi_status)) {
344 ret = dio->size;
345 iocb->ki_pos += ret;
346 } else {
347 ret = blk_status_to_errno(dio->bio.bi_status);
348 }
349
350 dio->iocb->ki_complete(iocb, ret, 0);
351 if (dio->multi_bio)
352 bio_put(&dio->bio);
353 } else {
354 struct task_struct *waiter = dio->waiter;
355
356 WRITE_ONCE(dio->waiter, NULL);
357 blk_wake_io_task(waiter);
358 }
359 }
360
361 if (should_dirty) {
362 bio_check_pages_dirty(bio);
363 } else {
364 bio_release_pages(bio, false);
365 bio_put(bio);
366 }
367}
368
369static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
370 unsigned int nr_pages)
371{
372 struct file *file = iocb->ki_filp;
373 struct inode *inode = bdev_file_inode(file);
374 struct block_device *bdev = I_BDEV(inode);
375 struct blk_plug plug;
376 struct blkdev_dio *dio;
377 struct bio *bio;
378 bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0;
379 bool is_read = (iov_iter_rw(iter) == READ), is_sync;
380 loff_t pos = iocb->ki_pos;
381 blk_qc_t qc = BLK_QC_T_NONE;
382 int ret = 0;
383
384 if ((pos | iov_iter_alignment(iter)) &
385 (bdev_logical_block_size(bdev) - 1))
386 return -EINVAL;
387
388 bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool);
389
390 dio = container_of(bio, struct blkdev_dio, bio);
391 dio->is_sync = is_sync = is_sync_kiocb(iocb);
392 if (dio->is_sync) {
393 dio->waiter = current;
394 bio_get(bio);
395 } else {
396 dio->iocb = iocb;
397 }
398
399 dio->size = 0;
400 dio->multi_bio = false;
401 dio->should_dirty = is_read && iter_is_iovec(iter);
402
403
404
405
406
407 if (!is_poll)
408 blk_start_plug(&plug);
409
410 for (;;) {
411 bio_set_dev(bio, bdev);
412 bio->bi_iter.bi_sector = pos >> 9;
413 bio->bi_write_hint = iocb->ki_hint;
414 bio->bi_private = dio;
415 bio->bi_end_io = blkdev_bio_end_io;
416 bio->bi_ioprio = iocb->ki_ioprio;
417
418 ret = bio_iov_iter_get_pages(bio, iter);
419 if (unlikely(ret)) {
420 bio->bi_status = BLK_STS_IOERR;
421 bio_endio(bio);
422 break;
423 }
424
425 if (is_read) {
426 bio->bi_opf = REQ_OP_READ;
427 if (dio->should_dirty)
428 bio_set_pages_dirty(bio);
429 } else {
430 bio->bi_opf = dio_bio_write_op(iocb);
431 task_io_account_write(bio->bi_iter.bi_size);
432 }
433 if (iocb->ki_flags & IOCB_NOWAIT)
434 bio->bi_opf |= REQ_NOWAIT;
435
436 dio->size += bio->bi_iter.bi_size;
437 pos += bio->bi_iter.bi_size;
438
439 nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS);
440 if (!nr_pages) {
441 bool polled = false;
442
443 if (iocb->ki_flags & IOCB_HIPRI) {
444 bio_set_polled(bio, iocb);
445 polled = true;
446 }
447
448 qc = submit_bio(bio);
449
450 if (polled)
451 WRITE_ONCE(iocb->ki_cookie, qc);
452 break;
453 }
454
455 if (!dio->multi_bio) {
456
457
458
459
460
461 if (!is_sync)
462 bio_get(bio);
463 dio->multi_bio = true;
464 atomic_set(&dio->ref, 2);
465 } else {
466 atomic_inc(&dio->ref);
467 }
468
469 submit_bio(bio);
470 bio = bio_alloc(GFP_KERNEL, nr_pages);
471 }
472
473 if (!is_poll)
474 blk_finish_plug(&plug);
475
476 if (!is_sync)
477 return -EIOCBQUEUED;
478
479 for (;;) {
480 set_current_state(TASK_UNINTERRUPTIBLE);
481 if (!READ_ONCE(dio->waiter))
482 break;
483
484 if (!(iocb->ki_flags & IOCB_HIPRI) ||
485 !blk_poll(bdev_get_queue(bdev), qc, true))
486 blk_io_schedule();
487 }
488 __set_current_state(TASK_RUNNING);
489
490 if (!ret)
491 ret = blk_status_to_errno(dio->bio.bi_status);
492 if (likely(!ret))
493 ret = dio->size;
494
495 bio_put(&dio->bio);
496 return ret;
497}
498
499static ssize_t
500blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
501{
502 unsigned int nr_pages;
503
504 if (!iov_iter_count(iter))
505 return 0;
506
507 nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1);
508 if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_VECS)
509 return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
510
511 return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages));
512}
513
514static __init int blkdev_init(void)
515{
516 return bioset_init(&blkdev_dio_pool, 4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS);
517}
518module_init(blkdev_init);
519
520int __sync_blockdev(struct block_device *bdev, int wait)
521{
522 if (!bdev)
523 return 0;
524 if (!wait)
525 return filemap_flush(bdev->bd_inode->i_mapping);
526 return filemap_write_and_wait(bdev->bd_inode->i_mapping);
527}
528
529
530
531
532
533int sync_blockdev(struct block_device *bdev)
534{
535 return __sync_blockdev(bdev, 1);
536}
537EXPORT_SYMBOL(sync_blockdev);
538
539
540
541
542
543
544int fsync_bdev(struct block_device *bdev)
545{
546 struct super_block *sb = get_super(bdev);
547 if (sb) {
548 int res = sync_filesystem(sb);
549 drop_super(sb);
550 return res;
551 }
552 return sync_blockdev(bdev);
553}
554EXPORT_SYMBOL(fsync_bdev);
555
556
557
558
559
560
561
562
563
564
565
566
567
568int freeze_bdev(struct block_device *bdev)
569{
570 struct super_block *sb;
571 int error = 0;
572
573 mutex_lock(&bdev->bd_fsfreeze_mutex);
574 if (++bdev->bd_fsfreeze_count > 1)
575 goto done;
576
577 sb = get_active_super(bdev);
578 if (!sb)
579 goto sync;
580 if (sb->s_op->freeze_super)
581 error = sb->s_op->freeze_super(sb);
582 else
583 error = freeze_super(sb);
584 deactivate_super(sb);
585
586 if (error) {
587 bdev->bd_fsfreeze_count--;
588 goto done;
589 }
590 bdev->bd_fsfreeze_sb = sb;
591
592sync:
593 sync_blockdev(bdev);
594done:
595 mutex_unlock(&bdev->bd_fsfreeze_mutex);
596 return error;
597}
598EXPORT_SYMBOL(freeze_bdev);
599
600
601
602
603
604
605
606int thaw_bdev(struct block_device *bdev)
607{
608 struct super_block *sb;
609 int error = -EINVAL;
610
611 mutex_lock(&bdev->bd_fsfreeze_mutex);
612 if (!bdev->bd_fsfreeze_count)
613 goto out;
614
615 error = 0;
616 if (--bdev->bd_fsfreeze_count > 0)
617 goto out;
618
619 sb = bdev->bd_fsfreeze_sb;
620 if (!sb)
621 goto out;
622
623 if (sb->s_op->thaw_super)
624 error = sb->s_op->thaw_super(sb);
625 else
626 error = thaw_super(sb);
627 if (error)
628 bdev->bd_fsfreeze_count++;
629 else
630 bdev->bd_fsfreeze_sb = NULL;
631out:
632 mutex_unlock(&bdev->bd_fsfreeze_mutex);
633 return error;
634}
635EXPORT_SYMBOL(thaw_bdev);
636
637static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
638{
639 return block_write_full_page(page, blkdev_get_block, wbc);
640}
641
642static int blkdev_readpage(struct file * file, struct page * page)
643{
644 return block_read_full_page(page, blkdev_get_block);
645}
646
647static void blkdev_readahead(struct readahead_control *rac)
648{
649 mpage_readahead(rac, blkdev_get_block);
650}
651
652static int blkdev_write_begin(struct file *file, struct address_space *mapping,
653 loff_t pos, unsigned len, unsigned flags,
654 struct page **pagep, void **fsdata)
655{
656 return block_write_begin(mapping, pos, len, flags, pagep,
657 blkdev_get_block);
658}
659
660static int blkdev_write_end(struct file *file, struct address_space *mapping,
661 loff_t pos, unsigned len, unsigned copied,
662 struct page *page, void *fsdata)
663{
664 int ret;
665 ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
666
667 unlock_page(page);
668 put_page(page);
669
670 return ret;
671}
672
673
674
675
676
677
678static loff_t block_llseek(struct file *file, loff_t offset, int whence)
679{
680 struct inode *bd_inode = bdev_file_inode(file);
681 loff_t retval;
682
683 inode_lock(bd_inode);
684 retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode));
685 inode_unlock(bd_inode);
686 return retval;
687}
688
689int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
690{
691 struct inode *bd_inode = bdev_file_inode(filp);
692 struct block_device *bdev = I_BDEV(bd_inode);
693 int error;
694
695 error = file_write_and_wait_range(filp, start, end);
696 if (error)
697 return error;
698
699
700
701
702
703
704 error = blkdev_issue_flush(bdev);
705 if (error == -EOPNOTSUPP)
706 error = 0;
707
708 return error;
709}
710EXPORT_SYMBOL(blkdev_fsync);
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728int bdev_read_page(struct block_device *bdev, sector_t sector,
729 struct page *page)
730{
731 const struct block_device_operations *ops = bdev->bd_disk->fops;
732 int result = -EOPNOTSUPP;
733
734 if (!ops->rw_page || bdev_get_integrity(bdev))
735 return result;
736
737 result = blk_queue_enter(bdev->bd_disk->queue, 0);
738 if (result)
739 return result;
740 result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
741 REQ_OP_READ);
742 blk_queue_exit(bdev->bd_disk->queue);
743 return result;
744}
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765int bdev_write_page(struct block_device *bdev, sector_t sector,
766 struct page *page, struct writeback_control *wbc)
767{
768 int result;
769 const struct block_device_operations *ops = bdev->bd_disk->fops;
770
771 if (!ops->rw_page || bdev_get_integrity(bdev))
772 return -EOPNOTSUPP;
773 result = blk_queue_enter(bdev->bd_disk->queue, 0);
774 if (result)
775 return result;
776
777 set_page_writeback(page);
778 result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
779 REQ_OP_WRITE);
780 if (result) {
781 end_page_writeback(page);
782 } else {
783 clean_page_buffers(page);
784 unlock_page(page);
785 }
786 blk_queue_exit(bdev->bd_disk->queue);
787 return result;
788}
789
790
791
792
793
794static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
795static struct kmem_cache * bdev_cachep __read_mostly;
796
797static struct inode *bdev_alloc_inode(struct super_block *sb)
798{
799 struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
800
801 if (!ei)
802 return NULL;
803 memset(&ei->bdev, 0, sizeof(ei->bdev));
804 ei->bdev.bd_bdi = &noop_backing_dev_info;
805 return &ei->vfs_inode;
806}
807
808static void bdev_free_inode(struct inode *inode)
809{
810 struct block_device *bdev = I_BDEV(inode);
811
812 free_percpu(bdev->bd_stats);
813 kfree(bdev->bd_meta_info);
814
815 if (!bdev_is_partition(bdev))
816 kfree(bdev->bd_disk);
817 kmem_cache_free(bdev_cachep, BDEV_I(inode));
818}
819
820static void init_once(void *data)
821{
822 struct bdev_inode *ei = data;
823
824 inode_init_once(&ei->vfs_inode);
825}
826
827static void bdev_evict_inode(struct inode *inode)
828{
829 struct block_device *bdev = &BDEV_I(inode)->bdev;
830 truncate_inode_pages_final(&inode->i_data);
831 invalidate_inode_buffers(inode);
832 clear_inode(inode);
833
834 inode_detach_wb(inode);
835 if (bdev->bd_bdi != &noop_backing_dev_info) {
836 bdi_put(bdev->bd_bdi);
837 bdev->bd_bdi = &noop_backing_dev_info;
838 }
839}
840
841static const struct super_operations bdev_sops = {
842 .statfs = simple_statfs,
843 .alloc_inode = bdev_alloc_inode,
844 .free_inode = bdev_free_inode,
845 .drop_inode = generic_delete_inode,
846 .evict_inode = bdev_evict_inode,
847};
848
849static int bd_init_fs_context(struct fs_context *fc)
850{
851 struct pseudo_fs_context *ctx = init_pseudo(fc, BDEVFS_MAGIC);
852 if (!ctx)
853 return -ENOMEM;
854 fc->s_iflags |= SB_I_CGROUPWB;
855 ctx->ops = &bdev_sops;
856 return 0;
857}
858
859static struct file_system_type bd_type = {
860 .name = "bdev",
861 .init_fs_context = bd_init_fs_context,
862 .kill_sb = kill_anon_super,
863};
864
865struct super_block *blockdev_superblock __read_mostly;
866EXPORT_SYMBOL_GPL(blockdev_superblock);
867
868void __init bdev_cache_init(void)
869{
870 int err;
871 static struct vfsmount *bd_mnt;
872
873 bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
874 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
875 SLAB_MEM_SPREAD|SLAB_ACCOUNT|SLAB_PANIC),
876 init_once);
877 err = register_filesystem(&bd_type);
878 if (err)
879 panic("Cannot register bdev pseudo-fs");
880 bd_mnt = kern_mount(&bd_type);
881 if (IS_ERR(bd_mnt))
882 panic("Cannot create bdev pseudo-fs");
883 blockdev_superblock = bd_mnt->mnt_sb;
884}
885
886struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
887{
888 struct block_device *bdev;
889 struct inode *inode;
890
891 inode = new_inode(blockdev_superblock);
892 if (!inode)
893 return NULL;
894 inode->i_mode = S_IFBLK;
895 inode->i_rdev = 0;
896 inode->i_data.a_ops = &def_blk_aops;
897 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
898
899 bdev = I_BDEV(inode);
900 mutex_init(&bdev->bd_fsfreeze_mutex);
901 spin_lock_init(&bdev->bd_size_lock);
902 bdev->bd_disk = disk;
903 bdev->bd_partno = partno;
904 bdev->bd_inode = inode;
905#ifdef CONFIG_SYSFS
906 INIT_LIST_HEAD(&bdev->bd_holder_disks);
907#endif
908 bdev->bd_stats = alloc_percpu(struct disk_stats);
909 if (!bdev->bd_stats) {
910 iput(inode);
911 return NULL;
912 }
913 return bdev;
914}
915
916void bdev_add(struct block_device *bdev, dev_t dev)
917{
918 bdev->bd_dev = dev;
919 bdev->bd_inode->i_rdev = dev;
920 bdev->bd_inode->i_ino = dev;
921 insert_inode_hash(bdev->bd_inode);
922}
923
924static struct block_device *bdget(dev_t dev)
925{
926 struct inode *inode;
927
928 inode = ilookup(blockdev_superblock, dev);
929 if (!inode)
930 return NULL;
931 return &BDEV_I(inode)->bdev;
932}
933
934
935
936
937
938
939
940
941struct block_device *bdgrab(struct block_device *bdev)
942{
943 if (!igrab(bdev->bd_inode))
944 return NULL;
945 return bdev;
946}
947EXPORT_SYMBOL(bdgrab);
948
949long nr_blockdev_pages(void)
950{
951 struct inode *inode;
952 long ret = 0;
953
954 spin_lock(&blockdev_superblock->s_inode_list_lock);
955 list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list)
956 ret += inode->i_mapping->nrpages;
957 spin_unlock(&blockdev_superblock->s_inode_list_lock);
958
959 return ret;
960}
961
962void bdput(struct block_device *bdev)
963{
964 iput(bdev->bd_inode);
965}
966EXPORT_SYMBOL(bdput);
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
983 void *holder)
984{
985 if (bdev->bd_holder == holder)
986 return true;
987 else if (bdev->bd_holder != NULL)
988 return false;
989 else if (whole == bdev)
990 return true;
991
992 else if (whole->bd_holder == bd_may_claim)
993 return true;
994 else if (whole->bd_holder != NULL)
995 return false;
996 else
997 return true;
998}
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012int bd_prepare_to_claim(struct block_device *bdev, void *holder)
1013{
1014 struct block_device *whole = bdev_whole(bdev);
1015
1016 if (WARN_ON_ONCE(!holder))
1017 return -EINVAL;
1018retry:
1019 spin_lock(&bdev_lock);
1020
1021 if (!bd_may_claim(bdev, whole, holder)) {
1022 spin_unlock(&bdev_lock);
1023 return -EBUSY;
1024 }
1025
1026
1027 if (whole->bd_claiming) {
1028 wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
1029 DEFINE_WAIT(wait);
1030
1031 prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
1032 spin_unlock(&bdev_lock);
1033 schedule();
1034 finish_wait(wq, &wait);
1035 goto retry;
1036 }
1037
1038
1039 whole->bd_claiming = holder;
1040 spin_unlock(&bdev_lock);
1041 return 0;
1042}
1043EXPORT_SYMBOL_GPL(bd_prepare_to_claim);
1044
1045static void bd_clear_claiming(struct block_device *whole, void *holder)
1046{
1047 lockdep_assert_held(&bdev_lock);
1048
1049 BUG_ON(whole->bd_claiming != holder);
1050 whole->bd_claiming = NULL;
1051 wake_up_bit(&whole->bd_claiming, 0);
1052}
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062static void bd_finish_claiming(struct block_device *bdev, void *holder)
1063{
1064 struct block_device *whole = bdev_whole(bdev);
1065
1066 spin_lock(&bdev_lock);
1067 BUG_ON(!bd_may_claim(bdev, whole, holder));
1068
1069
1070
1071
1072 whole->bd_holders++;
1073 whole->bd_holder = bd_may_claim;
1074 bdev->bd_holders++;
1075 bdev->bd_holder = holder;
1076 bd_clear_claiming(whole, holder);
1077 spin_unlock(&bdev_lock);
1078}
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089void bd_abort_claiming(struct block_device *bdev, void *holder)
1090{
1091 spin_lock(&bdev_lock);
1092 bd_clear_claiming(bdev_whole(bdev), holder);
1093 spin_unlock(&bdev_lock);
1094}
1095EXPORT_SYMBOL(bd_abort_claiming);
1096
1097#ifdef CONFIG_SYSFS
1098struct bd_holder_disk {
1099 struct list_head list;
1100 struct gendisk *disk;
1101 int refcnt;
1102};
1103
1104static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
1105 struct gendisk *disk)
1106{
1107 struct bd_holder_disk *holder;
1108
1109 list_for_each_entry(holder, &bdev->bd_holder_disks, list)
1110 if (holder->disk == disk)
1111 return holder;
1112 return NULL;
1113}
1114
1115static int add_symlink(struct kobject *from, struct kobject *to)
1116{
1117 return sysfs_create_link(from, to, kobject_name(to));
1118}
1119
1120static void del_symlink(struct kobject *from, struct kobject *to)
1121{
1122 sysfs_remove_link(from, kobject_name(to));
1123}
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
1154{
1155 struct bd_holder_disk *holder;
1156 int ret = 0;
1157
1158 mutex_lock(&bdev->bd_disk->open_mutex);
1159
1160 WARN_ON_ONCE(!bdev->bd_holder);
1161
1162
1163 if (WARN_ON(!disk->slave_dir || !bdev->bd_holder_dir))
1164 goto out_unlock;
1165
1166 holder = bd_find_holder_disk(bdev, disk);
1167 if (holder) {
1168 holder->refcnt++;
1169 goto out_unlock;
1170 }
1171
1172 holder = kzalloc(sizeof(*holder), GFP_KERNEL);
1173 if (!holder) {
1174 ret = -ENOMEM;
1175 goto out_unlock;
1176 }
1177
1178 INIT_LIST_HEAD(&holder->list);
1179 holder->disk = disk;
1180 holder->refcnt = 1;
1181
1182 ret = add_symlink(disk->slave_dir, bdev_kobj(bdev));
1183 if (ret)
1184 goto out_free;
1185
1186 ret = add_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
1187 if (ret)
1188 goto out_del;
1189
1190
1191
1192
1193 kobject_get(bdev->bd_holder_dir);
1194
1195 list_add(&holder->list, &bdev->bd_holder_disks);
1196 goto out_unlock;
1197
1198out_del:
1199 del_symlink(disk->slave_dir, bdev_kobj(bdev));
1200out_free:
1201 kfree(holder);
1202out_unlock:
1203 mutex_unlock(&bdev->bd_disk->open_mutex);
1204 return ret;
1205}
1206EXPORT_SYMBOL_GPL(bd_link_disk_holder);
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
1219{
1220 struct bd_holder_disk *holder;
1221
1222 mutex_lock(&bdev->bd_disk->open_mutex);
1223
1224 holder = bd_find_holder_disk(bdev, disk);
1225
1226 if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
1227 del_symlink(disk->slave_dir, bdev_kobj(bdev));
1228 del_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj);
1229 kobject_put(bdev->bd_holder_dir);
1230 list_del_init(&holder->list);
1231 kfree(holder);
1232 }
1233
1234 mutex_unlock(&bdev->bd_disk->open_mutex);
1235}
1236EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
1237#endif
1238
1239static void blkdev_flush_mapping(struct block_device *bdev)
1240{
1241 WARN_ON_ONCE(bdev->bd_holders);
1242 sync_blockdev(bdev);
1243 kill_bdev(bdev);
1244 bdev_write_inode(bdev);
1245}
1246
1247static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
1248{
1249 struct gendisk *disk = bdev->bd_disk;
1250 int ret = 0;
1251
1252 if (disk->fops->open) {
1253 ret = disk->fops->open(bdev, mode);
1254 if (ret) {
1255
1256 if (ret == -ENOMEDIUM &&
1257 test_bit(GD_NEED_PART_SCAN, &disk->state))
1258 bdev_disk_changed(disk, true);
1259 return ret;
1260 }
1261 }
1262
1263 if (!bdev->bd_openers) {
1264 set_init_blocksize(bdev);
1265 if (bdev->bd_bdi == &noop_backing_dev_info)
1266 bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
1267 }
1268 if (test_bit(GD_NEED_PART_SCAN, &disk->state))
1269 bdev_disk_changed(disk, false);
1270 bdev->bd_openers++;
1271 return 0;;
1272}
1273
1274static void blkdev_put_whole(struct block_device *bdev, fmode_t mode)
1275{
1276 if (!--bdev->bd_openers)
1277 blkdev_flush_mapping(bdev);
1278 if (bdev->bd_disk->fops->release)
1279 bdev->bd_disk->fops->release(bdev->bd_disk, mode);
1280}
1281
1282static int blkdev_get_part(struct block_device *part, fmode_t mode)
1283{
1284 struct gendisk *disk = part->bd_disk;
1285 struct block_device *whole;
1286 int ret;
1287
1288 if (part->bd_openers)
1289 goto done;
1290
1291 whole = bdgrab(disk->part0);
1292 ret = blkdev_get_whole(whole, mode);
1293 if (ret)
1294 goto out_put_whole;
1295
1296 ret = -ENXIO;
1297 if (!bdev_nr_sectors(part))
1298 goto out_blkdev_put;
1299
1300 disk->open_partitions++;
1301 set_init_blocksize(part);
1302 if (part->bd_bdi == &noop_backing_dev_info)
1303 part->bd_bdi = bdi_get(disk->queue->backing_dev_info);
1304done:
1305 part->bd_openers++;
1306 return 0;
1307
1308out_blkdev_put:
1309 blkdev_put_whole(whole, mode);
1310out_put_whole:
1311 bdput(whole);
1312 return ret;
1313}
1314
1315static void blkdev_put_part(struct block_device *part, fmode_t mode)
1316{
1317 struct block_device *whole = bdev_whole(part);
1318
1319 if (--part->bd_openers)
1320 return;
1321 blkdev_flush_mapping(part);
1322 whole->bd_disk->open_partitions--;
1323 blkdev_put_whole(whole, mode);
1324 bdput(whole);
1325}
1326
1327struct block_device *blkdev_get_no_open(dev_t dev)
1328{
1329 struct block_device *bdev;
1330 struct gendisk *disk;
1331
1332 bdev = bdget(dev);
1333 if (!bdev) {
1334 blk_request_module(dev);
1335 bdev = bdget(dev);
1336 if (!bdev)
1337 return NULL;
1338 }
1339
1340 disk = bdev->bd_disk;
1341 if (!kobject_get_unless_zero(&disk_to_dev(disk)->kobj))
1342 goto bdput;
1343 if ((disk->flags & (GENHD_FL_UP | GENHD_FL_HIDDEN)) != GENHD_FL_UP)
1344 goto put_disk;
1345 if (!try_module_get(bdev->bd_disk->fops->owner))
1346 goto put_disk;
1347 return bdev;
1348put_disk:
1349 put_disk(disk);
1350bdput:
1351 bdput(bdev);
1352 return NULL;
1353}
1354
1355void blkdev_put_no_open(struct block_device *bdev)
1356{
1357 module_put(bdev->bd_disk->fops->owner);
1358 put_disk(bdev->bd_disk);
1359 bdput(bdev);
1360}
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
1384{
1385 bool unblock_events = true;
1386 struct block_device *bdev;
1387 struct gendisk *disk;
1388 int ret;
1389
1390 ret = devcgroup_check_permission(DEVCG_DEV_BLOCK,
1391 MAJOR(dev), MINOR(dev),
1392 ((mode & FMODE_READ) ? DEVCG_ACC_READ : 0) |
1393 ((mode & FMODE_WRITE) ? DEVCG_ACC_WRITE : 0));
1394 if (ret)
1395 return ERR_PTR(ret);
1396
1397 bdev = blkdev_get_no_open(dev);
1398 if (!bdev)
1399 return ERR_PTR(-ENXIO);
1400 disk = bdev->bd_disk;
1401
1402 if (mode & FMODE_EXCL) {
1403 ret = bd_prepare_to_claim(bdev, holder);
1404 if (ret)
1405 goto put_blkdev;
1406 }
1407
1408 disk_block_events(disk);
1409
1410 mutex_lock(&disk->open_mutex);
1411 ret = -ENXIO;
1412 if (!(disk->flags & GENHD_FL_UP))
1413 goto abort_claiming;
1414 if (bdev_is_partition(bdev))
1415 ret = blkdev_get_part(bdev, mode);
1416 else
1417 ret = blkdev_get_whole(bdev, mode);
1418 if (ret)
1419 goto abort_claiming;
1420 if (mode & FMODE_EXCL) {
1421 bd_finish_claiming(bdev, holder);
1422
1423
1424
1425
1426
1427
1428
1429
1430 if ((mode & FMODE_WRITE) && !bdev->bd_write_holder &&
1431 (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
1432 bdev->bd_write_holder = true;
1433 unblock_events = false;
1434 }
1435 }
1436 mutex_unlock(&disk->open_mutex);
1437
1438 if (unblock_events)
1439 disk_unblock_events(disk);
1440 return bdev;
1441
1442abort_claiming:
1443 if (mode & FMODE_EXCL)
1444 bd_abort_claiming(bdev, holder);
1445 mutex_unlock(&disk->open_mutex);
1446 disk_unblock_events(disk);
1447put_blkdev:
1448 blkdev_put_no_open(bdev);
1449 return ERR_PTR(ret);
1450}
1451EXPORT_SYMBOL(blkdev_get_by_dev);
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
1471 void *holder)
1472{
1473 struct block_device *bdev;
1474 dev_t dev;
1475 int error;
1476
1477 error = lookup_bdev(path, &dev);
1478 if (error)
1479 return ERR_PTR(error);
1480
1481 bdev = blkdev_get_by_dev(dev, mode, holder);
1482 if (!IS_ERR(bdev) && (mode & FMODE_WRITE) && bdev_read_only(bdev)) {
1483 blkdev_put(bdev, mode);
1484 return ERR_PTR(-EACCES);
1485 }
1486
1487 return bdev;
1488}
1489EXPORT_SYMBOL(blkdev_get_by_path);
1490
1491static int blkdev_open(struct inode * inode, struct file * filp)
1492{
1493 struct block_device *bdev;
1494
1495
1496
1497
1498
1499
1500
1501 filp->f_flags |= O_LARGEFILE;
1502
1503 filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
1504
1505 if (filp->f_flags & O_NDELAY)
1506 filp->f_mode |= FMODE_NDELAY;
1507 if (filp->f_flags & O_EXCL)
1508 filp->f_mode |= FMODE_EXCL;
1509 if ((filp->f_flags & O_ACCMODE) == 3)
1510 filp->f_mode |= FMODE_WRITE_IOCTL;
1511
1512 bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp);
1513 if (IS_ERR(bdev))
1514 return PTR_ERR(bdev);
1515 filp->f_mapping = bdev->bd_inode->i_mapping;
1516 filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
1517 return 0;
1518}
1519
1520void blkdev_put(struct block_device *bdev, fmode_t mode)
1521{
1522 struct gendisk *disk = bdev->bd_disk;
1523
1524
1525
1526
1527
1528
1529
1530
1531 if (bdev->bd_openers == 1)
1532 sync_blockdev(bdev);
1533
1534 mutex_lock(&disk->open_mutex);
1535 if (mode & FMODE_EXCL) {
1536 struct block_device *whole = bdev_whole(bdev);
1537 bool bdev_free;
1538
1539
1540
1541
1542
1543
1544 spin_lock(&bdev_lock);
1545
1546 WARN_ON_ONCE(--bdev->bd_holders < 0);
1547 WARN_ON_ONCE(--whole->bd_holders < 0);
1548
1549 if ((bdev_free = !bdev->bd_holders))
1550 bdev->bd_holder = NULL;
1551 if (!whole->bd_holders)
1552 whole->bd_holder = NULL;
1553
1554 spin_unlock(&bdev_lock);
1555
1556
1557
1558
1559
1560 if (bdev_free && bdev->bd_write_holder) {
1561 disk_unblock_events(disk);
1562 bdev->bd_write_holder = false;
1563 }
1564 }
1565
1566
1567
1568
1569
1570
1571 disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE);
1572
1573 if (bdev_is_partition(bdev))
1574 blkdev_put_part(bdev, mode);
1575 else
1576 blkdev_put_whole(bdev, mode);
1577 mutex_unlock(&disk->open_mutex);
1578
1579 blkdev_put_no_open(bdev);
1580}
1581EXPORT_SYMBOL(blkdev_put);
1582
1583static int blkdev_close(struct inode * inode, struct file * filp)
1584{
1585 struct block_device *bdev = I_BDEV(bdev_file_inode(filp));
1586 blkdev_put(bdev, filp->f_mode);
1587 return 0;
1588}
1589
1590static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1591{
1592 struct block_device *bdev = I_BDEV(bdev_file_inode(file));
1593 fmode_t mode = file->f_mode;
1594
1595
1596
1597
1598
1599 if (file->f_flags & O_NDELAY)
1600 mode |= FMODE_NDELAY;
1601 else
1602 mode &= ~FMODE_NDELAY;
1603
1604 return blkdev_ioctl(bdev, mode, cmd, arg);
1605}
1606
1607
1608
1609
1610
1611
1612
1613
1614static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
1615{
1616 struct file *file = iocb->ki_filp;
1617 struct inode *bd_inode = bdev_file_inode(file);
1618 loff_t size = i_size_read(bd_inode);
1619 struct blk_plug plug;
1620 size_t shorted = 0;
1621 ssize_t ret;
1622
1623 if (bdev_read_only(I_BDEV(bd_inode)))
1624 return -EPERM;
1625
1626 if (IS_SWAPFILE(bd_inode) && !is_hibernate_resume_dev(bd_inode->i_rdev))
1627 return -ETXTBSY;
1628
1629 if (!iov_iter_count(from))
1630 return 0;
1631
1632 if (iocb->ki_pos >= size)
1633 return -ENOSPC;
1634
1635 if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT)
1636 return -EOPNOTSUPP;
1637
1638 size -= iocb->ki_pos;
1639 if (iov_iter_count(from) > size) {
1640 shorted = iov_iter_count(from) - size;
1641 iov_iter_truncate(from, size);
1642 }
1643
1644 blk_start_plug(&plug);
1645 ret = __generic_file_write_iter(iocb, from);
1646 if (ret > 0)
1647 ret = generic_write_sync(iocb, ret);
1648 iov_iter_reexpand(from, iov_iter_count(from) + shorted);
1649 blk_finish_plug(&plug);
1650 return ret;
1651}
1652
1653static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
1654{
1655 struct file *file = iocb->ki_filp;
1656 struct inode *bd_inode = bdev_file_inode(file);
1657 loff_t size = i_size_read(bd_inode);
1658 loff_t pos = iocb->ki_pos;
1659 size_t shorted = 0;
1660 ssize_t ret;
1661
1662 if (pos >= size)
1663 return 0;
1664
1665 size -= pos;
1666 if (iov_iter_count(to) > size) {
1667 shorted = iov_iter_count(to) - size;
1668 iov_iter_truncate(to, size);
1669 }
1670
1671 ret = generic_file_read_iter(iocb, to);
1672 iov_iter_reexpand(to, iov_iter_count(to) + shorted);
1673 return ret;
1674}
1675
1676static int blkdev_writepages(struct address_space *mapping,
1677 struct writeback_control *wbc)
1678{
1679 return generic_writepages(mapping, wbc);
1680}
1681
1682static const struct address_space_operations def_blk_aops = {
1683 .set_page_dirty = __set_page_dirty_buffers,
1684 .readpage = blkdev_readpage,
1685 .readahead = blkdev_readahead,
1686 .writepage = blkdev_writepage,
1687 .write_begin = blkdev_write_begin,
1688 .write_end = blkdev_write_end,
1689 .writepages = blkdev_writepages,
1690 .direct_IO = blkdev_direct_IO,
1691 .migratepage = buffer_migrate_page_norefs,
1692 .is_dirty_writeback = buffer_check_dirty_writeback,
1693};
1694
1695#define BLKDEV_FALLOC_FL_SUPPORTED \
1696 (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
1697 FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
1698
1699static long blkdev_fallocate(struct file *file, int mode, loff_t start,
1700 loff_t len)
1701{
1702 struct block_device *bdev = I_BDEV(bdev_file_inode(file));
1703 loff_t end = start + len - 1;
1704 loff_t isize;
1705 int error;
1706
1707
1708 if (mode & ~BLKDEV_FALLOC_FL_SUPPORTED)
1709 return -EOPNOTSUPP;
1710
1711
1712 isize = i_size_read(bdev->bd_inode);
1713 if (start >= isize)
1714 return -EINVAL;
1715 if (end >= isize) {
1716 if (mode & FALLOC_FL_KEEP_SIZE) {
1717 len = isize - start;
1718 end = start + len - 1;
1719 } else
1720 return -EINVAL;
1721 }
1722
1723
1724
1725
1726 if ((start | len) & (bdev_logical_block_size(bdev) - 1))
1727 return -EINVAL;
1728
1729
1730 error = truncate_bdev_range(bdev, file->f_mode, start, end);
1731 if (error)
1732 return error;
1733
1734 switch (mode) {
1735 case FALLOC_FL_ZERO_RANGE:
1736 case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE:
1737 error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
1738 GFP_KERNEL, BLKDEV_ZERO_NOUNMAP);
1739 break;
1740 case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE:
1741 error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
1742 GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK);
1743 break;
1744 case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
1745 error = blkdev_issue_discard(bdev, start >> 9, len >> 9,
1746 GFP_KERNEL, 0);
1747 break;
1748 default:
1749 return -EOPNOTSUPP;
1750 }
1751 if (error)
1752 return error;
1753
1754
1755
1756
1757
1758
1759 return truncate_bdev_range(bdev, file->f_mode, start, end);
1760}
1761
1762const struct file_operations def_blk_fops = {
1763 .open = blkdev_open,
1764 .release = blkdev_close,
1765 .llseek = block_llseek,
1766 .read_iter = blkdev_read_iter,
1767 .write_iter = blkdev_write_iter,
1768 .iopoll = blkdev_iopoll,
1769 .mmap = generic_file_mmap,
1770 .fsync = blkdev_fsync,
1771 .unlocked_ioctl = block_ioctl,
1772#ifdef CONFIG_COMPAT
1773 .compat_ioctl = compat_blkdev_ioctl,
1774#endif
1775 .splice_read = generic_file_splice_read,
1776 .splice_write = iter_file_splice_write,
1777 .fallocate = blkdev_fallocate,
1778};
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789int lookup_bdev(const char *pathname, dev_t *dev)
1790{
1791 struct inode *inode;
1792 struct path path;
1793 int error;
1794
1795 if (!pathname || !*pathname)
1796 return -EINVAL;
1797
1798 error = kern_path(pathname, LOOKUP_FOLLOW, &path);
1799 if (error)
1800 return error;
1801
1802 inode = d_backing_inode(path.dentry);
1803 error = -ENOTBLK;
1804 if (!S_ISBLK(inode->i_mode))
1805 goto out_path_put;
1806 error = -EACCES;
1807 if (!may_open_dev(&path))
1808 goto out_path_put;
1809
1810 *dev = inode->i_rdev;
1811 error = 0;
1812out_path_put:
1813 path_put(&path);
1814 return error;
1815}
1816EXPORT_SYMBOL(lookup_bdev);
1817
1818int __invalidate_device(struct block_device *bdev, bool kill_dirty)
1819{
1820 struct super_block *sb = get_super(bdev);
1821 int res = 0;
1822
1823 if (sb) {
1824
1825
1826
1827
1828
1829
1830 shrink_dcache_sb(sb);
1831 res = invalidate_inodes(sb, kill_dirty);
1832 drop_super(sb);
1833 }
1834 invalidate_bdev(bdev);
1835 return res;
1836}
1837EXPORT_SYMBOL(__invalidate_device);
1838
1839void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
1840{
1841 struct inode *inode, *old_inode = NULL;
1842
1843 spin_lock(&blockdev_superblock->s_inode_list_lock);
1844 list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
1845 struct address_space *mapping = inode->i_mapping;
1846 struct block_device *bdev;
1847
1848 spin_lock(&inode->i_lock);
1849 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) ||
1850 mapping->nrpages == 0) {
1851 spin_unlock(&inode->i_lock);
1852 continue;
1853 }
1854 __iget(inode);
1855 spin_unlock(&inode->i_lock);
1856 spin_unlock(&blockdev_superblock->s_inode_list_lock);
1857
1858
1859
1860
1861
1862
1863
1864
1865 iput(old_inode);
1866 old_inode = inode;
1867 bdev = I_BDEV(inode);
1868
1869 mutex_lock(&bdev->bd_disk->open_mutex);
1870 if (bdev->bd_openers)
1871 func(bdev, arg);
1872 mutex_unlock(&bdev->bd_disk->open_mutex);
1873
1874 spin_lock(&blockdev_superblock->s_inode_list_lock);
1875 }
1876 spin_unlock(&blockdev_superblock->s_inode_list_lock);
1877 iput(old_inode);
1878}
1879