1
2
3
4
5
6
7
8
9#include <linux/init.h>
10#include <linux/mm.h>
11#include <linux/fcntl.h>
12#include <linux/slab.h>
13#include <linux/kmod.h>
14#include <linux/major.h>
15#include <linux/device_cgroup.h>
16#include <linux/highmem.h>
17#include <linux/blkdev.h>
18#include <linux/backing-dev.h>
19#include <linux/module.h>
20#include <linux/blkpg.h>
21#include <linux/magic.h>
22#include <linux/dax.h>
23#include <linux/buffer_head.h>
24#include <linux/swap.h>
25#include <linux/pagevec.h>
26#include <linux/writeback.h>
27#include <linux/mpage.h>
28#include <linux/mount.h>
29#include <linux/pseudo_fs.h>
30#include <linux/uio.h>
31#include <linux/namei.h>
32#include <linux/log2.h>
33#include <linux/cleancache.h>
34#include <linux/task_io_accounting_ops.h>
35#include <linux/falloc.h>
36#include <linux/uaccess.h>
37#include "internal.h"
38
39struct bdev_inode {
40 struct block_device bdev;
41 struct inode vfs_inode;
42};
43
44static const struct address_space_operations def_blk_aops;
45
46static inline struct bdev_inode *BDEV_I(struct inode *inode)
47{
48 return container_of(inode, struct bdev_inode, vfs_inode);
49}
50
51struct block_device *I_BDEV(struct inode *inode)
52{
53 return &BDEV_I(inode)->bdev;
54}
55EXPORT_SYMBOL(I_BDEV);
56
57static void bdev_write_inode(struct block_device *bdev)
58{
59 struct inode *inode = bdev->bd_inode;
60 int ret;
61
62 spin_lock(&inode->i_lock);
63 while (inode->i_state & I_DIRTY) {
64 spin_unlock(&inode->i_lock);
65 ret = write_inode_now(inode, true);
66 if (ret) {
67 char name[BDEVNAME_SIZE];
68 pr_warn_ratelimited("VFS: Dirty inode writeback failed "
69 "for block device %s (err=%d).\n",
70 bdevname(bdev, name), ret);
71 }
72 spin_lock(&inode->i_lock);
73 }
74 spin_unlock(&inode->i_lock);
75}
76
77
78void kill_bdev(struct block_device *bdev)
79{
80 struct address_space *mapping = bdev->bd_inode->i_mapping;
81
82 if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
83 return;
84
85 invalidate_bh_lrus();
86 truncate_inode_pages(mapping, 0);
87}
88EXPORT_SYMBOL(kill_bdev);
89
90
91void invalidate_bdev(struct block_device *bdev)
92{
93 struct address_space *mapping = bdev->bd_inode->i_mapping;
94
95 if (mapping->nrpages) {
96 invalidate_bh_lrus();
97 lru_add_drain_all();
98 invalidate_mapping_pages(mapping, 0, -1);
99 }
100
101
102
103 cleancache_invalidate_inode(mapping);
104}
105EXPORT_SYMBOL(invalidate_bdev);
106
107static void set_init_blocksize(struct block_device *bdev)
108{
109 unsigned bsize = bdev_logical_block_size(bdev);
110 loff_t size = i_size_read(bdev->bd_inode);
111
112 while (bsize < PAGE_SIZE) {
113 if (size & bsize)
114 break;
115 bsize <<= 1;
116 }
117 bdev->bd_block_size = bsize;
118 bdev->bd_inode->i_blkbits = blksize_bits(bsize);
119}
120
121int set_blocksize(struct block_device *bdev, int size)
122{
123
124 if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
125 return -EINVAL;
126
127
128 if (size < bdev_logical_block_size(bdev))
129 return -EINVAL;
130
131
132 if (bdev->bd_block_size != size) {
133 sync_blockdev(bdev);
134 bdev->bd_block_size = size;
135 bdev->bd_inode->i_blkbits = blksize_bits(size);
136 kill_bdev(bdev);
137 }
138 return 0;
139}
140
141EXPORT_SYMBOL(set_blocksize);
142
143int sb_set_blocksize(struct super_block *sb, int size)
144{
145 if (set_blocksize(sb->s_bdev, size))
146 return 0;
147
148
149 sb->s_blocksize = size;
150 sb->s_blocksize_bits = blksize_bits(size);
151 return sb->s_blocksize;
152}
153
154EXPORT_SYMBOL(sb_set_blocksize);
155
156int sb_min_blocksize(struct super_block *sb, int size)
157{
158 int minsize = bdev_logical_block_size(sb->s_bdev);
159 if (size < minsize)
160 size = minsize;
161 return sb_set_blocksize(sb, size);
162}
163
164EXPORT_SYMBOL(sb_min_blocksize);
165
166static int
167blkdev_get_block(struct inode *inode, sector_t iblock,
168 struct buffer_head *bh, int create)
169{
170 bh->b_bdev = I_BDEV(inode);
171 bh->b_blocknr = iblock;
172 set_buffer_mapped(bh);
173 return 0;
174}
175
176static struct inode *bdev_file_inode(struct file *file)
177{
178 return file->f_mapping->host;
179}
180
181static unsigned int dio_bio_write_op(struct kiocb *iocb)
182{
183 unsigned int op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
184
185
186 if (iocb->ki_flags & IOCB_DSYNC)
187 op |= REQ_FUA;
188 return op;
189}
190
191#define DIO_INLINE_BIO_VECS 4
192
193static void blkdev_bio_end_io_simple(struct bio *bio)
194{
195 struct task_struct *waiter = bio->bi_private;
196
197 WRITE_ONCE(bio->bi_private, NULL);
198 blk_wake_io_task(waiter);
199}
200
201static ssize_t
202__blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
203 int nr_pages)
204{
205 struct file *file = iocb->ki_filp;
206 struct block_device *bdev = I_BDEV(bdev_file_inode(file));
207 struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs;
208 loff_t pos = iocb->ki_pos;
209 bool should_dirty = false;
210 struct bio bio;
211 ssize_t ret;
212 blk_qc_t qc;
213
214 if ((pos | iov_iter_alignment(iter)) &
215 (bdev_logical_block_size(bdev) - 1))
216 return -EINVAL;
217
218 if (nr_pages <= DIO_INLINE_BIO_VECS)
219 vecs = inline_vecs;
220 else {
221 vecs = kmalloc_array(nr_pages, sizeof(struct bio_vec),
222 GFP_KERNEL);
223 if (!vecs)
224 return -ENOMEM;
225 }
226
227 bio_init(&bio, vecs, nr_pages);
228 bio_set_dev(&bio, bdev);
229 bio.bi_iter.bi_sector = pos >> 9;
230 bio.bi_write_hint = iocb->ki_hint;
231 bio.bi_private = current;
232 bio.bi_end_io = blkdev_bio_end_io_simple;
233 bio.bi_ioprio = iocb->ki_ioprio;
234
235 ret = bio_iov_iter_get_pages(&bio, iter);
236 if (unlikely(ret))
237 goto out;
238 ret = bio.bi_iter.bi_size;
239
240 if (iov_iter_rw(iter) == READ) {
241 bio.bi_opf = REQ_OP_READ;
242 if (iter_is_iovec(iter))
243 should_dirty = true;
244 } else {
245 bio.bi_opf = dio_bio_write_op(iocb);
246 task_io_account_write(ret);
247 }
248 if (iocb->ki_flags & IOCB_HIPRI)
249 bio_set_polled(&bio, iocb);
250
251 qc = submit_bio(&bio);
252 for (;;) {
253 set_current_state(TASK_UNINTERRUPTIBLE);
254 if (!READ_ONCE(bio.bi_private))
255 break;
256 if (!(iocb->ki_flags & IOCB_HIPRI) ||
257 !blk_poll(bdev_get_queue(bdev), qc, true))
258 io_schedule();
259 }
260 __set_current_state(TASK_RUNNING);
261
262 bio_release_pages(&bio, should_dirty);
263 if (unlikely(bio.bi_status))
264 ret = blk_status_to_errno(bio.bi_status);
265
266out:
267 if (vecs != inline_vecs)
268 kfree(vecs);
269
270 bio_uninit(&bio);
271
272 return ret;
273}
274
275struct blkdev_dio {
276 union {
277 struct kiocb *iocb;
278 struct task_struct *waiter;
279 };
280 size_t size;
281 atomic_t ref;
282 bool multi_bio : 1;
283 bool should_dirty : 1;
284 bool is_sync : 1;
285 struct bio bio;
286};
287
288static struct bio_set blkdev_dio_pool;
289
290static int blkdev_iopoll(struct kiocb *kiocb, bool wait)
291{
292 struct block_device *bdev = I_BDEV(kiocb->ki_filp->f_mapping->host);
293 struct request_queue *q = bdev_get_queue(bdev);
294
295 return blk_poll(q, READ_ONCE(kiocb->ki_cookie), wait);
296}
297
298static void blkdev_bio_end_io(struct bio *bio)
299{
300 struct blkdev_dio *dio = bio->bi_private;
301 bool should_dirty = dio->should_dirty;
302
303 if (bio->bi_status && !dio->bio.bi_status)
304 dio->bio.bi_status = bio->bi_status;
305
306 if (!dio->multi_bio || atomic_dec_and_test(&dio->ref)) {
307 if (!dio->is_sync) {
308 struct kiocb *iocb = dio->iocb;
309 ssize_t ret;
310
311 if (likely(!dio->bio.bi_status)) {
312 ret = dio->size;
313 iocb->ki_pos += ret;
314 } else {
315 ret = blk_status_to_errno(dio->bio.bi_status);
316 }
317
318 dio->iocb->ki_complete(iocb, ret, 0);
319 if (dio->multi_bio)
320 bio_put(&dio->bio);
321 } else {
322 struct task_struct *waiter = dio->waiter;
323
324 WRITE_ONCE(dio->waiter, NULL);
325 blk_wake_io_task(waiter);
326 }
327 }
328
329 if (should_dirty) {
330 bio_check_pages_dirty(bio);
331 } else {
332 bio_release_pages(bio, false);
333 bio_put(bio);
334 }
335}
336
337static ssize_t
338__blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
339{
340 struct file *file = iocb->ki_filp;
341 struct inode *inode = bdev_file_inode(file);
342 struct block_device *bdev = I_BDEV(inode);
343 struct blk_plug plug;
344 struct blkdev_dio *dio;
345 struct bio *bio;
346 bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0;
347 bool is_read = (iov_iter_rw(iter) == READ), is_sync;
348 loff_t pos = iocb->ki_pos;
349 blk_qc_t qc = BLK_QC_T_NONE;
350 int ret = 0;
351
352 if ((pos | iov_iter_alignment(iter)) &
353 (bdev_logical_block_size(bdev) - 1))
354 return -EINVAL;
355
356 bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool);
357
358 dio = container_of(bio, struct blkdev_dio, bio);
359 dio->is_sync = is_sync = is_sync_kiocb(iocb);
360 if (dio->is_sync) {
361 dio->waiter = current;
362 bio_get(bio);
363 } else {
364 dio->iocb = iocb;
365 }
366
367 dio->size = 0;
368 dio->multi_bio = false;
369 dio->should_dirty = is_read && iter_is_iovec(iter);
370
371
372
373
374
375 if (!is_poll)
376 blk_start_plug(&plug);
377
378 for (;;) {
379 bio_set_dev(bio, bdev);
380 bio->bi_iter.bi_sector = pos >> 9;
381 bio->bi_write_hint = iocb->ki_hint;
382 bio->bi_private = dio;
383 bio->bi_end_io = blkdev_bio_end_io;
384 bio->bi_ioprio = iocb->ki_ioprio;
385
386 ret = bio_iov_iter_get_pages(bio, iter);
387 if (unlikely(ret)) {
388 bio->bi_status = BLK_STS_IOERR;
389 bio_endio(bio);
390 break;
391 }
392
393 if (is_read) {
394 bio->bi_opf = REQ_OP_READ;
395 if (dio->should_dirty)
396 bio_set_pages_dirty(bio);
397 } else {
398 bio->bi_opf = dio_bio_write_op(iocb);
399 task_io_account_write(bio->bi_iter.bi_size);
400 }
401
402 dio->size += bio->bi_iter.bi_size;
403 pos += bio->bi_iter.bi_size;
404
405 nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES);
406 if (!nr_pages) {
407 bool polled = false;
408
409 if (iocb->ki_flags & IOCB_HIPRI) {
410 bio_set_polled(bio, iocb);
411 polled = true;
412 }
413
414 qc = submit_bio(bio);
415
416 if (polled)
417 WRITE_ONCE(iocb->ki_cookie, qc);
418 break;
419 }
420
421 if (!dio->multi_bio) {
422
423
424
425
426
427 if (!is_sync)
428 bio_get(bio);
429 dio->multi_bio = true;
430 atomic_set(&dio->ref, 2);
431 } else {
432 atomic_inc(&dio->ref);
433 }
434
435 submit_bio(bio);
436 bio = bio_alloc(GFP_KERNEL, nr_pages);
437 }
438
439 if (!is_poll)
440 blk_finish_plug(&plug);
441
442 if (!is_sync)
443 return -EIOCBQUEUED;
444
445 for (;;) {
446 set_current_state(TASK_UNINTERRUPTIBLE);
447 if (!READ_ONCE(dio->waiter))
448 break;
449
450 if (!(iocb->ki_flags & IOCB_HIPRI) ||
451 !blk_poll(bdev_get_queue(bdev), qc, true))
452 io_schedule();
453 }
454 __set_current_state(TASK_RUNNING);
455
456 if (!ret)
457 ret = blk_status_to_errno(dio->bio.bi_status);
458 if (likely(!ret))
459 ret = dio->size;
460
461 bio_put(&dio->bio);
462 return ret;
463}
464
465static ssize_t
466blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
467{
468 int nr_pages;
469
470 nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES + 1);
471 if (!nr_pages)
472 return 0;
473 if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_PAGES)
474 return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
475
476 return __blkdev_direct_IO(iocb, iter, min(nr_pages, BIO_MAX_PAGES));
477}
478
479static __init int blkdev_init(void)
480{
481 return bioset_init(&blkdev_dio_pool, 4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS);
482}
483module_init(blkdev_init);
484
485int __sync_blockdev(struct block_device *bdev, int wait)
486{
487 if (!bdev)
488 return 0;
489 if (!wait)
490 return filemap_flush(bdev->bd_inode->i_mapping);
491 return filemap_write_and_wait(bdev->bd_inode->i_mapping);
492}
493
494
495
496
497
498int sync_blockdev(struct block_device *bdev)
499{
500 return __sync_blockdev(bdev, 1);
501}
502EXPORT_SYMBOL(sync_blockdev);
503
504
505
506
507
508
509int fsync_bdev(struct block_device *bdev)
510{
511 struct super_block *sb = get_super(bdev);
512 if (sb) {
513 int res = sync_filesystem(sb);
514 drop_super(sb);
515 return res;
516 }
517 return sync_blockdev(bdev);
518}
519EXPORT_SYMBOL(fsync_bdev);
520
521
522
523
524
525
526
527
528
529
530
531
532
533struct super_block *freeze_bdev(struct block_device *bdev)
534{
535 struct super_block *sb;
536 int error = 0;
537
538 mutex_lock(&bdev->bd_fsfreeze_mutex);
539 if (++bdev->bd_fsfreeze_count > 1) {
540
541
542
543
544
545 sb = get_super(bdev);
546 if (sb)
547 drop_super(sb);
548 mutex_unlock(&bdev->bd_fsfreeze_mutex);
549 return sb;
550 }
551
552 sb = get_active_super(bdev);
553 if (!sb)
554 goto out;
555 if (sb->s_op->freeze_super)
556 error = sb->s_op->freeze_super(sb);
557 else
558 error = freeze_super(sb);
559 if (error) {
560 deactivate_super(sb);
561 bdev->bd_fsfreeze_count--;
562 mutex_unlock(&bdev->bd_fsfreeze_mutex);
563 return ERR_PTR(error);
564 }
565 deactivate_super(sb);
566 out:
567 sync_blockdev(bdev);
568 mutex_unlock(&bdev->bd_fsfreeze_mutex);
569 return sb;
570}
571EXPORT_SYMBOL(freeze_bdev);
572
573
574
575
576
577
578
579
580int thaw_bdev(struct block_device *bdev, struct super_block *sb)
581{
582 int error = -EINVAL;
583
584 mutex_lock(&bdev->bd_fsfreeze_mutex);
585 if (!bdev->bd_fsfreeze_count)
586 goto out;
587
588 error = 0;
589 if (--bdev->bd_fsfreeze_count > 0)
590 goto out;
591
592 if (!sb)
593 goto out;
594
595 if (sb->s_op->thaw_super)
596 error = sb->s_op->thaw_super(sb);
597 else
598 error = thaw_super(sb);
599 if (error)
600 bdev->bd_fsfreeze_count++;
601out:
602 mutex_unlock(&bdev->bd_fsfreeze_mutex);
603 return error;
604}
605EXPORT_SYMBOL(thaw_bdev);
606
607static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
608{
609 return block_write_full_page(page, blkdev_get_block, wbc);
610}
611
612static int blkdev_readpage(struct file * file, struct page * page)
613{
614 return block_read_full_page(page, blkdev_get_block);
615}
616
617static int blkdev_readpages(struct file *file, struct address_space *mapping,
618 struct list_head *pages, unsigned nr_pages)
619{
620 return mpage_readpages(mapping, pages, nr_pages, blkdev_get_block);
621}
622
623static int blkdev_write_begin(struct file *file, struct address_space *mapping,
624 loff_t pos, unsigned len, unsigned flags,
625 struct page **pagep, void **fsdata)
626{
627 return block_write_begin(mapping, pos, len, flags, pagep,
628 blkdev_get_block);
629}
630
631static int blkdev_write_end(struct file *file, struct address_space *mapping,
632 loff_t pos, unsigned len, unsigned copied,
633 struct page *page, void *fsdata)
634{
635 int ret;
636 ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
637
638 unlock_page(page);
639 put_page(page);
640
641 return ret;
642}
643
644
645
646
647
648
649static loff_t block_llseek(struct file *file, loff_t offset, int whence)
650{
651 struct inode *bd_inode = bdev_file_inode(file);
652 loff_t retval;
653
654 inode_lock(bd_inode);
655 retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode));
656 inode_unlock(bd_inode);
657 return retval;
658}
659
660int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
661{
662 struct inode *bd_inode = bdev_file_inode(filp);
663 struct block_device *bdev = I_BDEV(bd_inode);
664 int error;
665
666 error = file_write_and_wait_range(filp, start, end);
667 if (error)
668 return error;
669
670
671
672
673
674
675 error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL);
676 if (error == -EOPNOTSUPP)
677 error = 0;
678
679 return error;
680}
681EXPORT_SYMBOL(blkdev_fsync);
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699int bdev_read_page(struct block_device *bdev, sector_t sector,
700 struct page *page)
701{
702 const struct block_device_operations *ops = bdev->bd_disk->fops;
703 int result = -EOPNOTSUPP;
704
705 if (!ops->rw_page || bdev_get_integrity(bdev))
706 return result;
707
708 result = blk_queue_enter(bdev->bd_queue, 0);
709 if (result)
710 return result;
711 result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
712 REQ_OP_READ);
713 blk_queue_exit(bdev->bd_queue);
714 return result;
715}
716EXPORT_SYMBOL_GPL(bdev_read_page);
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737int bdev_write_page(struct block_device *bdev, sector_t sector,
738 struct page *page, struct writeback_control *wbc)
739{
740 int result;
741 const struct block_device_operations *ops = bdev->bd_disk->fops;
742
743 if (!ops->rw_page || bdev_get_integrity(bdev))
744 return -EOPNOTSUPP;
745 result = blk_queue_enter(bdev->bd_queue, 0);
746 if (result)
747 return result;
748
749 set_page_writeback(page);
750 result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
751 REQ_OP_WRITE);
752 if (result) {
753 end_page_writeback(page);
754 } else {
755 clean_page_buffers(page);
756 unlock_page(page);
757 }
758 blk_queue_exit(bdev->bd_queue);
759 return result;
760}
761EXPORT_SYMBOL_GPL(bdev_write_page);
762
763
764
765
766
767static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
768static struct kmem_cache * bdev_cachep __read_mostly;
769
770static struct inode *bdev_alloc_inode(struct super_block *sb)
771{
772 struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
773 if (!ei)
774 return NULL;
775 return &ei->vfs_inode;
776}
777
778static void bdev_free_inode(struct inode *inode)
779{
780 kmem_cache_free(bdev_cachep, BDEV_I(inode));
781}
782
783static void init_once(void *foo)
784{
785 struct bdev_inode *ei = (struct bdev_inode *) foo;
786 struct block_device *bdev = &ei->bdev;
787
788 memset(bdev, 0, sizeof(*bdev));
789 mutex_init(&bdev->bd_mutex);
790 INIT_LIST_HEAD(&bdev->bd_list);
791#ifdef CONFIG_SYSFS
792 INIT_LIST_HEAD(&bdev->bd_holder_disks);
793#endif
794 bdev->bd_bdi = &noop_backing_dev_info;
795 inode_init_once(&ei->vfs_inode);
796
797 mutex_init(&bdev->bd_fsfreeze_mutex);
798}
799
800static void bdev_evict_inode(struct inode *inode)
801{
802 struct block_device *bdev = &BDEV_I(inode)->bdev;
803 truncate_inode_pages_final(&inode->i_data);
804 invalidate_inode_buffers(inode);
805 clear_inode(inode);
806 spin_lock(&bdev_lock);
807 list_del_init(&bdev->bd_list);
808 spin_unlock(&bdev_lock);
809
810 inode_detach_wb(inode);
811 if (bdev->bd_bdi != &noop_backing_dev_info) {
812 bdi_put(bdev->bd_bdi);
813 bdev->bd_bdi = &noop_backing_dev_info;
814 }
815}
816
817static const struct super_operations bdev_sops = {
818 .statfs = simple_statfs,
819 .alloc_inode = bdev_alloc_inode,
820 .free_inode = bdev_free_inode,
821 .drop_inode = generic_delete_inode,
822 .evict_inode = bdev_evict_inode,
823};
824
825static int bd_init_fs_context(struct fs_context *fc)
826{
827 struct pseudo_fs_context *ctx = init_pseudo(fc, BDEVFS_MAGIC);
828 if (!ctx)
829 return -ENOMEM;
830 fc->s_iflags |= SB_I_CGROUPWB;
831 ctx->ops = &bdev_sops;
832 return 0;
833}
834
835static struct file_system_type bd_type = {
836 .name = "bdev",
837 .init_fs_context = bd_init_fs_context,
838 .kill_sb = kill_anon_super,
839};
840
841struct super_block *blockdev_superblock __read_mostly;
842EXPORT_SYMBOL_GPL(blockdev_superblock);
843
844void __init bdev_cache_init(void)
845{
846 int err;
847 static struct vfsmount *bd_mnt;
848
849 bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
850 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
851 SLAB_MEM_SPREAD|SLAB_ACCOUNT|SLAB_PANIC),
852 init_once);
853 err = register_filesystem(&bd_type);
854 if (err)
855 panic("Cannot register bdev pseudo-fs");
856 bd_mnt = kern_mount(&bd_type);
857 if (IS_ERR(bd_mnt))
858 panic("Cannot create bdev pseudo-fs");
859 blockdev_superblock = bd_mnt->mnt_sb;
860}
861
862
863
864
865
866
867static inline unsigned long hash(dev_t dev)
868{
869 return MAJOR(dev)+MINOR(dev);
870}
871
872static int bdev_test(struct inode *inode, void *data)
873{
874 return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data;
875}
876
877static int bdev_set(struct inode *inode, void *data)
878{
879 BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data;
880 return 0;
881}
882
883static LIST_HEAD(all_bdevs);
884
885
886
887
888
889void bdev_unhash_inode(dev_t dev)
890{
891 struct inode *inode;
892
893 inode = ilookup5(blockdev_superblock, hash(dev), bdev_test, &dev);
894 if (inode) {
895 remove_inode_hash(inode);
896 iput(inode);
897 }
898}
899
900struct block_device *bdget(dev_t dev)
901{
902 struct block_device *bdev;
903 struct inode *inode;
904
905 inode = iget5_locked(blockdev_superblock, hash(dev),
906 bdev_test, bdev_set, &dev);
907
908 if (!inode)
909 return NULL;
910
911 bdev = &BDEV_I(inode)->bdev;
912
913 if (inode->i_state & I_NEW) {
914 bdev->bd_contains = NULL;
915 bdev->bd_super = NULL;
916 bdev->bd_inode = inode;
917 bdev->bd_block_size = i_blocksize(inode);
918 bdev->bd_part_count = 0;
919 bdev->bd_invalidated = 0;
920 inode->i_mode = S_IFBLK;
921 inode->i_rdev = dev;
922 inode->i_bdev = bdev;
923 inode->i_data.a_ops = &def_blk_aops;
924 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
925 spin_lock(&bdev_lock);
926 list_add(&bdev->bd_list, &all_bdevs);
927 spin_unlock(&bdev_lock);
928 unlock_new_inode(inode);
929 }
930 return bdev;
931}
932
933EXPORT_SYMBOL(bdget);
934
935
936
937
938
939struct block_device *bdgrab(struct block_device *bdev)
940{
941 ihold(bdev->bd_inode);
942 return bdev;
943}
944EXPORT_SYMBOL(bdgrab);
945
946long nr_blockdev_pages(void)
947{
948 struct block_device *bdev;
949 long ret = 0;
950 spin_lock(&bdev_lock);
951 list_for_each_entry(bdev, &all_bdevs, bd_list) {
952 ret += bdev->bd_inode->i_mapping->nrpages;
953 }
954 spin_unlock(&bdev_lock);
955 return ret;
956}
957
958void bdput(struct block_device *bdev)
959{
960 iput(bdev->bd_inode);
961}
962
963EXPORT_SYMBOL(bdput);
964
965static struct block_device *bd_acquire(struct inode *inode)
966{
967 struct block_device *bdev;
968
969 spin_lock(&bdev_lock);
970 bdev = inode->i_bdev;
971 if (bdev && !inode_unhashed(bdev->bd_inode)) {
972 bdgrab(bdev);
973 spin_unlock(&bdev_lock);
974 return bdev;
975 }
976 spin_unlock(&bdev_lock);
977
978
979
980
981
982
983
984 if (bdev)
985 bd_forget(inode);
986
987 bdev = bdget(inode->i_rdev);
988 if (bdev) {
989 spin_lock(&bdev_lock);
990 if (!inode->i_bdev) {
991
992
993
994
995
996
997 bdgrab(bdev);
998 inode->i_bdev = bdev;
999 inode->i_mapping = bdev->bd_inode->i_mapping;
1000 }
1001 spin_unlock(&bdev_lock);
1002 }
1003 return bdev;
1004}
1005
1006
1007
1008void bd_forget(struct inode *inode)
1009{
1010 struct block_device *bdev = NULL;
1011
1012 spin_lock(&bdev_lock);
1013 if (!sb_is_blkdev_sb(inode->i_sb))
1014 bdev = inode->i_bdev;
1015 inode->i_bdev = NULL;
1016 inode->i_mapping = &inode->i_data;
1017 spin_unlock(&bdev_lock);
1018
1019 if (bdev)
1020 bdput(bdev);
1021}
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
1038 void *holder)
1039{
1040 if (bdev->bd_holder == holder)
1041 return true;
1042 else if (bdev->bd_holder != NULL)
1043 return false;
1044 else if (whole == bdev)
1045 return true;
1046
1047 else if (whole->bd_holder == bd_may_claim)
1048 return true;
1049 else if (whole->bd_holder != NULL)
1050 return false;
1051 else
1052 return true;
1053}
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073static int bd_prepare_to_claim(struct block_device *bdev,
1074 struct block_device *whole, void *holder)
1075{
1076retry:
1077
1078 if (!bd_may_claim(bdev, whole, holder))
1079 return -EBUSY;
1080
1081
1082 if (whole->bd_claiming) {
1083 wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
1084 DEFINE_WAIT(wait);
1085
1086 prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
1087 spin_unlock(&bdev_lock);
1088 schedule();
1089 finish_wait(wq, &wait);
1090 spin_lock(&bdev_lock);
1091 goto retry;
1092 }
1093
1094
1095 return 0;
1096}
1097
1098static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno)
1099{
1100 struct gendisk *disk = get_gendisk(bdev->bd_dev, partno);
1101
1102 if (!disk)
1103 return NULL;
1104
1105
1106
1107
1108
1109
1110
1111
1112 if (inode_unhashed(bdev->bd_inode)) {
1113 put_disk_and_module(disk);
1114 return NULL;
1115 }
1116 return disk;
1117}
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142struct block_device *bd_start_claiming(struct block_device *bdev, void *holder)
1143{
1144 struct gendisk *disk;
1145 struct block_device *whole;
1146 int partno, err;
1147
1148 might_sleep();
1149
1150
1151
1152
1153
1154 disk = bdev_get_gendisk(bdev, &partno);
1155 if (!disk)
1156 return ERR_PTR(-ENXIO);
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166 if (partno)
1167 whole = bdget_disk(disk, 0);
1168 else
1169 whole = bdgrab(bdev);
1170
1171 put_disk_and_module(disk);
1172 if (!whole)
1173 return ERR_PTR(-ENOMEM);
1174
1175
1176 spin_lock(&bdev_lock);
1177
1178 err = bd_prepare_to_claim(bdev, whole, holder);
1179 if (err == 0) {
1180 whole->bd_claiming = holder;
1181 spin_unlock(&bdev_lock);
1182 return whole;
1183 } else {
1184 spin_unlock(&bdev_lock);
1185 bdput(whole);
1186 return ERR_PTR(err);
1187 }
1188}
1189EXPORT_SYMBOL(bd_start_claiming);
1190
1191static void bd_clear_claiming(struct block_device *whole, void *holder)
1192{
1193 lockdep_assert_held(&bdev_lock);
1194
1195 BUG_ON(whole->bd_claiming != holder);
1196 whole->bd_claiming = NULL;
1197 wake_up_bit(&whole->bd_claiming, 0);
1198}
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209void bd_finish_claiming(struct block_device *bdev, struct block_device *whole,
1210 void *holder)
1211{
1212 spin_lock(&bdev_lock);
1213 BUG_ON(!bd_may_claim(bdev, whole, holder));
1214
1215
1216
1217
1218 whole->bd_holders++;
1219 whole->bd_holder = bd_may_claim;
1220 bdev->bd_holders++;
1221 bdev->bd_holder = holder;
1222 bd_clear_claiming(whole, holder);
1223 spin_unlock(&bdev_lock);
1224}
1225EXPORT_SYMBOL(bd_finish_claiming);
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237void bd_abort_claiming(struct block_device *bdev, struct block_device *whole,
1238 void *holder)
1239{
1240 spin_lock(&bdev_lock);
1241 bd_clear_claiming(whole, holder);
1242 spin_unlock(&bdev_lock);
1243}
1244EXPORT_SYMBOL(bd_abort_claiming);
1245
1246#ifdef CONFIG_SYSFS
1247struct bd_holder_disk {
1248 struct list_head list;
1249 struct gendisk *disk;
1250 int refcnt;
1251};
1252
1253static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
1254 struct gendisk *disk)
1255{
1256 struct bd_holder_disk *holder;
1257
1258 list_for_each_entry(holder, &bdev->bd_holder_disks, list)
1259 if (holder->disk == disk)
1260 return holder;
1261 return NULL;
1262}
1263
1264static int add_symlink(struct kobject *from, struct kobject *to)
1265{
1266 return sysfs_create_link(from, to, kobject_name(to));
1267}
1268
1269static void del_symlink(struct kobject *from, struct kobject *to)
1270{
1271 sysfs_remove_link(from, kobject_name(to));
1272}
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
1303{
1304 struct bd_holder_disk *holder;
1305 int ret = 0;
1306
1307 mutex_lock(&bdev->bd_mutex);
1308
1309 WARN_ON_ONCE(!bdev->bd_holder);
1310
1311
1312 if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir))
1313 goto out_unlock;
1314
1315 holder = bd_find_holder_disk(bdev, disk);
1316 if (holder) {
1317 holder->refcnt++;
1318 goto out_unlock;
1319 }
1320
1321 holder = kzalloc(sizeof(*holder), GFP_KERNEL);
1322 if (!holder) {
1323 ret = -ENOMEM;
1324 goto out_unlock;
1325 }
1326
1327 INIT_LIST_HEAD(&holder->list);
1328 holder->disk = disk;
1329 holder->refcnt = 1;
1330
1331 ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
1332 if (ret)
1333 goto out_free;
1334
1335 ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
1336 if (ret)
1337 goto out_del;
1338
1339
1340
1341
1342 kobject_get(bdev->bd_part->holder_dir);
1343
1344 list_add(&holder->list, &bdev->bd_holder_disks);
1345 goto out_unlock;
1346
1347out_del:
1348 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
1349out_free:
1350 kfree(holder);
1351out_unlock:
1352 mutex_unlock(&bdev->bd_mutex);
1353 return ret;
1354}
1355EXPORT_SYMBOL_GPL(bd_link_disk_holder);
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
1368{
1369 struct bd_holder_disk *holder;
1370
1371 mutex_lock(&bdev->bd_mutex);
1372
1373 holder = bd_find_holder_disk(bdev, disk);
1374
1375 if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) {
1376 del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
1377 del_symlink(bdev->bd_part->holder_dir,
1378 &disk_to_dev(disk)->kobj);
1379 kobject_put(bdev->bd_part->holder_dir);
1380 list_del_init(&holder->list);
1381 kfree(holder);
1382 }
1383
1384 mutex_unlock(&bdev->bd_mutex);
1385}
1386EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
1387#endif
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399static void flush_disk(struct block_device *bdev, bool kill_dirty)
1400{
1401 if (__invalidate_device(bdev, kill_dirty)) {
1402 printk(KERN_WARNING "VFS: busy inodes on changed media or "
1403 "resized disk %s\n",
1404 bdev->bd_disk ? bdev->bd_disk->disk_name : "");
1405 }
1406
1407 if (!bdev->bd_disk)
1408 return;
1409 if (disk_part_scan_enabled(bdev->bd_disk))
1410 bdev->bd_invalidated = 1;
1411}
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423void check_disk_size_change(struct gendisk *disk, struct block_device *bdev,
1424 bool verbose)
1425{
1426 loff_t disk_size, bdev_size;
1427
1428 disk_size = (loff_t)get_capacity(disk) << 9;
1429 bdev_size = i_size_read(bdev->bd_inode);
1430 if (disk_size != bdev_size) {
1431 if (verbose) {
1432 printk(KERN_INFO
1433 "%s: detected capacity change from %lld to %lld\n",
1434 disk->disk_name, bdev_size, disk_size);
1435 }
1436 i_size_write(bdev->bd_inode, disk_size);
1437 if (bdev_size > disk_size)
1438 flush_disk(bdev, false);
1439 }
1440}
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450int revalidate_disk(struct gendisk *disk)
1451{
1452 int ret = 0;
1453
1454 if (disk->fops->revalidate_disk)
1455 ret = disk->fops->revalidate_disk(disk);
1456
1457
1458
1459
1460
1461 if (!(disk->flags & GENHD_FL_HIDDEN)) {
1462 struct block_device *bdev = bdget_disk(disk, 0);
1463
1464 if (!bdev)
1465 return ret;
1466
1467 mutex_lock(&bdev->bd_mutex);
1468 check_disk_size_change(disk, bdev, ret == 0);
1469 bdev->bd_invalidated = 0;
1470 mutex_unlock(&bdev->bd_mutex);
1471 bdput(bdev);
1472 }
1473 return ret;
1474}
1475EXPORT_SYMBOL(revalidate_disk);
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486int check_disk_change(struct block_device *bdev)
1487{
1488 struct gendisk *disk = bdev->bd_disk;
1489 const struct block_device_operations *bdops = disk->fops;
1490 unsigned int events;
1491
1492 events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE |
1493 DISK_EVENT_EJECT_REQUEST);
1494 if (!(events & DISK_EVENT_MEDIA_CHANGE))
1495 return 0;
1496
1497 flush_disk(bdev, true);
1498 if (bdops->revalidate_disk)
1499 bdops->revalidate_disk(bdev->bd_disk);
1500 return 1;
1501}
1502
1503EXPORT_SYMBOL(check_disk_change);
1504
1505void bd_set_size(struct block_device *bdev, loff_t size)
1506{
1507 inode_lock(bdev->bd_inode);
1508 i_size_write(bdev->bd_inode, size);
1509 inode_unlock(bdev->bd_inode);
1510}
1511EXPORT_SYMBOL(bd_set_size);
1512
1513static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
1514
1515
1516
1517
1518
1519
1520
1521
1522static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1523{
1524 struct gendisk *disk;
1525 int ret;
1526 int partno;
1527 int perm = 0;
1528 bool first_open = false;
1529
1530 if (mode & FMODE_READ)
1531 perm |= MAY_READ;
1532 if (mode & FMODE_WRITE)
1533 perm |= MAY_WRITE;
1534
1535
1536
1537 if (!for_part) {
1538 ret = devcgroup_inode_permission(bdev->bd_inode, perm);
1539 if (ret != 0) {
1540 bdput(bdev);
1541 return ret;
1542 }
1543 }
1544
1545 restart:
1546
1547 ret = -ENXIO;
1548 disk = bdev_get_gendisk(bdev, &partno);
1549 if (!disk)
1550 goto out;
1551
1552 disk_block_events(disk);
1553 mutex_lock_nested(&bdev->bd_mutex, for_part);
1554 if (!bdev->bd_openers) {
1555 first_open = true;
1556 bdev->bd_disk = disk;
1557 bdev->bd_queue = disk->queue;
1558 bdev->bd_contains = bdev;
1559 bdev->bd_partno = partno;
1560
1561 if (!partno) {
1562 ret = -ENXIO;
1563 bdev->bd_part = disk_get_part(disk, partno);
1564 if (!bdev->bd_part)
1565 goto out_clear;
1566
1567 ret = 0;
1568 if (disk->fops->open) {
1569 ret = disk->fops->open(bdev, mode);
1570 if (ret == -ERESTARTSYS) {
1571
1572
1573
1574
1575 disk_put_part(bdev->bd_part);
1576 bdev->bd_part = NULL;
1577 bdev->bd_disk = NULL;
1578 bdev->bd_queue = NULL;
1579 mutex_unlock(&bdev->bd_mutex);
1580 disk_unblock_events(disk);
1581 put_disk_and_module(disk);
1582 goto restart;
1583 }
1584 }
1585
1586 if (!ret) {
1587 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
1588 set_init_blocksize(bdev);
1589 }
1590
1591
1592
1593
1594
1595
1596
1597 if (bdev->bd_invalidated) {
1598 if (!ret)
1599 rescan_partitions(disk, bdev);
1600 else if (ret == -ENOMEDIUM)
1601 invalidate_partitions(disk, bdev);
1602 }
1603
1604 if (ret)
1605 goto out_clear;
1606 } else {
1607 struct block_device *whole;
1608 whole = bdget_disk(disk, 0);
1609 ret = -ENOMEM;
1610 if (!whole)
1611 goto out_clear;
1612 BUG_ON(for_part);
1613 ret = __blkdev_get(whole, mode, 1);
1614 if (ret)
1615 goto out_clear;
1616 bdev->bd_contains = whole;
1617 bdev->bd_part = disk_get_part(disk, partno);
1618 if (!(disk->flags & GENHD_FL_UP) ||
1619 !bdev->bd_part || !bdev->bd_part->nr_sects) {
1620 ret = -ENXIO;
1621 goto out_clear;
1622 }
1623 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
1624 set_init_blocksize(bdev);
1625 }
1626
1627 if (bdev->bd_bdi == &noop_backing_dev_info)
1628 bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
1629 } else {
1630 if (bdev->bd_contains == bdev) {
1631 ret = 0;
1632 if (bdev->bd_disk->fops->open)
1633 ret = bdev->bd_disk->fops->open(bdev, mode);
1634
1635 if (bdev->bd_invalidated) {
1636 if (!ret)
1637 rescan_partitions(bdev->bd_disk, bdev);
1638 else if (ret == -ENOMEDIUM)
1639 invalidate_partitions(bdev->bd_disk, bdev);
1640 }
1641 if (ret)
1642 goto out_unlock_bdev;
1643 }
1644 }
1645 bdev->bd_openers++;
1646 if (for_part)
1647 bdev->bd_part_count++;
1648 mutex_unlock(&bdev->bd_mutex);
1649 disk_unblock_events(disk);
1650
1651 if (!first_open)
1652 put_disk_and_module(disk);
1653 return 0;
1654
1655 out_clear:
1656 disk_put_part(bdev->bd_part);
1657 bdev->bd_disk = NULL;
1658 bdev->bd_part = NULL;
1659 bdev->bd_queue = NULL;
1660 if (bdev != bdev->bd_contains)
1661 __blkdev_put(bdev->bd_contains, mode, 1);
1662 bdev->bd_contains = NULL;
1663 out_unlock_bdev:
1664 mutex_unlock(&bdev->bd_mutex);
1665 disk_unblock_events(disk);
1666 put_disk_and_module(disk);
1667 out:
1668 bdput(bdev);
1669
1670 return ret;
1671}
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1693{
1694 struct block_device *whole = NULL;
1695 int res;
1696
1697 WARN_ON_ONCE((mode & FMODE_EXCL) && !holder);
1698
1699 if ((mode & FMODE_EXCL) && holder) {
1700 whole = bd_start_claiming(bdev, holder);
1701 if (IS_ERR(whole)) {
1702 bdput(bdev);
1703 return PTR_ERR(whole);
1704 }
1705 }
1706
1707 res = __blkdev_get(bdev, mode, 0);
1708
1709 if (whole) {
1710 struct gendisk *disk = whole->bd_disk;
1711
1712
1713 mutex_lock(&bdev->bd_mutex);
1714 if (!res)
1715 bd_finish_claiming(bdev, whole, holder);
1716 else
1717 bd_abort_claiming(bdev, whole, holder);
1718
1719
1720
1721
1722
1723
1724
1725 if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder &&
1726 (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
1727 bdev->bd_write_holder = true;
1728 disk_block_events(disk);
1729 }
1730
1731 mutex_unlock(&bdev->bd_mutex);
1732 bdput(whole);
1733 }
1734
1735 return res;
1736}
1737EXPORT_SYMBOL(blkdev_get);
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
1757 void *holder)
1758{
1759 struct block_device *bdev;
1760 int err;
1761
1762 bdev = lookup_bdev(path);
1763 if (IS_ERR(bdev))
1764 return bdev;
1765
1766 err = blkdev_get(bdev, mode, holder);
1767 if (err)
1768 return ERR_PTR(err);
1769
1770 if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
1771 blkdev_put(bdev, mode);
1772 return ERR_PTR(-EACCES);
1773 }
1774
1775 return bdev;
1776}
1777EXPORT_SYMBOL(blkdev_get_by_path);
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
1802{
1803 struct block_device *bdev;
1804 int err;
1805
1806 bdev = bdget(dev);
1807 if (!bdev)
1808 return ERR_PTR(-ENOMEM);
1809
1810 err = blkdev_get(bdev, mode, holder);
1811 if (err)
1812 return ERR_PTR(err);
1813
1814 return bdev;
1815}
1816EXPORT_SYMBOL(blkdev_get_by_dev);
1817
1818static int blkdev_open(struct inode * inode, struct file * filp)
1819{
1820 struct block_device *bdev;
1821
1822
1823
1824
1825
1826
1827
1828 filp->f_flags |= O_LARGEFILE;
1829
1830 filp->f_mode |= FMODE_NOWAIT;
1831
1832 if (filp->f_flags & O_NDELAY)
1833 filp->f_mode |= FMODE_NDELAY;
1834 if (filp->f_flags & O_EXCL)
1835 filp->f_mode |= FMODE_EXCL;
1836 if ((filp->f_flags & O_ACCMODE) == 3)
1837 filp->f_mode |= FMODE_WRITE_IOCTL;
1838
1839 bdev = bd_acquire(inode);
1840 if (bdev == NULL)
1841 return -ENOMEM;
1842
1843 filp->f_mapping = bdev->bd_inode->i_mapping;
1844 filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
1845
1846 return blkdev_get(bdev, filp->f_mode, filp);
1847}
1848
1849static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1850{
1851 struct gendisk *disk = bdev->bd_disk;
1852 struct block_device *victim = NULL;
1853
1854 mutex_lock_nested(&bdev->bd_mutex, for_part);
1855 if (for_part)
1856 bdev->bd_part_count--;
1857
1858 if (!--bdev->bd_openers) {
1859 WARN_ON_ONCE(bdev->bd_holders);
1860 sync_blockdev(bdev);
1861 kill_bdev(bdev);
1862
1863 bdev_write_inode(bdev);
1864 }
1865 if (bdev->bd_contains == bdev) {
1866 if (disk->fops->release)
1867 disk->fops->release(disk, mode);
1868 }
1869 if (!bdev->bd_openers) {
1870 disk_put_part(bdev->bd_part);
1871 bdev->bd_part = NULL;
1872 bdev->bd_disk = NULL;
1873 if (bdev != bdev->bd_contains)
1874 victim = bdev->bd_contains;
1875 bdev->bd_contains = NULL;
1876
1877 put_disk_and_module(disk);
1878 }
1879 mutex_unlock(&bdev->bd_mutex);
1880 bdput(bdev);
1881 if (victim)
1882 __blkdev_put(victim, mode, 1);
1883}
1884
1885void blkdev_put(struct block_device *bdev, fmode_t mode)
1886{
1887 mutex_lock(&bdev->bd_mutex);
1888
1889 if (mode & FMODE_EXCL) {
1890 bool bdev_free;
1891
1892
1893
1894
1895
1896
1897 spin_lock(&bdev_lock);
1898
1899 WARN_ON_ONCE(--bdev->bd_holders < 0);
1900 WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0);
1901
1902
1903 if ((bdev_free = !bdev->bd_holders))
1904 bdev->bd_holder = NULL;
1905 if (!bdev->bd_contains->bd_holders)
1906 bdev->bd_contains->bd_holder = NULL;
1907
1908 spin_unlock(&bdev_lock);
1909
1910
1911
1912
1913
1914 if (bdev_free && bdev->bd_write_holder) {
1915 disk_unblock_events(bdev->bd_disk);
1916 bdev->bd_write_holder = false;
1917 }
1918 }
1919
1920
1921
1922
1923
1924
1925 disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE);
1926
1927 mutex_unlock(&bdev->bd_mutex);
1928
1929 __blkdev_put(bdev, mode, 0);
1930}
1931EXPORT_SYMBOL(blkdev_put);
1932
1933static int blkdev_close(struct inode * inode, struct file * filp)
1934{
1935 struct block_device *bdev = I_BDEV(bdev_file_inode(filp));
1936 blkdev_put(bdev, filp->f_mode);
1937 return 0;
1938}
1939
1940static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1941{
1942 struct block_device *bdev = I_BDEV(bdev_file_inode(file));
1943 fmode_t mode = file->f_mode;
1944
1945
1946
1947
1948
1949 if (file->f_flags & O_NDELAY)
1950 mode |= FMODE_NDELAY;
1951 else
1952 mode &= ~FMODE_NDELAY;
1953
1954 return blkdev_ioctl(bdev, mode, cmd, arg);
1955}
1956
1957
1958
1959
1960
1961
1962
1963
1964ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
1965{
1966 struct file *file = iocb->ki_filp;
1967 struct inode *bd_inode = bdev_file_inode(file);
1968 loff_t size = i_size_read(bd_inode);
1969 struct blk_plug plug;
1970 ssize_t ret;
1971
1972 if (bdev_read_only(I_BDEV(bd_inode)))
1973 return -EPERM;
1974
1975 if (!iov_iter_count(from))
1976 return 0;
1977
1978 if (iocb->ki_pos >= size)
1979 return -ENOSPC;
1980
1981 if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT)
1982 return -EOPNOTSUPP;
1983
1984 iov_iter_truncate(from, size - iocb->ki_pos);
1985
1986 blk_start_plug(&plug);
1987 ret = __generic_file_write_iter(iocb, from);
1988 if (ret > 0)
1989 ret = generic_write_sync(iocb, ret);
1990 blk_finish_plug(&plug);
1991 return ret;
1992}
1993EXPORT_SYMBOL_GPL(blkdev_write_iter);
1994
1995ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
1996{
1997 struct file *file = iocb->ki_filp;
1998 struct inode *bd_inode = bdev_file_inode(file);
1999 loff_t size = i_size_read(bd_inode);
2000 loff_t pos = iocb->ki_pos;
2001
2002 if (pos >= size)
2003 return 0;
2004
2005 size -= pos;
2006 iov_iter_truncate(to, size);
2007 return generic_file_read_iter(iocb, to);
2008}
2009EXPORT_SYMBOL_GPL(blkdev_read_iter);
2010
2011
2012
2013
2014
2015static int blkdev_releasepage(struct page *page, gfp_t wait)
2016{
2017 struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super;
2018
2019 if (super && super->s_op->bdev_try_to_free_page)
2020 return super->s_op->bdev_try_to_free_page(super, page, wait);
2021
2022 return try_to_free_buffers(page);
2023}
2024
2025static int blkdev_writepages(struct address_space *mapping,
2026 struct writeback_control *wbc)
2027{
2028 return generic_writepages(mapping, wbc);
2029}
2030
2031static const struct address_space_operations def_blk_aops = {
2032 .readpage = blkdev_readpage,
2033 .readpages = blkdev_readpages,
2034 .writepage = blkdev_writepage,
2035 .write_begin = blkdev_write_begin,
2036 .write_end = blkdev_write_end,
2037 .writepages = blkdev_writepages,
2038 .releasepage = blkdev_releasepage,
2039 .direct_IO = blkdev_direct_IO,
2040 .migratepage = buffer_migrate_page_norefs,
2041 .is_dirty_writeback = buffer_check_dirty_writeback,
2042};
2043
2044#define BLKDEV_FALLOC_FL_SUPPORTED \
2045 (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
2046 FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
2047
2048static long blkdev_fallocate(struct file *file, int mode, loff_t start,
2049 loff_t len)
2050{
2051 struct block_device *bdev = I_BDEV(bdev_file_inode(file));
2052 struct address_space *mapping;
2053 loff_t end = start + len - 1;
2054 loff_t isize;
2055 int error;
2056
2057
2058 if (mode & ~BLKDEV_FALLOC_FL_SUPPORTED)
2059 return -EOPNOTSUPP;
2060
2061
2062 isize = i_size_read(bdev->bd_inode);
2063 if (start >= isize)
2064 return -EINVAL;
2065 if (end >= isize) {
2066 if (mode & FALLOC_FL_KEEP_SIZE) {
2067 len = isize - start;
2068 end = start + len - 1;
2069 } else
2070 return -EINVAL;
2071 }
2072
2073
2074
2075
2076 if ((start | len) & (bdev_logical_block_size(bdev) - 1))
2077 return -EINVAL;
2078
2079
2080 mapping = bdev->bd_inode->i_mapping;
2081 truncate_inode_pages_range(mapping, start, end);
2082
2083 switch (mode) {
2084 case FALLOC_FL_ZERO_RANGE:
2085 case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE:
2086 error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
2087 GFP_KERNEL, BLKDEV_ZERO_NOUNMAP);
2088 break;
2089 case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE:
2090 error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
2091 GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK);
2092 break;
2093 case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
2094 error = blkdev_issue_discard(bdev, start >> 9, len >> 9,
2095 GFP_KERNEL, 0);
2096 break;
2097 default:
2098 return -EOPNOTSUPP;
2099 }
2100 if (error)
2101 return error;
2102
2103
2104
2105
2106
2107
2108 return invalidate_inode_pages2_range(mapping,
2109 start >> PAGE_SHIFT,
2110 end >> PAGE_SHIFT);
2111}
2112
2113const struct file_operations def_blk_fops = {
2114 .open = blkdev_open,
2115 .release = blkdev_close,
2116 .llseek = block_llseek,
2117 .read_iter = blkdev_read_iter,
2118 .write_iter = blkdev_write_iter,
2119 .iopoll = blkdev_iopoll,
2120 .mmap = generic_file_mmap,
2121 .fsync = blkdev_fsync,
2122 .unlocked_ioctl = block_ioctl,
2123#ifdef CONFIG_COMPAT
2124 .compat_ioctl = compat_blkdev_ioctl,
2125#endif
2126 .splice_read = generic_file_splice_read,
2127 .splice_write = iter_file_splice_write,
2128 .fallocate = blkdev_fallocate,
2129};
2130
2131int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
2132{
2133 int res;
2134 mm_segment_t old_fs = get_fs();
2135 set_fs(KERNEL_DS);
2136 res = blkdev_ioctl(bdev, 0, cmd, arg);
2137 set_fs(old_fs);
2138 return res;
2139}
2140
2141EXPORT_SYMBOL(ioctl_by_bdev);
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151struct block_device *lookup_bdev(const char *pathname)
2152{
2153 struct block_device *bdev;
2154 struct inode *inode;
2155 struct path path;
2156 int error;
2157
2158 if (!pathname || !*pathname)
2159 return ERR_PTR(-EINVAL);
2160
2161 error = kern_path(pathname, LOOKUP_FOLLOW, &path);
2162 if (error)
2163 return ERR_PTR(error);
2164
2165 inode = d_backing_inode(path.dentry);
2166 error = -ENOTBLK;
2167 if (!S_ISBLK(inode->i_mode))
2168 goto fail;
2169 error = -EACCES;
2170 if (!may_open_dev(&path))
2171 goto fail;
2172 error = -ENOMEM;
2173 bdev = bd_acquire(inode);
2174 if (!bdev)
2175 goto fail;
2176out:
2177 path_put(&path);
2178 return bdev;
2179fail:
2180 bdev = ERR_PTR(error);
2181 goto out;
2182}
2183EXPORT_SYMBOL(lookup_bdev);
2184
2185int __invalidate_device(struct block_device *bdev, bool kill_dirty)
2186{
2187 struct super_block *sb = get_super(bdev);
2188 int res = 0;
2189
2190 if (sb) {
2191
2192
2193
2194
2195
2196
2197 shrink_dcache_sb(sb);
2198 res = invalidate_inodes(sb, kill_dirty);
2199 drop_super(sb);
2200 }
2201 invalidate_bdev(bdev);
2202 return res;
2203}
2204EXPORT_SYMBOL(__invalidate_device);
2205
2206void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
2207{
2208 struct inode *inode, *old_inode = NULL;
2209
2210 spin_lock(&blockdev_superblock->s_inode_list_lock);
2211 list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
2212 struct address_space *mapping = inode->i_mapping;
2213 struct block_device *bdev;
2214
2215 spin_lock(&inode->i_lock);
2216 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) ||
2217 mapping->nrpages == 0) {
2218 spin_unlock(&inode->i_lock);
2219 continue;
2220 }
2221 __iget(inode);
2222 spin_unlock(&inode->i_lock);
2223 spin_unlock(&blockdev_superblock->s_inode_list_lock);
2224
2225
2226
2227
2228
2229
2230
2231
2232 iput(old_inode);
2233 old_inode = inode;
2234 bdev = I_BDEV(inode);
2235
2236 mutex_lock(&bdev->bd_mutex);
2237 if (bdev->bd_openers)
2238 func(bdev, arg);
2239 mutex_unlock(&bdev->bd_mutex);
2240
2241 spin_lock(&blockdev_superblock->s_inode_list_lock);
2242 }
2243 spin_unlock(&blockdev_superblock->s_inode_list_lock);
2244 iput(old_inode);
2245}
2246