1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/kernel.h>
15#include <linux/module.h>
16#include <linux/backing-dev.h>
17#include <linux/bio.h>
18#include <linux/blkdev.h>
19#include <linux/highmem.h>
20#include <linux/mm.h>
21#include <linux/kernel_stat.h>
22#include <linux/string.h>
23#include <linux/init.h>
24#include <linux/completion.h>
25#include <linux/slab.h>
26#include <linux/swap.h>
27#include <linux/writeback.h>
28#include <linux/task_io_accounting_ops.h>
29#include <linux/fault-inject.h>
30
31#define CREATE_TRACE_POINTS
32#include <trace/events/block.h>
33
34#include "blk.h"
35
36EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
37EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
38EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
39
40static int __make_request(struct request_queue *q, struct bio *bio);
41
42
43
44
45static struct kmem_cache *request_cachep;
46
47
48
49
50struct kmem_cache *blk_requestq_cachep;
51
52
53
54
55static struct workqueue_struct *kblockd_workqueue;
56
57static void drive_stat_acct(struct request *rq, int new_io)
58{
59 struct hd_struct *part;
60 int rw = rq_data_dir(rq);
61 int cpu;
62
63 if (!blk_do_io_stat(rq))
64 return;
65
66 cpu = part_stat_lock();
67
68 if (!new_io) {
69 part = rq->part;
70 part_stat_inc(cpu, part, merges[rw]);
71 } else {
72 part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
73 if (!hd_struct_try_get(part)) {
74
75
76
77
78
79
80
81
82 part = &rq->rq_disk->part0;
83 hd_struct_get(part);
84 }
85 part_round_stats(cpu, part);
86 part_inc_in_flight(part, rw);
87 rq->part = part;
88 }
89
90 part_stat_unlock();
91}
92
93void blk_queue_congestion_threshold(struct request_queue *q)
94{
95 int nr;
96
97 nr = q->nr_requests - (q->nr_requests / 8) + 1;
98 if (nr > q->nr_requests)
99 nr = q->nr_requests;
100 q->nr_congestion_on = nr;
101
102 nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;
103 if (nr < 1)
104 nr = 1;
105 q->nr_congestion_off = nr;
106}
107
108
109
110
111
112
113
114
115
116
117struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
118{
119 struct backing_dev_info *ret = NULL;
120 struct request_queue *q = bdev_get_queue(bdev);
121
122 if (q)
123 ret = &q->backing_dev_info;
124 return ret;
125}
126EXPORT_SYMBOL(blk_get_backing_dev_info);
127
128void blk_rq_init(struct request_queue *q, struct request *rq)
129{
130 memset(rq, 0, sizeof(*rq));
131
132 INIT_LIST_HEAD(&rq->queuelist);
133 INIT_LIST_HEAD(&rq->timeout_list);
134 rq->cpu = -1;
135 rq->q = q;
136 rq->__sector = (sector_t) -1;
137 INIT_HLIST_NODE(&rq->hash);
138 RB_CLEAR_NODE(&rq->rb_node);
139 rq->cmd = rq->__cmd;
140 rq->cmd_len = BLK_MAX_CDB;
141 rq->tag = -1;
142 rq->ref_count = 1;
143 rq->start_time = jiffies;
144 set_start_time_ns(rq);
145 rq->part = NULL;
146}
147EXPORT_SYMBOL(blk_rq_init);
148
149static void req_bio_endio(struct request *rq, struct bio *bio,
150 unsigned int nbytes, int error)
151{
152 struct request_queue *q = rq->q;
153
154 if (&q->flush_rq != rq) {
155 if (error)
156 clear_bit(BIO_UPTODATE, &bio->bi_flags);
157 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
158 error = -EIO;
159
160 if (unlikely(nbytes > bio->bi_size)) {
161 printk(KERN_ERR "%s: want %u bytes done, %u left\n",
162 __func__, nbytes, bio->bi_size);
163 nbytes = bio->bi_size;
164 }
165
166 if (unlikely(rq->cmd_flags & REQ_QUIET))
167 set_bit(BIO_QUIET, &bio->bi_flags);
168
169 bio->bi_size -= nbytes;
170 bio->bi_sector += (nbytes >> 9);
171
172 if (bio_integrity(bio))
173 bio_integrity_advance(bio, nbytes);
174
175 if (bio->bi_size == 0)
176 bio_endio(bio, error);
177 } else {
178
179
180
181
182 if (error && !q->flush_err)
183 q->flush_err = error;
184 }
185}
186
187void blk_dump_rq_flags(struct request *rq, char *msg)
188{
189 int bit;
190
191 printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg,
192 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
193 rq->cmd_flags);
194
195 printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n",
196 (unsigned long long)blk_rq_pos(rq),
197 blk_rq_sectors(rq), blk_rq_cur_sectors(rq));
198 printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n",
199 rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq));
200
201 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
202 printk(KERN_INFO " cdb: ");
203 for (bit = 0; bit < BLK_MAX_CDB; bit++)
204 printk("%02x ", rq->cmd[bit]);
205 printk("\n");
206 }
207}
208EXPORT_SYMBOL(blk_dump_rq_flags);
209
210
211
212
213
214
215
216
217
218void blk_plug_device(struct request_queue *q)
219{
220 WARN_ON(!irqs_disabled());
221
222
223
224
225
226 if (blk_queue_stopped(q))
227 return;
228
229 if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) {
230 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
231 trace_block_plug(q);
232 }
233}
234EXPORT_SYMBOL(blk_plug_device);
235
236
237
238
239
240
241
242
243
244void blk_plug_device_unlocked(struct request_queue *q)
245{
246 unsigned long flags;
247
248 spin_lock_irqsave(q->queue_lock, flags);
249 blk_plug_device(q);
250 spin_unlock_irqrestore(q->queue_lock, flags);
251}
252EXPORT_SYMBOL(blk_plug_device_unlocked);
253
254
255
256
257
258int blk_remove_plug(struct request_queue *q)
259{
260 WARN_ON(!irqs_disabled());
261
262 if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q))
263 return 0;
264
265 del_timer(&q->unplug_timer);
266 return 1;
267}
268EXPORT_SYMBOL(blk_remove_plug);
269
270
271
272
273void __generic_unplug_device(struct request_queue *q)
274{
275 if (unlikely(blk_queue_stopped(q)))
276 return;
277 if (!blk_remove_plug(q) && !blk_queue_nonrot(q))
278 return;
279
280 q->request_fn(q);
281}
282
283
284
285
286
287
288
289
290
291
292
293
294void generic_unplug_device(struct request_queue *q)
295{
296 if (blk_queue_plugged(q)) {
297 spin_lock_irq(q->queue_lock);
298 __generic_unplug_device(q);
299 spin_unlock_irq(q->queue_lock);
300 }
301}
302EXPORT_SYMBOL(generic_unplug_device);
303
304static void blk_backing_dev_unplug(struct backing_dev_info *bdi,
305 struct page *page)
306{
307 struct request_queue *q = bdi->unplug_io_data;
308
309 blk_unplug(q);
310}
311
312void blk_unplug_work(struct work_struct *work)
313{
314 struct request_queue *q =
315 container_of(work, struct request_queue, unplug_work);
316
317 trace_block_unplug_io(q);
318 q->unplug_fn(q);
319}
320
321void blk_unplug_timeout(unsigned long data)
322{
323 struct request_queue *q = (struct request_queue *)data;
324
325 trace_block_unplug_timer(q);
326 kblockd_schedule_work(q, &q->unplug_work);
327}
328
329void blk_unplug(struct request_queue *q)
330{
331
332
333
334 if (q->unplug_fn) {
335 trace_block_unplug_io(q);
336 q->unplug_fn(q);
337 }
338}
339EXPORT_SYMBOL(blk_unplug);
340
341
342
343
344
345
346
347
348
349
350void blk_start_queue(struct request_queue *q)
351{
352 WARN_ON(!irqs_disabled());
353
354 queue_flag_clear(QUEUE_FLAG_STOPPED, q);
355 __blk_run_queue(q, false);
356}
357EXPORT_SYMBOL(blk_start_queue);
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373void blk_stop_queue(struct request_queue *q)
374{
375 blk_remove_plug(q);
376 queue_flag_set(QUEUE_FLAG_STOPPED, q);
377}
378EXPORT_SYMBOL(blk_stop_queue);
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394void blk_sync_queue(struct request_queue *q)
395{
396 del_timer_sync(&q->unplug_timer);
397 del_timer_sync(&q->timeout);
398 cancel_work_sync(&q->unplug_work);
399 throtl_shutdown_timer_wq(q);
400}
401EXPORT_SYMBOL(blk_sync_queue);
402
403
404
405
406
407
408
409
410
411
412
413void __blk_run_queue(struct request_queue *q, bool force_kblockd)
414{
415 blk_remove_plug(q);
416
417 if (unlikely(blk_queue_stopped(q)))
418 return;
419
420 if (elv_queue_empty(q))
421 return;
422
423
424
425
426
427 if (!force_kblockd && !queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
428 q->request_fn(q);
429 queue_flag_clear(QUEUE_FLAG_REENTER, q);
430 } else {
431 queue_flag_set(QUEUE_FLAG_PLUGGED, q);
432 kblockd_schedule_work(q, &q->unplug_work);
433 }
434}
435EXPORT_SYMBOL(__blk_run_queue);
436
437
438
439
440
441
442
443
444
445void blk_run_queue(struct request_queue *q)
446{
447 unsigned long flags;
448
449 spin_lock_irqsave(q->queue_lock, flags);
450 __blk_run_queue(q, false);
451 spin_unlock_irqrestore(q->queue_lock, flags);
452}
453EXPORT_SYMBOL(blk_run_queue);
454
455void blk_put_queue(struct request_queue *q)
456{
457 kobject_put(&q->kobj);
458}
459
460void blk_cleanup_queue(struct request_queue *q)
461{
462
463
464
465
466
467
468 blk_sync_queue(q);
469
470 del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
471 mutex_lock(&q->sysfs_lock);
472 queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
473 mutex_unlock(&q->sysfs_lock);
474
475 if (q->elevator)
476 elevator_exit(q->elevator);
477
478 blk_put_queue(q);
479}
480EXPORT_SYMBOL(blk_cleanup_queue);
481
482static int blk_init_free_list(struct request_queue *q)
483{
484 struct request_list *rl = &q->rq;
485
486 if (unlikely(rl->rq_pool))
487 return 0;
488
489 rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;
490 rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;
491 rl->elvpriv = 0;
492 init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
493 init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);
494
495 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
496 mempool_free_slab, request_cachep, q->node);
497
498 if (!rl->rq_pool)
499 return -ENOMEM;
500
501 return 0;
502}
503
504struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
505{
506 return blk_alloc_queue_node(gfp_mask, -1);
507}
508EXPORT_SYMBOL(blk_alloc_queue);
509
510struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
511{
512 struct request_queue *q;
513 int err;
514
515 q = kmem_cache_alloc_node(blk_requestq_cachep,
516 gfp_mask | __GFP_ZERO, node_id);
517 if (!q)
518 return NULL;
519
520 q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
521 q->backing_dev_info.unplug_io_data = q;
522 q->backing_dev_info.ra_pages =
523 (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
524 q->backing_dev_info.state = 0;
525 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
526 q->backing_dev_info.name = "block";
527
528 err = bdi_init(&q->backing_dev_info);
529 if (err) {
530 kmem_cache_free(blk_requestq_cachep, q);
531 return NULL;
532 }
533
534 if (blk_throtl_init(q)) {
535 kmem_cache_free(blk_requestq_cachep, q);
536 return NULL;
537 }
538
539 setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
540 laptop_mode_timer_fn, (unsigned long) q);
541 init_timer(&q->unplug_timer);
542 setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
543 INIT_LIST_HEAD(&q->timeout_list);
544 INIT_LIST_HEAD(&q->pending_flushes);
545 INIT_WORK(&q->unplug_work, blk_unplug_work);
546
547 kobject_init(&q->kobj, &blk_queue_ktype);
548
549 mutex_init(&q->sysfs_lock);
550 spin_lock_init(&q->__queue_lock);
551
552 return q;
553}
554EXPORT_SYMBOL(blk_alloc_queue_node);
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
590{
591 return blk_init_queue_node(rfn, lock, -1);
592}
593EXPORT_SYMBOL(blk_init_queue);
594
595struct request_queue *
596blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
597{
598 struct request_queue *uninit_q, *q;
599
600 uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id);
601 if (!uninit_q)
602 return NULL;
603
604 q = blk_init_allocated_queue_node(uninit_q, rfn, lock, node_id);
605 if (!q)
606 blk_cleanup_queue(uninit_q);
607
608 return q;
609}
610EXPORT_SYMBOL(blk_init_queue_node);
611
612struct request_queue *
613blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
614 spinlock_t *lock)
615{
616 return blk_init_allocated_queue_node(q, rfn, lock, -1);
617}
618EXPORT_SYMBOL(blk_init_allocated_queue);
619
620struct request_queue *
621blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,
622 spinlock_t *lock, int node_id)
623{
624 if (!q)
625 return NULL;
626
627 q->node = node_id;
628 if (blk_init_free_list(q))
629 return NULL;
630
631 q->request_fn = rfn;
632 q->prep_rq_fn = NULL;
633 q->unprep_rq_fn = NULL;
634 q->unplug_fn = generic_unplug_device;
635 q->queue_flags = QUEUE_FLAG_DEFAULT;
636 q->queue_lock = lock;
637
638
639
640
641 blk_queue_make_request(q, __make_request);
642
643 q->sg_reserved_size = INT_MAX;
644
645
646
647
648 if (!elevator_init(q, NULL)) {
649 blk_queue_congestion_threshold(q);
650 return q;
651 }
652
653 return NULL;
654}
655EXPORT_SYMBOL(blk_init_allocated_queue_node);
656
657int blk_get_queue(struct request_queue *q)
658{
659 if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
660 kobject_get(&q->kobj);
661 return 0;
662 }
663
664 return 1;
665}
666
667static inline void blk_free_request(struct request_queue *q, struct request *rq)
668{
669 if (rq->cmd_flags & REQ_ELVPRIV)
670 elv_put_request(q, rq);
671 mempool_free(rq, q->rq.rq_pool);
672}
673
674static struct request *
675blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask)
676{
677 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
678
679 if (!rq)
680 return NULL;
681
682 blk_rq_init(q, rq);
683
684 rq->cmd_flags = flags | REQ_ALLOCED;
685
686 if (priv) {
687 if (unlikely(elv_set_request(q, rq, gfp_mask))) {
688 mempool_free(rq, q->rq.rq_pool);
689 return NULL;
690 }
691 rq->cmd_flags |= REQ_ELVPRIV;
692 }
693
694 return rq;
695}
696
697
698
699
700
701static inline int ioc_batching(struct request_queue *q, struct io_context *ioc)
702{
703 if (!ioc)
704 return 0;
705
706
707
708
709
710
711 return ioc->nr_batch_requests == q->nr_batching ||
712 (ioc->nr_batch_requests > 0
713 && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));
714}
715
716
717
718
719
720
721
722static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)
723{
724 if (!ioc || ioc_batching(q, ioc))
725 return;
726
727 ioc->nr_batch_requests = q->nr_batching;
728 ioc->last_waited = jiffies;
729}
730
731static void __freed_request(struct request_queue *q, int sync)
732{
733 struct request_list *rl = &q->rq;
734
735 if (rl->count[sync] < queue_congestion_off_threshold(q))
736 blk_clear_queue_congested(q, sync);
737
738 if (rl->count[sync] + 1 <= q->nr_requests) {
739 if (waitqueue_active(&rl->wait[sync]))
740 wake_up(&rl->wait[sync]);
741
742 blk_clear_queue_full(q, sync);
743 }
744}
745
746
747
748
749
750static void freed_request(struct request_queue *q, int sync, int priv)
751{
752 struct request_list *rl = &q->rq;
753
754 rl->count[sync]--;
755 if (priv)
756 rl->elvpriv--;
757
758 __freed_request(q, sync);
759
760 if (unlikely(rl->starved[sync ^ 1]))
761 __freed_request(q, sync ^ 1);
762}
763
764
765
766
767
768
769static struct request *get_request(struct request_queue *q, int rw_flags,
770 struct bio *bio, gfp_t gfp_mask)
771{
772 struct request *rq = NULL;
773 struct request_list *rl = &q->rq;
774 struct io_context *ioc = NULL;
775 const bool is_sync = rw_is_sync(rw_flags) != 0;
776 int may_queue, priv;
777
778 may_queue = elv_may_queue(q, rw_flags);
779 if (may_queue == ELV_MQUEUE_NO)
780 goto rq_starved;
781
782 if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
783 if (rl->count[is_sync]+1 >= q->nr_requests) {
784 ioc = current_io_context(GFP_ATOMIC, q->node);
785
786
787
788
789
790
791 if (!blk_queue_full(q, is_sync)) {
792 ioc_set_batching(q, ioc);
793 blk_set_queue_full(q, is_sync);
794 } else {
795 if (may_queue != ELV_MQUEUE_MUST
796 && !ioc_batching(q, ioc)) {
797
798
799
800
801
802 goto out;
803 }
804 }
805 }
806 blk_set_queue_congested(q, is_sync);
807 }
808
809
810
811
812
813
814 if (rl->count[is_sync] >= (3 * q->nr_requests / 2))
815 goto out;
816
817 rl->count[is_sync]++;
818 rl->starved[is_sync] = 0;
819
820 priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
821 if (priv)
822 rl->elvpriv++;
823
824 if (blk_queue_io_stat(q))
825 rw_flags |= REQ_IO_STAT;
826 spin_unlock_irq(q->queue_lock);
827
828 rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);
829 if (unlikely(!rq)) {
830
831
832
833
834
835
836
837 spin_lock_irq(q->queue_lock);
838 freed_request(q, is_sync, priv);
839
840
841
842
843
844
845
846
847rq_starved:
848 if (unlikely(rl->count[is_sync] == 0))
849 rl->starved[is_sync] = 1;
850
851 goto out;
852 }
853
854
855
856
857
858
859
860 if (ioc_batching(q, ioc))
861 ioc->nr_batch_requests--;
862
863 trace_block_getrq(q, bio, rw_flags & 1);
864out:
865 return rq;
866}
867
868
869
870
871
872
873
874static struct request *get_request_wait(struct request_queue *q, int rw_flags,
875 struct bio *bio)
876{
877 const bool is_sync = rw_is_sync(rw_flags) != 0;
878 struct request *rq;
879
880 rq = get_request(q, rw_flags, bio, GFP_NOIO);
881 while (!rq) {
882 DEFINE_WAIT(wait);
883 struct io_context *ioc;
884 struct request_list *rl = &q->rq;
885
886 prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
887 TASK_UNINTERRUPTIBLE);
888
889 trace_block_sleeprq(q, bio, rw_flags & 1);
890
891 __generic_unplug_device(q);
892 spin_unlock_irq(q->queue_lock);
893 io_schedule();
894
895
896
897
898
899
900
901 ioc = current_io_context(GFP_NOIO, q->node);
902 ioc_set_batching(q, ioc);
903
904 spin_lock_irq(q->queue_lock);
905 finish_wait(&rl->wait[is_sync], &wait);
906
907 rq = get_request(q, rw_flags, bio, GFP_NOIO);
908 };
909
910 return rq;
911}
912
913struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
914{
915 struct request *rq;
916
917 BUG_ON(rw != READ && rw != WRITE);
918
919 spin_lock_irq(q->queue_lock);
920 if (gfp_mask & __GFP_WAIT) {
921 rq = get_request_wait(q, rw, NULL);
922 } else {
923 rq = get_request(q, rw, NULL, gfp_mask);
924 if (!rq)
925 spin_unlock_irq(q->queue_lock);
926 }
927
928
929 return rq;
930}
931EXPORT_SYMBOL(blk_get_request);
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964struct request *blk_make_request(struct request_queue *q, struct bio *bio,
965 gfp_t gfp_mask)
966{
967 struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask);
968
969 if (unlikely(!rq))
970 return ERR_PTR(-ENOMEM);
971
972 for_each_bio(bio) {
973 struct bio *bounce_bio = bio;
974 int ret;
975
976 blk_queue_bounce(q, &bounce_bio);
977 ret = blk_rq_append_bio(q, rq, bounce_bio);
978 if (unlikely(ret)) {
979 blk_put_request(rq);
980 return ERR_PTR(ret);
981 }
982 }
983
984 return rq;
985}
986EXPORT_SYMBOL(blk_make_request);
987
988
989
990
991
992
993
994
995
996
997
998void blk_requeue_request(struct request_queue *q, struct request *rq)
999{
1000 blk_delete_timer(rq);
1001 blk_clear_rq_complete(rq);
1002 trace_block_rq_requeue(q, rq);
1003
1004 if (blk_rq_tagged(rq))
1005 blk_queue_end_tag(q, rq);
1006
1007 BUG_ON(blk_queued_rq(rq));
1008
1009 elv_requeue_request(q, rq);
1010}
1011EXPORT_SYMBOL(blk_requeue_request);
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032void blk_insert_request(struct request_queue *q, struct request *rq,
1033 int at_head, void *data)
1034{
1035 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
1036 unsigned long flags;
1037
1038
1039
1040
1041
1042
1043 rq->cmd_type = REQ_TYPE_SPECIAL;
1044
1045 rq->special = data;
1046
1047 spin_lock_irqsave(q->queue_lock, flags);
1048
1049
1050
1051
1052 if (blk_rq_tagged(rq))
1053 blk_queue_end_tag(q, rq);
1054
1055 drive_stat_acct(rq, 1);
1056 __elv_add_request(q, rq, where, 0);
1057 __blk_run_queue(q, false);
1058 spin_unlock_irqrestore(q->queue_lock, flags);
1059}
1060EXPORT_SYMBOL(blk_insert_request);
1061
1062static void part_round_stats_single(int cpu, struct hd_struct *part,
1063 unsigned long now)
1064{
1065 if (now == part->stamp)
1066 return;
1067
1068 if (part_in_flight(part)) {
1069 __part_stat_add(cpu, part, time_in_queue,
1070 part_in_flight(part) * (now - part->stamp));
1071 __part_stat_add(cpu, part, io_ticks, (now - part->stamp));
1072 }
1073 part->stamp = now;
1074}
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092void part_round_stats(int cpu, struct hd_struct *part)
1093{
1094 unsigned long now = jiffies;
1095
1096 if (part->partno)
1097 part_round_stats_single(cpu, &part_to_disk(part)->part0, now);
1098 part_round_stats_single(cpu, part, now);
1099}
1100EXPORT_SYMBOL_GPL(part_round_stats);
1101
1102
1103
1104
1105void __blk_put_request(struct request_queue *q, struct request *req)
1106{
1107 if (unlikely(!q))
1108 return;
1109 if (unlikely(--req->ref_count))
1110 return;
1111
1112 elv_completed_request(q, req);
1113
1114
1115 WARN_ON(req->bio != NULL);
1116
1117
1118
1119
1120
1121 if (req->cmd_flags & REQ_ALLOCED) {
1122 int is_sync = rq_is_sync(req) != 0;
1123 int priv = req->cmd_flags & REQ_ELVPRIV;
1124
1125 BUG_ON(!list_empty(&req->queuelist));
1126 BUG_ON(!hlist_unhashed(&req->hash));
1127
1128 blk_free_request(q, req);
1129 freed_request(q, is_sync, priv);
1130 }
1131}
1132EXPORT_SYMBOL_GPL(__blk_put_request);
1133
1134void blk_put_request(struct request *req)
1135{
1136 unsigned long flags;
1137 struct request_queue *q = req->q;
1138
1139 spin_lock_irqsave(q->queue_lock, flags);
1140 __blk_put_request(q, req);
1141 spin_unlock_irqrestore(q->queue_lock, flags);
1142}
1143EXPORT_SYMBOL(blk_put_request);
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158void blk_add_request_payload(struct request *rq, struct page *page,
1159 unsigned int len)
1160{
1161 struct bio *bio = rq->bio;
1162
1163 bio->bi_io_vec->bv_page = page;
1164 bio->bi_io_vec->bv_offset = 0;
1165 bio->bi_io_vec->bv_len = len;
1166
1167 bio->bi_size = len;
1168 bio->bi_vcnt = 1;
1169 bio->bi_phys_segments = 1;
1170
1171 rq->__data_len = rq->resid_len = len;
1172 rq->nr_phys_segments = 1;
1173 rq->buffer = bio_data(bio);
1174}
1175EXPORT_SYMBOL_GPL(blk_add_request_payload);
1176
1177void init_request_from_bio(struct request *req, struct bio *bio)
1178{
1179 req->cpu = bio->bi_comp_cpu;
1180 req->cmd_type = REQ_TYPE_FS;
1181
1182 req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK;
1183 if (bio->bi_rw & REQ_RAHEAD)
1184 req->cmd_flags |= REQ_FAILFAST_MASK;
1185
1186 req->errors = 0;
1187 req->__sector = bio->bi_sector;
1188 req->ioprio = bio_prio(bio);
1189 blk_rq_bio_prep(req->q, req, bio);
1190}
1191
1192
1193
1194
1195
1196static inline bool queue_should_plug(struct request_queue *q)
1197{
1198 return !(blk_queue_nonrot(q) && blk_queue_tagged(q));
1199}
1200
1201static int __make_request(struct request_queue *q, struct bio *bio)
1202{
1203 struct request *req;
1204 int el_ret;
1205 unsigned int bytes = bio->bi_size;
1206 const unsigned short prio = bio_prio(bio);
1207 const bool sync = !!(bio->bi_rw & REQ_SYNC);
1208 const bool unplug = !!(bio->bi_rw & REQ_UNPLUG);
1209 const unsigned long ff = bio->bi_rw & REQ_FAILFAST_MASK;
1210 int where = ELEVATOR_INSERT_SORT;
1211 int rw_flags;
1212
1213
1214
1215
1216
1217
1218 blk_queue_bounce(q, &bio);
1219
1220 spin_lock_irq(q->queue_lock);
1221
1222 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
1223 where = ELEVATOR_INSERT_FRONT;
1224 goto get_rq;
1225 }
1226
1227 if (elv_queue_empty(q))
1228 goto get_rq;
1229
1230 el_ret = elv_merge(q, &req, bio);
1231 switch (el_ret) {
1232 case ELEVATOR_BACK_MERGE:
1233 BUG_ON(!rq_mergeable(req));
1234
1235 if (!ll_back_merge_fn(q, req, bio))
1236 break;
1237
1238 trace_block_bio_backmerge(q, bio);
1239
1240 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1241 blk_rq_set_mixed_merge(req);
1242
1243 req->biotail->bi_next = bio;
1244 req->biotail = bio;
1245 req->__data_len += bytes;
1246 req->ioprio = ioprio_best(req->ioprio, prio);
1247 if (!blk_rq_cpu_valid(req))
1248 req->cpu = bio->bi_comp_cpu;
1249 drive_stat_acct(req, 0);
1250 elv_bio_merged(q, req, bio);
1251 if (!attempt_back_merge(q, req))
1252 elv_merged_request(q, req, el_ret);
1253 goto out;
1254
1255 case ELEVATOR_FRONT_MERGE:
1256 BUG_ON(!rq_mergeable(req));
1257
1258 if (!ll_front_merge_fn(q, req, bio))
1259 break;
1260
1261 trace_block_bio_frontmerge(q, bio);
1262
1263 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) {
1264 blk_rq_set_mixed_merge(req);
1265 req->cmd_flags &= ~REQ_FAILFAST_MASK;
1266 req->cmd_flags |= ff;
1267 }
1268
1269 bio->bi_next = req->bio;
1270 req->bio = bio;
1271
1272
1273
1274
1275
1276
1277 req->buffer = bio_data(bio);
1278 req->__sector = bio->bi_sector;
1279 req->__data_len += bytes;
1280 req->ioprio = ioprio_best(req->ioprio, prio);
1281 if (!blk_rq_cpu_valid(req))
1282 req->cpu = bio->bi_comp_cpu;
1283 drive_stat_acct(req, 0);
1284 elv_bio_merged(q, req, bio);
1285 if (!attempt_front_merge(q, req))
1286 elv_merged_request(q, req, el_ret);
1287 goto out;
1288
1289
1290 default:
1291 ;
1292 }
1293
1294get_rq:
1295
1296
1297
1298
1299
1300 rw_flags = bio_data_dir(bio);
1301 if (sync)
1302 rw_flags |= REQ_SYNC;
1303
1304
1305
1306
1307
1308 req = get_request_wait(q, rw_flags, bio);
1309
1310
1311
1312
1313
1314
1315
1316 init_request_from_bio(req, bio);
1317
1318 spin_lock_irq(q->queue_lock);
1319 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
1320 bio_flagged(bio, BIO_CPU_AFFINE))
1321 req->cpu = blk_cpu_to_group(smp_processor_id());
1322 if (queue_should_plug(q) && elv_queue_empty(q))
1323 blk_plug_device(q);
1324
1325
1326 drive_stat_acct(req, 1);
1327 __elv_add_request(q, req, where, 0);
1328out:
1329 if (unplug || !queue_should_plug(q))
1330 __generic_unplug_device(q);
1331 spin_unlock_irq(q->queue_lock);
1332 return 0;
1333}
1334
1335
1336
1337
1338static inline void blk_partition_remap(struct bio *bio)
1339{
1340 struct block_device *bdev = bio->bi_bdev;
1341
1342 if (bio_sectors(bio) && bdev != bdev->bd_contains) {
1343 struct hd_struct *p = bdev->bd_part;
1344
1345 bio->bi_sector += p->start_sect;
1346 bio->bi_bdev = bdev->bd_contains;
1347
1348 trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio,
1349 bdev->bd_dev,
1350 bio->bi_sector - p->start_sect);
1351 }
1352}
1353
1354static void handle_bad_sector(struct bio *bio)
1355{
1356 char b[BDEVNAME_SIZE];
1357
1358 printk(KERN_INFO "attempt to access beyond end of device\n");
1359 printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",
1360 bdevname(bio->bi_bdev, b),
1361 bio->bi_rw,
1362 (unsigned long long)bio->bi_sector + bio_sectors(bio),
1363 (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9));
1364
1365 set_bit(BIO_EOF, &bio->bi_flags);
1366}
1367
1368#ifdef CONFIG_FAIL_MAKE_REQUEST
1369
1370static DECLARE_FAULT_ATTR(fail_make_request);
1371
1372static int __init setup_fail_make_request(char *str)
1373{
1374 return setup_fault_attr(&fail_make_request, str);
1375}
1376__setup("fail_make_request=", setup_fail_make_request);
1377
1378static int should_fail_request(struct bio *bio)
1379{
1380 struct hd_struct *part = bio->bi_bdev->bd_part;
1381
1382 if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail)
1383 return should_fail(&fail_make_request, bio->bi_size);
1384
1385 return 0;
1386}
1387
1388static int __init fail_make_request_debugfs(void)
1389{
1390 return init_fault_attr_dentries(&fail_make_request,
1391 "fail_make_request");
1392}
1393
1394late_initcall(fail_make_request_debugfs);
1395
1396#else
1397
1398static inline int should_fail_request(struct bio *bio)
1399{
1400 return 0;
1401}
1402
1403#endif
1404
1405
1406
1407
1408static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
1409{
1410 sector_t maxsector;
1411
1412 if (!nr_sectors)
1413 return 0;
1414
1415
1416 maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
1417 if (maxsector) {
1418 sector_t sector = bio->bi_sector;
1419
1420 if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
1421
1422
1423
1424
1425
1426 handle_bad_sector(bio);
1427 return 1;
1428 }
1429 }
1430
1431 return 0;
1432}
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458static inline void __generic_make_request(struct bio *bio)
1459{
1460 struct request_queue *q;
1461 sector_t old_sector;
1462 int ret, nr_sectors = bio_sectors(bio);
1463 dev_t old_dev;
1464 int err = -EIO;
1465
1466 might_sleep();
1467
1468 if (bio_check_eod(bio, nr_sectors))
1469 goto end_io;
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479 old_sector = -1;
1480 old_dev = 0;
1481 do {
1482 char b[BDEVNAME_SIZE];
1483
1484 q = bdev_get_queue(bio->bi_bdev);
1485 if (unlikely(!q)) {
1486 printk(KERN_ERR
1487 "generic_make_request: Trying to access "
1488 "nonexistent block-device %s (%Lu)\n",
1489 bdevname(bio->bi_bdev, b),
1490 (long long) bio->bi_sector);
1491 goto end_io;
1492 }
1493
1494 if (unlikely(!(bio->bi_rw & REQ_DISCARD) &&
1495 nr_sectors > queue_max_hw_sectors(q))) {
1496 printk(KERN_ERR "bio too big device %s (%u > %u)\n",
1497 bdevname(bio->bi_bdev, b),
1498 bio_sectors(bio),
1499 queue_max_hw_sectors(q));
1500 goto end_io;
1501 }
1502
1503 if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
1504 goto end_io;
1505
1506 if (should_fail_request(bio))
1507 goto end_io;
1508
1509
1510
1511
1512
1513 blk_partition_remap(bio);
1514
1515 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))
1516 goto end_io;
1517
1518 if (old_sector != -1)
1519 trace_block_bio_remap(q, bio, old_dev, old_sector);
1520
1521 old_sector = bio->bi_sector;
1522 old_dev = bio->bi_bdev->bd_dev;
1523
1524 if (bio_check_eod(bio, nr_sectors))
1525 goto end_io;
1526
1527
1528
1529
1530
1531
1532 if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) {
1533 bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA);
1534 if (!nr_sectors) {
1535 err = 0;
1536 goto end_io;
1537 }
1538 }
1539
1540 if ((bio->bi_rw & REQ_DISCARD) &&
1541 (!blk_queue_discard(q) ||
1542 ((bio->bi_rw & REQ_SECURE) &&
1543 !blk_queue_secdiscard(q)))) {
1544 err = -EOPNOTSUPP;
1545 goto end_io;
1546 }
1547
1548 blk_throtl_bio(q, &bio);
1549
1550
1551
1552
1553
1554 if (!bio)
1555 break;
1556
1557 trace_block_bio_queue(q, bio);
1558
1559 ret = q->make_request_fn(q, bio);
1560 } while (ret);
1561
1562 return;
1563
1564end_io:
1565 bio_endio(bio, err);
1566}
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579void generic_make_request(struct bio *bio)
1580{
1581 struct bio_list bio_list_on_stack;
1582
1583 if (current->bio_list) {
1584
1585 bio_list_add(current->bio_list, bio);
1586 return;
1587 }
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606 BUG_ON(bio->bi_next);
1607 bio_list_init(&bio_list_on_stack);
1608 current->bio_list = &bio_list_on_stack;
1609 do {
1610 __generic_make_request(bio);
1611 bio = bio_list_pop(current->bio_list);
1612 } while (bio);
1613 current->bio_list = NULL;
1614}
1615EXPORT_SYMBOL(generic_make_request);
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627void submit_bio(int rw, struct bio *bio)
1628{
1629 int count = bio_sectors(bio);
1630
1631 bio->bi_rw |= rw;
1632
1633
1634
1635
1636
1637 if (bio_has_data(bio) && !(rw & REQ_DISCARD)) {
1638 if (rw & WRITE) {
1639 count_vm_events(PGPGOUT, count);
1640 } else {
1641 task_io_account_read(bio->bi_size);
1642 count_vm_events(PGPGIN, count);
1643 }
1644
1645 if (unlikely(block_dump)) {
1646 char b[BDEVNAME_SIZE];
1647 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n",
1648 current->comm, task_pid_nr(current),
1649 (rw & WRITE) ? "WRITE" : "READ",
1650 (unsigned long long)bio->bi_sector,
1651 bdevname(bio->bi_bdev, b),
1652 count);
1653 }
1654 }
1655
1656 generic_make_request(bio);
1657}
1658EXPORT_SYMBOL(submit_bio);
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681int blk_rq_check_limits(struct request_queue *q, struct request *rq)
1682{
1683 if (rq->cmd_flags & REQ_DISCARD)
1684 return 0;
1685
1686 if (blk_rq_sectors(rq) > queue_max_sectors(q) ||
1687 blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) {
1688 printk(KERN_ERR "%s: over max size limit.\n", __func__);
1689 return -EIO;
1690 }
1691
1692
1693
1694
1695
1696
1697
1698 blk_recalc_rq_segments(rq);
1699 if (rq->nr_phys_segments > queue_max_segments(q)) {
1700 printk(KERN_ERR "%s: over max segments limit.\n", __func__);
1701 return -EIO;
1702 }
1703
1704 return 0;
1705}
1706EXPORT_SYMBOL_GPL(blk_rq_check_limits);
1707
1708
1709
1710
1711
1712
1713int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
1714{
1715 unsigned long flags;
1716
1717 if (blk_rq_check_limits(q, rq))
1718 return -EIO;
1719
1720#ifdef CONFIG_FAIL_MAKE_REQUEST
1721 if (rq->rq_disk && rq->rq_disk->part0.make_it_fail &&
1722 should_fail(&fail_make_request, blk_rq_bytes(rq)))
1723 return -EIO;
1724#endif
1725
1726 spin_lock_irqsave(q->queue_lock, flags);
1727
1728
1729
1730
1731
1732 BUG_ON(blk_queued_rq(rq));
1733
1734 drive_stat_acct(rq, 1);
1735 __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
1736
1737 spin_unlock_irqrestore(q->queue_lock, flags);
1738
1739 return 0;
1740}
1741EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759unsigned int blk_rq_err_bytes(const struct request *rq)
1760{
1761 unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
1762 unsigned int bytes = 0;
1763 struct bio *bio;
1764
1765 if (!(rq->cmd_flags & REQ_MIXED_MERGE))
1766 return blk_rq_bytes(rq);
1767
1768
1769
1770
1771
1772
1773
1774
1775 for (bio = rq->bio; bio; bio = bio->bi_next) {
1776 if ((bio->bi_rw & ff) != ff)
1777 break;
1778 bytes += bio->bi_size;
1779 }
1780
1781
1782 BUG_ON(blk_rq_bytes(rq) && !bytes);
1783 return bytes;
1784}
1785EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
1786
1787static void blk_account_io_completion(struct request *req, unsigned int bytes)
1788{
1789 if (blk_do_io_stat(req)) {
1790 const int rw = rq_data_dir(req);
1791 struct hd_struct *part;
1792 int cpu;
1793
1794 cpu = part_stat_lock();
1795 part = req->part;
1796 part_stat_add(cpu, part, sectors[rw], bytes >> 9);
1797 part_stat_unlock();
1798 }
1799}
1800
1801static void blk_account_io_done(struct request *req)
1802{
1803
1804
1805
1806
1807
1808 if (blk_do_io_stat(req) && req != &req->q->flush_rq) {
1809 unsigned long duration = jiffies - req->start_time;
1810 const int rw = rq_data_dir(req);
1811 struct hd_struct *part;
1812 int cpu;
1813
1814 cpu = part_stat_lock();
1815 part = req->part;
1816
1817 part_stat_inc(cpu, part, ios[rw]);
1818 part_stat_add(cpu, part, ticks[rw], duration);
1819 part_round_stats(cpu, part);
1820 part_dec_in_flight(part, rw);
1821
1822 hd_struct_put(part);
1823 part_stat_unlock();
1824 }
1825}
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843struct request *blk_peek_request(struct request_queue *q)
1844{
1845 struct request *rq;
1846 int ret;
1847
1848 while ((rq = __elv_next_request(q)) != NULL) {
1849 if (!(rq->cmd_flags & REQ_STARTED)) {
1850
1851
1852
1853
1854
1855 if (rq->cmd_flags & REQ_SORTED)
1856 elv_activate_rq(q, rq);
1857
1858
1859
1860
1861
1862
1863 rq->cmd_flags |= REQ_STARTED;
1864 trace_block_rq_issue(q, rq);
1865 }
1866
1867 if (!q->boundary_rq || q->boundary_rq == rq) {
1868 q->end_sector = rq_end_sector(rq);
1869 q->boundary_rq = NULL;
1870 }
1871
1872 if (rq->cmd_flags & REQ_DONTPREP)
1873 break;
1874
1875 if (q->dma_drain_size && blk_rq_bytes(rq)) {
1876
1877
1878
1879
1880
1881
1882 rq->nr_phys_segments++;
1883 }
1884
1885 if (!q->prep_rq_fn)
1886 break;
1887
1888 ret = q->prep_rq_fn(q, rq);
1889 if (ret == BLKPREP_OK) {
1890 break;
1891 } else if (ret == BLKPREP_DEFER) {
1892
1893
1894
1895
1896
1897
1898 if (q->dma_drain_size && blk_rq_bytes(rq) &&
1899 !(rq->cmd_flags & REQ_DONTPREP)) {
1900
1901
1902
1903
1904 --rq->nr_phys_segments;
1905 }
1906
1907 rq = NULL;
1908 break;
1909 } else if (ret == BLKPREP_KILL) {
1910 rq->cmd_flags |= REQ_QUIET;
1911
1912
1913
1914
1915 blk_start_request(rq);
1916 __blk_end_request_all(rq, -EIO);
1917 } else {
1918 printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
1919 break;
1920 }
1921 }
1922
1923 return rq;
1924}
1925EXPORT_SYMBOL(blk_peek_request);
1926
1927void blk_dequeue_request(struct request *rq)
1928{
1929 struct request_queue *q = rq->q;
1930
1931 BUG_ON(list_empty(&rq->queuelist));
1932 BUG_ON(ELV_ON_HASH(rq));
1933
1934 list_del_init(&rq->queuelist);
1935
1936
1937
1938
1939
1940
1941 if (blk_account_rq(rq)) {
1942 q->in_flight[rq_is_sync(rq)]++;
1943 set_io_start_time_ns(rq);
1944 }
1945}
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961void blk_start_request(struct request *req)
1962{
1963 blk_dequeue_request(req);
1964
1965
1966
1967
1968
1969 req->resid_len = blk_rq_bytes(req);
1970 if (unlikely(blk_bidi_rq(req)))
1971 req->next_rq->resid_len = blk_rq_bytes(req->next_rq);
1972
1973 blk_add_timer(req);
1974}
1975EXPORT_SYMBOL(blk_start_request);
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992struct request *blk_fetch_request(struct request_queue *q)
1993{
1994 struct request *rq;
1995
1996 rq = blk_peek_request(q);
1997 if (rq)
1998 blk_start_request(rq);
1999 return rq;
2000}
2001EXPORT_SYMBOL(blk_fetch_request);
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
2026{
2027 int total_bytes, bio_nbytes, next_idx = 0;
2028 struct bio *bio;
2029
2030 if (!req->bio)
2031 return false;
2032
2033 trace_block_rq_complete(req->q, req);
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043 if (req->cmd_type == REQ_TYPE_FS)
2044 req->errors = 0;
2045
2046 if (error && req->cmd_type == REQ_TYPE_FS &&
2047 !(req->cmd_flags & REQ_QUIET)) {
2048 printk(KERN_ERR "end_request: I/O error, dev %s, sector %llu\n",
2049 req->rq_disk ? req->rq_disk->disk_name : "?",
2050 (unsigned long long)blk_rq_pos(req));
2051 }
2052
2053 blk_account_io_completion(req, nr_bytes);
2054
2055 total_bytes = bio_nbytes = 0;
2056 while ((bio = req->bio) != NULL) {
2057 int nbytes;
2058
2059 if (nr_bytes >= bio->bi_size) {
2060 req->bio = bio->bi_next;
2061 nbytes = bio->bi_size;
2062 req_bio_endio(req, bio, nbytes, error);
2063 next_idx = 0;
2064 bio_nbytes = 0;
2065 } else {
2066 int idx = bio->bi_idx + next_idx;
2067
2068 if (unlikely(idx >= bio->bi_vcnt)) {
2069 blk_dump_rq_flags(req, "__end_that");
2070 printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n",
2071 __func__, idx, bio->bi_vcnt);
2072 break;
2073 }
2074
2075 nbytes = bio_iovec_idx(bio, idx)->bv_len;
2076 BIO_BUG_ON(nbytes > bio->bi_size);
2077
2078
2079
2080
2081 if (unlikely(nbytes > nr_bytes)) {
2082 bio_nbytes += nr_bytes;
2083 total_bytes += nr_bytes;
2084 break;
2085 }
2086
2087
2088
2089
2090 next_idx++;
2091 bio_nbytes += nbytes;
2092 }
2093
2094 total_bytes += nbytes;
2095 nr_bytes -= nbytes;
2096
2097 bio = req->bio;
2098 if (bio) {
2099
2100
2101
2102 if (unlikely(nr_bytes <= 0))
2103 break;
2104 }
2105 }
2106
2107
2108
2109
2110 if (!req->bio) {
2111
2112
2113
2114
2115
2116 req->__data_len = 0;
2117 return false;
2118 }
2119
2120
2121
2122
2123 if (bio_nbytes) {
2124 req_bio_endio(req, bio, bio_nbytes, error);
2125 bio->bi_idx += next_idx;
2126 bio_iovec(bio)->bv_offset += nr_bytes;
2127 bio_iovec(bio)->bv_len -= nr_bytes;
2128 }
2129
2130 req->__data_len -= total_bytes;
2131 req->buffer = bio_data(req->bio);
2132
2133
2134 if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD))
2135 req->__sector += total_bytes >> 9;
2136
2137
2138 if (req->cmd_flags & REQ_MIXED_MERGE) {
2139 req->cmd_flags &= ~REQ_FAILFAST_MASK;
2140 req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK;
2141 }
2142
2143
2144
2145
2146
2147 if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
2148 printk(KERN_ERR "blk: request botched\n");
2149 req->__data_len = blk_rq_cur_bytes(req);
2150 }
2151
2152
2153 blk_recalc_rq_segments(req);
2154
2155 return true;
2156}
2157EXPORT_SYMBOL_GPL(blk_update_request);
2158
2159static bool blk_update_bidi_request(struct request *rq, int error,
2160 unsigned int nr_bytes,
2161 unsigned int bidi_bytes)
2162{
2163 if (blk_update_request(rq, error, nr_bytes))
2164 return true;
2165
2166
2167 if (unlikely(blk_bidi_rq(rq)) &&
2168 blk_update_request(rq->next_rq, error, bidi_bytes))
2169 return true;
2170
2171 if (blk_queue_add_random(rq->q))
2172 add_disk_randomness(rq->rq_disk);
2173
2174 return false;
2175}
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187void blk_unprep_request(struct request *req)
2188{
2189 struct request_queue *q = req->q;
2190
2191 req->cmd_flags &= ~REQ_DONTPREP;
2192 if (q->unprep_rq_fn)
2193 q->unprep_rq_fn(q, req);
2194}
2195EXPORT_SYMBOL_GPL(blk_unprep_request);
2196
2197
2198
2199
2200static void blk_finish_request(struct request *req, int error)
2201{
2202 if (blk_rq_tagged(req))
2203 blk_queue_end_tag(req->q, req);
2204
2205 BUG_ON(blk_queued_rq(req));
2206
2207 if (unlikely(laptop_mode) && req->cmd_type == REQ_TYPE_FS)
2208 laptop_io_completion(&req->q->backing_dev_info);
2209
2210 blk_delete_timer(req);
2211
2212 if (req->cmd_flags & REQ_DONTPREP)
2213 blk_unprep_request(req);
2214
2215
2216 blk_account_io_done(req);
2217
2218 if (req->end_io)
2219 req->end_io(req, error);
2220 else {
2221 if (blk_bidi_rq(req))
2222 __blk_put_request(req->next_rq->q, req->next_rq);
2223
2224 __blk_put_request(req->q, req);
2225 }
2226}
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245static bool blk_end_bidi_request(struct request *rq, int error,
2246 unsigned int nr_bytes, unsigned int bidi_bytes)
2247{
2248 struct request_queue *q = rq->q;
2249 unsigned long flags;
2250
2251 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
2252 return true;
2253
2254 spin_lock_irqsave(q->queue_lock, flags);
2255 blk_finish_request(rq, error);
2256 spin_unlock_irqrestore(q->queue_lock, flags);
2257
2258 return false;
2259}
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276static bool __blk_end_bidi_request(struct request *rq, int error,
2277 unsigned int nr_bytes, unsigned int bidi_bytes)
2278{
2279 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
2280 return true;
2281
2282 blk_finish_request(rq, error);
2283
2284 return false;
2285}
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
2302{
2303 return blk_end_bidi_request(rq, error, nr_bytes, 0);
2304}
2305EXPORT_SYMBOL(blk_end_request);
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315void blk_end_request_all(struct request *rq, int error)
2316{
2317 bool pending;
2318 unsigned int bidi_bytes = 0;
2319
2320 if (unlikely(blk_bidi_rq(rq)))
2321 bidi_bytes = blk_rq_bytes(rq->next_rq);
2322
2323 pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
2324 BUG_ON(pending);
2325}
2326EXPORT_SYMBOL(blk_end_request_all);
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340bool blk_end_request_cur(struct request *rq, int error)
2341{
2342 return blk_end_request(rq, error, blk_rq_cur_bytes(rq));
2343}
2344EXPORT_SYMBOL(blk_end_request_cur);
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358bool blk_end_request_err(struct request *rq, int error)
2359{
2360 WARN_ON(error >= 0);
2361 return blk_end_request(rq, error, blk_rq_err_bytes(rq));
2362}
2363EXPORT_SYMBOL_GPL(blk_end_request_err);
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
2379{
2380 return __blk_end_bidi_request(rq, error, nr_bytes, 0);
2381}
2382EXPORT_SYMBOL(__blk_end_request);
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392void __blk_end_request_all(struct request *rq, int error)
2393{
2394 bool pending;
2395 unsigned int bidi_bytes = 0;
2396
2397 if (unlikely(blk_bidi_rq(rq)))
2398 bidi_bytes = blk_rq_bytes(rq->next_rq);
2399
2400 pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
2401 BUG_ON(pending);
2402}
2403EXPORT_SYMBOL(__blk_end_request_all);
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418bool __blk_end_request_cur(struct request *rq, int error)
2419{
2420 return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
2421}
2422EXPORT_SYMBOL(__blk_end_request_cur);
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437bool __blk_end_request_err(struct request *rq, int error)
2438{
2439 WARN_ON(error >= 0);
2440 return __blk_end_request(rq, error, blk_rq_err_bytes(rq));
2441}
2442EXPORT_SYMBOL_GPL(__blk_end_request_err);
2443
2444void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
2445 struct bio *bio)
2446{
2447
2448 rq->cmd_flags |= bio->bi_rw & REQ_WRITE;
2449
2450 if (bio_has_data(bio)) {
2451 rq->nr_phys_segments = bio_phys_segments(q, bio);
2452 rq->buffer = bio_data(bio);
2453 }
2454 rq->__data_len = bio->bi_size;
2455 rq->bio = rq->biotail = bio;
2456
2457 if (bio->bi_bdev)
2458 rq->rq_disk = bio->bi_bdev->bd_disk;
2459}
2460
2461#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
2462
2463
2464
2465
2466
2467
2468
2469void rq_flush_dcache_pages(struct request *rq)
2470{
2471 struct req_iterator iter;
2472 struct bio_vec *bvec;
2473
2474 rq_for_each_segment(bvec, rq, iter)
2475 flush_dcache_page(bvec->bv_page);
2476}
2477EXPORT_SYMBOL_GPL(rq_flush_dcache_pages);
2478#endif
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499int blk_lld_busy(struct request_queue *q)
2500{
2501 if (q->lld_busy_fn)
2502 return q->lld_busy_fn(q);
2503
2504 return 0;
2505}
2506EXPORT_SYMBOL_GPL(blk_lld_busy);
2507
2508
2509
2510
2511
2512
2513
2514
2515void blk_rq_unprep_clone(struct request *rq)
2516{
2517 struct bio *bio;
2518
2519 while ((bio = rq->bio) != NULL) {
2520 rq->bio = bio->bi_next;
2521
2522 bio_put(bio);
2523 }
2524}
2525EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
2526
2527
2528
2529
2530
2531static void __blk_rq_prep_clone(struct request *dst, struct request *src)
2532{
2533 dst->cpu = src->cpu;
2534 dst->cmd_flags = (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE;
2535 dst->cmd_type = src->cmd_type;
2536 dst->__sector = blk_rq_pos(src);
2537 dst->__data_len = blk_rq_bytes(src);
2538 dst->nr_phys_segments = src->nr_phys_segments;
2539 dst->ioprio = src->ioprio;
2540 dst->extra_len = src->extra_len;
2541}
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
2563 struct bio_set *bs, gfp_t gfp_mask,
2564 int (*bio_ctr)(struct bio *, struct bio *, void *),
2565 void *data)
2566{
2567 struct bio *bio, *bio_src;
2568
2569 if (!bs)
2570 bs = fs_bio_set;
2571
2572 blk_rq_init(NULL, rq);
2573
2574 __rq_for_each_bio(bio_src, rq_src) {
2575 bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs);
2576 if (!bio)
2577 goto free_and_out;
2578
2579 __bio_clone(bio, bio_src);
2580
2581 if (bio_integrity(bio_src) &&
2582 bio_integrity_clone(bio, bio_src, gfp_mask, bs))
2583 goto free_and_out;
2584
2585 if (bio_ctr && bio_ctr(bio, bio_src, data))
2586 goto free_and_out;
2587
2588 if (rq->bio) {
2589 rq->biotail->bi_next = bio;
2590 rq->biotail = bio;
2591 } else
2592 rq->bio = rq->biotail = bio;
2593 }
2594
2595 __blk_rq_prep_clone(rq, rq_src);
2596
2597 return 0;
2598
2599free_and_out:
2600 if (bio)
2601 bio_free(bio, bs);
2602 blk_rq_unprep_clone(rq);
2603
2604 return -ENOMEM;
2605}
2606EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
2607
2608int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
2609{
2610 return queue_work(kblockd_workqueue, work);
2611}
2612EXPORT_SYMBOL(kblockd_schedule_work);
2613
2614int __init blk_dev_init(void)
2615{
2616 BUILD_BUG_ON(__REQ_NR_BITS > 8 *
2617 sizeof(((struct request *)0)->cmd_flags));
2618
2619
2620 kblockd_workqueue = alloc_workqueue("kblockd",
2621 WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
2622 if (!kblockd_workqueue)
2623 panic("Failed to create kblockd\n");
2624
2625 request_cachep = kmem_cache_create("blkdev_requests",
2626 sizeof(struct request), 0, SLAB_PANIC, NULL);
2627
2628 blk_requestq_cachep = kmem_cache_create("blkdev_queue",
2629 sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
2630
2631 return 0;
2632}
2633