1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/kernel.h>
15#include <linux/module.h>
16#include <linux/backing-dev.h>
17#include <linux/bio.h>
18#include <linux/blkdev.h>
19#include <linux/highmem.h>
20#include <linux/mm.h>
21#include <linux/kernel_stat.h>
22#include <linux/string.h>
23#include <linux/init.h>
24#include <linux/completion.h>
25#include <linux/slab.h>
26#include <linux/swap.h>
27#include <linux/writeback.h>
28#include <linux/task_io_accounting_ops.h>
29#include <linux/fault-inject.h>
30#include <linux/list_sort.h>
31#include <linux/delay.h>
32
33#define CREATE_TRACE_POINTS
34#include <trace/events/block.h>
35
36#include "blk.h"
37
38EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
39EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
40EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
41
42DEFINE_IDA(blk_queue_ida);
43
44
45
46
47static struct kmem_cache *request_cachep;
48
49
50
51
52struct kmem_cache *blk_requestq_cachep;
53
54
55
56
57static struct workqueue_struct *kblockd_workqueue;
58
59static void drive_stat_acct(struct request *rq, int new_io)
60{
61 struct hd_struct *part;
62 int rw = rq_data_dir(rq);
63 int cpu;
64
65 if (!blk_do_io_stat(rq))
66 return;
67
68 cpu = part_stat_lock();
69
70 if (!new_io) {
71 part = rq->part;
72 part_stat_inc(cpu, part, merges[rw]);
73 } else {
74 part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
75 if (!hd_struct_try_get(part)) {
76
77
78
79
80
81
82
83
84 part = &rq->rq_disk->part0;
85 hd_struct_get(part);
86 }
87 part_round_stats(cpu, part);
88 part_inc_in_flight(part, rw);
89 rq->part = part;
90 }
91
92 part_stat_unlock();
93}
94
95void blk_queue_congestion_threshold(struct request_queue *q)
96{
97 int nr;
98
99 nr = q->nr_requests - (q->nr_requests / 8) + 1;
100 if (nr > q->nr_requests)
101 nr = q->nr_requests;
102 q->nr_congestion_on = nr;
103
104 nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;
105 if (nr < 1)
106 nr = 1;
107 q->nr_congestion_off = nr;
108}
109
110
111
112
113
114
115
116
117
118
119struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
120{
121 struct backing_dev_info *ret = NULL;
122 struct request_queue *q = bdev_get_queue(bdev);
123
124 if (q)
125 ret = &q->backing_dev_info;
126 return ret;
127}
128EXPORT_SYMBOL(blk_get_backing_dev_info);
129
130void blk_rq_init(struct request_queue *q, struct request *rq)
131{
132 memset(rq, 0, sizeof(*rq));
133
134 INIT_LIST_HEAD(&rq->queuelist);
135 INIT_LIST_HEAD(&rq->timeout_list);
136 rq->cpu = -1;
137 rq->q = q;
138 rq->__sector = (sector_t) -1;
139 INIT_HLIST_NODE(&rq->hash);
140 RB_CLEAR_NODE(&rq->rb_node);
141 rq->cmd = rq->__cmd;
142 rq->cmd_len = BLK_MAX_CDB;
143 rq->tag = -1;
144 rq->ref_count = 1;
145 rq->start_time = jiffies;
146 set_start_time_ns(rq);
147 rq->part = NULL;
148}
149EXPORT_SYMBOL(blk_rq_init);
150
151static void req_bio_endio(struct request *rq, struct bio *bio,
152 unsigned int nbytes, int error)
153{
154 if (error)
155 clear_bit(BIO_UPTODATE, &bio->bi_flags);
156 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
157 error = -EIO;
158
159 if (unlikely(nbytes > bio->bi_size)) {
160 printk(KERN_ERR "%s: want %u bytes done, %u left\n",
161 __func__, nbytes, bio->bi_size);
162 nbytes = bio->bi_size;
163 }
164
165 if (unlikely(rq->cmd_flags & REQ_QUIET))
166 set_bit(BIO_QUIET, &bio->bi_flags);
167
168 bio->bi_size -= nbytes;
169 bio->bi_sector += (nbytes >> 9);
170
171 if (bio_integrity(bio))
172 bio_integrity_advance(bio, nbytes);
173
174
175 if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
176 bio_endio(bio, error);
177}
178
179void blk_dump_rq_flags(struct request *rq, char *msg)
180{
181 int bit;
182
183 printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg,
184 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
185 rq->cmd_flags);
186
187 printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n",
188 (unsigned long long)blk_rq_pos(rq),
189 blk_rq_sectors(rq), blk_rq_cur_sectors(rq));
190 printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n",
191 rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq));
192
193 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
194 printk(KERN_INFO " cdb: ");
195 for (bit = 0; bit < BLK_MAX_CDB; bit++)
196 printk("%02x ", rq->cmd[bit]);
197 printk("\n");
198 }
199}
200EXPORT_SYMBOL(blk_dump_rq_flags);
201
202static void blk_delay_work(struct work_struct *work)
203{
204 struct request_queue *q;
205
206 q = container_of(work, struct request_queue, delay_work.work);
207 spin_lock_irq(q->queue_lock);
208 __blk_run_queue(q);
209 spin_unlock_irq(q->queue_lock);
210}
211
212
213
214
215
216
217
218
219
220
221
222void blk_delay_queue(struct request_queue *q, unsigned long msecs)
223{
224 queue_delayed_work(kblockd_workqueue, &q->delay_work,
225 msecs_to_jiffies(msecs));
226}
227EXPORT_SYMBOL(blk_delay_queue);
228
229
230
231
232
233
234
235
236
237
238void blk_start_queue(struct request_queue *q)
239{
240 WARN_ON(!irqs_disabled());
241
242 queue_flag_clear(QUEUE_FLAG_STOPPED, q);
243 __blk_run_queue(q);
244}
245EXPORT_SYMBOL(blk_start_queue);
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261void blk_stop_queue(struct request_queue *q)
262{
263 __cancel_delayed_work(&q->delay_work);
264 queue_flag_set(QUEUE_FLAG_STOPPED, q);
265}
266EXPORT_SYMBOL(blk_stop_queue);
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286void blk_sync_queue(struct request_queue *q)
287{
288 del_timer_sync(&q->timeout);
289 cancel_delayed_work_sync(&q->delay_work);
290}
291EXPORT_SYMBOL(blk_sync_queue);
292
293
294
295
296
297
298
299
300
301void __blk_run_queue(struct request_queue *q)
302{
303 if (unlikely(blk_queue_stopped(q)))
304 return;
305
306 q->request_fn(q);
307}
308EXPORT_SYMBOL(__blk_run_queue);
309
310
311
312
313
314
315
316
317
318void blk_run_queue_async(struct request_queue *q)
319{
320 if (likely(!blk_queue_stopped(q))) {
321 __cancel_delayed_work(&q->delay_work);
322 queue_delayed_work(kblockd_workqueue, &q->delay_work, 0);
323 }
324}
325EXPORT_SYMBOL(blk_run_queue_async);
326
327
328
329
330
331
332
333
334
335void blk_run_queue(struct request_queue *q)
336{
337 unsigned long flags;
338
339 spin_lock_irqsave(q->queue_lock, flags);
340 __blk_run_queue(q);
341 spin_unlock_irqrestore(q->queue_lock, flags);
342}
343EXPORT_SYMBOL(blk_run_queue);
344
345void blk_put_queue(struct request_queue *q)
346{
347 kobject_put(&q->kobj);
348}
349EXPORT_SYMBOL(blk_put_queue);
350
351
352
353
354
355
356
357
358
359
360void blk_drain_queue(struct request_queue *q, bool drain_all)
361{
362 while (true) {
363 bool drain = false;
364 int i;
365
366 spin_lock_irq(q->queue_lock);
367
368 elv_drain_elevator(q);
369 if (drain_all)
370 blk_throtl_drain(q);
371
372
373
374
375
376
377
378 if (!list_empty(&q->queue_head))
379 __blk_run_queue(q);
380
381 drain |= q->rq.elvpriv;
382
383
384
385
386
387
388 if (drain_all) {
389 drain |= !list_empty(&q->queue_head);
390 for (i = 0; i < 2; i++) {
391 drain |= q->rq.count[i];
392 drain |= q->in_flight[i];
393 drain |= !list_empty(&q->flush_queue[i]);
394 }
395 }
396
397 spin_unlock_irq(q->queue_lock);
398
399 if (!drain)
400 break;
401 msleep(10);
402 }
403}
404
405
406
407
408
409
410
411
412void blk_cleanup_queue(struct request_queue *q)
413{
414 spinlock_t *lock = q->queue_lock;
415
416
417 mutex_lock(&q->sysfs_lock);
418 queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
419
420 spin_lock_irq(lock);
421 queue_flag_set(QUEUE_FLAG_NOMERGES, q);
422 queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
423 queue_flag_set(QUEUE_FLAG_DEAD, q);
424
425 if (q->queue_lock != &q->__queue_lock)
426 q->queue_lock = &q->__queue_lock;
427
428 spin_unlock_irq(lock);
429 mutex_unlock(&q->sysfs_lock);
430
431
432
433
434
435
436 if (q->elevator)
437 blk_drain_queue(q, true);
438
439
440 del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
441 blk_sync_queue(q);
442
443
444 blk_put_queue(q);
445}
446EXPORT_SYMBOL(blk_cleanup_queue);
447
448static int blk_init_free_list(struct request_queue *q)
449{
450 struct request_list *rl = &q->rq;
451
452 if (unlikely(rl->rq_pool))
453 return 0;
454
455 rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;
456 rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;
457 rl->elvpriv = 0;
458 init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
459 init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);
460
461 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
462 mempool_free_slab, request_cachep, q->node);
463
464 if (!rl->rq_pool)
465 return -ENOMEM;
466
467 return 0;
468}
469
470struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
471{
472 return blk_alloc_queue_node(gfp_mask, -1);
473}
474EXPORT_SYMBOL(blk_alloc_queue);
475
476struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
477{
478 struct request_queue *q;
479 int err;
480
481 q = kmem_cache_alloc_node(blk_requestq_cachep,
482 gfp_mask | __GFP_ZERO, node_id);
483 if (!q)
484 return NULL;
485
486 q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
487 if (q->id < 0)
488 goto fail_q;
489
490 q->backing_dev_info.ra_pages =
491 (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
492 q->backing_dev_info.state = 0;
493 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
494 q->backing_dev_info.name = "block";
495 q->node = node_id;
496
497 err = bdi_init(&q->backing_dev_info);
498 if (err)
499 goto fail_id;
500
501 if (blk_throtl_init(q))
502 goto fail_id;
503
504 setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
505 laptop_mode_timer_fn, (unsigned long) q);
506 setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
507 INIT_LIST_HEAD(&q->timeout_list);
508 INIT_LIST_HEAD(&q->icq_list);
509 INIT_LIST_HEAD(&q->flush_queue[0]);
510 INIT_LIST_HEAD(&q->flush_queue[1]);
511 INIT_LIST_HEAD(&q->flush_data_in_flight);
512 INIT_DELAYED_WORK(&q->delay_work, blk_delay_work);
513
514 kobject_init(&q->kobj, &blk_queue_ktype);
515
516 mutex_init(&q->sysfs_lock);
517 spin_lock_init(&q->__queue_lock);
518
519
520
521
522
523 q->queue_lock = &q->__queue_lock;
524
525 return q;
526
527fail_id:
528 ida_simple_remove(&blk_queue_ida, q->id);
529fail_q:
530 kmem_cache_free(blk_requestq_cachep, q);
531 return NULL;
532}
533EXPORT_SYMBOL(blk_alloc_queue_node);
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
569{
570 return blk_init_queue_node(rfn, lock, -1);
571}
572EXPORT_SYMBOL(blk_init_queue);
573
574struct request_queue *
575blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
576{
577 struct request_queue *uninit_q, *q;
578
579 uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id);
580 if (!uninit_q)
581 return NULL;
582
583 q = blk_init_allocated_queue(uninit_q, rfn, lock);
584 if (!q)
585 blk_cleanup_queue(uninit_q);
586
587 return q;
588}
589EXPORT_SYMBOL(blk_init_queue_node);
590
591struct request_queue *
592blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
593 spinlock_t *lock)
594{
595 if (!q)
596 return NULL;
597
598 if (blk_init_free_list(q))
599 return NULL;
600
601 q->request_fn = rfn;
602 q->prep_rq_fn = NULL;
603 q->unprep_rq_fn = NULL;
604 q->queue_flags = QUEUE_FLAG_DEFAULT;
605
606
607 if (lock)
608 q->queue_lock = lock;
609
610
611
612
613 blk_queue_make_request(q, blk_queue_bio);
614
615 q->sg_reserved_size = INT_MAX;
616
617
618
619
620 if (!elevator_init(q, NULL)) {
621 blk_queue_congestion_threshold(q);
622 return q;
623 }
624
625 return NULL;
626}
627EXPORT_SYMBOL(blk_init_allocated_queue);
628
629bool blk_get_queue(struct request_queue *q)
630{
631 if (likely(!blk_queue_dead(q))) {
632 __blk_get_queue(q);
633 return true;
634 }
635
636 return false;
637}
638EXPORT_SYMBOL(blk_get_queue);
639
640static inline void blk_free_request(struct request_queue *q, struct request *rq)
641{
642 if (rq->cmd_flags & REQ_ELVPRIV) {
643 elv_put_request(q, rq);
644 if (rq->elv.icq)
645 put_io_context(rq->elv.icq->ioc);
646 }
647
648 mempool_free(rq, q->rq.rq_pool);
649}
650
651static struct request *
652blk_alloc_request(struct request_queue *q, struct io_cq *icq,
653 unsigned int flags, gfp_t gfp_mask)
654{
655 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
656
657 if (!rq)
658 return NULL;
659
660 blk_rq_init(q, rq);
661
662 rq->cmd_flags = flags | REQ_ALLOCED;
663
664 if (flags & REQ_ELVPRIV) {
665 rq->elv.icq = icq;
666 if (unlikely(elv_set_request(q, rq, gfp_mask))) {
667 mempool_free(rq, q->rq.rq_pool);
668 return NULL;
669 }
670
671 if (icq)
672 get_io_context(icq->ioc);
673 }
674
675 return rq;
676}
677
678
679
680
681
682static inline int ioc_batching(struct request_queue *q, struct io_context *ioc)
683{
684 if (!ioc)
685 return 0;
686
687
688
689
690
691
692 return ioc->nr_batch_requests == q->nr_batching ||
693 (ioc->nr_batch_requests > 0
694 && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));
695}
696
697
698
699
700
701
702
703static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)
704{
705 if (!ioc || ioc_batching(q, ioc))
706 return;
707
708 ioc->nr_batch_requests = q->nr_batching;
709 ioc->last_waited = jiffies;
710}
711
712static void __freed_request(struct request_queue *q, int sync)
713{
714 struct request_list *rl = &q->rq;
715
716 if (rl->count[sync] < queue_congestion_off_threshold(q))
717 blk_clear_queue_congested(q, sync);
718
719 if (rl->count[sync] + 1 <= q->nr_requests) {
720 if (waitqueue_active(&rl->wait[sync]))
721 wake_up(&rl->wait[sync]);
722
723 blk_clear_queue_full(q, sync);
724 }
725}
726
727
728
729
730
731static void freed_request(struct request_queue *q, unsigned int flags)
732{
733 struct request_list *rl = &q->rq;
734 int sync = rw_is_sync(flags);
735
736 rl->count[sync]--;
737 if (flags & REQ_ELVPRIV)
738 rl->elvpriv--;
739
740 __freed_request(q, sync);
741
742 if (unlikely(rl->starved[sync ^ 1]))
743 __freed_request(q, sync ^ 1);
744}
745
746
747
748
749
750static bool blk_rq_should_init_elevator(struct bio *bio)
751{
752 if (!bio)
753 return true;
754
755
756
757
758
759 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA))
760 return false;
761
762 return true;
763}
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779static struct request *get_request(struct request_queue *q, int rw_flags,
780 struct bio *bio, gfp_t gfp_mask)
781{
782 struct request *rq = NULL;
783 struct request_list *rl = &q->rq;
784 struct elevator_type *et;
785 struct io_context *ioc;
786 struct io_cq *icq = NULL;
787 const bool is_sync = rw_is_sync(rw_flags) != 0;
788 bool retried = false;
789 int may_queue;
790retry:
791 et = q->elevator->type;
792 ioc = current->io_context;
793
794 if (unlikely(blk_queue_dead(q)))
795 return NULL;
796
797 may_queue = elv_may_queue(q, rw_flags);
798 if (may_queue == ELV_MQUEUE_NO)
799 goto rq_starved;
800
801 if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
802 if (rl->count[is_sync]+1 >= q->nr_requests) {
803
804
805
806
807
808
809 if (!ioc && !retried) {
810 spin_unlock_irq(q->queue_lock);
811 create_io_context(current, gfp_mask, q->node);
812 spin_lock_irq(q->queue_lock);
813 retried = true;
814 goto retry;
815 }
816
817
818
819
820
821
822
823 if (!blk_queue_full(q, is_sync)) {
824 ioc_set_batching(q, ioc);
825 blk_set_queue_full(q, is_sync);
826 } else {
827 if (may_queue != ELV_MQUEUE_MUST
828 && !ioc_batching(q, ioc)) {
829
830
831
832
833
834 goto out;
835 }
836 }
837 }
838 blk_set_queue_congested(q, is_sync);
839 }
840
841
842
843
844
845
846 if (rl->count[is_sync] >= (3 * q->nr_requests / 2))
847 goto out;
848
849 rl->count[is_sync]++;
850 rl->starved[is_sync] = 0;
851
852
853
854
855
856
857
858
859
860
861
862 if (blk_rq_should_init_elevator(bio) &&
863 !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags)) {
864 rw_flags |= REQ_ELVPRIV;
865 rl->elvpriv++;
866 if (et->icq_cache && ioc)
867 icq = ioc_lookup_icq(ioc, q);
868 }
869
870 if (blk_queue_io_stat(q))
871 rw_flags |= REQ_IO_STAT;
872 spin_unlock_irq(q->queue_lock);
873
874
875 if ((rw_flags & REQ_ELVPRIV) && unlikely(et->icq_cache && !icq)) {
876 icq = ioc_create_icq(q, gfp_mask);
877 if (!icq)
878 goto fail_icq;
879 }
880
881 rq = blk_alloc_request(q, icq, rw_flags, gfp_mask);
882
883fail_icq:
884 if (unlikely(!rq)) {
885
886
887
888
889
890
891
892 spin_lock_irq(q->queue_lock);
893 freed_request(q, rw_flags);
894
895
896
897
898
899
900
901
902rq_starved:
903 if (unlikely(rl->count[is_sync] == 0))
904 rl->starved[is_sync] = 1;
905
906 goto out;
907 }
908
909
910
911
912
913
914
915 if (ioc_batching(q, ioc))
916 ioc->nr_batch_requests--;
917
918 trace_block_getrq(q, bio, rw_flags & 1);
919out:
920 return rq;
921}
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936static struct request *get_request_wait(struct request_queue *q, int rw_flags,
937 struct bio *bio)
938{
939 const bool is_sync = rw_is_sync(rw_flags) != 0;
940 struct request *rq;
941
942 rq = get_request(q, rw_flags, bio, GFP_NOIO);
943 while (!rq) {
944 DEFINE_WAIT(wait);
945 struct request_list *rl = &q->rq;
946
947 if (unlikely(blk_queue_dead(q)))
948 return NULL;
949
950 prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
951 TASK_UNINTERRUPTIBLE);
952
953 trace_block_sleeprq(q, bio, rw_flags & 1);
954
955 spin_unlock_irq(q->queue_lock);
956 io_schedule();
957
958
959
960
961
962
963
964 create_io_context(current, GFP_NOIO, q->node);
965 ioc_set_batching(q, current->io_context);
966
967 spin_lock_irq(q->queue_lock);
968 finish_wait(&rl->wait[is_sync], &wait);
969
970 rq = get_request(q, rw_flags, bio, GFP_NOIO);
971 };
972
973 return rq;
974}
975
976struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
977{
978 struct request *rq;
979
980 BUG_ON(rw != READ && rw != WRITE);
981
982 spin_lock_irq(q->queue_lock);
983 if (gfp_mask & __GFP_WAIT)
984 rq = get_request_wait(q, rw, NULL);
985 else
986 rq = get_request(q, rw, NULL, gfp_mask);
987 if (!rq)
988 spin_unlock_irq(q->queue_lock);
989
990
991 return rq;
992}
993EXPORT_SYMBOL(blk_get_request);
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026struct request *blk_make_request(struct request_queue *q, struct bio *bio,
1027 gfp_t gfp_mask)
1028{
1029 struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask);
1030
1031 if (unlikely(!rq))
1032 return ERR_PTR(-ENOMEM);
1033
1034 for_each_bio(bio) {
1035 struct bio *bounce_bio = bio;
1036 int ret;
1037
1038 blk_queue_bounce(q, &bounce_bio);
1039 ret = blk_rq_append_bio(q, rq, bounce_bio);
1040 if (unlikely(ret)) {
1041 blk_put_request(rq);
1042 return ERR_PTR(ret);
1043 }
1044 }
1045
1046 return rq;
1047}
1048EXPORT_SYMBOL(blk_make_request);
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060void blk_requeue_request(struct request_queue *q, struct request *rq)
1061{
1062 blk_delete_timer(rq);
1063 blk_clear_rq_complete(rq);
1064 trace_block_rq_requeue(q, rq);
1065
1066 if (blk_rq_tagged(rq))
1067 blk_queue_end_tag(q, rq);
1068
1069 BUG_ON(blk_queued_rq(rq));
1070
1071 elv_requeue_request(q, rq);
1072}
1073EXPORT_SYMBOL(blk_requeue_request);
1074
1075static void add_acct_request(struct request_queue *q, struct request *rq,
1076 int where)
1077{
1078 drive_stat_acct(rq, 1);
1079 __elv_add_request(q, rq, where);
1080}
1081
1082static void part_round_stats_single(int cpu, struct hd_struct *part,
1083 unsigned long now)
1084{
1085 if (now == part->stamp)
1086 return;
1087
1088 if (part_in_flight(part)) {
1089 __part_stat_add(cpu, part, time_in_queue,
1090 part_in_flight(part) * (now - part->stamp));
1091 __part_stat_add(cpu, part, io_ticks, (now - part->stamp));
1092 }
1093 part->stamp = now;
1094}
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112void part_round_stats(int cpu, struct hd_struct *part)
1113{
1114 unsigned long now = jiffies;
1115
1116 if (part->partno)
1117 part_round_stats_single(cpu, &part_to_disk(part)->part0, now);
1118 part_round_stats_single(cpu, part, now);
1119}
1120EXPORT_SYMBOL_GPL(part_round_stats);
1121
1122
1123
1124
1125void __blk_put_request(struct request_queue *q, struct request *req)
1126{
1127 if (unlikely(!q))
1128 return;
1129 if (unlikely(--req->ref_count))
1130 return;
1131
1132 elv_completed_request(q, req);
1133
1134
1135 WARN_ON(req->bio != NULL);
1136
1137
1138
1139
1140
1141 if (req->cmd_flags & REQ_ALLOCED) {
1142 unsigned int flags = req->cmd_flags;
1143
1144 BUG_ON(!list_empty(&req->queuelist));
1145 BUG_ON(!hlist_unhashed(&req->hash));
1146
1147 blk_free_request(q, req);
1148 freed_request(q, flags);
1149 }
1150}
1151EXPORT_SYMBOL_GPL(__blk_put_request);
1152
1153void blk_put_request(struct request *req)
1154{
1155 unsigned long flags;
1156 struct request_queue *q = req->q;
1157
1158 spin_lock_irqsave(q->queue_lock, flags);
1159 __blk_put_request(q, req);
1160 spin_unlock_irqrestore(q->queue_lock, flags);
1161}
1162EXPORT_SYMBOL(blk_put_request);
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177void blk_add_request_payload(struct request *rq, struct page *page,
1178 unsigned int len)
1179{
1180 struct bio *bio = rq->bio;
1181
1182 bio->bi_io_vec->bv_page = page;
1183 bio->bi_io_vec->bv_offset = 0;
1184 bio->bi_io_vec->bv_len = len;
1185
1186 bio->bi_size = len;
1187 bio->bi_vcnt = 1;
1188 bio->bi_phys_segments = 1;
1189
1190 rq->__data_len = rq->resid_len = len;
1191 rq->nr_phys_segments = 1;
1192 rq->buffer = bio_data(bio);
1193}
1194EXPORT_SYMBOL_GPL(blk_add_request_payload);
1195
1196static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
1197 struct bio *bio)
1198{
1199 const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
1200
1201 if (!ll_back_merge_fn(q, req, bio))
1202 return false;
1203
1204 trace_block_bio_backmerge(q, bio);
1205
1206 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1207 blk_rq_set_mixed_merge(req);
1208
1209 req->biotail->bi_next = bio;
1210 req->biotail = bio;
1211 req->__data_len += bio->bi_size;
1212 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
1213
1214 drive_stat_acct(req, 0);
1215 return true;
1216}
1217
1218static bool bio_attempt_front_merge(struct request_queue *q,
1219 struct request *req, struct bio *bio)
1220{
1221 const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
1222
1223 if (!ll_front_merge_fn(q, req, bio))
1224 return false;
1225
1226 trace_block_bio_frontmerge(q, bio);
1227
1228 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1229 blk_rq_set_mixed_merge(req);
1230
1231 bio->bi_next = req->bio;
1232 req->bio = bio;
1233
1234
1235
1236
1237
1238
1239 req->buffer = bio_data(bio);
1240 req->__sector = bio->bi_sector;
1241 req->__data_len += bio->bi_size;
1242 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
1243
1244 drive_stat_acct(req, 0);
1245 return true;
1246}
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265static bool attempt_plug_merge(struct request_queue *q, struct bio *bio,
1266 unsigned int *request_count)
1267{
1268 struct blk_plug *plug;
1269 struct request *rq;
1270 bool ret = false;
1271
1272 plug = current->plug;
1273 if (!plug)
1274 goto out;
1275 *request_count = 0;
1276
1277 list_for_each_entry_reverse(rq, &plug->list, queuelist) {
1278 int el_ret;
1279
1280 if (rq->q == q)
1281 (*request_count)++;
1282
1283 if (rq->q != q || !blk_rq_merge_ok(rq, bio))
1284 continue;
1285
1286 el_ret = blk_try_merge(rq, bio);
1287 if (el_ret == ELEVATOR_BACK_MERGE) {
1288 ret = bio_attempt_back_merge(q, rq, bio);
1289 if (ret)
1290 break;
1291 } else if (el_ret == ELEVATOR_FRONT_MERGE) {
1292 ret = bio_attempt_front_merge(q, rq, bio);
1293 if (ret)
1294 break;
1295 }
1296 }
1297out:
1298 return ret;
1299}
1300
1301void init_request_from_bio(struct request *req, struct bio *bio)
1302{
1303 req->cmd_type = REQ_TYPE_FS;
1304
1305 req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK;
1306 if (bio->bi_rw & REQ_RAHEAD)
1307 req->cmd_flags |= REQ_FAILFAST_MASK;
1308
1309 req->errors = 0;
1310 req->__sector = bio->bi_sector;
1311 req->ioprio = bio_prio(bio);
1312 blk_rq_bio_prep(req->q, req, bio);
1313}
1314
1315void blk_queue_bio(struct request_queue *q, struct bio *bio)
1316{
1317 const bool sync = !!(bio->bi_rw & REQ_SYNC);
1318 struct blk_plug *plug;
1319 int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;
1320 struct request *req;
1321 unsigned int request_count = 0;
1322
1323
1324
1325
1326
1327
1328 blk_queue_bounce(q, &bio);
1329
1330 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
1331 spin_lock_irq(q->queue_lock);
1332 where = ELEVATOR_INSERT_FLUSH;
1333 goto get_rq;
1334 }
1335
1336
1337
1338
1339
1340 if (attempt_plug_merge(q, bio, &request_count))
1341 return;
1342
1343 spin_lock_irq(q->queue_lock);
1344
1345 el_ret = elv_merge(q, &req, bio);
1346 if (el_ret == ELEVATOR_BACK_MERGE) {
1347 if (bio_attempt_back_merge(q, req, bio)) {
1348 elv_bio_merged(q, req, bio);
1349 if (!attempt_back_merge(q, req))
1350 elv_merged_request(q, req, el_ret);
1351 goto out_unlock;
1352 }
1353 } else if (el_ret == ELEVATOR_FRONT_MERGE) {
1354 if (bio_attempt_front_merge(q, req, bio)) {
1355 elv_bio_merged(q, req, bio);
1356 if (!attempt_front_merge(q, req))
1357 elv_merged_request(q, req, el_ret);
1358 goto out_unlock;
1359 }
1360 }
1361
1362get_rq:
1363
1364
1365
1366
1367
1368 rw_flags = bio_data_dir(bio);
1369 if (sync)
1370 rw_flags |= REQ_SYNC;
1371
1372
1373
1374
1375
1376 req = get_request_wait(q, rw_flags, bio);
1377 if (unlikely(!req)) {
1378 bio_endio(bio, -ENODEV);
1379 goto out_unlock;
1380 }
1381
1382
1383
1384
1385
1386
1387
1388 init_request_from_bio(req, bio);
1389
1390 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags))
1391 req->cpu = raw_smp_processor_id();
1392
1393 plug = current->plug;
1394 if (plug) {
1395
1396
1397
1398
1399
1400
1401 if (list_empty(&plug->list))
1402 trace_block_plug(q);
1403 else {
1404 if (!plug->should_sort) {
1405 struct request *__rq;
1406
1407 __rq = list_entry_rq(plug->list.prev);
1408 if (__rq->q != q)
1409 plug->should_sort = 1;
1410 }
1411 if (request_count >= BLK_MAX_REQUEST_COUNT) {
1412 blk_flush_plug_list(plug, false);
1413 trace_block_plug(q);
1414 }
1415 }
1416 list_add_tail(&req->queuelist, &plug->list);
1417 drive_stat_acct(req, 1);
1418 } else {
1419 spin_lock_irq(q->queue_lock);
1420 add_acct_request(q, req, where);
1421 __blk_run_queue(q);
1422out_unlock:
1423 spin_unlock_irq(q->queue_lock);
1424 }
1425}
1426EXPORT_SYMBOL_GPL(blk_queue_bio);
1427
1428
1429
1430
1431static inline void blk_partition_remap(struct bio *bio)
1432{
1433 struct block_device *bdev = bio->bi_bdev;
1434
1435 if (bio_sectors(bio) && bdev != bdev->bd_contains) {
1436 struct hd_struct *p = bdev->bd_part;
1437
1438 bio->bi_sector += p->start_sect;
1439 bio->bi_bdev = bdev->bd_contains;
1440
1441 trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio,
1442 bdev->bd_dev,
1443 bio->bi_sector - p->start_sect);
1444 }
1445}
1446
1447static void handle_bad_sector(struct bio *bio)
1448{
1449 char b[BDEVNAME_SIZE];
1450
1451 printk(KERN_INFO "attempt to access beyond end of device\n");
1452 printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",
1453 bdevname(bio->bi_bdev, b),
1454 bio->bi_rw,
1455 (unsigned long long)bio->bi_sector + bio_sectors(bio),
1456 (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9));
1457
1458 set_bit(BIO_EOF, &bio->bi_flags);
1459}
1460
1461#ifdef CONFIG_FAIL_MAKE_REQUEST
1462
1463static DECLARE_FAULT_ATTR(fail_make_request);
1464
1465static int __init setup_fail_make_request(char *str)
1466{
1467 return setup_fault_attr(&fail_make_request, str);
1468}
1469__setup("fail_make_request=", setup_fail_make_request);
1470
1471static bool should_fail_request(struct hd_struct *part, unsigned int bytes)
1472{
1473 return part->make_it_fail && should_fail(&fail_make_request, bytes);
1474}
1475
1476static int __init fail_make_request_debugfs(void)
1477{
1478 struct dentry *dir = fault_create_debugfs_attr("fail_make_request",
1479 NULL, &fail_make_request);
1480
1481 return IS_ERR(dir) ? PTR_ERR(dir) : 0;
1482}
1483
1484late_initcall(fail_make_request_debugfs);
1485
1486#else
1487
1488static inline bool should_fail_request(struct hd_struct *part,
1489 unsigned int bytes)
1490{
1491 return false;
1492}
1493
1494#endif
1495
1496
1497
1498
1499static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
1500{
1501 sector_t maxsector;
1502
1503 if (!nr_sectors)
1504 return 0;
1505
1506
1507 maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
1508 if (maxsector) {
1509 sector_t sector = bio->bi_sector;
1510
1511 if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
1512
1513
1514
1515
1516
1517 handle_bad_sector(bio);
1518 return 1;
1519 }
1520 }
1521
1522 return 0;
1523}
1524
1525static noinline_for_stack bool
1526generic_make_request_checks(struct bio *bio)
1527{
1528 struct request_queue *q;
1529 int nr_sectors = bio_sectors(bio);
1530 int err = -EIO;
1531 char b[BDEVNAME_SIZE];
1532 struct hd_struct *part;
1533
1534 might_sleep();
1535
1536 if (bio_check_eod(bio, nr_sectors))
1537 goto end_io;
1538
1539 q = bdev_get_queue(bio->bi_bdev);
1540 if (unlikely(!q)) {
1541 printk(KERN_ERR
1542 "generic_make_request: Trying to access "
1543 "nonexistent block-device %s (%Lu)\n",
1544 bdevname(bio->bi_bdev, b),
1545 (long long) bio->bi_sector);
1546 goto end_io;
1547 }
1548
1549 if (unlikely(!(bio->bi_rw & REQ_DISCARD) &&
1550 nr_sectors > queue_max_hw_sectors(q))) {
1551 printk(KERN_ERR "bio too big device %s (%u > %u)\n",
1552 bdevname(bio->bi_bdev, b),
1553 bio_sectors(bio),
1554 queue_max_hw_sectors(q));
1555 goto end_io;
1556 }
1557
1558 part = bio->bi_bdev->bd_part;
1559 if (should_fail_request(part, bio->bi_size) ||
1560 should_fail_request(&part_to_disk(part)->part0,
1561 bio->bi_size))
1562 goto end_io;
1563
1564
1565
1566
1567
1568 blk_partition_remap(bio);
1569
1570 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))
1571 goto end_io;
1572
1573 if (bio_check_eod(bio, nr_sectors))
1574 goto end_io;
1575
1576
1577
1578
1579
1580
1581 if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) {
1582 bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA);
1583 if (!nr_sectors) {
1584 err = 0;
1585 goto end_io;
1586 }
1587 }
1588
1589 if ((bio->bi_rw & REQ_DISCARD) &&
1590 (!blk_queue_discard(q) ||
1591 ((bio->bi_rw & REQ_SECURE) &&
1592 !blk_queue_secdiscard(q)))) {
1593 err = -EOPNOTSUPP;
1594 goto end_io;
1595 }
1596
1597 if (blk_throtl_bio(q, bio))
1598 return false;
1599
1600 trace_block_bio_queue(q, bio);
1601 return true;
1602
1603end_io:
1604 bio_endio(bio, err);
1605 return false;
1606}
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632void generic_make_request(struct bio *bio)
1633{
1634 struct bio_list bio_list_on_stack;
1635
1636 if (!generic_make_request_checks(bio))
1637 return;
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649 if (current->bio_list) {
1650 bio_list_add(current->bio_list, bio);
1651 return;
1652 }
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668 BUG_ON(bio->bi_next);
1669 bio_list_init(&bio_list_on_stack);
1670 current->bio_list = &bio_list_on_stack;
1671 do {
1672 struct request_queue *q = bdev_get_queue(bio->bi_bdev);
1673
1674 q->make_request_fn(q, bio);
1675
1676 bio = bio_list_pop(current->bio_list);
1677 } while (bio);
1678 current->bio_list = NULL;
1679}
1680EXPORT_SYMBOL(generic_make_request);
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692void submit_bio(int rw, struct bio *bio)
1693{
1694 int count = bio_sectors(bio);
1695
1696 bio->bi_rw |= rw;
1697
1698
1699
1700
1701
1702 if (bio_has_data(bio) && !(rw & REQ_DISCARD)) {
1703 if (rw & WRITE) {
1704 count_vm_events(PGPGOUT, count);
1705 } else {
1706 task_io_account_read(bio->bi_size);
1707 count_vm_events(PGPGIN, count);
1708 }
1709
1710 if (unlikely(block_dump)) {
1711 char b[BDEVNAME_SIZE];
1712 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n",
1713 current->comm, task_pid_nr(current),
1714 (rw & WRITE) ? "WRITE" : "READ",
1715 (unsigned long long)bio->bi_sector,
1716 bdevname(bio->bi_bdev, b),
1717 count);
1718 }
1719 }
1720
1721 generic_make_request(bio);
1722}
1723EXPORT_SYMBOL(submit_bio);
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746int blk_rq_check_limits(struct request_queue *q, struct request *rq)
1747{
1748 if (rq->cmd_flags & REQ_DISCARD)
1749 return 0;
1750
1751 if (blk_rq_sectors(rq) > queue_max_sectors(q) ||
1752 blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) {
1753 printk(KERN_ERR "%s: over max size limit.\n", __func__);
1754 return -EIO;
1755 }
1756
1757
1758
1759
1760
1761
1762
1763 blk_recalc_rq_segments(rq);
1764 if (rq->nr_phys_segments > queue_max_segments(q)) {
1765 printk(KERN_ERR "%s: over max segments limit.\n", __func__);
1766 return -EIO;
1767 }
1768
1769 return 0;
1770}
1771EXPORT_SYMBOL_GPL(blk_rq_check_limits);
1772
1773
1774
1775
1776
1777
1778int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
1779{
1780 unsigned long flags;
1781 int where = ELEVATOR_INSERT_BACK;
1782
1783 if (blk_rq_check_limits(q, rq))
1784 return -EIO;
1785
1786 if (rq->rq_disk &&
1787 should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq)))
1788 return -EIO;
1789
1790 spin_lock_irqsave(q->queue_lock, flags);
1791 if (unlikely(blk_queue_dead(q))) {
1792 spin_unlock_irqrestore(q->queue_lock, flags);
1793 return -ENODEV;
1794 }
1795
1796
1797
1798
1799
1800 BUG_ON(blk_queued_rq(rq));
1801
1802 if (rq->cmd_flags & (REQ_FLUSH|REQ_FUA))
1803 where = ELEVATOR_INSERT_FLUSH;
1804
1805 add_acct_request(q, rq, where);
1806 if (where == ELEVATOR_INSERT_FLUSH)
1807 __blk_run_queue(q);
1808 spin_unlock_irqrestore(q->queue_lock, flags);
1809
1810 return 0;
1811}
1812EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830unsigned int blk_rq_err_bytes(const struct request *rq)
1831{
1832 unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
1833 unsigned int bytes = 0;
1834 struct bio *bio;
1835
1836 if (!(rq->cmd_flags & REQ_MIXED_MERGE))
1837 return blk_rq_bytes(rq);
1838
1839
1840
1841
1842
1843
1844
1845
1846 for (bio = rq->bio; bio; bio = bio->bi_next) {
1847 if ((bio->bi_rw & ff) != ff)
1848 break;
1849 bytes += bio->bi_size;
1850 }
1851
1852
1853 BUG_ON(blk_rq_bytes(rq) && !bytes);
1854 return bytes;
1855}
1856EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
1857
1858static void blk_account_io_completion(struct request *req, unsigned int bytes)
1859{
1860 if (blk_do_io_stat(req)) {
1861 const int rw = rq_data_dir(req);
1862 struct hd_struct *part;
1863 int cpu;
1864
1865 cpu = part_stat_lock();
1866 part = req->part;
1867 part_stat_add(cpu, part, sectors[rw], bytes >> 9);
1868 part_stat_unlock();
1869 }
1870}
1871
1872static void blk_account_io_done(struct request *req)
1873{
1874
1875
1876
1877
1878
1879 if (blk_do_io_stat(req) && !(req->cmd_flags & REQ_FLUSH_SEQ)) {
1880 unsigned long duration = jiffies - req->start_time;
1881 const int rw = rq_data_dir(req);
1882 struct hd_struct *part;
1883 int cpu;
1884
1885 cpu = part_stat_lock();
1886 part = req->part;
1887
1888 part_stat_inc(cpu, part, ios[rw]);
1889 part_stat_add(cpu, part, ticks[rw], duration);
1890 part_round_stats(cpu, part);
1891 part_dec_in_flight(part, rw);
1892
1893 hd_struct_put(part);
1894 part_stat_unlock();
1895 }
1896}
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914struct request *blk_peek_request(struct request_queue *q)
1915{
1916 struct request *rq;
1917 int ret;
1918
1919 while ((rq = __elv_next_request(q)) != NULL) {
1920 if (!(rq->cmd_flags & REQ_STARTED)) {
1921
1922
1923
1924
1925
1926 if (rq->cmd_flags & REQ_SORTED)
1927 elv_activate_rq(q, rq);
1928
1929
1930
1931
1932
1933
1934 rq->cmd_flags |= REQ_STARTED;
1935 trace_block_rq_issue(q, rq);
1936 }
1937
1938 if (!q->boundary_rq || q->boundary_rq == rq) {
1939 q->end_sector = rq_end_sector(rq);
1940 q->boundary_rq = NULL;
1941 }
1942
1943 if (rq->cmd_flags & REQ_DONTPREP)
1944 break;
1945
1946 if (q->dma_drain_size && blk_rq_bytes(rq)) {
1947
1948
1949
1950
1951
1952
1953 rq->nr_phys_segments++;
1954 }
1955
1956 if (!q->prep_rq_fn)
1957 break;
1958
1959 ret = q->prep_rq_fn(q, rq);
1960 if (ret == BLKPREP_OK) {
1961 break;
1962 } else if (ret == BLKPREP_DEFER) {
1963
1964
1965
1966
1967
1968
1969 if (q->dma_drain_size && blk_rq_bytes(rq) &&
1970 !(rq->cmd_flags & REQ_DONTPREP)) {
1971
1972
1973
1974
1975 --rq->nr_phys_segments;
1976 }
1977
1978 rq = NULL;
1979 break;
1980 } else if (ret == BLKPREP_KILL) {
1981 rq->cmd_flags |= REQ_QUIET;
1982
1983
1984
1985
1986 blk_start_request(rq);
1987 __blk_end_request_all(rq, -EIO);
1988 } else {
1989 printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
1990 break;
1991 }
1992 }
1993
1994 return rq;
1995}
1996EXPORT_SYMBOL(blk_peek_request);
1997
1998void blk_dequeue_request(struct request *rq)
1999{
2000 struct request_queue *q = rq->q;
2001
2002 BUG_ON(list_empty(&rq->queuelist));
2003 BUG_ON(ELV_ON_HASH(rq));
2004
2005 list_del_init(&rq->queuelist);
2006
2007
2008
2009
2010
2011
2012 if (blk_account_rq(rq)) {
2013 q->in_flight[rq_is_sync(rq)]++;
2014 set_io_start_time_ns(rq);
2015 }
2016}
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032void blk_start_request(struct request *req)
2033{
2034 blk_dequeue_request(req);
2035
2036
2037
2038
2039
2040 req->resid_len = blk_rq_bytes(req);
2041 if (unlikely(blk_bidi_rq(req)))
2042 req->next_rq->resid_len = blk_rq_bytes(req->next_rq);
2043
2044 blk_add_timer(req);
2045}
2046EXPORT_SYMBOL(blk_start_request);
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063struct request *blk_fetch_request(struct request_queue *q)
2064{
2065 struct request *rq;
2066
2067 rq = blk_peek_request(q);
2068 if (rq)
2069 blk_start_request(rq);
2070 return rq;
2071}
2072EXPORT_SYMBOL(blk_fetch_request);
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
2097{
2098 int total_bytes, bio_nbytes, next_idx = 0;
2099 struct bio *bio;
2100
2101 if (!req->bio)
2102 return false;
2103
2104 trace_block_rq_complete(req->q, req);
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114 if (req->cmd_type == REQ_TYPE_FS)
2115 req->errors = 0;
2116
2117 if (error && req->cmd_type == REQ_TYPE_FS &&
2118 !(req->cmd_flags & REQ_QUIET)) {
2119 char *error_type;
2120
2121 switch (error) {
2122 case -ENOLINK:
2123 error_type = "recoverable transport";
2124 break;
2125 case -EREMOTEIO:
2126 error_type = "critical target";
2127 break;
2128 case -EBADE:
2129 error_type = "critical nexus";
2130 break;
2131 case -EIO:
2132 default:
2133 error_type = "I/O";
2134 break;
2135 }
2136 printk(KERN_ERR "end_request: %s error, dev %s, sector %llu\n",
2137 error_type, req->rq_disk ? req->rq_disk->disk_name : "?",
2138 (unsigned long long)blk_rq_pos(req));
2139 }
2140
2141 blk_account_io_completion(req, nr_bytes);
2142
2143 total_bytes = bio_nbytes = 0;
2144 while ((bio = req->bio) != NULL) {
2145 int nbytes;
2146
2147 if (nr_bytes >= bio->bi_size) {
2148 req->bio = bio->bi_next;
2149 nbytes = bio->bi_size;
2150 req_bio_endio(req, bio, nbytes, error);
2151 next_idx = 0;
2152 bio_nbytes = 0;
2153 } else {
2154 int idx = bio->bi_idx + next_idx;
2155
2156 if (unlikely(idx >= bio->bi_vcnt)) {
2157 blk_dump_rq_flags(req, "__end_that");
2158 printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n",
2159 __func__, idx, bio->bi_vcnt);
2160 break;
2161 }
2162
2163 nbytes = bio_iovec_idx(bio, idx)->bv_len;
2164 BIO_BUG_ON(nbytes > bio->bi_size);
2165
2166
2167
2168
2169 if (unlikely(nbytes > nr_bytes)) {
2170 bio_nbytes += nr_bytes;
2171 total_bytes += nr_bytes;
2172 break;
2173 }
2174
2175
2176
2177
2178 next_idx++;
2179 bio_nbytes += nbytes;
2180 }
2181
2182 total_bytes += nbytes;
2183 nr_bytes -= nbytes;
2184
2185 bio = req->bio;
2186 if (bio) {
2187
2188
2189
2190 if (unlikely(nr_bytes <= 0))
2191 break;
2192 }
2193 }
2194
2195
2196
2197
2198 if (!req->bio) {
2199
2200
2201
2202
2203
2204 req->__data_len = 0;
2205 return false;
2206 }
2207
2208
2209
2210
2211 if (bio_nbytes) {
2212 req_bio_endio(req, bio, bio_nbytes, error);
2213 bio->bi_idx += next_idx;
2214 bio_iovec(bio)->bv_offset += nr_bytes;
2215 bio_iovec(bio)->bv_len -= nr_bytes;
2216 }
2217
2218 req->__data_len -= total_bytes;
2219 req->buffer = bio_data(req->bio);
2220
2221
2222 if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD))
2223 req->__sector += total_bytes >> 9;
2224
2225
2226 if (req->cmd_flags & REQ_MIXED_MERGE) {
2227 req->cmd_flags &= ~REQ_FAILFAST_MASK;
2228 req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK;
2229 }
2230
2231
2232
2233
2234
2235 if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
2236 blk_dump_rq_flags(req, "request botched");
2237 req->__data_len = blk_rq_cur_bytes(req);
2238 }
2239
2240
2241 blk_recalc_rq_segments(req);
2242
2243 return true;
2244}
2245EXPORT_SYMBOL_GPL(blk_update_request);
2246
2247static bool blk_update_bidi_request(struct request *rq, int error,
2248 unsigned int nr_bytes,
2249 unsigned int bidi_bytes)
2250{
2251 if (blk_update_request(rq, error, nr_bytes))
2252 return true;
2253
2254
2255 if (unlikely(blk_bidi_rq(rq)) &&
2256 blk_update_request(rq->next_rq, error, bidi_bytes))
2257 return true;
2258
2259 if (blk_queue_add_random(rq->q))
2260 add_disk_randomness(rq->rq_disk);
2261
2262 return false;
2263}
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275void blk_unprep_request(struct request *req)
2276{
2277 struct request_queue *q = req->q;
2278
2279 req->cmd_flags &= ~REQ_DONTPREP;
2280 if (q->unprep_rq_fn)
2281 q->unprep_rq_fn(q, req);
2282}
2283EXPORT_SYMBOL_GPL(blk_unprep_request);
2284
2285
2286
2287
2288static void blk_finish_request(struct request *req, int error)
2289{
2290 if (blk_rq_tagged(req))
2291 blk_queue_end_tag(req->q, req);
2292
2293 BUG_ON(blk_queued_rq(req));
2294
2295 if (unlikely(laptop_mode) && req->cmd_type == REQ_TYPE_FS)
2296 laptop_io_completion(&req->q->backing_dev_info);
2297
2298 blk_delete_timer(req);
2299
2300 if (req->cmd_flags & REQ_DONTPREP)
2301 blk_unprep_request(req);
2302
2303
2304 blk_account_io_done(req);
2305
2306 if (req->end_io)
2307 req->end_io(req, error);
2308 else {
2309 if (blk_bidi_rq(req))
2310 __blk_put_request(req->next_rq->q, req->next_rq);
2311
2312 __blk_put_request(req->q, req);
2313 }
2314}
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333static bool blk_end_bidi_request(struct request *rq, int error,
2334 unsigned int nr_bytes, unsigned int bidi_bytes)
2335{
2336 struct request_queue *q = rq->q;
2337 unsigned long flags;
2338
2339 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
2340 return true;
2341
2342 spin_lock_irqsave(q->queue_lock, flags);
2343 blk_finish_request(rq, error);
2344 spin_unlock_irqrestore(q->queue_lock, flags);
2345
2346 return false;
2347}
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364bool __blk_end_bidi_request(struct request *rq, int error,
2365 unsigned int nr_bytes, unsigned int bidi_bytes)
2366{
2367 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
2368 return true;
2369
2370 blk_finish_request(rq, error);
2371
2372 return false;
2373}
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
2390{
2391 return blk_end_bidi_request(rq, error, nr_bytes, 0);
2392}
2393EXPORT_SYMBOL(blk_end_request);
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403void blk_end_request_all(struct request *rq, int error)
2404{
2405 bool pending;
2406 unsigned int bidi_bytes = 0;
2407
2408 if (unlikely(blk_bidi_rq(rq)))
2409 bidi_bytes = blk_rq_bytes(rq->next_rq);
2410
2411 pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
2412 BUG_ON(pending);
2413}
2414EXPORT_SYMBOL(blk_end_request_all);
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428bool blk_end_request_cur(struct request *rq, int error)
2429{
2430 return blk_end_request(rq, error, blk_rq_cur_bytes(rq));
2431}
2432EXPORT_SYMBOL(blk_end_request_cur);
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446bool blk_end_request_err(struct request *rq, int error)
2447{
2448 WARN_ON(error >= 0);
2449 return blk_end_request(rq, error, blk_rq_err_bytes(rq));
2450}
2451EXPORT_SYMBOL_GPL(blk_end_request_err);
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
2467{
2468 return __blk_end_bidi_request(rq, error, nr_bytes, 0);
2469}
2470EXPORT_SYMBOL(__blk_end_request);
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480void __blk_end_request_all(struct request *rq, int error)
2481{
2482 bool pending;
2483 unsigned int bidi_bytes = 0;
2484
2485 if (unlikely(blk_bidi_rq(rq)))
2486 bidi_bytes = blk_rq_bytes(rq->next_rq);
2487
2488 pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
2489 BUG_ON(pending);
2490}
2491EXPORT_SYMBOL(__blk_end_request_all);
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506bool __blk_end_request_cur(struct request *rq, int error)
2507{
2508 return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
2509}
2510EXPORT_SYMBOL(__blk_end_request_cur);
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525bool __blk_end_request_err(struct request *rq, int error)
2526{
2527 WARN_ON(error >= 0);
2528 return __blk_end_request(rq, error, blk_rq_err_bytes(rq));
2529}
2530EXPORT_SYMBOL_GPL(__blk_end_request_err);
2531
2532void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
2533 struct bio *bio)
2534{
2535
2536 rq->cmd_flags |= bio->bi_rw & REQ_WRITE;
2537
2538 if (bio_has_data(bio)) {
2539 rq->nr_phys_segments = bio_phys_segments(q, bio);
2540 rq->buffer = bio_data(bio);
2541 }
2542 rq->__data_len = bio->bi_size;
2543 rq->bio = rq->biotail = bio;
2544
2545 if (bio->bi_bdev)
2546 rq->rq_disk = bio->bi_bdev->bd_disk;
2547}
2548
2549#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
2550
2551
2552
2553
2554
2555
2556
2557void rq_flush_dcache_pages(struct request *rq)
2558{
2559 struct req_iterator iter;
2560 struct bio_vec *bvec;
2561
2562 rq_for_each_segment(bvec, rq, iter)
2563 flush_dcache_page(bvec->bv_page);
2564}
2565EXPORT_SYMBOL_GPL(rq_flush_dcache_pages);
2566#endif
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587int blk_lld_busy(struct request_queue *q)
2588{
2589 if (q->lld_busy_fn)
2590 return q->lld_busy_fn(q);
2591
2592 return 0;
2593}
2594EXPORT_SYMBOL_GPL(blk_lld_busy);
2595
2596
2597
2598
2599
2600
2601
2602
2603void blk_rq_unprep_clone(struct request *rq)
2604{
2605 struct bio *bio;
2606
2607 while ((bio = rq->bio) != NULL) {
2608 rq->bio = bio->bi_next;
2609
2610 bio_put(bio);
2611 }
2612}
2613EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
2614
2615
2616
2617
2618
2619static void __blk_rq_prep_clone(struct request *dst, struct request *src)
2620{
2621 dst->cpu = src->cpu;
2622 dst->cmd_flags = (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE;
2623 dst->cmd_type = src->cmd_type;
2624 dst->__sector = blk_rq_pos(src);
2625 dst->__data_len = blk_rq_bytes(src);
2626 dst->nr_phys_segments = src->nr_phys_segments;
2627 dst->ioprio = src->ioprio;
2628 dst->extra_len = src->extra_len;
2629}
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
2651 struct bio_set *bs, gfp_t gfp_mask,
2652 int (*bio_ctr)(struct bio *, struct bio *, void *),
2653 void *data)
2654{
2655 struct bio *bio, *bio_src;
2656
2657 if (!bs)
2658 bs = fs_bio_set;
2659
2660 blk_rq_init(NULL, rq);
2661
2662 __rq_for_each_bio(bio_src, rq_src) {
2663 bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs);
2664 if (!bio)
2665 goto free_and_out;
2666
2667 __bio_clone(bio, bio_src);
2668
2669 if (bio_integrity(bio_src) &&
2670 bio_integrity_clone(bio, bio_src, gfp_mask, bs))
2671 goto free_and_out;
2672
2673 if (bio_ctr && bio_ctr(bio, bio_src, data))
2674 goto free_and_out;
2675
2676 if (rq->bio) {
2677 rq->biotail->bi_next = bio;
2678 rq->biotail = bio;
2679 } else
2680 rq->bio = rq->biotail = bio;
2681 }
2682
2683 __blk_rq_prep_clone(rq, rq_src);
2684
2685 return 0;
2686
2687free_and_out:
2688 if (bio)
2689 bio_free(bio, bs);
2690 blk_rq_unprep_clone(rq);
2691
2692 return -ENOMEM;
2693}
2694EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
2695
2696int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
2697{
2698 return queue_work(kblockd_workqueue, work);
2699}
2700EXPORT_SYMBOL(kblockd_schedule_work);
2701
2702int kblockd_schedule_delayed_work(struct request_queue *q,
2703 struct delayed_work *dwork, unsigned long delay)
2704{
2705 return queue_delayed_work(kblockd_workqueue, dwork, delay);
2706}
2707EXPORT_SYMBOL(kblockd_schedule_delayed_work);
2708
2709#define PLUG_MAGIC 0x91827364
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725void blk_start_plug(struct blk_plug *plug)
2726{
2727 struct task_struct *tsk = current;
2728
2729 plug->magic = PLUG_MAGIC;
2730 INIT_LIST_HEAD(&plug->list);
2731 INIT_LIST_HEAD(&plug->cb_list);
2732 plug->should_sort = 0;
2733
2734
2735
2736
2737
2738 if (!tsk->plug) {
2739
2740
2741
2742
2743 tsk->plug = plug;
2744 }
2745}
2746EXPORT_SYMBOL(blk_start_plug);
2747
2748static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
2749{
2750 struct request *rqa = container_of(a, struct request, queuelist);
2751 struct request *rqb = container_of(b, struct request, queuelist);
2752
2753 return !(rqa->q <= rqb->q);
2754}
2755
2756
2757
2758
2759
2760
2761
2762static void queue_unplugged(struct request_queue *q, unsigned int depth,
2763 bool from_schedule)
2764 __releases(q->queue_lock)
2765{
2766 trace_block_unplug(q, depth, !from_schedule);
2767
2768
2769
2770
2771 if (unlikely(blk_queue_dead(q))) {
2772 spin_unlock(q->queue_lock);
2773 return;
2774 }
2775
2776
2777
2778
2779
2780
2781 if (from_schedule) {
2782 spin_unlock(q->queue_lock);
2783 blk_run_queue_async(q);
2784 } else {
2785 __blk_run_queue(q);
2786 spin_unlock(q->queue_lock);
2787 }
2788
2789}
2790
2791static void flush_plug_callbacks(struct blk_plug *plug)
2792{
2793 LIST_HEAD(callbacks);
2794
2795 if (list_empty(&plug->cb_list))
2796 return;
2797
2798 list_splice_init(&plug->cb_list, &callbacks);
2799
2800 while (!list_empty(&callbacks)) {
2801 struct blk_plug_cb *cb = list_first_entry(&callbacks,
2802 struct blk_plug_cb,
2803 list);
2804 list_del(&cb->list);
2805 cb->callback(cb);
2806 }
2807}
2808
2809void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
2810{
2811 struct request_queue *q;
2812 unsigned long flags;
2813 struct request *rq;
2814 LIST_HEAD(list);
2815 unsigned int depth;
2816
2817 BUG_ON(plug->magic != PLUG_MAGIC);
2818
2819 flush_plug_callbacks(plug);
2820 if (list_empty(&plug->list))
2821 return;
2822
2823 list_splice_init(&plug->list, &list);
2824
2825 if (plug->should_sort) {
2826 list_sort(NULL, &list, plug_rq_cmp);
2827 plug->should_sort = 0;
2828 }
2829
2830 q = NULL;
2831 depth = 0;
2832
2833
2834
2835
2836
2837 local_irq_save(flags);
2838 while (!list_empty(&list)) {
2839 rq = list_entry_rq(list.next);
2840 list_del_init(&rq->queuelist);
2841 BUG_ON(!rq->q);
2842 if (rq->q != q) {
2843
2844
2845
2846 if (q)
2847 queue_unplugged(q, depth, from_schedule);
2848 q = rq->q;
2849 depth = 0;
2850 spin_lock(q->queue_lock);
2851 }
2852
2853
2854
2855
2856 if (unlikely(blk_queue_dead(q))) {
2857 __blk_end_request_all(rq, -ENODEV);
2858 continue;
2859 }
2860
2861
2862
2863
2864 if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA))
2865 __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
2866 else
2867 __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
2868
2869 depth++;
2870 }
2871
2872
2873
2874
2875 if (q)
2876 queue_unplugged(q, depth, from_schedule);
2877
2878 local_irq_restore(flags);
2879}
2880
2881void blk_finish_plug(struct blk_plug *plug)
2882{
2883 blk_flush_plug_list(plug, false);
2884
2885 if (plug == current->plug)
2886 current->plug = NULL;
2887}
2888EXPORT_SYMBOL(blk_finish_plug);
2889
2890int __init blk_dev_init(void)
2891{
2892 BUILD_BUG_ON(__REQ_NR_BITS > 8 *
2893 sizeof(((struct request *)0)->cmd_flags));
2894
2895
2896 kblockd_workqueue = alloc_workqueue("kblockd",
2897 WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
2898 if (!kblockd_workqueue)
2899 panic("Failed to create kblockd\n");
2900
2901 request_cachep = kmem_cache_create("blkdev_requests",
2902 sizeof(struct request), 0, SLAB_PANIC, NULL);
2903
2904 blk_requestq_cachep = kmem_cache_create("blkdev_queue",
2905 sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
2906
2907 return 0;
2908}
2909