1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/kernel.h>
15#include <linux/module.h>
16#include <linux/backing-dev.h>
17#include <linux/bio.h>
18#include <linux/blkdev.h>
19#include <linux/blk-mq.h>
20#include <linux/highmem.h>
21#include <linux/mm.h>
22#include <linux/kernel_stat.h>
23#include <linux/string.h>
24#include <linux/init.h>
25#include <linux/completion.h>
26#include <linux/slab.h>
27#include <linux/swap.h>
28#include <linux/writeback.h>
29#include <linux/task_io_accounting_ops.h>
30#include <linux/fault-inject.h>
31#include <linux/list_sort.h>
32#include <linux/delay.h>
33#include <linux/ratelimit.h>
34#include <linux/pm_runtime.h>
35#include <linux/blk-cgroup.h>
36
37#define CREATE_TRACE_POINTS
38#include <trace/events/block.h>
39
40#include "blk.h"
41#include "blk-mq.h"
42
43EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
44EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
45EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
46EXPORT_TRACEPOINT_SYMBOL_GPL(block_split);
47EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug);
48
49DEFINE_IDA(blk_queue_ida);
50
51
52
53
54struct kmem_cache *request_cachep;
55
56
57
58
59struct kmem_cache *blk_requestq_cachep;
60
61
62
63
64static struct workqueue_struct *kblockd_workqueue;
65
66static void blk_clear_congested(struct request_list *rl, int sync)
67{
68#ifdef CONFIG_CGROUP_WRITEBACK
69 clear_wb_congested(rl->blkg->wb_congested, sync);
70#else
71
72
73
74
75 if (rl == &rl->q->root_rl)
76 clear_wb_congested(rl->q->backing_dev_info.wb.congested, sync);
77#endif
78}
79
80static void blk_set_congested(struct request_list *rl, int sync)
81{
82#ifdef CONFIG_CGROUP_WRITEBACK
83 set_wb_congested(rl->blkg->wb_congested, sync);
84#else
85
86 if (rl == &rl->q->root_rl)
87 set_wb_congested(rl->q->backing_dev_info.wb.congested, sync);
88#endif
89}
90
91void blk_queue_congestion_threshold(struct request_queue *q)
92{
93 int nr;
94
95 nr = q->nr_requests - (q->nr_requests / 8) + 1;
96 if (nr > q->nr_requests)
97 nr = q->nr_requests;
98 q->nr_congestion_on = nr;
99
100 nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;
101 if (nr < 1)
102 nr = 1;
103 q->nr_congestion_off = nr;
104}
105
106
107
108
109
110
111
112
113
114struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
115{
116 struct request_queue *q = bdev_get_queue(bdev);
117
118 return &q->backing_dev_info;
119}
120EXPORT_SYMBOL(blk_get_backing_dev_info);
121
122void blk_rq_init(struct request_queue *q, struct request *rq)
123{
124 memset(rq, 0, sizeof(*rq));
125
126 INIT_LIST_HEAD(&rq->queuelist);
127 INIT_LIST_HEAD(&rq->timeout_list);
128 rq->cpu = -1;
129 rq->q = q;
130 rq->__sector = (sector_t) -1;
131 INIT_HLIST_NODE(&rq->hash);
132 RB_CLEAR_NODE(&rq->rb_node);
133 rq->cmd = rq->__cmd;
134 rq->cmd_len = BLK_MAX_CDB;
135 rq->tag = -1;
136 rq->start_time = jiffies;
137 set_start_time_ns(rq);
138 rq->part = NULL;
139}
140EXPORT_SYMBOL(blk_rq_init);
141
142static void req_bio_endio(struct request *rq, struct bio *bio,
143 unsigned int nbytes, int error)
144{
145 if (error)
146 bio->bi_error = error;
147
148 if (unlikely(rq->cmd_flags & REQ_QUIET))
149 bio_set_flag(bio, BIO_QUIET);
150
151 bio_advance(bio, nbytes);
152
153
154 if (bio->bi_iter.bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
155 bio_endio(bio);
156}
157
158void blk_dump_rq_flags(struct request *rq, char *msg)
159{
160 int bit;
161
162 printk(KERN_INFO "%s: dev %s: type=%x, flags=%llx\n", msg,
163 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
164 (unsigned long long) rq->cmd_flags);
165
166 printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n",
167 (unsigned long long)blk_rq_pos(rq),
168 blk_rq_sectors(rq), blk_rq_cur_sectors(rq));
169 printk(KERN_INFO " bio %p, biotail %p, len %u\n",
170 rq->bio, rq->biotail, blk_rq_bytes(rq));
171
172 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
173 printk(KERN_INFO " cdb: ");
174 for (bit = 0; bit < BLK_MAX_CDB; bit++)
175 printk("%02x ", rq->cmd[bit]);
176 printk("\n");
177 }
178}
179EXPORT_SYMBOL(blk_dump_rq_flags);
180
181static void blk_delay_work(struct work_struct *work)
182{
183 struct request_queue *q;
184
185 q = container_of(work, struct request_queue, delay_work.work);
186 spin_lock_irq(q->queue_lock);
187 __blk_run_queue(q);
188 spin_unlock_irq(q->queue_lock);
189}
190
191
192
193
194
195
196
197
198
199
200
201void blk_delay_queue(struct request_queue *q, unsigned long msecs)
202{
203 if (likely(!blk_queue_dead(q)))
204 queue_delayed_work(kblockd_workqueue, &q->delay_work,
205 msecs_to_jiffies(msecs));
206}
207EXPORT_SYMBOL(blk_delay_queue);
208
209
210
211
212
213
214
215
216
217
218void blk_start_queue_async(struct request_queue *q)
219{
220 queue_flag_clear(QUEUE_FLAG_STOPPED, q);
221 blk_run_queue_async(q);
222}
223EXPORT_SYMBOL(blk_start_queue_async);
224
225
226
227
228
229
230
231
232
233
234void blk_start_queue(struct request_queue *q)
235{
236 WARN_ON(!irqs_disabled());
237
238 queue_flag_clear(QUEUE_FLAG_STOPPED, q);
239 __blk_run_queue(q);
240}
241EXPORT_SYMBOL(blk_start_queue);
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257void blk_stop_queue(struct request_queue *q)
258{
259 cancel_delayed_work(&q->delay_work);
260 queue_flag_set(QUEUE_FLAG_STOPPED, q);
261}
262EXPORT_SYMBOL(blk_stop_queue);
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282void blk_sync_queue(struct request_queue *q)
283{
284 del_timer_sync(&q->timeout);
285
286 if (q->mq_ops) {
287 struct blk_mq_hw_ctx *hctx;
288 int i;
289
290 queue_for_each_hw_ctx(q, hctx, i) {
291 cancel_delayed_work_sync(&hctx->run_work);
292 cancel_delayed_work_sync(&hctx->delay_work);
293 }
294 } else {
295 cancel_delayed_work_sync(&q->delay_work);
296 }
297}
298EXPORT_SYMBOL(blk_sync_queue);
299
300
301
302
303
304
305
306
307
308
309
310
311inline void __blk_run_queue_uncond(struct request_queue *q)
312{
313 if (unlikely(blk_queue_dead(q)))
314 return;
315
316
317
318
319
320
321
322
323 q->request_fn_active++;
324 q->request_fn(q);
325 q->request_fn_active--;
326}
327EXPORT_SYMBOL_GPL(__blk_run_queue_uncond);
328
329
330
331
332
333
334
335
336
337void __blk_run_queue(struct request_queue *q)
338{
339 if (unlikely(blk_queue_stopped(q)))
340 return;
341
342 __blk_run_queue_uncond(q);
343}
344EXPORT_SYMBOL(__blk_run_queue);
345
346
347
348
349
350
351
352
353
354void blk_run_queue_async(struct request_queue *q)
355{
356 if (likely(!blk_queue_stopped(q) && !blk_queue_dead(q)))
357 mod_delayed_work(kblockd_workqueue, &q->delay_work, 0);
358}
359EXPORT_SYMBOL(blk_run_queue_async);
360
361
362
363
364
365
366
367
368
369void blk_run_queue(struct request_queue *q)
370{
371 unsigned long flags;
372
373 spin_lock_irqsave(q->queue_lock, flags);
374 __blk_run_queue(q);
375 spin_unlock_irqrestore(q->queue_lock, flags);
376}
377EXPORT_SYMBOL(blk_run_queue);
378
379void blk_put_queue(struct request_queue *q)
380{
381 kobject_put(&q->kobj);
382}
383EXPORT_SYMBOL(blk_put_queue);
384
385
386
387
388
389
390
391
392
393
394static void __blk_drain_queue(struct request_queue *q, bool drain_all)
395 __releases(q->queue_lock)
396 __acquires(q->queue_lock)
397{
398 int i;
399
400 lockdep_assert_held(q->queue_lock);
401
402 while (true) {
403 bool drain = false;
404
405
406
407
408
409 if (q->elevator)
410 elv_drain_elevator(q);
411
412 blkcg_drain_queue(q);
413
414
415
416
417
418
419
420
421 if (!list_empty(&q->queue_head) && q->request_fn)
422 __blk_run_queue(q);
423
424 drain |= q->nr_rqs_elvpriv;
425 drain |= q->request_fn_active;
426
427
428
429
430
431
432 if (drain_all) {
433 struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
434 drain |= !list_empty(&q->queue_head);
435 for (i = 0; i < 2; i++) {
436 drain |= q->nr_rqs[i];
437 drain |= q->in_flight[i];
438 if (fq)
439 drain |= !list_empty(&fq->flush_queue[i]);
440 }
441 }
442
443 if (!drain)
444 break;
445
446 spin_unlock_irq(q->queue_lock);
447
448 msleep(10);
449
450 spin_lock_irq(q->queue_lock);
451 }
452
453
454
455
456
457
458 if (q->request_fn) {
459 struct request_list *rl;
460
461 blk_queue_for_each_rl(rl, q)
462 for (i = 0; i < ARRAY_SIZE(rl->wait); i++)
463 wake_up_all(&rl->wait[i]);
464 }
465}
466
467
468
469
470
471
472
473
474
475
476
477void blk_queue_bypass_start(struct request_queue *q)
478{
479 spin_lock_irq(q->queue_lock);
480 q->bypass_depth++;
481 queue_flag_set(QUEUE_FLAG_BYPASS, q);
482 spin_unlock_irq(q->queue_lock);
483
484
485
486
487
488
489 if (blk_queue_init_done(q)) {
490 spin_lock_irq(q->queue_lock);
491 __blk_drain_queue(q, false);
492 spin_unlock_irq(q->queue_lock);
493
494
495 synchronize_rcu();
496 }
497}
498EXPORT_SYMBOL_GPL(blk_queue_bypass_start);
499
500
501
502
503
504
505
506void blk_queue_bypass_end(struct request_queue *q)
507{
508 spin_lock_irq(q->queue_lock);
509 if (!--q->bypass_depth)
510 queue_flag_clear(QUEUE_FLAG_BYPASS, q);
511 WARN_ON_ONCE(q->bypass_depth < 0);
512 spin_unlock_irq(q->queue_lock);
513}
514EXPORT_SYMBOL_GPL(blk_queue_bypass_end);
515
516void blk_set_queue_dying(struct request_queue *q)
517{
518 queue_flag_set_unlocked(QUEUE_FLAG_DYING, q);
519
520 if (q->mq_ops)
521 blk_mq_wake_waiters(q);
522 else {
523 struct request_list *rl;
524
525 blk_queue_for_each_rl(rl, q) {
526 if (rl->rq_pool) {
527 wake_up(&rl->wait[BLK_RW_SYNC]);
528 wake_up(&rl->wait[BLK_RW_ASYNC]);
529 }
530 }
531 }
532}
533EXPORT_SYMBOL_GPL(blk_set_queue_dying);
534
535
536
537
538
539
540
541
542void blk_cleanup_queue(struct request_queue *q)
543{
544 spinlock_t *lock = q->queue_lock;
545
546
547 mutex_lock(&q->sysfs_lock);
548 blk_set_queue_dying(q);
549 spin_lock_irq(lock);
550
551
552
553
554
555
556
557
558
559
560 q->bypass_depth++;
561 queue_flag_set(QUEUE_FLAG_BYPASS, q);
562
563 queue_flag_set(QUEUE_FLAG_NOMERGES, q);
564 queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
565 queue_flag_set(QUEUE_FLAG_DYING, q);
566 spin_unlock_irq(lock);
567 mutex_unlock(&q->sysfs_lock);
568
569
570
571
572
573 blk_freeze_queue(q);
574 spin_lock_irq(lock);
575 if (!q->mq_ops)
576 __blk_drain_queue(q, true);
577 queue_flag_set(QUEUE_FLAG_DEAD, q);
578 spin_unlock_irq(lock);
579
580
581 blk_flush_integrity();
582
583
584 del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
585 blk_sync_queue(q);
586
587 if (q->mq_ops)
588 blk_mq_free_queue(q);
589 percpu_ref_exit(&q->q_usage_counter);
590
591 spin_lock_irq(lock);
592 if (q->queue_lock != &q->__queue_lock)
593 q->queue_lock = &q->__queue_lock;
594 spin_unlock_irq(lock);
595
596 bdi_unregister(&q->backing_dev_info);
597
598
599 blk_put_queue(q);
600}
601EXPORT_SYMBOL(blk_cleanup_queue);
602
603
604static void *alloc_request_struct(gfp_t gfp_mask, void *data)
605{
606 int nid = (int)(long)data;
607 return kmem_cache_alloc_node(request_cachep, gfp_mask, nid);
608}
609
610static void free_request_struct(void *element, void *unused)
611{
612 kmem_cache_free(request_cachep, element);
613}
614
615int blk_init_rl(struct request_list *rl, struct request_queue *q,
616 gfp_t gfp_mask)
617{
618 if (unlikely(rl->rq_pool))
619 return 0;
620
621 rl->q = q;
622 rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;
623 rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;
624 init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
625 init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);
626
627 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, alloc_request_struct,
628 free_request_struct,
629 (void *)(long)q->node, gfp_mask,
630 q->node);
631 if (!rl->rq_pool)
632 return -ENOMEM;
633
634 return 0;
635}
636
637void blk_exit_rl(struct request_list *rl)
638{
639 if (rl->rq_pool)
640 mempool_destroy(rl->rq_pool);
641}
642
643struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
644{
645 return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE);
646}
647EXPORT_SYMBOL(blk_alloc_queue);
648
649int blk_queue_enter(struct request_queue *q, bool nowait)
650{
651 while (true) {
652 int ret;
653
654 if (percpu_ref_tryget_live(&q->q_usage_counter))
655 return 0;
656
657 if (nowait)
658 return -EBUSY;
659
660 ret = wait_event_interruptible(q->mq_freeze_wq,
661 !atomic_read(&q->mq_freeze_depth) ||
662 blk_queue_dying(q));
663 if (blk_queue_dying(q))
664 return -ENODEV;
665 if (ret)
666 return ret;
667 }
668}
669
670void blk_queue_exit(struct request_queue *q)
671{
672 percpu_ref_put(&q->q_usage_counter);
673}
674
675static void blk_queue_usage_counter_release(struct percpu_ref *ref)
676{
677 struct request_queue *q =
678 container_of(ref, struct request_queue, q_usage_counter);
679
680 wake_up_all(&q->mq_freeze_wq);
681}
682
683static void blk_rq_timed_out_timer(unsigned long data)
684{
685 struct request_queue *q = (struct request_queue *)data;
686
687 kblockd_schedule_work(&q->timeout_work);
688}
689
690struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
691{
692 struct request_queue *q;
693 int err;
694
695 q = kmem_cache_alloc_node(blk_requestq_cachep,
696 gfp_mask | __GFP_ZERO, node_id);
697 if (!q)
698 return NULL;
699
700 q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
701 if (q->id < 0)
702 goto fail_q;
703
704 q->bio_split = bioset_create(BIO_POOL_SIZE, 0);
705 if (!q->bio_split)
706 goto fail_id;
707
708 q->backing_dev_info.ra_pages =
709 (VM_MAX_READAHEAD * 1024) / PAGE_SIZE;
710 q->backing_dev_info.capabilities = BDI_CAP_CGROUP_WRITEBACK;
711 q->backing_dev_info.name = "block";
712 q->node = node_id;
713
714 err = bdi_init(&q->backing_dev_info);
715 if (err)
716 goto fail_split;
717
718 setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
719 laptop_mode_timer_fn, (unsigned long) q);
720 setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
721 INIT_LIST_HEAD(&q->queue_head);
722 INIT_LIST_HEAD(&q->timeout_list);
723 INIT_LIST_HEAD(&q->icq_list);
724#ifdef CONFIG_BLK_CGROUP
725 INIT_LIST_HEAD(&q->blkg_list);
726#endif
727 INIT_DELAYED_WORK(&q->delay_work, blk_delay_work);
728
729 kobject_init(&q->kobj, &blk_queue_ktype);
730
731 mutex_init(&q->sysfs_lock);
732 spin_lock_init(&q->__queue_lock);
733
734
735
736
737
738 q->queue_lock = &q->__queue_lock;
739
740
741
742
743
744
745
746 q->bypass_depth = 1;
747 __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
748
749 init_waitqueue_head(&q->mq_freeze_wq);
750
751
752
753
754
755 if (percpu_ref_init(&q->q_usage_counter,
756 blk_queue_usage_counter_release,
757 PERCPU_REF_INIT_ATOMIC, GFP_KERNEL))
758 goto fail_bdi;
759
760 if (blkcg_init_queue(q))
761 goto fail_ref;
762
763 return q;
764
765fail_ref:
766 percpu_ref_exit(&q->q_usage_counter);
767fail_bdi:
768 bdi_destroy(&q->backing_dev_info);
769fail_split:
770 bioset_free(q->bio_split);
771fail_id:
772 ida_simple_remove(&blk_queue_ida, q->id);
773fail_q:
774 kmem_cache_free(blk_requestq_cachep, q);
775 return NULL;
776}
777EXPORT_SYMBOL(blk_alloc_queue_node);
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
813{
814 return blk_init_queue_node(rfn, lock, NUMA_NO_NODE);
815}
816EXPORT_SYMBOL(blk_init_queue);
817
818struct request_queue *
819blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
820{
821 struct request_queue *uninit_q, *q;
822
823 uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id);
824 if (!uninit_q)
825 return NULL;
826
827 q = blk_init_allocated_queue(uninit_q, rfn, lock);
828 if (!q)
829 blk_cleanup_queue(uninit_q);
830
831 return q;
832}
833EXPORT_SYMBOL(blk_init_queue_node);
834
835static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio);
836
837struct request_queue *
838blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
839 spinlock_t *lock)
840{
841 if (!q)
842 return NULL;
843
844 q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, 0);
845 if (!q->fq)
846 return NULL;
847
848 if (blk_init_rl(&q->root_rl, q, GFP_KERNEL))
849 goto fail;
850
851 INIT_WORK(&q->timeout_work, blk_timeout_work);
852 q->request_fn = rfn;
853 q->prep_rq_fn = NULL;
854 q->unprep_rq_fn = NULL;
855 q->queue_flags |= QUEUE_FLAG_DEFAULT;
856
857
858 if (lock)
859 q->queue_lock = lock;
860
861
862
863
864 blk_queue_make_request(q, blk_queue_bio);
865
866 q->sg_reserved_size = INT_MAX;
867
868
869 mutex_lock(&q->sysfs_lock);
870
871
872 if (elevator_init(q, NULL)) {
873 mutex_unlock(&q->sysfs_lock);
874 goto fail;
875 }
876
877 mutex_unlock(&q->sysfs_lock);
878
879 return q;
880
881fail:
882 blk_free_flush_queue(q->fq);
883 return NULL;
884}
885EXPORT_SYMBOL(blk_init_allocated_queue);
886
887bool blk_get_queue(struct request_queue *q)
888{
889 if (likely(!blk_queue_dying(q))) {
890 __blk_get_queue(q);
891 return true;
892 }
893
894 return false;
895}
896EXPORT_SYMBOL(blk_get_queue);
897
898static inline void blk_free_request(struct request_list *rl, struct request *rq)
899{
900 if (rq->cmd_flags & REQ_ELVPRIV) {
901 elv_put_request(rl->q, rq);
902 if (rq->elv.icq)
903 put_io_context(rq->elv.icq->ioc);
904 }
905
906 mempool_free(rq, rl->rq_pool);
907}
908
909
910
911
912
913static inline int ioc_batching(struct request_queue *q, struct io_context *ioc)
914{
915 if (!ioc)
916 return 0;
917
918
919
920
921
922
923 return ioc->nr_batch_requests == q->nr_batching ||
924 (ioc->nr_batch_requests > 0
925 && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));
926}
927
928
929
930
931
932
933
934static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)
935{
936 if (!ioc || ioc_batching(q, ioc))
937 return;
938
939 ioc->nr_batch_requests = q->nr_batching;
940 ioc->last_waited = jiffies;
941}
942
943static void __freed_request(struct request_list *rl, int sync)
944{
945 struct request_queue *q = rl->q;
946
947 if (rl->count[sync] < queue_congestion_off_threshold(q))
948 blk_clear_congested(rl, sync);
949
950 if (rl->count[sync] + 1 <= q->nr_requests) {
951 if (waitqueue_active(&rl->wait[sync]))
952 wake_up(&rl->wait[sync]);
953
954 blk_clear_rl_full(rl, sync);
955 }
956}
957
958
959
960
961
962static void freed_request(struct request_list *rl, unsigned int flags)
963{
964 struct request_queue *q = rl->q;
965 int sync = rw_is_sync(flags);
966
967 q->nr_rqs[sync]--;
968 rl->count[sync]--;
969 if (flags & REQ_ELVPRIV)
970 q->nr_rqs_elvpriv--;
971
972 __freed_request(rl, sync);
973
974 if (unlikely(rl->starved[sync ^ 1]))
975 __freed_request(rl, sync ^ 1);
976}
977
978int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
979{
980 struct request_list *rl;
981 int on_thresh, off_thresh;
982
983 spin_lock_irq(q->queue_lock);
984 q->nr_requests = nr;
985 blk_queue_congestion_threshold(q);
986 on_thresh = queue_congestion_on_threshold(q);
987 off_thresh = queue_congestion_off_threshold(q);
988
989 blk_queue_for_each_rl(rl, q) {
990 if (rl->count[BLK_RW_SYNC] >= on_thresh)
991 blk_set_congested(rl, BLK_RW_SYNC);
992 else if (rl->count[BLK_RW_SYNC] < off_thresh)
993 blk_clear_congested(rl, BLK_RW_SYNC);
994
995 if (rl->count[BLK_RW_ASYNC] >= on_thresh)
996 blk_set_congested(rl, BLK_RW_ASYNC);
997 else if (rl->count[BLK_RW_ASYNC] < off_thresh)
998 blk_clear_congested(rl, BLK_RW_ASYNC);
999
1000 if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
1001 blk_set_rl_full(rl, BLK_RW_SYNC);
1002 } else {
1003 blk_clear_rl_full(rl, BLK_RW_SYNC);
1004 wake_up(&rl->wait[BLK_RW_SYNC]);
1005 }
1006
1007 if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) {
1008 blk_set_rl_full(rl, BLK_RW_ASYNC);
1009 } else {
1010 blk_clear_rl_full(rl, BLK_RW_ASYNC);
1011 wake_up(&rl->wait[BLK_RW_ASYNC]);
1012 }
1013 }
1014
1015 spin_unlock_irq(q->queue_lock);
1016 return 0;
1017}
1018
1019
1020
1021
1022
1023static bool blk_rq_should_init_elevator(struct bio *bio)
1024{
1025 if (!bio)
1026 return true;
1027
1028
1029
1030
1031
1032 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA))
1033 return false;
1034
1035 return true;
1036}
1037
1038
1039
1040
1041
1042
1043
1044
1045static struct io_context *rq_ioc(struct bio *bio)
1046{
1047#ifdef CONFIG_BLK_CGROUP
1048 if (bio && bio->bi_ioc)
1049 return bio->bi_ioc;
1050#endif
1051 return current->io_context;
1052}
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068static struct request *__get_request(struct request_list *rl, int rw_flags,
1069 struct bio *bio, gfp_t gfp_mask)
1070{
1071 struct request_queue *q = rl->q;
1072 struct request *rq;
1073 struct elevator_type *et = q->elevator->type;
1074 struct io_context *ioc = rq_ioc(bio);
1075 struct io_cq *icq = NULL;
1076 const bool is_sync = rw_is_sync(rw_flags) != 0;
1077 int may_queue;
1078
1079 if (unlikely(blk_queue_dying(q)))
1080 return ERR_PTR(-ENODEV);
1081
1082 may_queue = elv_may_queue(q, rw_flags);
1083 if (may_queue == ELV_MQUEUE_NO)
1084 goto rq_starved;
1085
1086 if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
1087 if (rl->count[is_sync]+1 >= q->nr_requests) {
1088
1089
1090
1091
1092
1093
1094 if (!blk_rl_full(rl, is_sync)) {
1095 ioc_set_batching(q, ioc);
1096 blk_set_rl_full(rl, is_sync);
1097 } else {
1098 if (may_queue != ELV_MQUEUE_MUST
1099 && !ioc_batching(q, ioc)) {
1100
1101
1102
1103
1104
1105 return ERR_PTR(-ENOMEM);
1106 }
1107 }
1108 }
1109 blk_set_congested(rl, is_sync);
1110 }
1111
1112
1113
1114
1115
1116
1117 if (rl->count[is_sync] >= (3 * q->nr_requests / 2))
1118 return ERR_PTR(-ENOMEM);
1119
1120 q->nr_rqs[is_sync]++;
1121 rl->count[is_sync]++;
1122 rl->starved[is_sync] = 0;
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134 if (blk_rq_should_init_elevator(bio) && !blk_queue_bypass(q)) {
1135 rw_flags |= REQ_ELVPRIV;
1136 q->nr_rqs_elvpriv++;
1137 if (et->icq_cache && ioc)
1138 icq = ioc_lookup_icq(ioc, q);
1139 }
1140
1141 if (blk_queue_io_stat(q))
1142 rw_flags |= REQ_IO_STAT;
1143 spin_unlock_irq(q->queue_lock);
1144
1145
1146 rq = mempool_alloc(rl->rq_pool, gfp_mask);
1147 if (!rq)
1148 goto fail_alloc;
1149
1150 blk_rq_init(q, rq);
1151 blk_rq_set_rl(rq, rl);
1152 rq->cmd_flags = rw_flags | REQ_ALLOCED;
1153
1154
1155 if (rw_flags & REQ_ELVPRIV) {
1156 if (unlikely(et->icq_cache && !icq)) {
1157 if (ioc)
1158 icq = ioc_create_icq(ioc, q, gfp_mask);
1159 if (!icq)
1160 goto fail_elvpriv;
1161 }
1162
1163 rq->elv.icq = icq;
1164 if (unlikely(elv_set_request(q, rq, bio, gfp_mask)))
1165 goto fail_elvpriv;
1166
1167
1168 if (icq)
1169 get_io_context(icq->ioc);
1170 }
1171out:
1172
1173
1174
1175
1176
1177
1178 if (ioc_batching(q, ioc))
1179 ioc->nr_batch_requests--;
1180
1181 trace_block_getrq(q, bio, rw_flags & 1);
1182 return rq;
1183
1184fail_elvpriv:
1185
1186
1187
1188
1189
1190
1191 printk_ratelimited(KERN_WARNING "%s: dev %s: request aux data allocation failed, iosched may be disturbed\n",
1192 __func__, dev_name(q->backing_dev_info.dev));
1193
1194 rq->cmd_flags &= ~REQ_ELVPRIV;
1195 rq->elv.icq = NULL;
1196
1197 spin_lock_irq(q->queue_lock);
1198 q->nr_rqs_elvpriv--;
1199 spin_unlock_irq(q->queue_lock);
1200 goto out;
1201
1202fail_alloc:
1203
1204
1205
1206
1207
1208
1209
1210 spin_lock_irq(q->queue_lock);
1211 freed_request(rl, rw_flags);
1212
1213
1214
1215
1216
1217
1218
1219
1220rq_starved:
1221 if (unlikely(rl->count[is_sync] == 0))
1222 rl->starved[is_sync] = 1;
1223 return ERR_PTR(-ENOMEM);
1224}
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240static struct request *get_request(struct request_queue *q, int rw_flags,
1241 struct bio *bio, gfp_t gfp_mask)
1242{
1243 const bool is_sync = rw_is_sync(rw_flags) != 0;
1244 DEFINE_WAIT(wait);
1245 struct request_list *rl;
1246 struct request *rq;
1247
1248 rl = blk_get_rl(q, bio);
1249retry:
1250 rq = __get_request(rl, rw_flags, bio, gfp_mask);
1251 if (!IS_ERR(rq))
1252 return rq;
1253
1254 if (!gfpflags_allow_blocking(gfp_mask) || unlikely(blk_queue_dying(q))) {
1255 blk_put_rl(rl);
1256 return rq;
1257 }
1258
1259
1260 prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
1261 TASK_UNINTERRUPTIBLE);
1262
1263 trace_block_sleeprq(q, bio, rw_flags & 1);
1264
1265 spin_unlock_irq(q->queue_lock);
1266 io_schedule();
1267
1268
1269
1270
1271
1272
1273 ioc_set_batching(q, current->io_context);
1274
1275 spin_lock_irq(q->queue_lock);
1276 finish_wait(&rl->wait[is_sync], &wait);
1277
1278 goto retry;
1279}
1280
1281static struct request *blk_old_get_request(struct request_queue *q, int rw,
1282 gfp_t gfp_mask)
1283{
1284 struct request *rq;
1285
1286 BUG_ON(rw != READ && rw != WRITE);
1287
1288
1289 create_io_context(gfp_mask, q->node);
1290
1291 spin_lock_irq(q->queue_lock);
1292 rq = get_request(q, rw, NULL, gfp_mask);
1293 if (IS_ERR(rq))
1294 spin_unlock_irq(q->queue_lock);
1295
1296
1297 return rq;
1298}
1299
1300struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
1301{
1302 if (q->mq_ops)
1303 return blk_mq_alloc_request(q, rw,
1304 (gfp_mask & __GFP_DIRECT_RECLAIM) ?
1305 0 : BLK_MQ_REQ_NOWAIT);
1306 else
1307 return blk_old_get_request(q, rw, gfp_mask);
1308}
1309EXPORT_SYMBOL(blk_get_request);
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342struct request *blk_make_request(struct request_queue *q, struct bio *bio,
1343 gfp_t gfp_mask)
1344{
1345 struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask);
1346
1347 if (IS_ERR(rq))
1348 return rq;
1349
1350 blk_rq_set_block_pc(rq);
1351
1352 for_each_bio(bio) {
1353 struct bio *bounce_bio = bio;
1354 int ret;
1355
1356 blk_queue_bounce(q, &bounce_bio);
1357 ret = blk_rq_append_bio(q, rq, bounce_bio);
1358 if (unlikely(ret)) {
1359 blk_put_request(rq);
1360 return ERR_PTR(ret);
1361 }
1362 }
1363
1364 return rq;
1365}
1366EXPORT_SYMBOL(blk_make_request);
1367
1368
1369
1370
1371
1372
1373void blk_rq_set_block_pc(struct request *rq)
1374{
1375 rq->cmd_type = REQ_TYPE_BLOCK_PC;
1376 rq->__data_len = 0;
1377 rq->__sector = (sector_t) -1;
1378 rq->bio = rq->biotail = NULL;
1379 memset(rq->__cmd, 0, sizeof(rq->__cmd));
1380}
1381EXPORT_SYMBOL(blk_rq_set_block_pc);
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393void blk_requeue_request(struct request_queue *q, struct request *rq)
1394{
1395 blk_delete_timer(rq);
1396 blk_clear_rq_complete(rq);
1397 trace_block_rq_requeue(q, rq);
1398
1399 if (rq->cmd_flags & REQ_QUEUED)
1400 blk_queue_end_tag(q, rq);
1401
1402 BUG_ON(blk_queued_rq(rq));
1403
1404 elv_requeue_request(q, rq);
1405}
1406EXPORT_SYMBOL(blk_requeue_request);
1407
1408static void add_acct_request(struct request_queue *q, struct request *rq,
1409 int where)
1410{
1411 blk_account_io_start(rq, true);
1412 __elv_add_request(q, rq, where);
1413}
1414
1415static void part_round_stats_single(int cpu, struct hd_struct *part,
1416 unsigned long now)
1417{
1418 int inflight;
1419
1420 if (now == part->stamp)
1421 return;
1422
1423 inflight = part_in_flight(part);
1424 if (inflight) {
1425 __part_stat_add(cpu, part, time_in_queue,
1426 inflight * (now - part->stamp));
1427 __part_stat_add(cpu, part, io_ticks, (now - part->stamp));
1428 }
1429 part->stamp = now;
1430}
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448void part_round_stats(int cpu, struct hd_struct *part)
1449{
1450 unsigned long now = jiffies;
1451
1452 if (part->partno)
1453 part_round_stats_single(cpu, &part_to_disk(part)->part0, now);
1454 part_round_stats_single(cpu, part, now);
1455}
1456EXPORT_SYMBOL_GPL(part_round_stats);
1457
1458#ifdef CONFIG_PM
1459static void blk_pm_put_request(struct request *rq)
1460{
1461 if (rq->q->dev && !(rq->cmd_flags & REQ_PM) && !--rq->q->nr_pending)
1462 pm_runtime_mark_last_busy(rq->q->dev);
1463}
1464#else
1465static inline void blk_pm_put_request(struct request *rq) {}
1466#endif
1467
1468
1469
1470
1471void __blk_put_request(struct request_queue *q, struct request *req)
1472{
1473 if (unlikely(!q))
1474 return;
1475
1476 if (q->mq_ops) {
1477 blk_mq_free_request(req);
1478 return;
1479 }
1480
1481 blk_pm_put_request(req);
1482
1483 elv_completed_request(q, req);
1484
1485
1486 WARN_ON(req->bio != NULL);
1487
1488
1489
1490
1491
1492 if (req->cmd_flags & REQ_ALLOCED) {
1493 unsigned int flags = req->cmd_flags;
1494 struct request_list *rl = blk_rq_rl(req);
1495
1496 BUG_ON(!list_empty(&req->queuelist));
1497 BUG_ON(ELV_ON_HASH(req));
1498
1499 blk_free_request(rl, req);
1500 freed_request(rl, flags);
1501 blk_put_rl(rl);
1502 }
1503}
1504EXPORT_SYMBOL_GPL(__blk_put_request);
1505
1506void blk_put_request(struct request *req)
1507{
1508 struct request_queue *q = req->q;
1509
1510 if (q->mq_ops)
1511 blk_mq_free_request(req);
1512 else {
1513 unsigned long flags;
1514
1515 spin_lock_irqsave(q->queue_lock, flags);
1516 __blk_put_request(q, req);
1517 spin_unlock_irqrestore(q->queue_lock, flags);
1518 }
1519}
1520EXPORT_SYMBOL(blk_put_request);
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535void blk_add_request_payload(struct request *rq, struct page *page,
1536 unsigned int len)
1537{
1538 struct bio *bio = rq->bio;
1539
1540 bio->bi_io_vec->bv_page = page;
1541 bio->bi_io_vec->bv_offset = 0;
1542 bio->bi_io_vec->bv_len = len;
1543
1544 bio->bi_iter.bi_size = len;
1545 bio->bi_vcnt = 1;
1546 bio->bi_phys_segments = 1;
1547
1548 rq->__data_len = rq->resid_len = len;
1549 rq->nr_phys_segments = 1;
1550}
1551EXPORT_SYMBOL_GPL(blk_add_request_payload);
1552
1553bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
1554 struct bio *bio)
1555{
1556 const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
1557
1558 if (!ll_back_merge_fn(q, req, bio))
1559 return false;
1560
1561 trace_block_bio_backmerge(q, req, bio);
1562
1563 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1564 blk_rq_set_mixed_merge(req);
1565
1566 req->biotail->bi_next = bio;
1567 req->biotail = bio;
1568 req->__data_len += bio->bi_iter.bi_size;
1569 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
1570
1571 blk_account_io_start(req, false);
1572 return true;
1573}
1574
1575bool bio_attempt_front_merge(struct request_queue *q, struct request *req,
1576 struct bio *bio)
1577{
1578 const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
1579
1580 if (!ll_front_merge_fn(q, req, bio))
1581 return false;
1582
1583 trace_block_bio_frontmerge(q, req, bio);
1584
1585 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1586 blk_rq_set_mixed_merge(req);
1587
1588 bio->bi_next = req->bio;
1589 req->bio = bio;
1590
1591 req->__sector = bio->bi_iter.bi_sector;
1592 req->__data_len += bio->bi_iter.bi_size;
1593 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
1594
1595 blk_account_io_start(req, false);
1596 return true;
1597}
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
1622 unsigned int *request_count,
1623 struct request **same_queue_rq)
1624{
1625 struct blk_plug *plug;
1626 struct request *rq;
1627 bool ret = false;
1628 struct list_head *plug_list;
1629
1630 plug = current->plug;
1631 if (!plug)
1632 goto out;
1633 *request_count = 0;
1634
1635 if (q->mq_ops)
1636 plug_list = &plug->mq_list;
1637 else
1638 plug_list = &plug->list;
1639
1640 list_for_each_entry_reverse(rq, plug_list, queuelist) {
1641 int el_ret;
1642
1643 if (rq->q == q) {
1644 (*request_count)++;
1645
1646
1647
1648
1649
1650 if (same_queue_rq)
1651 *same_queue_rq = rq;
1652 }
1653
1654 if (rq->q != q || !blk_rq_merge_ok(rq, bio))
1655 continue;
1656
1657 el_ret = blk_try_merge(rq, bio);
1658 if (el_ret == ELEVATOR_BACK_MERGE) {
1659 ret = bio_attempt_back_merge(q, rq, bio);
1660 if (ret)
1661 break;
1662 } else if (el_ret == ELEVATOR_FRONT_MERGE) {
1663 ret = bio_attempt_front_merge(q, rq, bio);
1664 if (ret)
1665 break;
1666 }
1667 }
1668out:
1669 return ret;
1670}
1671
1672unsigned int blk_plug_queued_count(struct request_queue *q)
1673{
1674 struct blk_plug *plug;
1675 struct request *rq;
1676 struct list_head *plug_list;
1677 unsigned int ret = 0;
1678
1679 plug = current->plug;
1680 if (!plug)
1681 goto out;
1682
1683 if (q->mq_ops)
1684 plug_list = &plug->mq_list;
1685 else
1686 plug_list = &plug->list;
1687
1688 list_for_each_entry(rq, plug_list, queuelist) {
1689 if (rq->q == q)
1690 ret++;
1691 }
1692out:
1693 return ret;
1694}
1695
1696void init_request_from_bio(struct request *req, struct bio *bio)
1697{
1698 req->cmd_type = REQ_TYPE_FS;
1699
1700 req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK;
1701 if (bio->bi_rw & REQ_RAHEAD)
1702 req->cmd_flags |= REQ_FAILFAST_MASK;
1703
1704 req->errors = 0;
1705 req->__sector = bio->bi_iter.bi_sector;
1706 req->ioprio = bio_prio(bio);
1707 blk_rq_bio_prep(req->q, req, bio);
1708}
1709
1710static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
1711{
1712 const bool sync = !!(bio->bi_rw & REQ_SYNC);
1713 struct blk_plug *plug;
1714 int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;
1715 struct request *req;
1716 unsigned int request_count = 0;
1717
1718
1719
1720
1721
1722
1723 blk_queue_bounce(q, &bio);
1724
1725 blk_queue_split(q, &bio, q->bio_split);
1726
1727 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
1728 bio->bi_error = -EIO;
1729 bio_endio(bio);
1730 return BLK_QC_T_NONE;
1731 }
1732
1733 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
1734 spin_lock_irq(q->queue_lock);
1735 where = ELEVATOR_INSERT_FLUSH;
1736 goto get_rq;
1737 }
1738
1739
1740
1741
1742
1743 if (!blk_queue_nomerges(q)) {
1744 if (blk_attempt_plug_merge(q, bio, &request_count, NULL))
1745 return BLK_QC_T_NONE;
1746 } else
1747 request_count = blk_plug_queued_count(q);
1748
1749 spin_lock_irq(q->queue_lock);
1750
1751 el_ret = elv_merge(q, &req, bio);
1752 if (el_ret == ELEVATOR_BACK_MERGE) {
1753 if (bio_attempt_back_merge(q, req, bio)) {
1754 elv_bio_merged(q, req, bio);
1755 if (!attempt_back_merge(q, req))
1756 elv_merged_request(q, req, el_ret);
1757 goto out_unlock;
1758 }
1759 } else if (el_ret == ELEVATOR_FRONT_MERGE) {
1760 if (bio_attempt_front_merge(q, req, bio)) {
1761 elv_bio_merged(q, req, bio);
1762 if (!attempt_front_merge(q, req))
1763 elv_merged_request(q, req, el_ret);
1764 goto out_unlock;
1765 }
1766 }
1767
1768get_rq:
1769
1770
1771
1772
1773
1774 rw_flags = bio_data_dir(bio);
1775 if (sync)
1776 rw_flags |= REQ_SYNC;
1777
1778
1779
1780
1781
1782 req = get_request(q, rw_flags, bio, GFP_NOIO);
1783 if (IS_ERR(req)) {
1784 bio->bi_error = PTR_ERR(req);
1785 bio_endio(bio);
1786 goto out_unlock;
1787 }
1788
1789
1790
1791
1792
1793
1794
1795 init_request_from_bio(req, bio);
1796
1797 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags))
1798 req->cpu = raw_smp_processor_id();
1799
1800 plug = current->plug;
1801 if (plug) {
1802
1803
1804
1805
1806 if (!request_count)
1807 trace_block_plug(q);
1808 else {
1809 if (request_count >= BLK_MAX_REQUEST_COUNT) {
1810 blk_flush_plug_list(plug, false);
1811 trace_block_plug(q);
1812 }
1813 }
1814 list_add_tail(&req->queuelist, &plug->list);
1815 blk_account_io_start(req, true);
1816 } else {
1817 spin_lock_irq(q->queue_lock);
1818 add_acct_request(q, req, where);
1819 __blk_run_queue(q);
1820out_unlock:
1821 spin_unlock_irq(q->queue_lock);
1822 }
1823
1824 return BLK_QC_T_NONE;
1825}
1826
1827
1828
1829
1830static inline void blk_partition_remap(struct bio *bio)
1831{
1832 struct block_device *bdev = bio->bi_bdev;
1833
1834 if (bio_sectors(bio) && bdev != bdev->bd_contains) {
1835 struct hd_struct *p = bdev->bd_part;
1836
1837 bio->bi_iter.bi_sector += p->start_sect;
1838 bio->bi_bdev = bdev->bd_contains;
1839
1840 trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio,
1841 bdev->bd_dev,
1842 bio->bi_iter.bi_sector - p->start_sect);
1843 }
1844}
1845
1846static void handle_bad_sector(struct bio *bio)
1847{
1848 char b[BDEVNAME_SIZE];
1849
1850 printk(KERN_INFO "attempt to access beyond end of device\n");
1851 printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",
1852 bdevname(bio->bi_bdev, b),
1853 bio->bi_rw,
1854 (unsigned long long)bio_end_sector(bio),
1855 (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9));
1856}
1857
1858#ifdef CONFIG_FAIL_MAKE_REQUEST
1859
1860static DECLARE_FAULT_ATTR(fail_make_request);
1861
1862static int __init setup_fail_make_request(char *str)
1863{
1864 return setup_fault_attr(&fail_make_request, str);
1865}
1866__setup("fail_make_request=", setup_fail_make_request);
1867
1868static bool should_fail_request(struct hd_struct *part, unsigned int bytes)
1869{
1870 return part->make_it_fail && should_fail(&fail_make_request, bytes);
1871}
1872
1873static int __init fail_make_request_debugfs(void)
1874{
1875 struct dentry *dir = fault_create_debugfs_attr("fail_make_request",
1876 NULL, &fail_make_request);
1877
1878 return PTR_ERR_OR_ZERO(dir);
1879}
1880
1881late_initcall(fail_make_request_debugfs);
1882
1883#else
1884
1885static inline bool should_fail_request(struct hd_struct *part,
1886 unsigned int bytes)
1887{
1888 return false;
1889}
1890
1891#endif
1892
1893
1894
1895
1896static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
1897{
1898 sector_t maxsector;
1899
1900 if (!nr_sectors)
1901 return 0;
1902
1903
1904 maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
1905 if (maxsector) {
1906 sector_t sector = bio->bi_iter.bi_sector;
1907
1908 if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
1909
1910
1911
1912
1913
1914 handle_bad_sector(bio);
1915 return 1;
1916 }
1917 }
1918
1919 return 0;
1920}
1921
1922static noinline_for_stack bool
1923generic_make_request_checks(struct bio *bio)
1924{
1925 struct request_queue *q;
1926 int nr_sectors = bio_sectors(bio);
1927 int err = -EIO;
1928 char b[BDEVNAME_SIZE];
1929 struct hd_struct *part;
1930
1931 might_sleep();
1932
1933 if (bio_check_eod(bio, nr_sectors))
1934 goto end_io;
1935
1936 q = bdev_get_queue(bio->bi_bdev);
1937 if (unlikely(!q)) {
1938 printk(KERN_ERR
1939 "generic_make_request: Trying to access "
1940 "nonexistent block-device %s (%Lu)\n",
1941 bdevname(bio->bi_bdev, b),
1942 (long long) bio->bi_iter.bi_sector);
1943 goto end_io;
1944 }
1945
1946 part = bio->bi_bdev->bd_part;
1947 if (should_fail_request(part, bio->bi_iter.bi_size) ||
1948 should_fail_request(&part_to_disk(part)->part0,
1949 bio->bi_iter.bi_size))
1950 goto end_io;
1951
1952
1953
1954
1955
1956 blk_partition_remap(bio);
1957
1958 if (bio_check_eod(bio, nr_sectors))
1959 goto end_io;
1960
1961
1962
1963
1964
1965
1966 if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) {
1967 bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA);
1968 if (!nr_sectors) {
1969 err = 0;
1970 goto end_io;
1971 }
1972 }
1973
1974 if ((bio->bi_rw & REQ_DISCARD) &&
1975 (!blk_queue_discard(q) ||
1976 ((bio->bi_rw & REQ_SECURE) && !blk_queue_secdiscard(q)))) {
1977 err = -EOPNOTSUPP;
1978 goto end_io;
1979 }
1980
1981 if (bio->bi_rw & REQ_WRITE_SAME && !bdev_write_same(bio->bi_bdev)) {
1982 err = -EOPNOTSUPP;
1983 goto end_io;
1984 }
1985
1986
1987
1988
1989
1990
1991
1992 create_io_context(GFP_ATOMIC, q->node);
1993
1994 if (!blkcg_bio_issue_check(q, bio))
1995 return false;
1996
1997 trace_block_bio_queue(q, bio);
1998 return true;
1999
2000end_io:
2001 bio->bi_error = err;
2002 bio_endio(bio);
2003 return false;
2004}
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030blk_qc_t generic_make_request(struct bio *bio)
2031{
2032 struct bio_list bio_list_on_stack;
2033 blk_qc_t ret = BLK_QC_T_NONE;
2034
2035 if (!generic_make_request_checks(bio))
2036 goto out;
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048 if (current->bio_list) {
2049 bio_list_add(current->bio_list, bio);
2050 goto out;
2051 }
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067 BUG_ON(bio->bi_next);
2068 bio_list_init(&bio_list_on_stack);
2069 current->bio_list = &bio_list_on_stack;
2070 do {
2071 struct request_queue *q = bdev_get_queue(bio->bi_bdev);
2072
2073 if (likely(blk_queue_enter(q, false) == 0)) {
2074 ret = q->make_request_fn(q, bio);
2075
2076 blk_queue_exit(q);
2077
2078 bio = bio_list_pop(current->bio_list);
2079 } else {
2080 struct bio *bio_next = bio_list_pop(current->bio_list);
2081
2082 bio_io_error(bio);
2083 bio = bio_next;
2084 }
2085 } while (bio);
2086 current->bio_list = NULL;
2087
2088out:
2089 return ret;
2090}
2091EXPORT_SYMBOL(generic_make_request);
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103blk_qc_t submit_bio(int rw, struct bio *bio)
2104{
2105 bio->bi_rw |= rw;
2106
2107
2108
2109
2110
2111 if (bio_has_data(bio)) {
2112 unsigned int count;
2113
2114 if (unlikely(rw & REQ_WRITE_SAME))
2115 count = bdev_logical_block_size(bio->bi_bdev) >> 9;
2116 else
2117 count = bio_sectors(bio);
2118
2119 if (rw & WRITE) {
2120 count_vm_events(PGPGOUT, count);
2121 } else {
2122 task_io_account_read(bio->bi_iter.bi_size);
2123 count_vm_events(PGPGIN, count);
2124 }
2125
2126 if (unlikely(block_dump)) {
2127 char b[BDEVNAME_SIZE];
2128 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n",
2129 current->comm, task_pid_nr(current),
2130 (rw & WRITE) ? "WRITE" : "READ",
2131 (unsigned long long)bio->bi_iter.bi_sector,
2132 bdevname(bio->bi_bdev, b),
2133 count);
2134 }
2135 }
2136
2137 return generic_make_request(bio);
2138}
2139EXPORT_SYMBOL(submit_bio);
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158static int blk_cloned_rq_check_limits(struct request_queue *q,
2159 struct request *rq)
2160{
2161 if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, rq->cmd_flags)) {
2162 printk(KERN_ERR "%s: over max size limit.\n", __func__);
2163 return -EIO;
2164 }
2165
2166
2167
2168
2169
2170
2171
2172 blk_recalc_rq_segments(rq);
2173 if (rq->nr_phys_segments > queue_max_segments(q)) {
2174 printk(KERN_ERR "%s: over max segments limit.\n", __func__);
2175 return -EIO;
2176 }
2177
2178 return 0;
2179}
2180
2181
2182
2183
2184
2185
2186int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
2187{
2188 unsigned long flags;
2189 int where = ELEVATOR_INSERT_BACK;
2190
2191 if (blk_cloned_rq_check_limits(q, rq))
2192 return -EIO;
2193
2194 if (rq->rq_disk &&
2195 should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq)))
2196 return -EIO;
2197
2198 if (q->mq_ops) {
2199 if (blk_queue_io_stat(q))
2200 blk_account_io_start(rq, true);
2201 blk_mq_insert_request(rq, false, true, false);
2202 return 0;
2203 }
2204
2205 spin_lock_irqsave(q->queue_lock, flags);
2206 if (unlikely(blk_queue_dying(q))) {
2207 spin_unlock_irqrestore(q->queue_lock, flags);
2208 return -ENODEV;
2209 }
2210
2211
2212
2213
2214
2215 BUG_ON(blk_queued_rq(rq));
2216
2217 if (rq->cmd_flags & (REQ_FLUSH|REQ_FUA))
2218 where = ELEVATOR_INSERT_FLUSH;
2219
2220 add_acct_request(q, rq, where);
2221 if (where == ELEVATOR_INSERT_FLUSH)
2222 __blk_run_queue(q);
2223 spin_unlock_irqrestore(q->queue_lock, flags);
2224
2225 return 0;
2226}
2227EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245unsigned int blk_rq_err_bytes(const struct request *rq)
2246{
2247 unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
2248 unsigned int bytes = 0;
2249 struct bio *bio;
2250
2251 if (!(rq->cmd_flags & REQ_MIXED_MERGE))
2252 return blk_rq_bytes(rq);
2253
2254
2255
2256
2257
2258
2259
2260
2261 for (bio = rq->bio; bio; bio = bio->bi_next) {
2262 if ((bio->bi_rw & ff) != ff)
2263 break;
2264 bytes += bio->bi_iter.bi_size;
2265 }
2266
2267
2268 BUG_ON(blk_rq_bytes(rq) && !bytes);
2269 return bytes;
2270}
2271EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
2272
2273void blk_account_io_completion(struct request *req, unsigned int bytes)
2274{
2275 if (blk_do_io_stat(req)) {
2276 const int rw = rq_data_dir(req);
2277 struct hd_struct *part;
2278 int cpu;
2279
2280 cpu = part_stat_lock();
2281 part = req->part;
2282 part_stat_add(cpu, part, sectors[rw], bytes >> 9);
2283 part_stat_unlock();
2284 }
2285}
2286
2287void blk_account_io_done(struct request *req)
2288{
2289
2290
2291
2292
2293
2294 if (blk_do_io_stat(req) && !(req->cmd_flags & REQ_FLUSH_SEQ)) {
2295 unsigned long duration = jiffies - req->start_time;
2296 const int rw = rq_data_dir(req);
2297 struct hd_struct *part;
2298 int cpu;
2299
2300 cpu = part_stat_lock();
2301 part = req->part;
2302
2303 part_stat_inc(cpu, part, ios[rw]);
2304 part_stat_add(cpu, part, ticks[rw], duration);
2305 part_round_stats(cpu, part);
2306 part_dec_in_flight(part, rw);
2307
2308 hd_struct_put(part);
2309 part_stat_unlock();
2310 }
2311}
2312
2313#ifdef CONFIG_PM
2314
2315
2316
2317
2318static struct request *blk_pm_peek_request(struct request_queue *q,
2319 struct request *rq)
2320{
2321 if (q->dev && (q->rpm_status == RPM_SUSPENDED ||
2322 (q->rpm_status != RPM_ACTIVE && !(rq->cmd_flags & REQ_PM))))
2323 return NULL;
2324 else
2325 return rq;
2326}
2327#else
2328static inline struct request *blk_pm_peek_request(struct request_queue *q,
2329 struct request *rq)
2330{
2331 return rq;
2332}
2333#endif
2334
2335void blk_account_io_start(struct request *rq, bool new_io)
2336{
2337 struct hd_struct *part;
2338 int rw = rq_data_dir(rq);
2339 int cpu;
2340
2341 if (!blk_do_io_stat(rq))
2342 return;
2343
2344 cpu = part_stat_lock();
2345
2346 if (!new_io) {
2347 part = rq->part;
2348 part_stat_inc(cpu, part, merges[rw]);
2349 } else {
2350 part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
2351 if (!hd_struct_try_get(part)) {
2352
2353
2354
2355
2356
2357
2358
2359
2360 part = &rq->rq_disk->part0;
2361 hd_struct_get(part);
2362 }
2363 part_round_stats(cpu, part);
2364 part_inc_in_flight(part, rw);
2365 rq->part = part;
2366 }
2367
2368 part_stat_unlock();
2369}
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387struct request *blk_peek_request(struct request_queue *q)
2388{
2389 struct request *rq;
2390 int ret;
2391
2392 while ((rq = __elv_next_request(q)) != NULL) {
2393
2394 rq = blk_pm_peek_request(q, rq);
2395 if (!rq)
2396 break;
2397
2398 if (!(rq->cmd_flags & REQ_STARTED)) {
2399
2400
2401
2402
2403
2404 if (rq->cmd_flags & REQ_SORTED)
2405 elv_activate_rq(q, rq);
2406
2407
2408
2409
2410
2411
2412 rq->cmd_flags |= REQ_STARTED;
2413 trace_block_rq_issue(q, rq);
2414 }
2415
2416 if (!q->boundary_rq || q->boundary_rq == rq) {
2417 q->end_sector = rq_end_sector(rq);
2418 q->boundary_rq = NULL;
2419 }
2420
2421 if (rq->cmd_flags & REQ_DONTPREP)
2422 break;
2423
2424 if (q->dma_drain_size && blk_rq_bytes(rq)) {
2425
2426
2427
2428
2429
2430
2431 rq->nr_phys_segments++;
2432 }
2433
2434 if (!q->prep_rq_fn)
2435 break;
2436
2437 ret = q->prep_rq_fn(q, rq);
2438 if (ret == BLKPREP_OK) {
2439 break;
2440 } else if (ret == BLKPREP_DEFER) {
2441
2442
2443
2444
2445
2446
2447 if (q->dma_drain_size && blk_rq_bytes(rq) &&
2448 !(rq->cmd_flags & REQ_DONTPREP)) {
2449
2450
2451
2452
2453 --rq->nr_phys_segments;
2454 }
2455
2456 rq = NULL;
2457 break;
2458 } else if (ret == BLKPREP_KILL || ret == BLKPREP_INVALID) {
2459 int err = (ret == BLKPREP_INVALID) ? -EREMOTEIO : -EIO;
2460
2461 rq->cmd_flags |= REQ_QUIET;
2462
2463
2464
2465
2466 blk_start_request(rq);
2467 __blk_end_request_all(rq, err);
2468 } else {
2469 printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
2470 break;
2471 }
2472 }
2473
2474 return rq;
2475}
2476EXPORT_SYMBOL(blk_peek_request);
2477
2478void blk_dequeue_request(struct request *rq)
2479{
2480 struct request_queue *q = rq->q;
2481
2482 BUG_ON(list_empty(&rq->queuelist));
2483 BUG_ON(ELV_ON_HASH(rq));
2484
2485 list_del_init(&rq->queuelist);
2486
2487
2488
2489
2490
2491
2492 if (blk_account_rq(rq)) {
2493 q->in_flight[rq_is_sync(rq)]++;
2494 set_io_start_time_ns(rq);
2495 }
2496}
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512void blk_start_request(struct request *req)
2513{
2514 blk_dequeue_request(req);
2515
2516
2517
2518
2519
2520 req->resid_len = blk_rq_bytes(req);
2521 if (unlikely(blk_bidi_rq(req)))
2522 req->next_rq->resid_len = blk_rq_bytes(req->next_rq);
2523
2524 BUG_ON(test_bit(REQ_ATOM_COMPLETE, &req->atomic_flags));
2525 blk_add_timer(req);
2526}
2527EXPORT_SYMBOL(blk_start_request);
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544struct request *blk_fetch_request(struct request_queue *q)
2545{
2546 struct request *rq;
2547
2548 rq = blk_peek_request(q);
2549 if (rq)
2550 blk_start_request(rq);
2551 return rq;
2552}
2553EXPORT_SYMBOL(blk_fetch_request);
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
2578{
2579 int total_bytes;
2580
2581 trace_block_rq_complete(req->q, req, nr_bytes);
2582
2583 if (!req->bio)
2584 return false;
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594 if (req->cmd_type == REQ_TYPE_FS)
2595 req->errors = 0;
2596
2597 if (error && req->cmd_type == REQ_TYPE_FS &&
2598 !(req->cmd_flags & REQ_QUIET)) {
2599 char *error_type;
2600
2601 switch (error) {
2602 case -ENOLINK:
2603 error_type = "recoverable transport";
2604 break;
2605 case -EREMOTEIO:
2606 error_type = "critical target";
2607 break;
2608 case -EBADE:
2609 error_type = "critical nexus";
2610 break;
2611 case -ETIMEDOUT:
2612 error_type = "timeout";
2613 break;
2614 case -ENOSPC:
2615 error_type = "critical space allocation";
2616 break;
2617 case -ENODATA:
2618 error_type = "critical medium";
2619 break;
2620 case -EIO:
2621 default:
2622 error_type = "I/O";
2623 break;
2624 }
2625 printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector %llu\n",
2626 __func__, error_type, req->rq_disk ?
2627 req->rq_disk->disk_name : "?",
2628 (unsigned long long)blk_rq_pos(req));
2629
2630 }
2631
2632 blk_account_io_completion(req, nr_bytes);
2633
2634 total_bytes = 0;
2635 while (req->bio) {
2636 struct bio *bio = req->bio;
2637 unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes);
2638
2639 if (bio_bytes == bio->bi_iter.bi_size)
2640 req->bio = bio->bi_next;
2641
2642 req_bio_endio(req, bio, bio_bytes, error);
2643
2644 total_bytes += bio_bytes;
2645 nr_bytes -= bio_bytes;
2646
2647 if (!nr_bytes)
2648 break;
2649 }
2650
2651
2652
2653
2654 if (!req->bio) {
2655
2656
2657
2658
2659
2660 req->__data_len = 0;
2661 return false;
2662 }
2663
2664 req->__data_len -= total_bytes;
2665
2666
2667 if (req->cmd_type == REQ_TYPE_FS)
2668 req->__sector += total_bytes >> 9;
2669
2670
2671 if (req->cmd_flags & REQ_MIXED_MERGE) {
2672 req->cmd_flags &= ~REQ_FAILFAST_MASK;
2673 req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK;
2674 }
2675
2676
2677
2678
2679
2680 if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
2681 blk_dump_rq_flags(req, "request botched");
2682 req->__data_len = blk_rq_cur_bytes(req);
2683 }
2684
2685
2686 blk_recalc_rq_segments(req);
2687
2688 return true;
2689}
2690EXPORT_SYMBOL_GPL(blk_update_request);
2691
2692static bool blk_update_bidi_request(struct request *rq, int error,
2693 unsigned int nr_bytes,
2694 unsigned int bidi_bytes)
2695{
2696 if (blk_update_request(rq, error, nr_bytes))
2697 return true;
2698
2699
2700 if (unlikely(blk_bidi_rq(rq)) &&
2701 blk_update_request(rq->next_rq, error, bidi_bytes))
2702 return true;
2703
2704 if (blk_queue_add_random(rq->q))
2705 add_disk_randomness(rq->rq_disk);
2706
2707 return false;
2708}
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720void blk_unprep_request(struct request *req)
2721{
2722 struct request_queue *q = req->q;
2723
2724 req->cmd_flags &= ~REQ_DONTPREP;
2725 if (q->unprep_rq_fn)
2726 q->unprep_rq_fn(q, req);
2727}
2728EXPORT_SYMBOL_GPL(blk_unprep_request);
2729
2730
2731
2732
2733void blk_finish_request(struct request *req, int error)
2734{
2735 if (req->cmd_flags & REQ_QUEUED)
2736 blk_queue_end_tag(req->q, req);
2737
2738 BUG_ON(blk_queued_rq(req));
2739
2740 if (unlikely(laptop_mode) && req->cmd_type == REQ_TYPE_FS)
2741 laptop_io_completion(&req->q->backing_dev_info);
2742
2743 blk_delete_timer(req);
2744
2745 if (req->cmd_flags & REQ_DONTPREP)
2746 blk_unprep_request(req);
2747
2748 blk_account_io_done(req);
2749
2750 if (req->end_io)
2751 req->end_io(req, error);
2752 else {
2753 if (blk_bidi_rq(req))
2754 __blk_put_request(req->next_rq->q, req->next_rq);
2755
2756 __blk_put_request(req->q, req);
2757 }
2758}
2759EXPORT_SYMBOL(blk_finish_request);
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778static bool blk_end_bidi_request(struct request *rq, int error,
2779 unsigned int nr_bytes, unsigned int bidi_bytes)
2780{
2781 struct request_queue *q = rq->q;
2782 unsigned long flags;
2783
2784 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
2785 return true;
2786
2787 spin_lock_irqsave(q->queue_lock, flags);
2788 blk_finish_request(rq, error);
2789 spin_unlock_irqrestore(q->queue_lock, flags);
2790
2791 return false;
2792}
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809bool __blk_end_bidi_request(struct request *rq, int error,
2810 unsigned int nr_bytes, unsigned int bidi_bytes)
2811{
2812 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
2813 return true;
2814
2815 blk_finish_request(rq, error);
2816
2817 return false;
2818}
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
2835{
2836 return blk_end_bidi_request(rq, error, nr_bytes, 0);
2837}
2838EXPORT_SYMBOL(blk_end_request);
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848void blk_end_request_all(struct request *rq, int error)
2849{
2850 bool pending;
2851 unsigned int bidi_bytes = 0;
2852
2853 if (unlikely(blk_bidi_rq(rq)))
2854 bidi_bytes = blk_rq_bytes(rq->next_rq);
2855
2856 pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
2857 BUG_ON(pending);
2858}
2859EXPORT_SYMBOL(blk_end_request_all);
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873bool blk_end_request_cur(struct request *rq, int error)
2874{
2875 return blk_end_request(rq, error, blk_rq_cur_bytes(rq));
2876}
2877EXPORT_SYMBOL(blk_end_request_cur);
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891bool blk_end_request_err(struct request *rq, int error)
2892{
2893 WARN_ON(error >= 0);
2894 return blk_end_request(rq, error, blk_rq_err_bytes(rq));
2895}
2896EXPORT_SYMBOL_GPL(blk_end_request_err);
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
2912{
2913 return __blk_end_bidi_request(rq, error, nr_bytes, 0);
2914}
2915EXPORT_SYMBOL(__blk_end_request);
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925void __blk_end_request_all(struct request *rq, int error)
2926{
2927 bool pending;
2928 unsigned int bidi_bytes = 0;
2929
2930 if (unlikely(blk_bidi_rq(rq)))
2931 bidi_bytes = blk_rq_bytes(rq->next_rq);
2932
2933 pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
2934 BUG_ON(pending);
2935}
2936EXPORT_SYMBOL(__blk_end_request_all);
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951bool __blk_end_request_cur(struct request *rq, int error)
2952{
2953 return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
2954}
2955EXPORT_SYMBOL(__blk_end_request_cur);
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970bool __blk_end_request_err(struct request *rq, int error)
2971{
2972 WARN_ON(error >= 0);
2973 return __blk_end_request(rq, error, blk_rq_err_bytes(rq));
2974}
2975EXPORT_SYMBOL_GPL(__blk_end_request_err);
2976
2977void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
2978 struct bio *bio)
2979{
2980
2981 rq->cmd_flags |= bio->bi_rw & REQ_WRITE;
2982
2983 if (bio_has_data(bio))
2984 rq->nr_phys_segments = bio_phys_segments(q, bio);
2985
2986 rq->__data_len = bio->bi_iter.bi_size;
2987 rq->bio = rq->biotail = bio;
2988
2989 if (bio->bi_bdev)
2990 rq->rq_disk = bio->bi_bdev->bd_disk;
2991}
2992
2993#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
2994
2995
2996
2997
2998
2999
3000
3001void rq_flush_dcache_pages(struct request *rq)
3002{
3003 struct req_iterator iter;
3004 struct bio_vec bvec;
3005
3006 rq_for_each_segment(bvec, rq, iter)
3007 flush_dcache_page(bvec.bv_page);
3008}
3009EXPORT_SYMBOL_GPL(rq_flush_dcache_pages);
3010#endif
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031int blk_lld_busy(struct request_queue *q)
3032{
3033 if (q->lld_busy_fn)
3034 return q->lld_busy_fn(q);
3035
3036 return 0;
3037}
3038EXPORT_SYMBOL_GPL(blk_lld_busy);
3039
3040
3041
3042
3043
3044
3045
3046
3047void blk_rq_unprep_clone(struct request *rq)
3048{
3049 struct bio *bio;
3050
3051 while ((bio = rq->bio) != NULL) {
3052 rq->bio = bio->bi_next;
3053
3054 bio_put(bio);
3055 }
3056}
3057EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
3058
3059
3060
3061
3062
3063static void __blk_rq_prep_clone(struct request *dst, struct request *src)
3064{
3065 dst->cpu = src->cpu;
3066 dst->cmd_flags |= (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE;
3067 dst->cmd_type = src->cmd_type;
3068 dst->__sector = blk_rq_pos(src);
3069 dst->__data_len = blk_rq_bytes(src);
3070 dst->nr_phys_segments = src->nr_phys_segments;
3071 dst->ioprio = src->ioprio;
3072 dst->extra_len = src->extra_len;
3073}
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
3095 struct bio_set *bs, gfp_t gfp_mask,
3096 int (*bio_ctr)(struct bio *, struct bio *, void *),
3097 void *data)
3098{
3099 struct bio *bio, *bio_src;
3100
3101 if (!bs)
3102 bs = fs_bio_set;
3103
3104 __rq_for_each_bio(bio_src, rq_src) {
3105 bio = bio_clone_fast(bio_src, gfp_mask, bs);
3106 if (!bio)
3107 goto free_and_out;
3108
3109 if (bio_ctr && bio_ctr(bio, bio_src, data))
3110 goto free_and_out;
3111
3112 if (rq->bio) {
3113 rq->biotail->bi_next = bio;
3114 rq->biotail = bio;
3115 } else
3116 rq->bio = rq->biotail = bio;
3117 }
3118
3119 __blk_rq_prep_clone(rq, rq_src);
3120
3121 return 0;
3122
3123free_and_out:
3124 if (bio)
3125 bio_put(bio);
3126 blk_rq_unprep_clone(rq);
3127
3128 return -ENOMEM;
3129}
3130EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
3131
3132int kblockd_schedule_work(struct work_struct *work)
3133{
3134 return queue_work(kblockd_workqueue, work);
3135}
3136EXPORT_SYMBOL(kblockd_schedule_work);
3137
3138int kblockd_schedule_delayed_work(struct delayed_work *dwork,
3139 unsigned long delay)
3140{
3141 return queue_delayed_work(kblockd_workqueue, dwork, delay);
3142}
3143EXPORT_SYMBOL(kblockd_schedule_delayed_work);
3144
3145int kblockd_schedule_delayed_work_on(int cpu, struct delayed_work *dwork,
3146 unsigned long delay)
3147{
3148 return queue_delayed_work_on(cpu, kblockd_workqueue, dwork, delay);
3149}
3150EXPORT_SYMBOL(kblockd_schedule_delayed_work_on);
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166void blk_start_plug(struct blk_plug *plug)
3167{
3168 struct task_struct *tsk = current;
3169
3170
3171
3172
3173 if (tsk->plug)
3174 return;
3175
3176 INIT_LIST_HEAD(&plug->list);
3177 INIT_LIST_HEAD(&plug->mq_list);
3178 INIT_LIST_HEAD(&plug->cb_list);
3179
3180
3181
3182
3183 tsk->plug = plug;
3184}
3185EXPORT_SYMBOL(blk_start_plug);
3186
3187static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
3188{
3189 struct request *rqa = container_of(a, struct request, queuelist);
3190 struct request *rqb = container_of(b, struct request, queuelist);
3191
3192 return !(rqa->q < rqb->q ||
3193 (rqa->q == rqb->q && blk_rq_pos(rqa) < blk_rq_pos(rqb)));
3194}
3195
3196
3197
3198
3199
3200
3201
3202static void queue_unplugged(struct request_queue *q, unsigned int depth,
3203 bool from_schedule)
3204 __releases(q->queue_lock)
3205{
3206 trace_block_unplug(q, depth, !from_schedule);
3207
3208 if (from_schedule)
3209 blk_run_queue_async(q);
3210 else
3211 __blk_run_queue(q);
3212 spin_unlock(q->queue_lock);
3213}
3214
3215static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
3216{
3217 LIST_HEAD(callbacks);
3218
3219 while (!list_empty(&plug->cb_list)) {
3220 list_splice_init(&plug->cb_list, &callbacks);
3221
3222 while (!list_empty(&callbacks)) {
3223 struct blk_plug_cb *cb = list_first_entry(&callbacks,
3224 struct blk_plug_cb,
3225 list);
3226 list_del(&cb->list);
3227 cb->callback(cb, from_schedule);
3228 }
3229 }
3230}
3231
3232struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, void *data,
3233 int size)
3234{
3235 struct blk_plug *plug = current->plug;
3236 struct blk_plug_cb *cb;
3237
3238 if (!plug)
3239 return NULL;
3240
3241 list_for_each_entry(cb, &plug->cb_list, list)
3242 if (cb->callback == unplug && cb->data == data)
3243 return cb;
3244
3245
3246 BUG_ON(size < sizeof(*cb));
3247 cb = kzalloc(size, GFP_ATOMIC);
3248 if (cb) {
3249 cb->data = data;
3250 cb->callback = unplug;
3251 list_add(&cb->list, &plug->cb_list);
3252 }
3253 return cb;
3254}
3255EXPORT_SYMBOL(blk_check_plugged);
3256
3257void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
3258{
3259 struct request_queue *q;
3260 unsigned long flags;
3261 struct request *rq;
3262 LIST_HEAD(list);
3263 unsigned int depth;
3264
3265 flush_plug_callbacks(plug, from_schedule);
3266
3267 if (!list_empty(&plug->mq_list))
3268 blk_mq_flush_plug_list(plug, from_schedule);
3269
3270 if (list_empty(&plug->list))
3271 return;
3272
3273 list_splice_init(&plug->list, &list);
3274
3275 list_sort(NULL, &list, plug_rq_cmp);
3276
3277 q = NULL;
3278 depth = 0;
3279
3280
3281
3282
3283
3284 local_irq_save(flags);
3285 while (!list_empty(&list)) {
3286 rq = list_entry_rq(list.next);
3287 list_del_init(&rq->queuelist);
3288 BUG_ON(!rq->q);
3289 if (rq->q != q) {
3290
3291
3292
3293 if (q)
3294 queue_unplugged(q, depth, from_schedule);
3295 q = rq->q;
3296 depth = 0;
3297 spin_lock(q->queue_lock);
3298 }
3299
3300
3301
3302
3303 if (unlikely(blk_queue_dying(q))) {
3304 __blk_end_request_all(rq, -ENODEV);
3305 continue;
3306 }
3307
3308
3309
3310
3311 if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA))
3312 __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
3313 else
3314 __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
3315
3316 depth++;
3317 }
3318
3319
3320
3321
3322 if (q)
3323 queue_unplugged(q, depth, from_schedule);
3324
3325 local_irq_restore(flags);
3326}
3327
3328void blk_finish_plug(struct blk_plug *plug)
3329{
3330 if (plug != current->plug)
3331 return;
3332 blk_flush_plug_list(plug, false);
3333
3334 current->plug = NULL;
3335}
3336EXPORT_SYMBOL(blk_finish_plug);
3337
3338bool blk_poll(struct request_queue *q, blk_qc_t cookie)
3339{
3340 struct blk_plug *plug;
3341 long state;
3342
3343 if (!q->mq_ops || !q->mq_ops->poll || !blk_qc_t_valid(cookie) ||
3344 !test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
3345 return false;
3346
3347 plug = current->plug;
3348 if (plug)
3349 blk_flush_plug_list(plug, false);
3350
3351 state = current->state;
3352 while (!need_resched()) {
3353 unsigned int queue_num = blk_qc_t_to_queue_num(cookie);
3354 struct blk_mq_hw_ctx *hctx = q->queue_hw_ctx[queue_num];
3355 int ret;
3356
3357 hctx->poll_invoked++;
3358
3359 ret = q->mq_ops->poll(hctx, blk_qc_t_to_tag(cookie));
3360 if (ret > 0) {
3361 hctx->poll_success++;
3362 set_current_state(TASK_RUNNING);
3363 return true;
3364 }
3365
3366 if (signal_pending_state(state, current))
3367 set_current_state(TASK_RUNNING);
3368
3369 if (current->state == TASK_RUNNING)
3370 return true;
3371 if (ret < 0)
3372 break;
3373 cpu_relax();
3374 }
3375
3376 return false;
3377}
3378
3379#ifdef CONFIG_PM
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401void blk_pm_runtime_init(struct request_queue *q, struct device *dev)
3402{
3403 q->dev = dev;
3404 q->rpm_status = RPM_ACTIVE;
3405 pm_runtime_set_autosuspend_delay(q->dev, -1);
3406 pm_runtime_use_autosuspend(q->dev);
3407}
3408EXPORT_SYMBOL(blk_pm_runtime_init);
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431int blk_pre_runtime_suspend(struct request_queue *q)
3432{
3433 int ret = 0;
3434
3435 if (!q->dev)
3436 return ret;
3437
3438 spin_lock_irq(q->queue_lock);
3439 if (q->nr_pending) {
3440 ret = -EBUSY;
3441 pm_runtime_mark_last_busy(q->dev);
3442 } else {
3443 q->rpm_status = RPM_SUSPENDING;
3444 }
3445 spin_unlock_irq(q->queue_lock);
3446 return ret;
3447}
3448EXPORT_SYMBOL(blk_pre_runtime_suspend);
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463void blk_post_runtime_suspend(struct request_queue *q, int err)
3464{
3465 if (!q->dev)
3466 return;
3467
3468 spin_lock_irq(q->queue_lock);
3469 if (!err) {
3470 q->rpm_status = RPM_SUSPENDED;
3471 } else {
3472 q->rpm_status = RPM_ACTIVE;
3473 pm_runtime_mark_last_busy(q->dev);
3474 }
3475 spin_unlock_irq(q->queue_lock);
3476}
3477EXPORT_SYMBOL(blk_post_runtime_suspend);
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490void blk_pre_runtime_resume(struct request_queue *q)
3491{
3492 if (!q->dev)
3493 return;
3494
3495 spin_lock_irq(q->queue_lock);
3496 q->rpm_status = RPM_RESUMING;
3497 spin_unlock_irq(q->queue_lock);
3498}
3499EXPORT_SYMBOL(blk_pre_runtime_resume);
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515void blk_post_runtime_resume(struct request_queue *q, int err)
3516{
3517 if (!q->dev)
3518 return;
3519
3520 spin_lock_irq(q->queue_lock);
3521 if (!err) {
3522 q->rpm_status = RPM_ACTIVE;
3523 __blk_run_queue(q);
3524 pm_runtime_mark_last_busy(q->dev);
3525 pm_request_autosuspend(q->dev);
3526 } else {
3527 q->rpm_status = RPM_SUSPENDED;
3528 }
3529 spin_unlock_irq(q->queue_lock);
3530}
3531EXPORT_SYMBOL(blk_post_runtime_resume);
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547void blk_set_runtime_active(struct request_queue *q)
3548{
3549 spin_lock_irq(q->queue_lock);
3550 q->rpm_status = RPM_ACTIVE;
3551 pm_runtime_mark_last_busy(q->dev);
3552 pm_request_autosuspend(q->dev);
3553 spin_unlock_irq(q->queue_lock);
3554}
3555EXPORT_SYMBOL(blk_set_runtime_active);
3556#endif
3557
3558int __init blk_dev_init(void)
3559{
3560 BUILD_BUG_ON(__REQ_NR_BITS > 8 *
3561 FIELD_SIZEOF(struct request, cmd_flags));
3562
3563
3564 kblockd_workqueue = alloc_workqueue("kblockd",
3565 WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
3566 if (!kblockd_workqueue)
3567 panic("Failed to create kblockd\n");
3568
3569 request_cachep = kmem_cache_create("blkdev_requests",
3570 sizeof(struct request), 0, SLAB_PANIC, NULL);
3571
3572 blk_requestq_cachep = kmem_cache_create("request_queue",
3573 sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
3574
3575 return 0;
3576}
3577