1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/kernel.h>
15#include <linux/module.h>
16#include <linux/backing-dev.h>
17#include <linux/bio.h>
18#include <linux/blkdev.h>
19#include <linux/blk-mq.h>
20#include <linux/highmem.h>
21#include <linux/mm.h>
22#include <linux/kernel_stat.h>
23#include <linux/string.h>
24#include <linux/init.h>
25#include <linux/completion.h>
26#include <linux/slab.h>
27#include <linux/swap.h>
28#include <linux/writeback.h>
29#include <linux/task_io_accounting_ops.h>
30#include <linux/fault-inject.h>
31#include <linux/list_sort.h>
32#include <linux/delay.h>
33#include <linux/ratelimit.h>
34#include <linux/pm_runtime.h>
35#include <linux/blk-cgroup.h>
36
37#define CREATE_TRACE_POINTS
38#include <trace/events/block.h>
39
40#include "blk.h"
41#include "blk-mq.h"
42
43EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
44EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
45EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
46EXPORT_TRACEPOINT_SYMBOL_GPL(block_split);
47EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug);
48
49DEFINE_IDA(blk_queue_ida);
50
51
52
53
54struct kmem_cache *request_cachep;
55
56
57
58
59struct kmem_cache *blk_requestq_cachep;
60
61
62
63
64static struct workqueue_struct *kblockd_workqueue;
65
66static void blk_clear_congested(struct request_list *rl, int sync)
67{
68#ifdef CONFIG_CGROUP_WRITEBACK
69 clear_wb_congested(rl->blkg->wb_congested, sync);
70#else
71
72
73
74
75 if (rl == &rl->q->root_rl)
76 clear_wb_congested(rl->q->backing_dev_info.wb.congested, sync);
77#endif
78}
79
80static void blk_set_congested(struct request_list *rl, int sync)
81{
82#ifdef CONFIG_CGROUP_WRITEBACK
83 set_wb_congested(rl->blkg->wb_congested, sync);
84#else
85
86 if (rl == &rl->q->root_rl)
87 set_wb_congested(rl->q->backing_dev_info.wb.congested, sync);
88#endif
89}
90
91void blk_queue_congestion_threshold(struct request_queue *q)
92{
93 int nr;
94
95 nr = q->nr_requests - (q->nr_requests / 8) + 1;
96 if (nr > q->nr_requests)
97 nr = q->nr_requests;
98 q->nr_congestion_on = nr;
99
100 nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;
101 if (nr < 1)
102 nr = 1;
103 q->nr_congestion_off = nr;
104}
105
106
107
108
109
110
111
112
113
114struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
115{
116 struct request_queue *q = bdev_get_queue(bdev);
117
118 return &q->backing_dev_info;
119}
120EXPORT_SYMBOL(blk_get_backing_dev_info);
121
122void blk_rq_init(struct request_queue *q, struct request *rq)
123{
124 memset(rq, 0, sizeof(*rq));
125
126 INIT_LIST_HEAD(&rq->queuelist);
127 INIT_LIST_HEAD(&rq->timeout_list);
128 rq->cpu = -1;
129 rq->q = q;
130 rq->__sector = (sector_t) -1;
131 INIT_HLIST_NODE(&rq->hash);
132 RB_CLEAR_NODE(&rq->rb_node);
133 rq->cmd = rq->__cmd;
134 rq->cmd_len = BLK_MAX_CDB;
135 rq->tag = -1;
136 rq->start_time = jiffies;
137 set_start_time_ns(rq);
138 rq->part = NULL;
139}
140EXPORT_SYMBOL(blk_rq_init);
141
142static void req_bio_endio(struct request *rq, struct bio *bio,
143 unsigned int nbytes, int error)
144{
145 if (error)
146 bio->bi_error = error;
147
148 if (unlikely(rq->cmd_flags & REQ_QUIET))
149 bio_set_flag(bio, BIO_QUIET);
150
151 bio_advance(bio, nbytes);
152
153
154 if (bio->bi_iter.bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
155 bio_endio(bio);
156}
157
158void blk_dump_rq_flags(struct request *rq, char *msg)
159{
160 int bit;
161
162 printk(KERN_INFO "%s: dev %s: type=%x, flags=%llx\n", msg,
163 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
164 (unsigned long long) rq->cmd_flags);
165
166 printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n",
167 (unsigned long long)blk_rq_pos(rq),
168 blk_rq_sectors(rq), blk_rq_cur_sectors(rq));
169 printk(KERN_INFO " bio %p, biotail %p, len %u\n",
170 rq->bio, rq->biotail, blk_rq_bytes(rq));
171
172 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
173 printk(KERN_INFO " cdb: ");
174 for (bit = 0; bit < BLK_MAX_CDB; bit++)
175 printk("%02x ", rq->cmd[bit]);
176 printk("\n");
177 }
178}
179EXPORT_SYMBOL(blk_dump_rq_flags);
180
181static void blk_delay_work(struct work_struct *work)
182{
183 struct request_queue *q;
184
185 q = container_of(work, struct request_queue, delay_work.work);
186 spin_lock_irq(q->queue_lock);
187 __blk_run_queue(q);
188 spin_unlock_irq(q->queue_lock);
189}
190
191
192
193
194
195
196
197
198
199
200
201void blk_delay_queue(struct request_queue *q, unsigned long msecs)
202{
203 if (likely(!blk_queue_dead(q)))
204 queue_delayed_work(kblockd_workqueue, &q->delay_work,
205 msecs_to_jiffies(msecs));
206}
207EXPORT_SYMBOL(blk_delay_queue);
208
209
210
211
212
213
214
215
216
217
218void blk_start_queue_async(struct request_queue *q)
219{
220 queue_flag_clear(QUEUE_FLAG_STOPPED, q);
221 blk_run_queue_async(q);
222}
223EXPORT_SYMBOL(blk_start_queue_async);
224
225
226
227
228
229
230
231
232
233
234void blk_start_queue(struct request_queue *q)
235{
236 WARN_ON(!irqs_disabled());
237
238 queue_flag_clear(QUEUE_FLAG_STOPPED, q);
239 __blk_run_queue(q);
240}
241EXPORT_SYMBOL(blk_start_queue);
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257void blk_stop_queue(struct request_queue *q)
258{
259 cancel_delayed_work(&q->delay_work);
260 queue_flag_set(QUEUE_FLAG_STOPPED, q);
261}
262EXPORT_SYMBOL(blk_stop_queue);
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282void blk_sync_queue(struct request_queue *q)
283{
284 del_timer_sync(&q->timeout);
285
286 if (q->mq_ops) {
287 struct blk_mq_hw_ctx *hctx;
288 int i;
289
290 queue_for_each_hw_ctx(q, hctx, i) {
291 cancel_work_sync(&hctx->run_work);
292 cancel_delayed_work_sync(&hctx->delay_work);
293 }
294 } else {
295 cancel_delayed_work_sync(&q->delay_work);
296 }
297}
298EXPORT_SYMBOL(blk_sync_queue);
299
300
301
302
303
304
305
306
307
308
309
310
311inline void __blk_run_queue_uncond(struct request_queue *q)
312{
313 if (unlikely(blk_queue_dead(q)))
314 return;
315
316
317
318
319
320
321
322
323 q->request_fn_active++;
324 q->request_fn(q);
325 q->request_fn_active--;
326}
327EXPORT_SYMBOL_GPL(__blk_run_queue_uncond);
328
329
330
331
332
333
334
335
336
337void __blk_run_queue(struct request_queue *q)
338{
339 if (unlikely(blk_queue_stopped(q)))
340 return;
341
342 __blk_run_queue_uncond(q);
343}
344EXPORT_SYMBOL(__blk_run_queue);
345
346
347
348
349
350
351
352
353
354void blk_run_queue_async(struct request_queue *q)
355{
356 if (likely(!blk_queue_stopped(q) && !blk_queue_dead(q)))
357 mod_delayed_work(kblockd_workqueue, &q->delay_work, 0);
358}
359EXPORT_SYMBOL(blk_run_queue_async);
360
361
362
363
364
365
366
367
368
369void blk_run_queue(struct request_queue *q)
370{
371 unsigned long flags;
372
373 spin_lock_irqsave(q->queue_lock, flags);
374 __blk_run_queue(q);
375 spin_unlock_irqrestore(q->queue_lock, flags);
376}
377EXPORT_SYMBOL(blk_run_queue);
378
379void blk_put_queue(struct request_queue *q)
380{
381 kobject_put(&q->kobj);
382}
383EXPORT_SYMBOL(blk_put_queue);
384
385
386
387
388
389
390
391
392
393
394static void __blk_drain_queue(struct request_queue *q, bool drain_all)
395 __releases(q->queue_lock)
396 __acquires(q->queue_lock)
397{
398 int i;
399
400 lockdep_assert_held(q->queue_lock);
401
402 while (true) {
403 bool drain = false;
404
405
406
407
408
409 if (q->elevator)
410 elv_drain_elevator(q);
411
412 blkcg_drain_queue(q);
413
414
415
416
417
418
419
420
421 if (!list_empty(&q->queue_head) && q->request_fn)
422 __blk_run_queue(q);
423
424 drain |= q->nr_rqs_elvpriv;
425 drain |= q->request_fn_active;
426
427
428
429
430
431
432 if (drain_all) {
433 struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
434 drain |= !list_empty(&q->queue_head);
435 for (i = 0; i < 2; i++) {
436 drain |= q->nr_rqs[i];
437 drain |= q->in_flight[i];
438 if (fq)
439 drain |= !list_empty(&fq->flush_queue[i]);
440 }
441 }
442
443 if (!drain)
444 break;
445
446 spin_unlock_irq(q->queue_lock);
447
448 msleep(10);
449
450 spin_lock_irq(q->queue_lock);
451 }
452
453
454
455
456
457
458 if (q->request_fn) {
459 struct request_list *rl;
460
461 blk_queue_for_each_rl(rl, q)
462 for (i = 0; i < ARRAY_SIZE(rl->wait); i++)
463 wake_up_all(&rl->wait[i]);
464 }
465}
466
467
468
469
470
471
472
473
474
475
476
477void blk_queue_bypass_start(struct request_queue *q)
478{
479 spin_lock_irq(q->queue_lock);
480 q->bypass_depth++;
481 queue_flag_set(QUEUE_FLAG_BYPASS, q);
482 spin_unlock_irq(q->queue_lock);
483
484
485
486
487
488
489 if (blk_queue_init_done(q)) {
490 spin_lock_irq(q->queue_lock);
491 __blk_drain_queue(q, false);
492 spin_unlock_irq(q->queue_lock);
493
494
495 synchronize_rcu();
496 }
497}
498EXPORT_SYMBOL_GPL(blk_queue_bypass_start);
499
500
501
502
503
504
505
506void blk_queue_bypass_end(struct request_queue *q)
507{
508 spin_lock_irq(q->queue_lock);
509 if (!--q->bypass_depth)
510 queue_flag_clear(QUEUE_FLAG_BYPASS, q);
511 WARN_ON_ONCE(q->bypass_depth < 0);
512 spin_unlock_irq(q->queue_lock);
513}
514EXPORT_SYMBOL_GPL(blk_queue_bypass_end);
515
516void blk_set_queue_dying(struct request_queue *q)
517{
518 spin_lock_irq(q->queue_lock);
519 queue_flag_set(QUEUE_FLAG_DYING, q);
520 spin_unlock_irq(q->queue_lock);
521
522 if (q->mq_ops)
523 blk_mq_wake_waiters(q);
524 else {
525 struct request_list *rl;
526
527 blk_queue_for_each_rl(rl, q) {
528 if (rl->rq_pool) {
529 wake_up(&rl->wait[BLK_RW_SYNC]);
530 wake_up(&rl->wait[BLK_RW_ASYNC]);
531 }
532 }
533 }
534}
535EXPORT_SYMBOL_GPL(blk_set_queue_dying);
536
537
538
539
540
541
542
543
544void blk_cleanup_queue(struct request_queue *q)
545{
546 spinlock_t *lock = q->queue_lock;
547
548
549 mutex_lock(&q->sysfs_lock);
550 blk_set_queue_dying(q);
551 spin_lock_irq(lock);
552
553
554
555
556
557
558
559
560
561
562 q->bypass_depth++;
563 queue_flag_set(QUEUE_FLAG_BYPASS, q);
564
565 queue_flag_set(QUEUE_FLAG_NOMERGES, q);
566 queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
567 queue_flag_set(QUEUE_FLAG_DYING, q);
568 spin_unlock_irq(lock);
569 mutex_unlock(&q->sysfs_lock);
570
571
572
573
574
575 blk_freeze_queue(q);
576 spin_lock_irq(lock);
577 if (!q->mq_ops)
578 __blk_drain_queue(q, true);
579 queue_flag_set(QUEUE_FLAG_DEAD, q);
580 spin_unlock_irq(lock);
581
582
583 blk_flush_integrity();
584
585
586 del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
587 blk_sync_queue(q);
588
589 if (q->mq_ops)
590 blk_mq_free_queue(q);
591 percpu_ref_exit(&q->q_usage_counter);
592
593 spin_lock_irq(lock);
594 if (q->queue_lock != &q->__queue_lock)
595 q->queue_lock = &q->__queue_lock;
596 spin_unlock_irq(lock);
597
598 bdi_unregister(&q->backing_dev_info);
599
600
601 blk_put_queue(q);
602}
603EXPORT_SYMBOL(blk_cleanup_queue);
604
605
606static void *alloc_request_struct(gfp_t gfp_mask, void *data)
607{
608 int nid = (int)(long)data;
609 return kmem_cache_alloc_node(request_cachep, gfp_mask, nid);
610}
611
612static void free_request_struct(void *element, void *unused)
613{
614 kmem_cache_free(request_cachep, element);
615}
616
617int blk_init_rl(struct request_list *rl, struct request_queue *q,
618 gfp_t gfp_mask)
619{
620 if (unlikely(rl->rq_pool))
621 return 0;
622
623 rl->q = q;
624 rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;
625 rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;
626 init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
627 init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);
628
629 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, alloc_request_struct,
630 free_request_struct,
631 (void *)(long)q->node, gfp_mask,
632 q->node);
633 if (!rl->rq_pool)
634 return -ENOMEM;
635
636 return 0;
637}
638
639void blk_exit_rl(struct request_list *rl)
640{
641 if (rl->rq_pool)
642 mempool_destroy(rl->rq_pool);
643}
644
645struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
646{
647 return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE);
648}
649EXPORT_SYMBOL(blk_alloc_queue);
650
651int blk_queue_enter(struct request_queue *q, bool nowait)
652{
653 while (true) {
654 int ret;
655
656 if (percpu_ref_tryget_live(&q->q_usage_counter))
657 return 0;
658
659 if (nowait)
660 return -EBUSY;
661
662 ret = wait_event_interruptible(q->mq_freeze_wq,
663 !atomic_read(&q->mq_freeze_depth) ||
664 blk_queue_dying(q));
665 if (blk_queue_dying(q))
666 return -ENODEV;
667 if (ret)
668 return ret;
669 }
670}
671
672void blk_queue_exit(struct request_queue *q)
673{
674 percpu_ref_put(&q->q_usage_counter);
675}
676
677static void blk_queue_usage_counter_release(struct percpu_ref *ref)
678{
679 struct request_queue *q =
680 container_of(ref, struct request_queue, q_usage_counter);
681
682 wake_up_all(&q->mq_freeze_wq);
683}
684
685static void blk_rq_timed_out_timer(unsigned long data)
686{
687 struct request_queue *q = (struct request_queue *)data;
688
689 kblockd_schedule_work(&q->timeout_work);
690}
691
692struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
693{
694 struct request_queue *q;
695 int err;
696
697 q = kmem_cache_alloc_node(blk_requestq_cachep,
698 gfp_mask | __GFP_ZERO, node_id);
699 if (!q)
700 return NULL;
701
702 q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
703 if (q->id < 0)
704 goto fail_q;
705
706 q->bio_split = bioset_create(BIO_POOL_SIZE, 0);
707 if (!q->bio_split)
708 goto fail_id;
709
710 q->backing_dev_info.ra_pages =
711 (VM_MAX_READAHEAD * 1024) / PAGE_SIZE;
712 q->backing_dev_info.capabilities = BDI_CAP_CGROUP_WRITEBACK;
713 q->backing_dev_info.name = "block";
714 q->node = node_id;
715
716 err = bdi_init(&q->backing_dev_info);
717 if (err)
718 goto fail_split;
719
720 setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
721 laptop_mode_timer_fn, (unsigned long) q);
722 setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
723 INIT_LIST_HEAD(&q->queue_head);
724 INIT_LIST_HEAD(&q->timeout_list);
725 INIT_LIST_HEAD(&q->icq_list);
726#ifdef CONFIG_BLK_CGROUP
727 INIT_LIST_HEAD(&q->blkg_list);
728#endif
729 INIT_DELAYED_WORK(&q->delay_work, blk_delay_work);
730
731 kobject_init(&q->kobj, &blk_queue_ktype);
732
733 mutex_init(&q->sysfs_lock);
734 spin_lock_init(&q->__queue_lock);
735
736
737
738
739
740 q->queue_lock = &q->__queue_lock;
741
742
743
744
745
746
747
748 q->bypass_depth = 1;
749 __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
750
751 init_waitqueue_head(&q->mq_freeze_wq);
752
753
754
755
756
757 if (percpu_ref_init(&q->q_usage_counter,
758 blk_queue_usage_counter_release,
759 PERCPU_REF_INIT_ATOMIC, GFP_KERNEL))
760 goto fail_bdi;
761
762 if (blkcg_init_queue(q))
763 goto fail_ref;
764
765 return q;
766
767fail_ref:
768 percpu_ref_exit(&q->q_usage_counter);
769fail_bdi:
770 bdi_destroy(&q->backing_dev_info);
771fail_split:
772 bioset_free(q->bio_split);
773fail_id:
774 ida_simple_remove(&blk_queue_ida, q->id);
775fail_q:
776 kmem_cache_free(blk_requestq_cachep, q);
777 return NULL;
778}
779EXPORT_SYMBOL(blk_alloc_queue_node);
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
815{
816 return blk_init_queue_node(rfn, lock, NUMA_NO_NODE);
817}
818EXPORT_SYMBOL(blk_init_queue);
819
820struct request_queue *
821blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
822{
823 struct request_queue *uninit_q, *q;
824
825 uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id);
826 if (!uninit_q)
827 return NULL;
828
829 q = blk_init_allocated_queue(uninit_q, rfn, lock);
830 if (!q)
831 blk_cleanup_queue(uninit_q);
832
833 return q;
834}
835EXPORT_SYMBOL(blk_init_queue_node);
836
837static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio);
838
839struct request_queue *
840blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
841 spinlock_t *lock)
842{
843 if (!q)
844 return NULL;
845
846 q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, 0);
847 if (!q->fq)
848 return NULL;
849
850 if (blk_init_rl(&q->root_rl, q, GFP_KERNEL))
851 goto fail;
852
853 INIT_WORK(&q->timeout_work, blk_timeout_work);
854 q->request_fn = rfn;
855 q->prep_rq_fn = NULL;
856 q->unprep_rq_fn = NULL;
857 q->queue_flags |= QUEUE_FLAG_DEFAULT;
858
859
860 if (lock)
861 q->queue_lock = lock;
862
863
864
865
866 blk_queue_make_request(q, blk_queue_bio);
867
868 q->sg_reserved_size = INT_MAX;
869
870
871 mutex_lock(&q->sysfs_lock);
872
873
874 if (elevator_init(q, NULL)) {
875 mutex_unlock(&q->sysfs_lock);
876 goto fail;
877 }
878
879 mutex_unlock(&q->sysfs_lock);
880
881 return q;
882
883fail:
884 blk_free_flush_queue(q->fq);
885 return NULL;
886}
887EXPORT_SYMBOL(blk_init_allocated_queue);
888
889bool blk_get_queue(struct request_queue *q)
890{
891 if (likely(!blk_queue_dying(q))) {
892 __blk_get_queue(q);
893 return true;
894 }
895
896 return false;
897}
898EXPORT_SYMBOL(blk_get_queue);
899
900static inline void blk_free_request(struct request_list *rl, struct request *rq)
901{
902 if (rq->cmd_flags & REQ_ELVPRIV) {
903 elv_put_request(rl->q, rq);
904 if (rq->elv.icq)
905 put_io_context(rq->elv.icq->ioc);
906 }
907
908 mempool_free(rq, rl->rq_pool);
909}
910
911
912
913
914
915static inline int ioc_batching(struct request_queue *q, struct io_context *ioc)
916{
917 if (!ioc)
918 return 0;
919
920
921
922
923
924
925 return ioc->nr_batch_requests == q->nr_batching ||
926 (ioc->nr_batch_requests > 0
927 && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));
928}
929
930
931
932
933
934
935
936static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)
937{
938 if (!ioc || ioc_batching(q, ioc))
939 return;
940
941 ioc->nr_batch_requests = q->nr_batching;
942 ioc->last_waited = jiffies;
943}
944
945static void __freed_request(struct request_list *rl, int sync)
946{
947 struct request_queue *q = rl->q;
948
949 if (rl->count[sync] < queue_congestion_off_threshold(q))
950 blk_clear_congested(rl, sync);
951
952 if (rl->count[sync] + 1 <= q->nr_requests) {
953 if (waitqueue_active(&rl->wait[sync]))
954 wake_up(&rl->wait[sync]);
955
956 blk_clear_rl_full(rl, sync);
957 }
958}
959
960
961
962
963
964static void freed_request(struct request_list *rl, int op, unsigned int flags)
965{
966 struct request_queue *q = rl->q;
967 int sync = rw_is_sync(op, flags);
968
969 q->nr_rqs[sync]--;
970 rl->count[sync]--;
971 if (flags & REQ_ELVPRIV)
972 q->nr_rqs_elvpriv--;
973
974 __freed_request(rl, sync);
975
976 if (unlikely(rl->starved[sync ^ 1]))
977 __freed_request(rl, sync ^ 1);
978}
979
980int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
981{
982 struct request_list *rl;
983 int on_thresh, off_thresh;
984
985 spin_lock_irq(q->queue_lock);
986 q->nr_requests = nr;
987 blk_queue_congestion_threshold(q);
988 on_thresh = queue_congestion_on_threshold(q);
989 off_thresh = queue_congestion_off_threshold(q);
990
991 blk_queue_for_each_rl(rl, q) {
992 if (rl->count[BLK_RW_SYNC] >= on_thresh)
993 blk_set_congested(rl, BLK_RW_SYNC);
994 else if (rl->count[BLK_RW_SYNC] < off_thresh)
995 blk_clear_congested(rl, BLK_RW_SYNC);
996
997 if (rl->count[BLK_RW_ASYNC] >= on_thresh)
998 blk_set_congested(rl, BLK_RW_ASYNC);
999 else if (rl->count[BLK_RW_ASYNC] < off_thresh)
1000 blk_clear_congested(rl, BLK_RW_ASYNC);
1001
1002 if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
1003 blk_set_rl_full(rl, BLK_RW_SYNC);
1004 } else {
1005 blk_clear_rl_full(rl, BLK_RW_SYNC);
1006 wake_up(&rl->wait[BLK_RW_SYNC]);
1007 }
1008
1009 if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) {
1010 blk_set_rl_full(rl, BLK_RW_ASYNC);
1011 } else {
1012 blk_clear_rl_full(rl, BLK_RW_ASYNC);
1013 wake_up(&rl->wait[BLK_RW_ASYNC]);
1014 }
1015 }
1016
1017 spin_unlock_irq(q->queue_lock);
1018 return 0;
1019}
1020
1021
1022
1023
1024
1025static bool blk_rq_should_init_elevator(struct bio *bio)
1026{
1027 if (!bio)
1028 return true;
1029
1030
1031
1032
1033
1034 if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA))
1035 return false;
1036
1037 return true;
1038}
1039
1040
1041
1042
1043
1044
1045
1046
1047static struct io_context *rq_ioc(struct bio *bio)
1048{
1049#ifdef CONFIG_BLK_CGROUP
1050 if (bio && bio->bi_ioc)
1051 return bio->bi_ioc;
1052#endif
1053 return current->io_context;
1054}
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071static struct request *__get_request(struct request_list *rl, int op,
1072 int op_flags, struct bio *bio,
1073 gfp_t gfp_mask)
1074{
1075 struct request_queue *q = rl->q;
1076 struct request *rq;
1077 struct elevator_type *et = q->elevator->type;
1078 struct io_context *ioc = rq_ioc(bio);
1079 struct io_cq *icq = NULL;
1080 const bool is_sync = rw_is_sync(op, op_flags) != 0;
1081 int may_queue;
1082
1083 if (unlikely(blk_queue_dying(q)))
1084 return ERR_PTR(-ENODEV);
1085
1086 may_queue = elv_may_queue(q, op, op_flags);
1087 if (may_queue == ELV_MQUEUE_NO)
1088 goto rq_starved;
1089
1090 if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
1091 if (rl->count[is_sync]+1 >= q->nr_requests) {
1092
1093
1094
1095
1096
1097
1098 if (!blk_rl_full(rl, is_sync)) {
1099 ioc_set_batching(q, ioc);
1100 blk_set_rl_full(rl, is_sync);
1101 } else {
1102 if (may_queue != ELV_MQUEUE_MUST
1103 && !ioc_batching(q, ioc)) {
1104
1105
1106
1107
1108
1109 return ERR_PTR(-ENOMEM);
1110 }
1111 }
1112 }
1113 blk_set_congested(rl, is_sync);
1114 }
1115
1116
1117
1118
1119
1120
1121 if (rl->count[is_sync] >= (3 * q->nr_requests / 2))
1122 return ERR_PTR(-ENOMEM);
1123
1124 q->nr_rqs[is_sync]++;
1125 rl->count[is_sync]++;
1126 rl->starved[is_sync] = 0;
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138 if (blk_rq_should_init_elevator(bio) && !blk_queue_bypass(q)) {
1139 op_flags |= REQ_ELVPRIV;
1140 q->nr_rqs_elvpriv++;
1141 if (et->icq_cache && ioc)
1142 icq = ioc_lookup_icq(ioc, q);
1143 }
1144
1145 if (blk_queue_io_stat(q))
1146 op_flags |= REQ_IO_STAT;
1147 spin_unlock_irq(q->queue_lock);
1148
1149
1150 rq = mempool_alloc(rl->rq_pool, gfp_mask);
1151 if (!rq)
1152 goto fail_alloc;
1153
1154 blk_rq_init(q, rq);
1155 blk_rq_set_rl(rq, rl);
1156 req_set_op_attrs(rq, op, op_flags | REQ_ALLOCED);
1157
1158
1159 if (op_flags & REQ_ELVPRIV) {
1160 if (unlikely(et->icq_cache && !icq)) {
1161 if (ioc)
1162 icq = ioc_create_icq(ioc, q, gfp_mask);
1163 if (!icq)
1164 goto fail_elvpriv;
1165 }
1166
1167 rq->elv.icq = icq;
1168 if (unlikely(elv_set_request(q, rq, bio, gfp_mask)))
1169 goto fail_elvpriv;
1170
1171
1172 if (icq)
1173 get_io_context(icq->ioc);
1174 }
1175out:
1176
1177
1178
1179
1180
1181
1182 if (ioc_batching(q, ioc))
1183 ioc->nr_batch_requests--;
1184
1185 trace_block_getrq(q, bio, op);
1186 return rq;
1187
1188fail_elvpriv:
1189
1190
1191
1192
1193
1194
1195 printk_ratelimited(KERN_WARNING "%s: dev %s: request aux data allocation failed, iosched may be disturbed\n",
1196 __func__, dev_name(q->backing_dev_info.dev));
1197
1198 rq->cmd_flags &= ~REQ_ELVPRIV;
1199 rq->elv.icq = NULL;
1200
1201 spin_lock_irq(q->queue_lock);
1202 q->nr_rqs_elvpriv--;
1203 spin_unlock_irq(q->queue_lock);
1204 goto out;
1205
1206fail_alloc:
1207
1208
1209
1210
1211
1212
1213
1214 spin_lock_irq(q->queue_lock);
1215 freed_request(rl, op, op_flags);
1216
1217
1218
1219
1220
1221
1222
1223
1224rq_starved:
1225 if (unlikely(rl->count[is_sync] == 0))
1226 rl->starved[is_sync] = 1;
1227 return ERR_PTR(-ENOMEM);
1228}
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245static struct request *get_request(struct request_queue *q, int op,
1246 int op_flags, struct bio *bio,
1247 gfp_t gfp_mask)
1248{
1249 const bool is_sync = rw_is_sync(op, op_flags) != 0;
1250 DEFINE_WAIT(wait);
1251 struct request_list *rl;
1252 struct request *rq;
1253
1254 rl = blk_get_rl(q, bio);
1255retry:
1256 rq = __get_request(rl, op, op_flags, bio, gfp_mask);
1257 if (!IS_ERR(rq))
1258 return rq;
1259
1260 if (!gfpflags_allow_blocking(gfp_mask) || unlikely(blk_queue_dying(q))) {
1261 blk_put_rl(rl);
1262 return rq;
1263 }
1264
1265
1266 prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
1267 TASK_UNINTERRUPTIBLE);
1268
1269 trace_block_sleeprq(q, bio, op);
1270
1271 spin_unlock_irq(q->queue_lock);
1272 io_schedule();
1273
1274
1275
1276
1277
1278
1279 ioc_set_batching(q, current->io_context);
1280
1281 spin_lock_irq(q->queue_lock);
1282 finish_wait(&rl->wait[is_sync], &wait);
1283
1284 goto retry;
1285}
1286
1287static struct request *blk_old_get_request(struct request_queue *q, int rw,
1288 gfp_t gfp_mask)
1289{
1290 struct request *rq;
1291
1292 BUG_ON(rw != READ && rw != WRITE);
1293
1294
1295 create_io_context(gfp_mask, q->node);
1296
1297 spin_lock_irq(q->queue_lock);
1298 rq = get_request(q, rw, 0, NULL, gfp_mask);
1299 if (IS_ERR(rq)) {
1300 spin_unlock_irq(q->queue_lock);
1301 return rq;
1302 }
1303
1304
1305 rq->__data_len = 0;
1306 rq->__sector = (sector_t) -1;
1307 rq->bio = rq->biotail = NULL;
1308 return rq;
1309}
1310
1311struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
1312{
1313 if (q->mq_ops)
1314 return blk_mq_alloc_request(q, rw,
1315 (gfp_mask & __GFP_DIRECT_RECLAIM) ?
1316 0 : BLK_MQ_REQ_NOWAIT);
1317 else
1318 return blk_old_get_request(q, rw, gfp_mask);
1319}
1320EXPORT_SYMBOL(blk_get_request);
1321
1322
1323
1324
1325
1326
1327void blk_rq_set_block_pc(struct request *rq)
1328{
1329 rq->cmd_type = REQ_TYPE_BLOCK_PC;
1330 memset(rq->__cmd, 0, sizeof(rq->__cmd));
1331}
1332EXPORT_SYMBOL(blk_rq_set_block_pc);
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344void blk_requeue_request(struct request_queue *q, struct request *rq)
1345{
1346 blk_delete_timer(rq);
1347 blk_clear_rq_complete(rq);
1348 trace_block_rq_requeue(q, rq);
1349
1350 if (rq->cmd_flags & REQ_QUEUED)
1351 blk_queue_end_tag(q, rq);
1352
1353 BUG_ON(blk_queued_rq(rq));
1354
1355 elv_requeue_request(q, rq);
1356}
1357EXPORT_SYMBOL(blk_requeue_request);
1358
1359static void add_acct_request(struct request_queue *q, struct request *rq,
1360 int where)
1361{
1362 blk_account_io_start(rq, true);
1363 __elv_add_request(q, rq, where);
1364}
1365
1366static void part_round_stats_single(int cpu, struct hd_struct *part,
1367 unsigned long now)
1368{
1369 int inflight;
1370
1371 if (now == part->stamp)
1372 return;
1373
1374 inflight = part_in_flight(part);
1375 if (inflight) {
1376 __part_stat_add(cpu, part, time_in_queue,
1377 inflight * (now - part->stamp));
1378 __part_stat_add(cpu, part, io_ticks, (now - part->stamp));
1379 }
1380 part->stamp = now;
1381}
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399void part_round_stats(int cpu, struct hd_struct *part)
1400{
1401 unsigned long now = jiffies;
1402
1403 if (part->partno)
1404 part_round_stats_single(cpu, &part_to_disk(part)->part0, now);
1405 part_round_stats_single(cpu, part, now);
1406}
1407EXPORT_SYMBOL_GPL(part_round_stats);
1408
1409#ifdef CONFIG_PM
1410static void blk_pm_put_request(struct request *rq)
1411{
1412 if (rq->q->dev && !(rq->cmd_flags & REQ_PM) && !--rq->q->nr_pending)
1413 pm_runtime_mark_last_busy(rq->q->dev);
1414}
1415#else
1416static inline void blk_pm_put_request(struct request *rq) {}
1417#endif
1418
1419
1420
1421
1422void __blk_put_request(struct request_queue *q, struct request *req)
1423{
1424 if (unlikely(!q))
1425 return;
1426
1427 if (q->mq_ops) {
1428 blk_mq_free_request(req);
1429 return;
1430 }
1431
1432 blk_pm_put_request(req);
1433
1434 elv_completed_request(q, req);
1435
1436
1437 WARN_ON(req->bio != NULL);
1438
1439
1440
1441
1442
1443 if (req->cmd_flags & REQ_ALLOCED) {
1444 unsigned int flags = req->cmd_flags;
1445 int op = req_op(req);
1446 struct request_list *rl = blk_rq_rl(req);
1447
1448 BUG_ON(!list_empty(&req->queuelist));
1449 BUG_ON(ELV_ON_HASH(req));
1450
1451 blk_free_request(rl, req);
1452 freed_request(rl, op, flags);
1453 blk_put_rl(rl);
1454 }
1455}
1456EXPORT_SYMBOL_GPL(__blk_put_request);
1457
1458void blk_put_request(struct request *req)
1459{
1460 struct request_queue *q = req->q;
1461
1462 if (q->mq_ops)
1463 blk_mq_free_request(req);
1464 else {
1465 unsigned long flags;
1466
1467 spin_lock_irqsave(q->queue_lock, flags);
1468 __blk_put_request(q, req);
1469 spin_unlock_irqrestore(q->queue_lock, flags);
1470 }
1471}
1472EXPORT_SYMBOL(blk_put_request);
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488void blk_add_request_payload(struct request *rq, struct page *page,
1489 int offset, unsigned int len)
1490{
1491 struct bio *bio = rq->bio;
1492
1493 bio->bi_io_vec->bv_page = page;
1494 bio->bi_io_vec->bv_offset = offset;
1495 bio->bi_io_vec->bv_len = len;
1496
1497 bio->bi_iter.bi_size = len;
1498 bio->bi_vcnt = 1;
1499 bio->bi_phys_segments = 1;
1500
1501 rq->__data_len = rq->resid_len = len;
1502 rq->nr_phys_segments = 1;
1503}
1504EXPORT_SYMBOL_GPL(blk_add_request_payload);
1505
1506bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
1507 struct bio *bio)
1508{
1509 const int ff = bio->bi_opf & REQ_FAILFAST_MASK;
1510
1511 if (!ll_back_merge_fn(q, req, bio))
1512 return false;
1513
1514 trace_block_bio_backmerge(q, req, bio);
1515
1516 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1517 blk_rq_set_mixed_merge(req);
1518
1519 req->biotail->bi_next = bio;
1520 req->biotail = bio;
1521 req->__data_len += bio->bi_iter.bi_size;
1522 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
1523
1524 blk_account_io_start(req, false);
1525 return true;
1526}
1527
1528bool bio_attempt_front_merge(struct request_queue *q, struct request *req,
1529 struct bio *bio)
1530{
1531 const int ff = bio->bi_opf & REQ_FAILFAST_MASK;
1532
1533 if (!ll_front_merge_fn(q, req, bio))
1534 return false;
1535
1536 trace_block_bio_frontmerge(q, req, bio);
1537
1538 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1539 blk_rq_set_mixed_merge(req);
1540
1541 bio->bi_next = req->bio;
1542 req->bio = bio;
1543
1544 req->__sector = bio->bi_iter.bi_sector;
1545 req->__data_len += bio->bi_iter.bi_size;
1546 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
1547
1548 blk_account_io_start(req, false);
1549 return true;
1550}
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
1575 unsigned int *request_count,
1576 struct request **same_queue_rq)
1577{
1578 struct blk_plug *plug;
1579 struct request *rq;
1580 bool ret = false;
1581 struct list_head *plug_list;
1582
1583 plug = current->plug;
1584 if (!plug)
1585 goto out;
1586 *request_count = 0;
1587
1588 if (q->mq_ops)
1589 plug_list = &plug->mq_list;
1590 else
1591 plug_list = &plug->list;
1592
1593 list_for_each_entry_reverse(rq, plug_list, queuelist) {
1594 int el_ret;
1595
1596 if (rq->q == q) {
1597 (*request_count)++;
1598
1599
1600
1601
1602
1603 if (same_queue_rq)
1604 *same_queue_rq = rq;
1605 }
1606
1607 if (rq->q != q || !blk_rq_merge_ok(rq, bio))
1608 continue;
1609
1610 el_ret = blk_try_merge(rq, bio);
1611 if (el_ret == ELEVATOR_BACK_MERGE) {
1612 ret = bio_attempt_back_merge(q, rq, bio);
1613 if (ret)
1614 break;
1615 } else if (el_ret == ELEVATOR_FRONT_MERGE) {
1616 ret = bio_attempt_front_merge(q, rq, bio);
1617 if (ret)
1618 break;
1619 }
1620 }
1621out:
1622 return ret;
1623}
1624
1625unsigned int blk_plug_queued_count(struct request_queue *q)
1626{
1627 struct blk_plug *plug;
1628 struct request *rq;
1629 struct list_head *plug_list;
1630 unsigned int ret = 0;
1631
1632 plug = current->plug;
1633 if (!plug)
1634 goto out;
1635
1636 if (q->mq_ops)
1637 plug_list = &plug->mq_list;
1638 else
1639 plug_list = &plug->list;
1640
1641 list_for_each_entry(rq, plug_list, queuelist) {
1642 if (rq->q == q)
1643 ret++;
1644 }
1645out:
1646 return ret;
1647}
1648
1649void init_request_from_bio(struct request *req, struct bio *bio)
1650{
1651 req->cmd_type = REQ_TYPE_FS;
1652
1653 req->cmd_flags |= bio->bi_opf & REQ_COMMON_MASK;
1654 if (bio->bi_opf & REQ_RAHEAD)
1655 req->cmd_flags |= REQ_FAILFAST_MASK;
1656
1657 req->errors = 0;
1658 req->__sector = bio->bi_iter.bi_sector;
1659 req->ioprio = bio_prio(bio);
1660 blk_rq_bio_prep(req->q, req, bio);
1661}
1662
1663static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
1664{
1665 const bool sync = !!(bio->bi_opf & REQ_SYNC);
1666 struct blk_plug *plug;
1667 int el_ret, rw_flags = 0, where = ELEVATOR_INSERT_SORT;
1668 struct request *req;
1669 unsigned int request_count = 0;
1670
1671
1672
1673
1674
1675
1676 blk_queue_bounce(q, &bio);
1677
1678 blk_queue_split(q, &bio, q->bio_split);
1679
1680 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
1681 bio->bi_error = -EIO;
1682 bio_endio(bio);
1683 return BLK_QC_T_NONE;
1684 }
1685
1686 if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) {
1687 spin_lock_irq(q->queue_lock);
1688 where = ELEVATOR_INSERT_FLUSH;
1689 goto get_rq;
1690 }
1691
1692
1693
1694
1695
1696 if (!blk_queue_nomerges(q)) {
1697 if (blk_attempt_plug_merge(q, bio, &request_count, NULL))
1698 return BLK_QC_T_NONE;
1699 } else
1700 request_count = blk_plug_queued_count(q);
1701
1702 spin_lock_irq(q->queue_lock);
1703
1704 el_ret = elv_merge(q, &req, bio);
1705 if (el_ret == ELEVATOR_BACK_MERGE) {
1706 if (bio_attempt_back_merge(q, req, bio)) {
1707 elv_bio_merged(q, req, bio);
1708 if (!attempt_back_merge(q, req))
1709 elv_merged_request(q, req, el_ret);
1710 goto out_unlock;
1711 }
1712 } else if (el_ret == ELEVATOR_FRONT_MERGE) {
1713 if (bio_attempt_front_merge(q, req, bio)) {
1714 elv_bio_merged(q, req, bio);
1715 if (!attempt_front_merge(q, req))
1716 elv_merged_request(q, req, el_ret);
1717 goto out_unlock;
1718 }
1719 }
1720
1721get_rq:
1722
1723
1724
1725
1726
1727 if (sync)
1728 rw_flags |= REQ_SYNC;
1729
1730
1731
1732
1733 rw_flags |= (bio->bi_opf & (REQ_META | REQ_PRIO));
1734
1735
1736
1737
1738
1739 req = get_request(q, bio_data_dir(bio), rw_flags, bio, GFP_NOIO);
1740 if (IS_ERR(req)) {
1741 bio->bi_error = PTR_ERR(req);
1742 bio_endio(bio);
1743 goto out_unlock;
1744 }
1745
1746
1747
1748
1749
1750
1751
1752 init_request_from_bio(req, bio);
1753
1754 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags))
1755 req->cpu = raw_smp_processor_id();
1756
1757 plug = current->plug;
1758 if (plug) {
1759
1760
1761
1762
1763 if (!request_count)
1764 trace_block_plug(q);
1765 else {
1766 if (request_count >= BLK_MAX_REQUEST_COUNT) {
1767 blk_flush_plug_list(plug, false);
1768 trace_block_plug(q);
1769 }
1770 }
1771 list_add_tail(&req->queuelist, &plug->list);
1772 blk_account_io_start(req, true);
1773 } else {
1774 spin_lock_irq(q->queue_lock);
1775 add_acct_request(q, req, where);
1776 __blk_run_queue(q);
1777out_unlock:
1778 spin_unlock_irq(q->queue_lock);
1779 }
1780
1781 return BLK_QC_T_NONE;
1782}
1783
1784
1785
1786
1787static inline void blk_partition_remap(struct bio *bio)
1788{
1789 struct block_device *bdev = bio->bi_bdev;
1790
1791 if (bio_sectors(bio) && bdev != bdev->bd_contains) {
1792 struct hd_struct *p = bdev->bd_part;
1793
1794 bio->bi_iter.bi_sector += p->start_sect;
1795 bio->bi_bdev = bdev->bd_contains;
1796
1797 trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio,
1798 bdev->bd_dev,
1799 bio->bi_iter.bi_sector - p->start_sect);
1800 }
1801}
1802
1803static void handle_bad_sector(struct bio *bio)
1804{
1805 char b[BDEVNAME_SIZE];
1806
1807 printk(KERN_INFO "attempt to access beyond end of device\n");
1808 printk(KERN_INFO "%s: rw=%d, want=%Lu, limit=%Lu\n",
1809 bdevname(bio->bi_bdev, b),
1810 bio->bi_opf,
1811 (unsigned long long)bio_end_sector(bio),
1812 (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9));
1813}
1814
1815#ifdef CONFIG_FAIL_MAKE_REQUEST
1816
1817static DECLARE_FAULT_ATTR(fail_make_request);
1818
1819static int __init setup_fail_make_request(char *str)
1820{
1821 return setup_fault_attr(&fail_make_request, str);
1822}
1823__setup("fail_make_request=", setup_fail_make_request);
1824
1825static bool should_fail_request(struct hd_struct *part, unsigned int bytes)
1826{
1827 return part->make_it_fail && should_fail(&fail_make_request, bytes);
1828}
1829
1830static int __init fail_make_request_debugfs(void)
1831{
1832 struct dentry *dir = fault_create_debugfs_attr("fail_make_request",
1833 NULL, &fail_make_request);
1834
1835 return PTR_ERR_OR_ZERO(dir);
1836}
1837
1838late_initcall(fail_make_request_debugfs);
1839
1840#else
1841
1842static inline bool should_fail_request(struct hd_struct *part,
1843 unsigned int bytes)
1844{
1845 return false;
1846}
1847
1848#endif
1849
1850
1851
1852
1853static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
1854{
1855 sector_t maxsector;
1856
1857 if (!nr_sectors)
1858 return 0;
1859
1860
1861 maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
1862 if (maxsector) {
1863 sector_t sector = bio->bi_iter.bi_sector;
1864
1865 if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
1866
1867
1868
1869
1870
1871 handle_bad_sector(bio);
1872 return 1;
1873 }
1874 }
1875
1876 return 0;
1877}
1878
1879static noinline_for_stack bool
1880generic_make_request_checks(struct bio *bio)
1881{
1882 struct request_queue *q;
1883 int nr_sectors = bio_sectors(bio);
1884 int err = -EIO;
1885 char b[BDEVNAME_SIZE];
1886 struct hd_struct *part;
1887
1888 might_sleep();
1889
1890 if (bio_check_eod(bio, nr_sectors))
1891 goto end_io;
1892
1893 q = bdev_get_queue(bio->bi_bdev);
1894 if (unlikely(!q)) {
1895 printk(KERN_ERR
1896 "generic_make_request: Trying to access "
1897 "nonexistent block-device %s (%Lu)\n",
1898 bdevname(bio->bi_bdev, b),
1899 (long long) bio->bi_iter.bi_sector);
1900 goto end_io;
1901 }
1902
1903 part = bio->bi_bdev->bd_part;
1904 if (should_fail_request(part, bio->bi_iter.bi_size) ||
1905 should_fail_request(&part_to_disk(part)->part0,
1906 bio->bi_iter.bi_size))
1907 goto end_io;
1908
1909
1910
1911
1912
1913 blk_partition_remap(bio);
1914
1915 if (bio_check_eod(bio, nr_sectors))
1916 goto end_io;
1917
1918
1919
1920
1921
1922
1923 if ((bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) &&
1924 !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
1925 bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
1926 if (!nr_sectors) {
1927 err = 0;
1928 goto end_io;
1929 }
1930 }
1931
1932 switch (bio_op(bio)) {
1933 case REQ_OP_DISCARD:
1934 if (!blk_queue_discard(q))
1935 goto not_supported;
1936 break;
1937 case REQ_OP_SECURE_ERASE:
1938 if (!blk_queue_secure_erase(q))
1939 goto not_supported;
1940 break;
1941 case REQ_OP_WRITE_SAME:
1942 if (!bdev_write_same(bio->bi_bdev))
1943 goto not_supported;
1944 break;
1945 default:
1946 break;
1947 }
1948
1949
1950
1951
1952
1953
1954
1955 create_io_context(GFP_ATOMIC, q->node);
1956
1957 if (!blkcg_bio_issue_check(q, bio))
1958 return false;
1959
1960 trace_block_bio_queue(q, bio);
1961 return true;
1962
1963not_supported:
1964 err = -EOPNOTSUPP;
1965end_io:
1966 bio->bi_error = err;
1967 bio_endio(bio);
1968 return false;
1969}
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995blk_qc_t generic_make_request(struct bio *bio)
1996{
1997 struct bio_list bio_list_on_stack;
1998 blk_qc_t ret = BLK_QC_T_NONE;
1999
2000 if (!generic_make_request_checks(bio))
2001 goto out;
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013 if (current->bio_list) {
2014 bio_list_add(current->bio_list, bio);
2015 goto out;
2016 }
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032 BUG_ON(bio->bi_next);
2033 bio_list_init(&bio_list_on_stack);
2034 current->bio_list = &bio_list_on_stack;
2035 do {
2036 struct request_queue *q = bdev_get_queue(bio->bi_bdev);
2037
2038 if (likely(blk_queue_enter(q, false) == 0)) {
2039 ret = q->make_request_fn(q, bio);
2040
2041 blk_queue_exit(q);
2042
2043 bio = bio_list_pop(current->bio_list);
2044 } else {
2045 struct bio *bio_next = bio_list_pop(current->bio_list);
2046
2047 bio_io_error(bio);
2048 bio = bio_next;
2049 }
2050 } while (bio);
2051 current->bio_list = NULL;
2052
2053out:
2054 return ret;
2055}
2056EXPORT_SYMBOL(generic_make_request);
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067blk_qc_t submit_bio(struct bio *bio)
2068{
2069
2070
2071
2072
2073 if (bio_has_data(bio)) {
2074 unsigned int count;
2075
2076 if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME))
2077 count = bdev_logical_block_size(bio->bi_bdev) >> 9;
2078 else
2079 count = bio_sectors(bio);
2080
2081 if (op_is_write(bio_op(bio))) {
2082 count_vm_events(PGPGOUT, count);
2083 } else {
2084 task_io_account_read(bio->bi_iter.bi_size);
2085 count_vm_events(PGPGIN, count);
2086 }
2087
2088 if (unlikely(block_dump)) {
2089 char b[BDEVNAME_SIZE];
2090 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n",
2091 current->comm, task_pid_nr(current),
2092 op_is_write(bio_op(bio)) ? "WRITE" : "READ",
2093 (unsigned long long)bio->bi_iter.bi_sector,
2094 bdevname(bio->bi_bdev, b),
2095 count);
2096 }
2097 }
2098
2099 return generic_make_request(bio);
2100}
2101EXPORT_SYMBOL(submit_bio);
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120static int blk_cloned_rq_check_limits(struct request_queue *q,
2121 struct request *rq)
2122{
2123 if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, req_op(rq))) {
2124 printk(KERN_ERR "%s: over max size limit.\n", __func__);
2125 return -EIO;
2126 }
2127
2128
2129
2130
2131
2132
2133
2134 blk_recalc_rq_segments(rq);
2135 if (rq->nr_phys_segments > queue_max_segments(q)) {
2136 printk(KERN_ERR "%s: over max segments limit.\n", __func__);
2137 return -EIO;
2138 }
2139
2140 return 0;
2141}
2142
2143
2144
2145
2146
2147
2148int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
2149{
2150 unsigned long flags;
2151 int where = ELEVATOR_INSERT_BACK;
2152
2153 if (blk_cloned_rq_check_limits(q, rq))
2154 return -EIO;
2155
2156 if (rq->rq_disk &&
2157 should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq)))
2158 return -EIO;
2159
2160 if (q->mq_ops) {
2161 if (blk_queue_io_stat(q))
2162 blk_account_io_start(rq, true);
2163 blk_mq_insert_request(rq, false, true, false);
2164 return 0;
2165 }
2166
2167 spin_lock_irqsave(q->queue_lock, flags);
2168 if (unlikely(blk_queue_dying(q))) {
2169 spin_unlock_irqrestore(q->queue_lock, flags);
2170 return -ENODEV;
2171 }
2172
2173
2174
2175
2176
2177 BUG_ON(blk_queued_rq(rq));
2178
2179 if (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA))
2180 where = ELEVATOR_INSERT_FLUSH;
2181
2182 add_acct_request(q, rq, where);
2183 if (where == ELEVATOR_INSERT_FLUSH)
2184 __blk_run_queue(q);
2185 spin_unlock_irqrestore(q->queue_lock, flags);
2186
2187 return 0;
2188}
2189EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207unsigned int blk_rq_err_bytes(const struct request *rq)
2208{
2209 unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
2210 unsigned int bytes = 0;
2211 struct bio *bio;
2212
2213 if (!(rq->cmd_flags & REQ_MIXED_MERGE))
2214 return blk_rq_bytes(rq);
2215
2216
2217
2218
2219
2220
2221
2222
2223 for (bio = rq->bio; bio; bio = bio->bi_next) {
2224 if ((bio->bi_opf & ff) != ff)
2225 break;
2226 bytes += bio->bi_iter.bi_size;
2227 }
2228
2229
2230 BUG_ON(blk_rq_bytes(rq) && !bytes);
2231 return bytes;
2232}
2233EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
2234
2235void blk_account_io_completion(struct request *req, unsigned int bytes)
2236{
2237 if (blk_do_io_stat(req)) {
2238 const int rw = rq_data_dir(req);
2239 struct hd_struct *part;
2240 int cpu;
2241
2242 cpu = part_stat_lock();
2243 part = req->part;
2244 part_stat_add(cpu, part, sectors[rw], bytes >> 9);
2245 part_stat_unlock();
2246 }
2247}
2248
2249void blk_account_io_done(struct request *req)
2250{
2251
2252
2253
2254
2255
2256 if (blk_do_io_stat(req) && !(req->cmd_flags & REQ_FLUSH_SEQ)) {
2257 unsigned long duration = jiffies - req->start_time;
2258 const int rw = rq_data_dir(req);
2259 struct hd_struct *part;
2260 int cpu;
2261
2262 cpu = part_stat_lock();
2263 part = req->part;
2264
2265 part_stat_inc(cpu, part, ios[rw]);
2266 part_stat_add(cpu, part, ticks[rw], duration);
2267 part_round_stats(cpu, part);
2268 part_dec_in_flight(part, rw);
2269
2270 hd_struct_put(part);
2271 part_stat_unlock();
2272 }
2273}
2274
2275#ifdef CONFIG_PM
2276
2277
2278
2279
2280static struct request *blk_pm_peek_request(struct request_queue *q,
2281 struct request *rq)
2282{
2283 if (q->dev && (q->rpm_status == RPM_SUSPENDED ||
2284 (q->rpm_status != RPM_ACTIVE && !(rq->cmd_flags & REQ_PM))))
2285 return NULL;
2286 else
2287 return rq;
2288}
2289#else
2290static inline struct request *blk_pm_peek_request(struct request_queue *q,
2291 struct request *rq)
2292{
2293 return rq;
2294}
2295#endif
2296
2297void blk_account_io_start(struct request *rq, bool new_io)
2298{
2299 struct hd_struct *part;
2300 int rw = rq_data_dir(rq);
2301 int cpu;
2302
2303 if (!blk_do_io_stat(rq))
2304 return;
2305
2306 cpu = part_stat_lock();
2307
2308 if (!new_io) {
2309 part = rq->part;
2310 part_stat_inc(cpu, part, merges[rw]);
2311 } else {
2312 part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
2313 if (!hd_struct_try_get(part)) {
2314
2315
2316
2317
2318
2319
2320
2321
2322 part = &rq->rq_disk->part0;
2323 hd_struct_get(part);
2324 }
2325 part_round_stats(cpu, part);
2326 part_inc_in_flight(part, rw);
2327 rq->part = part;
2328 }
2329
2330 part_stat_unlock();
2331}
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349struct request *blk_peek_request(struct request_queue *q)
2350{
2351 struct request *rq;
2352 int ret;
2353
2354 while ((rq = __elv_next_request(q)) != NULL) {
2355
2356 rq = blk_pm_peek_request(q, rq);
2357 if (!rq)
2358 break;
2359
2360 if (!(rq->cmd_flags & REQ_STARTED)) {
2361
2362
2363
2364
2365
2366 if (rq->cmd_flags & REQ_SORTED)
2367 elv_activate_rq(q, rq);
2368
2369
2370
2371
2372
2373
2374 rq->cmd_flags |= REQ_STARTED;
2375 trace_block_rq_issue(q, rq);
2376 }
2377
2378 if (!q->boundary_rq || q->boundary_rq == rq) {
2379 q->end_sector = rq_end_sector(rq);
2380 q->boundary_rq = NULL;
2381 }
2382
2383 if (rq->cmd_flags & REQ_DONTPREP)
2384 break;
2385
2386 if (q->dma_drain_size && blk_rq_bytes(rq)) {
2387
2388
2389
2390
2391
2392
2393 rq->nr_phys_segments++;
2394 }
2395
2396 if (!q->prep_rq_fn)
2397 break;
2398
2399 ret = q->prep_rq_fn(q, rq);
2400 if (ret == BLKPREP_OK) {
2401 break;
2402 } else if (ret == BLKPREP_DEFER) {
2403
2404
2405
2406
2407
2408
2409 if (q->dma_drain_size && blk_rq_bytes(rq) &&
2410 !(rq->cmd_flags & REQ_DONTPREP)) {
2411
2412
2413
2414
2415 --rq->nr_phys_segments;
2416 }
2417
2418 rq = NULL;
2419 break;
2420 } else if (ret == BLKPREP_KILL || ret == BLKPREP_INVALID) {
2421 int err = (ret == BLKPREP_INVALID) ? -EREMOTEIO : -EIO;
2422
2423 rq->cmd_flags |= REQ_QUIET;
2424
2425
2426
2427
2428 blk_start_request(rq);
2429 __blk_end_request_all(rq, err);
2430 } else {
2431 printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
2432 break;
2433 }
2434 }
2435
2436 return rq;
2437}
2438EXPORT_SYMBOL(blk_peek_request);
2439
2440void blk_dequeue_request(struct request *rq)
2441{
2442 struct request_queue *q = rq->q;
2443
2444 BUG_ON(list_empty(&rq->queuelist));
2445 BUG_ON(ELV_ON_HASH(rq));
2446
2447 list_del_init(&rq->queuelist);
2448
2449
2450
2451
2452
2453
2454 if (blk_account_rq(rq)) {
2455 q->in_flight[rq_is_sync(rq)]++;
2456 set_io_start_time_ns(rq);
2457 }
2458}
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474void blk_start_request(struct request *req)
2475{
2476 blk_dequeue_request(req);
2477
2478
2479
2480
2481
2482 req->resid_len = blk_rq_bytes(req);
2483 if (unlikely(blk_bidi_rq(req)))
2484 req->next_rq->resid_len = blk_rq_bytes(req->next_rq);
2485
2486 BUG_ON(test_bit(REQ_ATOM_COMPLETE, &req->atomic_flags));
2487 blk_add_timer(req);
2488}
2489EXPORT_SYMBOL(blk_start_request);
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506struct request *blk_fetch_request(struct request_queue *q)
2507{
2508 struct request *rq;
2509
2510 rq = blk_peek_request(q);
2511 if (rq)
2512 blk_start_request(rq);
2513 return rq;
2514}
2515EXPORT_SYMBOL(blk_fetch_request);
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
2540{
2541 int total_bytes;
2542
2543 trace_block_rq_complete(req->q, req, nr_bytes);
2544
2545 if (!req->bio)
2546 return false;
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556 if (req->cmd_type == REQ_TYPE_FS)
2557 req->errors = 0;
2558
2559 if (error && req->cmd_type == REQ_TYPE_FS &&
2560 !(req->cmd_flags & REQ_QUIET)) {
2561 char *error_type;
2562
2563 switch (error) {
2564 case -ENOLINK:
2565 error_type = "recoverable transport";
2566 break;
2567 case -EREMOTEIO:
2568 error_type = "critical target";
2569 break;
2570 case -EBADE:
2571 error_type = "critical nexus";
2572 break;
2573 case -ETIMEDOUT:
2574 error_type = "timeout";
2575 break;
2576 case -ENOSPC:
2577 error_type = "critical space allocation";
2578 break;
2579 case -ENODATA:
2580 error_type = "critical medium";
2581 break;
2582 case -EIO:
2583 default:
2584 error_type = "I/O";
2585 break;
2586 }
2587 printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector %llu\n",
2588 __func__, error_type, req->rq_disk ?
2589 req->rq_disk->disk_name : "?",
2590 (unsigned long long)blk_rq_pos(req));
2591
2592 }
2593
2594 blk_account_io_completion(req, nr_bytes);
2595
2596 total_bytes = 0;
2597 while (req->bio) {
2598 struct bio *bio = req->bio;
2599 unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes);
2600
2601 if (bio_bytes == bio->bi_iter.bi_size)
2602 req->bio = bio->bi_next;
2603
2604 req_bio_endio(req, bio, bio_bytes, error);
2605
2606 total_bytes += bio_bytes;
2607 nr_bytes -= bio_bytes;
2608
2609 if (!nr_bytes)
2610 break;
2611 }
2612
2613
2614
2615
2616 if (!req->bio) {
2617
2618
2619
2620
2621
2622 req->__data_len = 0;
2623 return false;
2624 }
2625
2626 req->__data_len -= total_bytes;
2627
2628
2629 if (req->cmd_type == REQ_TYPE_FS)
2630 req->__sector += total_bytes >> 9;
2631
2632
2633 if (req->cmd_flags & REQ_MIXED_MERGE) {
2634 req->cmd_flags &= ~REQ_FAILFAST_MASK;
2635 req->cmd_flags |= req->bio->bi_opf & REQ_FAILFAST_MASK;
2636 }
2637
2638
2639
2640
2641
2642 if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
2643 blk_dump_rq_flags(req, "request botched");
2644 req->__data_len = blk_rq_cur_bytes(req);
2645 }
2646
2647
2648 blk_recalc_rq_segments(req);
2649
2650 return true;
2651}
2652EXPORT_SYMBOL_GPL(blk_update_request);
2653
2654static bool blk_update_bidi_request(struct request *rq, int error,
2655 unsigned int nr_bytes,
2656 unsigned int bidi_bytes)
2657{
2658 if (blk_update_request(rq, error, nr_bytes))
2659 return true;
2660
2661
2662 if (unlikely(blk_bidi_rq(rq)) &&
2663 blk_update_request(rq->next_rq, error, bidi_bytes))
2664 return true;
2665
2666 if (blk_queue_add_random(rq->q))
2667 add_disk_randomness(rq->rq_disk);
2668
2669 return false;
2670}
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682void blk_unprep_request(struct request *req)
2683{
2684 struct request_queue *q = req->q;
2685
2686 req->cmd_flags &= ~REQ_DONTPREP;
2687 if (q->unprep_rq_fn)
2688 q->unprep_rq_fn(q, req);
2689}
2690EXPORT_SYMBOL_GPL(blk_unprep_request);
2691
2692
2693
2694
2695void blk_finish_request(struct request *req, int error)
2696{
2697 if (req->cmd_flags & REQ_QUEUED)
2698 blk_queue_end_tag(req->q, req);
2699
2700 BUG_ON(blk_queued_rq(req));
2701
2702 if (unlikely(laptop_mode) && req->cmd_type == REQ_TYPE_FS)
2703 laptop_io_completion(&req->q->backing_dev_info);
2704
2705 blk_delete_timer(req);
2706
2707 if (req->cmd_flags & REQ_DONTPREP)
2708 blk_unprep_request(req);
2709
2710 blk_account_io_done(req);
2711
2712 if (req->end_io)
2713 req->end_io(req, error);
2714 else {
2715 if (blk_bidi_rq(req))
2716 __blk_put_request(req->next_rq->q, req->next_rq);
2717
2718 __blk_put_request(req->q, req);
2719 }
2720}
2721EXPORT_SYMBOL(blk_finish_request);
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740static bool blk_end_bidi_request(struct request *rq, int error,
2741 unsigned int nr_bytes, unsigned int bidi_bytes)
2742{
2743 struct request_queue *q = rq->q;
2744 unsigned long flags;
2745
2746 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
2747 return true;
2748
2749 spin_lock_irqsave(q->queue_lock, flags);
2750 blk_finish_request(rq, error);
2751 spin_unlock_irqrestore(q->queue_lock, flags);
2752
2753 return false;
2754}
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771bool __blk_end_bidi_request(struct request *rq, int error,
2772 unsigned int nr_bytes, unsigned int bidi_bytes)
2773{
2774 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
2775 return true;
2776
2777 blk_finish_request(rq, error);
2778
2779 return false;
2780}
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
2797{
2798 return blk_end_bidi_request(rq, error, nr_bytes, 0);
2799}
2800EXPORT_SYMBOL(blk_end_request);
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810void blk_end_request_all(struct request *rq, int error)
2811{
2812 bool pending;
2813 unsigned int bidi_bytes = 0;
2814
2815 if (unlikely(blk_bidi_rq(rq)))
2816 bidi_bytes = blk_rq_bytes(rq->next_rq);
2817
2818 pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
2819 BUG_ON(pending);
2820}
2821EXPORT_SYMBOL(blk_end_request_all);
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835bool blk_end_request_cur(struct request *rq, int error)
2836{
2837 return blk_end_request(rq, error, blk_rq_cur_bytes(rq));
2838}
2839EXPORT_SYMBOL(blk_end_request_cur);
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853bool blk_end_request_err(struct request *rq, int error)
2854{
2855 WARN_ON(error >= 0);
2856 return blk_end_request(rq, error, blk_rq_err_bytes(rq));
2857}
2858EXPORT_SYMBOL_GPL(blk_end_request_err);
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
2874{
2875 return __blk_end_bidi_request(rq, error, nr_bytes, 0);
2876}
2877EXPORT_SYMBOL(__blk_end_request);
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887void __blk_end_request_all(struct request *rq, int error)
2888{
2889 bool pending;
2890 unsigned int bidi_bytes = 0;
2891
2892 if (unlikely(blk_bidi_rq(rq)))
2893 bidi_bytes = blk_rq_bytes(rq->next_rq);
2894
2895 pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
2896 BUG_ON(pending);
2897}
2898EXPORT_SYMBOL(__blk_end_request_all);
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913bool __blk_end_request_cur(struct request *rq, int error)
2914{
2915 return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
2916}
2917EXPORT_SYMBOL(__blk_end_request_cur);
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932bool __blk_end_request_err(struct request *rq, int error)
2933{
2934 WARN_ON(error >= 0);
2935 return __blk_end_request(rq, error, blk_rq_err_bytes(rq));
2936}
2937EXPORT_SYMBOL_GPL(__blk_end_request_err);
2938
2939void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
2940 struct bio *bio)
2941{
2942 req_set_op(rq, bio_op(bio));
2943
2944 if (bio_has_data(bio))
2945 rq->nr_phys_segments = bio_phys_segments(q, bio);
2946
2947 rq->__data_len = bio->bi_iter.bi_size;
2948 rq->bio = rq->biotail = bio;
2949
2950 if (bio->bi_bdev)
2951 rq->rq_disk = bio->bi_bdev->bd_disk;
2952}
2953
2954#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
2955
2956
2957
2958
2959
2960
2961
2962void rq_flush_dcache_pages(struct request *rq)
2963{
2964 struct req_iterator iter;
2965 struct bio_vec bvec;
2966
2967 rq_for_each_segment(bvec, rq, iter)
2968 flush_dcache_page(bvec.bv_page);
2969}
2970EXPORT_SYMBOL_GPL(rq_flush_dcache_pages);
2971#endif
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992int blk_lld_busy(struct request_queue *q)
2993{
2994 if (q->lld_busy_fn)
2995 return q->lld_busy_fn(q);
2996
2997 return 0;
2998}
2999EXPORT_SYMBOL_GPL(blk_lld_busy);
3000
3001
3002
3003
3004
3005
3006
3007
3008void blk_rq_unprep_clone(struct request *rq)
3009{
3010 struct bio *bio;
3011
3012 while ((bio = rq->bio) != NULL) {
3013 rq->bio = bio->bi_next;
3014
3015 bio_put(bio);
3016 }
3017}
3018EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
3019
3020
3021
3022
3023
3024static void __blk_rq_prep_clone(struct request *dst, struct request *src)
3025{
3026 dst->cpu = src->cpu;
3027 req_set_op_attrs(dst, req_op(src),
3028 (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE);
3029 dst->cmd_type = src->cmd_type;
3030 dst->__sector = blk_rq_pos(src);
3031 dst->__data_len = blk_rq_bytes(src);
3032 dst->nr_phys_segments = src->nr_phys_segments;
3033 dst->ioprio = src->ioprio;
3034 dst->extra_len = src->extra_len;
3035}
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
3057 struct bio_set *bs, gfp_t gfp_mask,
3058 int (*bio_ctr)(struct bio *, struct bio *, void *),
3059 void *data)
3060{
3061 struct bio *bio, *bio_src;
3062
3063 if (!bs)
3064 bs = fs_bio_set;
3065
3066 __rq_for_each_bio(bio_src, rq_src) {
3067 bio = bio_clone_fast(bio_src, gfp_mask, bs);
3068 if (!bio)
3069 goto free_and_out;
3070
3071 if (bio_ctr && bio_ctr(bio, bio_src, data))
3072 goto free_and_out;
3073
3074 if (rq->bio) {
3075 rq->biotail->bi_next = bio;
3076 rq->biotail = bio;
3077 } else
3078 rq->bio = rq->biotail = bio;
3079 }
3080
3081 __blk_rq_prep_clone(rq, rq_src);
3082
3083 return 0;
3084
3085free_and_out:
3086 if (bio)
3087 bio_put(bio);
3088 blk_rq_unprep_clone(rq);
3089
3090 return -ENOMEM;
3091}
3092EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
3093
3094int kblockd_schedule_work(struct work_struct *work)
3095{
3096 return queue_work(kblockd_workqueue, work);
3097}
3098EXPORT_SYMBOL(kblockd_schedule_work);
3099
3100int kblockd_schedule_work_on(int cpu, struct work_struct *work)
3101{
3102 return queue_work_on(cpu, kblockd_workqueue, work);
3103}
3104EXPORT_SYMBOL(kblockd_schedule_work_on);
3105
3106int kblockd_schedule_delayed_work(struct delayed_work *dwork,
3107 unsigned long delay)
3108{
3109 return queue_delayed_work(kblockd_workqueue, dwork, delay);
3110}
3111EXPORT_SYMBOL(kblockd_schedule_delayed_work);
3112
3113int kblockd_schedule_delayed_work_on(int cpu, struct delayed_work *dwork,
3114 unsigned long delay)
3115{
3116 return queue_delayed_work_on(cpu, kblockd_workqueue, dwork, delay);
3117}
3118EXPORT_SYMBOL(kblockd_schedule_delayed_work_on);
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134void blk_start_plug(struct blk_plug *plug)
3135{
3136 struct task_struct *tsk = current;
3137
3138
3139
3140
3141 if (tsk->plug)
3142 return;
3143
3144 INIT_LIST_HEAD(&plug->list);
3145 INIT_LIST_HEAD(&plug->mq_list);
3146 INIT_LIST_HEAD(&plug->cb_list);
3147
3148
3149
3150
3151 tsk->plug = plug;
3152}
3153EXPORT_SYMBOL(blk_start_plug);
3154
3155static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
3156{
3157 struct request *rqa = container_of(a, struct request, queuelist);
3158 struct request *rqb = container_of(b, struct request, queuelist);
3159
3160 return !(rqa->q < rqb->q ||
3161 (rqa->q == rqb->q && blk_rq_pos(rqa) < blk_rq_pos(rqb)));
3162}
3163
3164
3165
3166
3167
3168
3169
3170static void queue_unplugged(struct request_queue *q, unsigned int depth,
3171 bool from_schedule)
3172 __releases(q->queue_lock)
3173{
3174 trace_block_unplug(q, depth, !from_schedule);
3175
3176 if (from_schedule)
3177 blk_run_queue_async(q);
3178 else
3179 __blk_run_queue(q);
3180 spin_unlock(q->queue_lock);
3181}
3182
3183static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
3184{
3185 LIST_HEAD(callbacks);
3186
3187 while (!list_empty(&plug->cb_list)) {
3188 list_splice_init(&plug->cb_list, &callbacks);
3189
3190 while (!list_empty(&callbacks)) {
3191 struct blk_plug_cb *cb = list_first_entry(&callbacks,
3192 struct blk_plug_cb,
3193 list);
3194 list_del(&cb->list);
3195 cb->callback(cb, from_schedule);
3196 }
3197 }
3198}
3199
3200struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, void *data,
3201 int size)
3202{
3203 struct blk_plug *plug = current->plug;
3204 struct blk_plug_cb *cb;
3205
3206 if (!plug)
3207 return NULL;
3208
3209 list_for_each_entry(cb, &plug->cb_list, list)
3210 if (cb->callback == unplug && cb->data == data)
3211 return cb;
3212
3213
3214 BUG_ON(size < sizeof(*cb));
3215 cb = kzalloc(size, GFP_ATOMIC);
3216 if (cb) {
3217 cb->data = data;
3218 cb->callback = unplug;
3219 list_add(&cb->list, &plug->cb_list);
3220 }
3221 return cb;
3222}
3223EXPORT_SYMBOL(blk_check_plugged);
3224
3225void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
3226{
3227 struct request_queue *q;
3228 unsigned long flags;
3229 struct request *rq;
3230 LIST_HEAD(list);
3231 unsigned int depth;
3232
3233 flush_plug_callbacks(plug, from_schedule);
3234
3235 if (!list_empty(&plug->mq_list))
3236 blk_mq_flush_plug_list(plug, from_schedule);
3237
3238 if (list_empty(&plug->list))
3239 return;
3240
3241 list_splice_init(&plug->list, &list);
3242
3243 list_sort(NULL, &list, plug_rq_cmp);
3244
3245 q = NULL;
3246 depth = 0;
3247
3248
3249
3250
3251
3252 local_irq_save(flags);
3253 while (!list_empty(&list)) {
3254 rq = list_entry_rq(list.next);
3255 list_del_init(&rq->queuelist);
3256 BUG_ON(!rq->q);
3257 if (rq->q != q) {
3258
3259
3260
3261 if (q)
3262 queue_unplugged(q, depth, from_schedule);
3263 q = rq->q;
3264 depth = 0;
3265 spin_lock(q->queue_lock);
3266 }
3267
3268
3269
3270
3271 if (unlikely(blk_queue_dying(q))) {
3272 __blk_end_request_all(rq, -ENODEV);
3273 continue;
3274 }
3275
3276
3277
3278
3279 if (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA))
3280 __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
3281 else
3282 __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
3283
3284 depth++;
3285 }
3286
3287
3288
3289
3290 if (q)
3291 queue_unplugged(q, depth, from_schedule);
3292
3293 local_irq_restore(flags);
3294}
3295
3296void blk_finish_plug(struct blk_plug *plug)
3297{
3298 if (plug != current->plug)
3299 return;
3300 blk_flush_plug_list(plug, false);
3301
3302 current->plug = NULL;
3303}
3304EXPORT_SYMBOL(blk_finish_plug);
3305
3306bool blk_poll(struct request_queue *q, blk_qc_t cookie)
3307{
3308 struct blk_plug *plug;
3309 long state;
3310 unsigned int queue_num;
3311 struct blk_mq_hw_ctx *hctx;
3312
3313 if (!q->mq_ops || !q->mq_ops->poll || !blk_qc_t_valid(cookie) ||
3314 !test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
3315 return false;
3316
3317 queue_num = blk_qc_t_to_queue_num(cookie);
3318 hctx = q->queue_hw_ctx[queue_num];
3319 hctx->poll_considered++;
3320
3321 plug = current->plug;
3322 if (plug)
3323 blk_flush_plug_list(plug, false);
3324
3325 state = current->state;
3326 while (!need_resched()) {
3327 int ret;
3328
3329 hctx->poll_invoked++;
3330
3331 ret = q->mq_ops->poll(hctx, blk_qc_t_to_tag(cookie));
3332 if (ret > 0) {
3333 hctx->poll_success++;
3334 set_current_state(TASK_RUNNING);
3335 return true;
3336 }
3337
3338 if (signal_pending_state(state, current))
3339 set_current_state(TASK_RUNNING);
3340
3341 if (current->state == TASK_RUNNING)
3342 return true;
3343 if (ret < 0)
3344 break;
3345 cpu_relax();
3346 }
3347
3348 return false;
3349}
3350EXPORT_SYMBOL_GPL(blk_poll);
3351
3352#ifdef CONFIG_PM
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374void blk_pm_runtime_init(struct request_queue *q, struct device *dev)
3375{
3376 q->dev = dev;
3377 q->rpm_status = RPM_ACTIVE;
3378 pm_runtime_set_autosuspend_delay(q->dev, -1);
3379 pm_runtime_use_autosuspend(q->dev);
3380}
3381EXPORT_SYMBOL(blk_pm_runtime_init);
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404int blk_pre_runtime_suspend(struct request_queue *q)
3405{
3406 int ret = 0;
3407
3408 if (!q->dev)
3409 return ret;
3410
3411 spin_lock_irq(q->queue_lock);
3412 if (q->nr_pending) {
3413 ret = -EBUSY;
3414 pm_runtime_mark_last_busy(q->dev);
3415 } else {
3416 q->rpm_status = RPM_SUSPENDING;
3417 }
3418 spin_unlock_irq(q->queue_lock);
3419 return ret;
3420}
3421EXPORT_SYMBOL(blk_pre_runtime_suspend);
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436void blk_post_runtime_suspend(struct request_queue *q, int err)
3437{
3438 if (!q->dev)
3439 return;
3440
3441 spin_lock_irq(q->queue_lock);
3442 if (!err) {
3443 q->rpm_status = RPM_SUSPENDED;
3444 } else {
3445 q->rpm_status = RPM_ACTIVE;
3446 pm_runtime_mark_last_busy(q->dev);
3447 }
3448 spin_unlock_irq(q->queue_lock);
3449}
3450EXPORT_SYMBOL(blk_post_runtime_suspend);
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463void blk_pre_runtime_resume(struct request_queue *q)
3464{
3465 if (!q->dev)
3466 return;
3467
3468 spin_lock_irq(q->queue_lock);
3469 q->rpm_status = RPM_RESUMING;
3470 spin_unlock_irq(q->queue_lock);
3471}
3472EXPORT_SYMBOL(blk_pre_runtime_resume);
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488void blk_post_runtime_resume(struct request_queue *q, int err)
3489{
3490 if (!q->dev)
3491 return;
3492
3493 spin_lock_irq(q->queue_lock);
3494 if (!err) {
3495 q->rpm_status = RPM_ACTIVE;
3496 __blk_run_queue(q);
3497 pm_runtime_mark_last_busy(q->dev);
3498 pm_request_autosuspend(q->dev);
3499 } else {
3500 q->rpm_status = RPM_SUSPENDED;
3501 }
3502 spin_unlock_irq(q->queue_lock);
3503}
3504EXPORT_SYMBOL(blk_post_runtime_resume);
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520void blk_set_runtime_active(struct request_queue *q)
3521{
3522 spin_lock_irq(q->queue_lock);
3523 q->rpm_status = RPM_ACTIVE;
3524 pm_runtime_mark_last_busy(q->dev);
3525 pm_request_autosuspend(q->dev);
3526 spin_unlock_irq(q->queue_lock);
3527}
3528EXPORT_SYMBOL(blk_set_runtime_active);
3529#endif
3530
3531int __init blk_dev_init(void)
3532{
3533 BUILD_BUG_ON(__REQ_NR_BITS > 8 *
3534 FIELD_SIZEOF(struct request, cmd_flags));
3535
3536
3537 kblockd_workqueue = alloc_workqueue("kblockd",
3538 WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
3539 if (!kblockd_workqueue)
3540 panic("Failed to create kblockd\n");
3541
3542 request_cachep = kmem_cache_create("blkdev_requests",
3543 sizeof(struct request), 0, SLAB_PANIC, NULL);
3544
3545 blk_requestq_cachep = kmem_cache_create("request_queue",
3546 sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
3547
3548 return 0;
3549}
3550