1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/kernel.h>
15#include <linux/module.h>
16#include <linux/backing-dev.h>
17#include <linux/bio.h>
18#include <linux/blkdev.h>
19#include <linux/blk-mq.h>
20#include <linux/highmem.h>
21#include <linux/mm.h>
22#include <linux/kernel_stat.h>
23#include <linux/string.h>
24#include <linux/init.h>
25#include <linux/completion.h>
26#include <linux/slab.h>
27#include <linux/swap.h>
28#include <linux/writeback.h>
29#include <linux/task_io_accounting_ops.h>
30#include <linux/fault-inject.h>
31#include <linux/list_sort.h>
32#include <linux/delay.h>
33#include <linux/ratelimit.h>
34#include <linux/pm_runtime.h>
35#include <linux/blk-cgroup.h>
36#include <linux/debugfs.h>
37#include <linux/bpf.h>
38
39#define CREATE_TRACE_POINTS
40#include <trace/events/block.h>
41
42#include "blk.h"
43#include "blk-mq.h"
44#include "blk-mq-sched.h"
45#include "blk-wbt.h"
46
47#ifdef CONFIG_DEBUG_FS
48struct dentry *blk_debugfs_root;
49#endif
50
51EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
52EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
53EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
54EXPORT_TRACEPOINT_SYMBOL_GPL(block_split);
55EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug);
56
57DEFINE_IDA(blk_queue_ida);
58
59
60
61
62struct kmem_cache *request_cachep;
63
64
65
66
67struct kmem_cache *blk_requestq_cachep;
68
69
70
71
72static struct workqueue_struct *kblockd_workqueue;
73
74
75
76
77
78
79void blk_queue_flag_set(unsigned int flag, struct request_queue *q)
80{
81 unsigned long flags;
82
83 spin_lock_irqsave(q->queue_lock, flags);
84 queue_flag_set(flag, q);
85 spin_unlock_irqrestore(q->queue_lock, flags);
86}
87EXPORT_SYMBOL(blk_queue_flag_set);
88
89
90
91
92
93
94void blk_queue_flag_clear(unsigned int flag, struct request_queue *q)
95{
96 unsigned long flags;
97
98 spin_lock_irqsave(q->queue_lock, flags);
99 queue_flag_clear(flag, q);
100 spin_unlock_irqrestore(q->queue_lock, flags);
101}
102EXPORT_SYMBOL(blk_queue_flag_clear);
103
104
105
106
107
108
109
110
111
112bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q)
113{
114 unsigned long flags;
115 bool res;
116
117 spin_lock_irqsave(q->queue_lock, flags);
118 res = queue_flag_test_and_set(flag, q);
119 spin_unlock_irqrestore(q->queue_lock, flags);
120
121 return res;
122}
123EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_set);
124
125
126
127
128
129
130
131
132
133bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q)
134{
135 unsigned long flags;
136 bool res;
137
138 spin_lock_irqsave(q->queue_lock, flags);
139 res = queue_flag_test_and_clear(flag, q);
140 spin_unlock_irqrestore(q->queue_lock, flags);
141
142 return res;
143}
144EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_clear);
145
146static void blk_clear_congested(struct request_list *rl, int sync)
147{
148#ifdef CONFIG_CGROUP_WRITEBACK
149 clear_wb_congested(rl->blkg->wb_congested, sync);
150#else
151
152
153
154
155 if (rl == &rl->q->root_rl)
156 clear_wb_congested(rl->q->backing_dev_info->wb.congested, sync);
157#endif
158}
159
160static void blk_set_congested(struct request_list *rl, int sync)
161{
162#ifdef CONFIG_CGROUP_WRITEBACK
163 set_wb_congested(rl->blkg->wb_congested, sync);
164#else
165
166 if (rl == &rl->q->root_rl)
167 set_wb_congested(rl->q->backing_dev_info->wb.congested, sync);
168#endif
169}
170
171void blk_queue_congestion_threshold(struct request_queue *q)
172{
173 int nr;
174
175 nr = q->nr_requests - (q->nr_requests / 8) + 1;
176 if (nr > q->nr_requests)
177 nr = q->nr_requests;
178 q->nr_congestion_on = nr;
179
180 nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;
181 if (nr < 1)
182 nr = 1;
183 q->nr_congestion_off = nr;
184}
185
186void blk_rq_init(struct request_queue *q, struct request *rq)
187{
188 memset(rq, 0, sizeof(*rq));
189
190 INIT_LIST_HEAD(&rq->queuelist);
191 INIT_LIST_HEAD(&rq->timeout_list);
192 rq->cpu = -1;
193 rq->q = q;
194 rq->__sector = (sector_t) -1;
195 INIT_HLIST_NODE(&rq->hash);
196 RB_CLEAR_NODE(&rq->rb_node);
197 rq->tag = -1;
198 rq->internal_tag = -1;
199 rq->start_time = jiffies;
200 set_start_time_ns(rq);
201 rq->part = NULL;
202 seqcount_init(&rq->gstate_seq);
203 u64_stats_init(&rq->aborted_gstate_sync);
204
205
206
207 WRITE_ONCE(rq->gstate, MQ_RQ_GEN_INC);
208}
209EXPORT_SYMBOL(blk_rq_init);
210
211static const struct {
212 int errno;
213 const char *name;
214} blk_errors[] = {
215 [BLK_STS_OK] = { 0, "" },
216 [BLK_STS_NOTSUPP] = { -EOPNOTSUPP, "operation not supported" },
217 [BLK_STS_TIMEOUT] = { -ETIMEDOUT, "timeout" },
218 [BLK_STS_NOSPC] = { -ENOSPC, "critical space allocation" },
219 [BLK_STS_TRANSPORT] = { -ENOLINK, "recoverable transport" },
220 [BLK_STS_TARGET] = { -EREMOTEIO, "critical target" },
221 [BLK_STS_NEXUS] = { -EBADE, "critical nexus" },
222 [BLK_STS_MEDIUM] = { -ENODATA, "critical medium" },
223 [BLK_STS_PROTECTION] = { -EILSEQ, "protection" },
224 [BLK_STS_RESOURCE] = { -ENOMEM, "kernel resource" },
225 [BLK_STS_DEV_RESOURCE] = { -EBUSY, "device resource" },
226 [BLK_STS_AGAIN] = { -EAGAIN, "nonblocking retry" },
227
228
229 [BLK_STS_DM_REQUEUE] = { -EREMCHG, "dm internal retry" },
230
231
232 [BLK_STS_IOERR] = { -EIO, "I/O" },
233};
234
235blk_status_t errno_to_blk_status(int errno)
236{
237 int i;
238
239 for (i = 0; i < ARRAY_SIZE(blk_errors); i++) {
240 if (blk_errors[i].errno == errno)
241 return (__force blk_status_t)i;
242 }
243
244 return BLK_STS_IOERR;
245}
246EXPORT_SYMBOL_GPL(errno_to_blk_status);
247
248int blk_status_to_errno(blk_status_t status)
249{
250 int idx = (__force int)status;
251
252 if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
253 return -EIO;
254 return blk_errors[idx].errno;
255}
256EXPORT_SYMBOL_GPL(blk_status_to_errno);
257
258static void print_req_error(struct request *req, blk_status_t status)
259{
260 int idx = (__force int)status;
261
262 if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
263 return;
264
265 printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector %llu\n",
266 __func__, blk_errors[idx].name, req->rq_disk ?
267 req->rq_disk->disk_name : "?",
268 (unsigned long long)blk_rq_pos(req));
269}
270
271static void req_bio_endio(struct request *rq, struct bio *bio,
272 unsigned int nbytes, blk_status_t error)
273{
274 if (error)
275 bio->bi_status = error;
276
277 if (unlikely(rq->rq_flags & RQF_QUIET))
278 bio_set_flag(bio, BIO_QUIET);
279
280 bio_advance(bio, nbytes);
281
282
283 if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ))
284 bio_endio(bio);
285}
286
287void blk_dump_rq_flags(struct request *rq, char *msg)
288{
289 printk(KERN_INFO "%s: dev %s: flags=%llx\n", msg,
290 rq->rq_disk ? rq->rq_disk->disk_name : "?",
291 (unsigned long long) rq->cmd_flags);
292
293 printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n",
294 (unsigned long long)blk_rq_pos(rq),
295 blk_rq_sectors(rq), blk_rq_cur_sectors(rq));
296 printk(KERN_INFO " bio %p, biotail %p, len %u\n",
297 rq->bio, rq->biotail, blk_rq_bytes(rq));
298}
299EXPORT_SYMBOL(blk_dump_rq_flags);
300
301static void blk_delay_work(struct work_struct *work)
302{
303 struct request_queue *q;
304
305 q = container_of(work, struct request_queue, delay_work.work);
306 spin_lock_irq(q->queue_lock);
307 __blk_run_queue(q);
308 spin_unlock_irq(q->queue_lock);
309}
310
311
312
313
314
315
316
317
318
319
320
321void blk_delay_queue(struct request_queue *q, unsigned long msecs)
322{
323 lockdep_assert_held(q->queue_lock);
324 WARN_ON_ONCE(q->mq_ops);
325
326 if (likely(!blk_queue_dead(q)))
327 queue_delayed_work(kblockd_workqueue, &q->delay_work,
328 msecs_to_jiffies(msecs));
329}
330EXPORT_SYMBOL(blk_delay_queue);
331
332
333
334
335
336
337
338
339
340
341void blk_start_queue_async(struct request_queue *q)
342{
343 lockdep_assert_held(q->queue_lock);
344 WARN_ON_ONCE(q->mq_ops);
345
346 queue_flag_clear(QUEUE_FLAG_STOPPED, q);
347 blk_run_queue_async(q);
348}
349EXPORT_SYMBOL(blk_start_queue_async);
350
351
352
353
354
355
356
357
358
359
360void blk_start_queue(struct request_queue *q)
361{
362 lockdep_assert_held(q->queue_lock);
363 WARN_ON(!in_interrupt() && !irqs_disabled());
364 WARN_ON_ONCE(q->mq_ops);
365
366 queue_flag_clear(QUEUE_FLAG_STOPPED, q);
367 __blk_run_queue(q);
368}
369EXPORT_SYMBOL(blk_start_queue);
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385void blk_stop_queue(struct request_queue *q)
386{
387 lockdep_assert_held(q->queue_lock);
388 WARN_ON_ONCE(q->mq_ops);
389
390 cancel_delayed_work(&q->delay_work);
391 queue_flag_set(QUEUE_FLAG_STOPPED, q);
392}
393EXPORT_SYMBOL(blk_stop_queue);
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413void blk_sync_queue(struct request_queue *q)
414{
415 del_timer_sync(&q->timeout);
416 cancel_work_sync(&q->timeout_work);
417
418 if (q->mq_ops) {
419 struct blk_mq_hw_ctx *hctx;
420 int i;
421
422 cancel_delayed_work_sync(&q->requeue_work);
423 queue_for_each_hw_ctx(q, hctx, i)
424 cancel_delayed_work_sync(&hctx->run_work);
425 } else {
426 cancel_delayed_work_sync(&q->delay_work);
427 }
428}
429EXPORT_SYMBOL(blk_sync_queue);
430
431
432
433
434
435
436
437
438int blk_set_preempt_only(struct request_queue *q)
439{
440 return blk_queue_flag_test_and_set(QUEUE_FLAG_PREEMPT_ONLY, q);
441}
442EXPORT_SYMBOL_GPL(blk_set_preempt_only);
443
444void blk_clear_preempt_only(struct request_queue *q)
445{
446 blk_queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q);
447 wake_up_all(&q->mq_freeze_wq);
448}
449EXPORT_SYMBOL_GPL(blk_clear_preempt_only);
450
451
452
453
454
455
456
457
458
459
460
461
462inline void __blk_run_queue_uncond(struct request_queue *q)
463{
464 lockdep_assert_held(q->queue_lock);
465 WARN_ON_ONCE(q->mq_ops);
466
467 if (unlikely(blk_queue_dead(q)))
468 return;
469
470
471
472
473
474
475
476
477 q->request_fn_active++;
478 q->request_fn(q);
479 q->request_fn_active--;
480}
481EXPORT_SYMBOL_GPL(__blk_run_queue_uncond);
482
483
484
485
486
487
488
489
490void __blk_run_queue(struct request_queue *q)
491{
492 lockdep_assert_held(q->queue_lock);
493 WARN_ON_ONCE(q->mq_ops);
494
495 if (unlikely(blk_queue_stopped(q)))
496 return;
497
498 __blk_run_queue_uncond(q);
499}
500EXPORT_SYMBOL(__blk_run_queue);
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515void blk_run_queue_async(struct request_queue *q)
516{
517 lockdep_assert_held(q->queue_lock);
518 WARN_ON_ONCE(q->mq_ops);
519
520 if (likely(!blk_queue_stopped(q) && !blk_queue_dead(q)))
521 mod_delayed_work(kblockd_workqueue, &q->delay_work, 0);
522}
523EXPORT_SYMBOL(blk_run_queue_async);
524
525
526
527
528
529
530
531
532
533void blk_run_queue(struct request_queue *q)
534{
535 unsigned long flags;
536
537 WARN_ON_ONCE(q->mq_ops);
538
539 spin_lock_irqsave(q->queue_lock, flags);
540 __blk_run_queue(q);
541 spin_unlock_irqrestore(q->queue_lock, flags);
542}
543EXPORT_SYMBOL(blk_run_queue);
544
545void blk_put_queue(struct request_queue *q)
546{
547 kobject_put(&q->kobj);
548}
549EXPORT_SYMBOL(blk_put_queue);
550
551
552
553
554
555
556
557
558
559
560static void __blk_drain_queue(struct request_queue *q, bool drain_all)
561 __releases(q->queue_lock)
562 __acquires(q->queue_lock)
563{
564 int i;
565
566 lockdep_assert_held(q->queue_lock);
567 WARN_ON_ONCE(q->mq_ops);
568
569 while (true) {
570 bool drain = false;
571
572
573
574
575
576 if (q->elevator)
577 elv_drain_elevator(q);
578
579 blkcg_drain_queue(q);
580
581
582
583
584
585
586
587
588 if (!list_empty(&q->queue_head) && q->request_fn)
589 __blk_run_queue(q);
590
591 drain |= q->nr_rqs_elvpriv;
592 drain |= q->request_fn_active;
593
594
595
596
597
598
599 if (drain_all) {
600 struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
601 drain |= !list_empty(&q->queue_head);
602 for (i = 0; i < 2; i++) {
603 drain |= q->nr_rqs[i];
604 drain |= q->in_flight[i];
605 if (fq)
606 drain |= !list_empty(&fq->flush_queue[i]);
607 }
608 }
609
610 if (!drain)
611 break;
612
613 spin_unlock_irq(q->queue_lock);
614
615 msleep(10);
616
617 spin_lock_irq(q->queue_lock);
618 }
619
620
621
622
623
624
625 if (q->request_fn) {
626 struct request_list *rl;
627
628 blk_queue_for_each_rl(rl, q)
629 for (i = 0; i < ARRAY_SIZE(rl->wait); i++)
630 wake_up_all(&rl->wait[i]);
631 }
632}
633
634void blk_drain_queue(struct request_queue *q)
635{
636 spin_lock_irq(q->queue_lock);
637 __blk_drain_queue(q, true);
638 spin_unlock_irq(q->queue_lock);
639}
640
641
642
643
644
645
646
647
648
649
650
651void blk_queue_bypass_start(struct request_queue *q)
652{
653 WARN_ON_ONCE(q->mq_ops);
654
655 spin_lock_irq(q->queue_lock);
656 q->bypass_depth++;
657 queue_flag_set(QUEUE_FLAG_BYPASS, q);
658 spin_unlock_irq(q->queue_lock);
659
660
661
662
663
664
665 if (blk_queue_init_done(q)) {
666 spin_lock_irq(q->queue_lock);
667 __blk_drain_queue(q, false);
668 spin_unlock_irq(q->queue_lock);
669
670
671 synchronize_rcu();
672 }
673}
674EXPORT_SYMBOL_GPL(blk_queue_bypass_start);
675
676
677
678
679
680
681
682
683
684
685void blk_queue_bypass_end(struct request_queue *q)
686{
687 spin_lock_irq(q->queue_lock);
688 if (!--q->bypass_depth)
689 queue_flag_clear(QUEUE_FLAG_BYPASS, q);
690 WARN_ON_ONCE(q->bypass_depth < 0);
691 spin_unlock_irq(q->queue_lock);
692}
693EXPORT_SYMBOL_GPL(blk_queue_bypass_end);
694
695void blk_set_queue_dying(struct request_queue *q)
696{
697 blk_queue_flag_set(QUEUE_FLAG_DYING, q);
698
699
700
701
702
703
704 blk_freeze_queue_start(q);
705
706 if (q->mq_ops)
707 blk_mq_wake_waiters(q);
708 else {
709 struct request_list *rl;
710
711 spin_lock_irq(q->queue_lock);
712 blk_queue_for_each_rl(rl, q) {
713 if (rl->rq_pool) {
714 wake_up_all(&rl->wait[BLK_RW_SYNC]);
715 wake_up_all(&rl->wait[BLK_RW_ASYNC]);
716 }
717 }
718 spin_unlock_irq(q->queue_lock);
719 }
720
721
722 wake_up_all(&q->mq_freeze_wq);
723}
724EXPORT_SYMBOL_GPL(blk_set_queue_dying);
725
726
727
728
729
730
731
732
733void blk_cleanup_queue(struct request_queue *q)
734{
735 spinlock_t *lock = q->queue_lock;
736
737
738 mutex_lock(&q->sysfs_lock);
739 blk_set_queue_dying(q);
740 spin_lock_irq(lock);
741
742
743
744
745
746
747
748
749
750
751 q->bypass_depth++;
752 queue_flag_set(QUEUE_FLAG_BYPASS, q);
753
754 queue_flag_set(QUEUE_FLAG_NOMERGES, q);
755 queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
756 queue_flag_set(QUEUE_FLAG_DYING, q);
757 spin_unlock_irq(lock);
758 mutex_unlock(&q->sysfs_lock);
759
760
761
762
763
764 blk_freeze_queue(q);
765 spin_lock_irq(lock);
766 queue_flag_set(QUEUE_FLAG_DEAD, q);
767 spin_unlock_irq(lock);
768
769
770
771
772
773
774
775 if (q->mq_ops)
776 blk_mq_quiesce_queue(q);
777
778
779 blk_flush_integrity();
780
781
782 del_timer_sync(&q->backing_dev_info->laptop_mode_wb_timer);
783 blk_sync_queue(q);
784
785
786
787
788
789 WARN_ON_ONCE(q->kobj.state_in_sysfs);
790
791
792
793
794
795
796 if (q->elevator) {
797 ioc_clear_queue(q);
798 elevator_exit(q, q->elevator);
799 q->elevator = NULL;
800 }
801
802
803
804
805
806
807 blkcg_exit_queue(q);
808
809
810
811
812
813
814 bdi_put(q->backing_dev_info);
815
816 if (q->mq_ops)
817 blk_mq_free_queue(q);
818 percpu_ref_exit(&q->q_usage_counter);
819
820 spin_lock_irq(lock);
821 if (q->queue_lock != &q->__queue_lock)
822 q->queue_lock = &q->__queue_lock;
823 spin_unlock_irq(lock);
824
825
826 blk_put_queue(q);
827}
828EXPORT_SYMBOL(blk_cleanup_queue);
829
830
831static void *alloc_request_simple(gfp_t gfp_mask, void *data)
832{
833 struct request_queue *q = data;
834
835 return kmem_cache_alloc_node(request_cachep, gfp_mask, q->node);
836}
837
838static void free_request_simple(void *element, void *data)
839{
840 kmem_cache_free(request_cachep, element);
841}
842
843static void *alloc_request_size(gfp_t gfp_mask, void *data)
844{
845 struct request_queue *q = data;
846 struct request *rq;
847
848 rq = kmalloc_node(sizeof(struct request) + q->cmd_size, gfp_mask,
849 q->node);
850 if (rq && q->init_rq_fn && q->init_rq_fn(q, rq, gfp_mask) < 0) {
851 kfree(rq);
852 rq = NULL;
853 }
854 return rq;
855}
856
857static void free_request_size(void *element, void *data)
858{
859 struct request_queue *q = data;
860
861 if (q->exit_rq_fn)
862 q->exit_rq_fn(q, element);
863 kfree(element);
864}
865
866int blk_init_rl(struct request_list *rl, struct request_queue *q,
867 gfp_t gfp_mask)
868{
869 if (unlikely(rl->rq_pool) || q->mq_ops)
870 return 0;
871
872 rl->q = q;
873 rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;
874 rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;
875 init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
876 init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);
877
878 if (q->cmd_size) {
879 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ,
880 alloc_request_size, free_request_size,
881 q, gfp_mask, q->node);
882 } else {
883 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ,
884 alloc_request_simple, free_request_simple,
885 q, gfp_mask, q->node);
886 }
887 if (!rl->rq_pool)
888 return -ENOMEM;
889
890 if (rl != &q->root_rl)
891 WARN_ON_ONCE(!blk_get_queue(q));
892
893 return 0;
894}
895
896void blk_exit_rl(struct request_queue *q, struct request_list *rl)
897{
898 if (rl->rq_pool) {
899 mempool_destroy(rl->rq_pool);
900 if (rl != &q->root_rl)
901 blk_put_queue(q);
902 }
903}
904
905struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
906{
907 return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE, NULL);
908}
909EXPORT_SYMBOL(blk_alloc_queue);
910
911
912
913
914
915
916int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
917{
918 const bool preempt = flags & BLK_MQ_REQ_PREEMPT;
919
920 while (true) {
921 bool success = false;
922
923 rcu_read_lock();
924 if (percpu_ref_tryget_live(&q->q_usage_counter)) {
925
926
927
928
929
930 if (preempt || !blk_queue_preempt_only(q)) {
931 success = true;
932 } else {
933 percpu_ref_put(&q->q_usage_counter);
934 }
935 }
936 rcu_read_unlock();
937
938 if (success)
939 return 0;
940
941 if (flags & BLK_MQ_REQ_NOWAIT)
942 return -EBUSY;
943
944
945
946
947
948
949
950
951 smp_rmb();
952
953 wait_event(q->mq_freeze_wq,
954 (atomic_read(&q->mq_freeze_depth) == 0 &&
955 (preempt || !blk_queue_preempt_only(q))) ||
956 blk_queue_dying(q));
957 if (blk_queue_dying(q))
958 return -ENODEV;
959 }
960}
961
962void blk_queue_exit(struct request_queue *q)
963{
964 percpu_ref_put(&q->q_usage_counter);
965}
966
967static void blk_queue_usage_counter_release(struct percpu_ref *ref)
968{
969 struct request_queue *q =
970 container_of(ref, struct request_queue, q_usage_counter);
971
972 wake_up_all(&q->mq_freeze_wq);
973}
974
975static void blk_rq_timed_out_timer(struct timer_list *t)
976{
977 struct request_queue *q = from_timer(q, t, timeout);
978
979 kblockd_schedule_work(&q->timeout_work);
980}
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
996 spinlock_t *lock)
997{
998 struct request_queue *q;
999
1000 q = kmem_cache_alloc_node(blk_requestq_cachep,
1001 gfp_mask | __GFP_ZERO, node_id);
1002 if (!q)
1003 return NULL;
1004
1005 q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
1006 if (q->id < 0)
1007 goto fail_q;
1008
1009 q->bio_split = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
1010 if (!q->bio_split)
1011 goto fail_id;
1012
1013 q->backing_dev_info = bdi_alloc_node(gfp_mask, node_id);
1014 if (!q->backing_dev_info)
1015 goto fail_split;
1016
1017 q->stats = blk_alloc_queue_stats();
1018 if (!q->stats)
1019 goto fail_stats;
1020
1021 q->backing_dev_info->ra_pages =
1022 (VM_MAX_READAHEAD * 1024) / PAGE_SIZE;
1023 q->backing_dev_info->capabilities = BDI_CAP_CGROUP_WRITEBACK;
1024 q->backing_dev_info->name = "block";
1025 q->node = node_id;
1026
1027 timer_setup(&q->backing_dev_info->laptop_mode_wb_timer,
1028 laptop_mode_timer_fn, 0);
1029 timer_setup(&q->timeout, blk_rq_timed_out_timer, 0);
1030 INIT_WORK(&q->timeout_work, NULL);
1031 INIT_LIST_HEAD(&q->queue_head);
1032 INIT_LIST_HEAD(&q->timeout_list);
1033 INIT_LIST_HEAD(&q->icq_list);
1034#ifdef CONFIG_BLK_CGROUP
1035 INIT_LIST_HEAD(&q->blkg_list);
1036#endif
1037 INIT_DELAYED_WORK(&q->delay_work, blk_delay_work);
1038
1039 kobject_init(&q->kobj, &blk_queue_ktype);
1040
1041#ifdef CONFIG_BLK_DEV_IO_TRACE
1042 mutex_init(&q->blk_trace_mutex);
1043#endif
1044 mutex_init(&q->sysfs_lock);
1045 spin_lock_init(&q->__queue_lock);
1046
1047 if (!q->mq_ops)
1048 q->queue_lock = lock ? : &q->__queue_lock;
1049
1050
1051
1052
1053
1054
1055
1056 q->bypass_depth = 1;
1057 queue_flag_set_unlocked(QUEUE_FLAG_BYPASS, q);
1058
1059 init_waitqueue_head(&q->mq_freeze_wq);
1060
1061
1062
1063
1064
1065 if (percpu_ref_init(&q->q_usage_counter,
1066 blk_queue_usage_counter_release,
1067 PERCPU_REF_INIT_ATOMIC, GFP_KERNEL))
1068 goto fail_bdi;
1069
1070 if (blkcg_init_queue(q))
1071 goto fail_ref;
1072
1073 return q;
1074
1075fail_ref:
1076 percpu_ref_exit(&q->q_usage_counter);
1077fail_bdi:
1078 blk_free_queue_stats(q->stats);
1079fail_stats:
1080 bdi_put(q->backing_dev_info);
1081fail_split:
1082 bioset_free(q->bio_split);
1083fail_id:
1084 ida_simple_remove(&blk_queue_ida, q->id);
1085fail_q:
1086 kmem_cache_free(blk_requestq_cachep, q);
1087 return NULL;
1088}
1089EXPORT_SYMBOL(blk_alloc_queue_node);
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
1125{
1126 return blk_init_queue_node(rfn, lock, NUMA_NO_NODE);
1127}
1128EXPORT_SYMBOL(blk_init_queue);
1129
1130struct request_queue *
1131blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
1132{
1133 struct request_queue *q;
1134
1135 q = blk_alloc_queue_node(GFP_KERNEL, node_id, lock);
1136 if (!q)
1137 return NULL;
1138
1139 q->request_fn = rfn;
1140 if (blk_init_allocated_queue(q) < 0) {
1141 blk_cleanup_queue(q);
1142 return NULL;
1143 }
1144
1145 return q;
1146}
1147EXPORT_SYMBOL(blk_init_queue_node);
1148
1149static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio);
1150
1151
1152int blk_init_allocated_queue(struct request_queue *q)
1153{
1154 WARN_ON_ONCE(q->mq_ops);
1155
1156 q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, q->cmd_size);
1157 if (!q->fq)
1158 return -ENOMEM;
1159
1160 if (q->init_rq_fn && q->init_rq_fn(q, q->fq->flush_rq, GFP_KERNEL))
1161 goto out_free_flush_queue;
1162
1163 if (blk_init_rl(&q->root_rl, q, GFP_KERNEL))
1164 goto out_exit_flush_rq;
1165
1166 INIT_WORK(&q->timeout_work, blk_timeout_work);
1167 q->queue_flags |= QUEUE_FLAG_DEFAULT;
1168
1169
1170
1171
1172 blk_queue_make_request(q, blk_queue_bio);
1173
1174 q->sg_reserved_size = INT_MAX;
1175
1176
1177 mutex_lock(&q->sysfs_lock);
1178
1179
1180 if (elevator_init(q, NULL)) {
1181 mutex_unlock(&q->sysfs_lock);
1182 goto out_exit_flush_rq;
1183 }
1184
1185 mutex_unlock(&q->sysfs_lock);
1186 return 0;
1187
1188out_exit_flush_rq:
1189 if (q->exit_rq_fn)
1190 q->exit_rq_fn(q, q->fq->flush_rq);
1191out_free_flush_queue:
1192 blk_free_flush_queue(q->fq);
1193 return -ENOMEM;
1194}
1195EXPORT_SYMBOL(blk_init_allocated_queue);
1196
1197bool blk_get_queue(struct request_queue *q)
1198{
1199 if (likely(!blk_queue_dying(q))) {
1200 __blk_get_queue(q);
1201 return true;
1202 }
1203
1204 return false;
1205}
1206EXPORT_SYMBOL(blk_get_queue);
1207
1208static inline void blk_free_request(struct request_list *rl, struct request *rq)
1209{
1210 if (rq->rq_flags & RQF_ELVPRIV) {
1211 elv_put_request(rl->q, rq);
1212 if (rq->elv.icq)
1213 put_io_context(rq->elv.icq->ioc);
1214 }
1215
1216 mempool_free(rq, rl->rq_pool);
1217}
1218
1219
1220
1221
1222
1223static inline int ioc_batching(struct request_queue *q, struct io_context *ioc)
1224{
1225 if (!ioc)
1226 return 0;
1227
1228
1229
1230
1231
1232
1233 return ioc->nr_batch_requests == q->nr_batching ||
1234 (ioc->nr_batch_requests > 0
1235 && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));
1236}
1237
1238
1239
1240
1241
1242
1243
1244static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)
1245{
1246 if (!ioc || ioc_batching(q, ioc))
1247 return;
1248
1249 ioc->nr_batch_requests = q->nr_batching;
1250 ioc->last_waited = jiffies;
1251}
1252
1253static void __freed_request(struct request_list *rl, int sync)
1254{
1255 struct request_queue *q = rl->q;
1256
1257 if (rl->count[sync] < queue_congestion_off_threshold(q))
1258 blk_clear_congested(rl, sync);
1259
1260 if (rl->count[sync] + 1 <= q->nr_requests) {
1261 if (waitqueue_active(&rl->wait[sync]))
1262 wake_up(&rl->wait[sync]);
1263
1264 blk_clear_rl_full(rl, sync);
1265 }
1266}
1267
1268
1269
1270
1271
1272static void freed_request(struct request_list *rl, bool sync,
1273 req_flags_t rq_flags)
1274{
1275 struct request_queue *q = rl->q;
1276
1277 q->nr_rqs[sync]--;
1278 rl->count[sync]--;
1279 if (rq_flags & RQF_ELVPRIV)
1280 q->nr_rqs_elvpriv--;
1281
1282 __freed_request(rl, sync);
1283
1284 if (unlikely(rl->starved[sync ^ 1]))
1285 __freed_request(rl, sync ^ 1);
1286}
1287
1288int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
1289{
1290 struct request_list *rl;
1291 int on_thresh, off_thresh;
1292
1293 WARN_ON_ONCE(q->mq_ops);
1294
1295 spin_lock_irq(q->queue_lock);
1296 q->nr_requests = nr;
1297 blk_queue_congestion_threshold(q);
1298 on_thresh = queue_congestion_on_threshold(q);
1299 off_thresh = queue_congestion_off_threshold(q);
1300
1301 blk_queue_for_each_rl(rl, q) {
1302 if (rl->count[BLK_RW_SYNC] >= on_thresh)
1303 blk_set_congested(rl, BLK_RW_SYNC);
1304 else if (rl->count[BLK_RW_SYNC] < off_thresh)
1305 blk_clear_congested(rl, BLK_RW_SYNC);
1306
1307 if (rl->count[BLK_RW_ASYNC] >= on_thresh)
1308 blk_set_congested(rl, BLK_RW_ASYNC);
1309 else if (rl->count[BLK_RW_ASYNC] < off_thresh)
1310 blk_clear_congested(rl, BLK_RW_ASYNC);
1311
1312 if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
1313 blk_set_rl_full(rl, BLK_RW_SYNC);
1314 } else {
1315 blk_clear_rl_full(rl, BLK_RW_SYNC);
1316 wake_up(&rl->wait[BLK_RW_SYNC]);
1317 }
1318
1319 if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) {
1320 blk_set_rl_full(rl, BLK_RW_ASYNC);
1321 } else {
1322 blk_clear_rl_full(rl, BLK_RW_ASYNC);
1323 wake_up(&rl->wait[BLK_RW_ASYNC]);
1324 }
1325 }
1326
1327 spin_unlock_irq(q->queue_lock);
1328 return 0;
1329}
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345static struct request *__get_request(struct request_list *rl, unsigned int op,
1346 struct bio *bio, blk_mq_req_flags_t flags)
1347{
1348 struct request_queue *q = rl->q;
1349 struct request *rq;
1350 struct elevator_type *et = q->elevator->type;
1351 struct io_context *ioc = rq_ioc(bio);
1352 struct io_cq *icq = NULL;
1353 const bool is_sync = op_is_sync(op);
1354 int may_queue;
1355 gfp_t gfp_mask = flags & BLK_MQ_REQ_NOWAIT ? GFP_ATOMIC :
1356 __GFP_DIRECT_RECLAIM;
1357 req_flags_t rq_flags = RQF_ALLOCED;
1358
1359 lockdep_assert_held(q->queue_lock);
1360
1361 if (unlikely(blk_queue_dying(q)))
1362 return ERR_PTR(-ENODEV);
1363
1364 may_queue = elv_may_queue(q, op);
1365 if (may_queue == ELV_MQUEUE_NO)
1366 goto rq_starved;
1367
1368 if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
1369 if (rl->count[is_sync]+1 >= q->nr_requests) {
1370
1371
1372
1373
1374
1375
1376 if (!blk_rl_full(rl, is_sync)) {
1377 ioc_set_batching(q, ioc);
1378 blk_set_rl_full(rl, is_sync);
1379 } else {
1380 if (may_queue != ELV_MQUEUE_MUST
1381 && !ioc_batching(q, ioc)) {
1382
1383
1384
1385
1386
1387 return ERR_PTR(-ENOMEM);
1388 }
1389 }
1390 }
1391 blk_set_congested(rl, is_sync);
1392 }
1393
1394
1395
1396
1397
1398
1399 if (rl->count[is_sync] >= (3 * q->nr_requests / 2))
1400 return ERR_PTR(-ENOMEM);
1401
1402 q->nr_rqs[is_sync]++;
1403 rl->count[is_sync]++;
1404 rl->starved[is_sync] = 0;
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419 if (!op_is_flush(op) && !blk_queue_bypass(q)) {
1420 rq_flags |= RQF_ELVPRIV;
1421 q->nr_rqs_elvpriv++;
1422 if (et->icq_cache && ioc)
1423 icq = ioc_lookup_icq(ioc, q);
1424 }
1425
1426 if (blk_queue_io_stat(q))
1427 rq_flags |= RQF_IO_STAT;
1428 spin_unlock_irq(q->queue_lock);
1429
1430
1431 rq = mempool_alloc(rl->rq_pool, gfp_mask);
1432 if (!rq)
1433 goto fail_alloc;
1434
1435 blk_rq_init(q, rq);
1436 blk_rq_set_rl(rq, rl);
1437 rq->cmd_flags = op;
1438 rq->rq_flags = rq_flags;
1439 if (flags & BLK_MQ_REQ_PREEMPT)
1440 rq->rq_flags |= RQF_PREEMPT;
1441
1442
1443 if (rq_flags & RQF_ELVPRIV) {
1444 if (unlikely(et->icq_cache && !icq)) {
1445 if (ioc)
1446 icq = ioc_create_icq(ioc, q, gfp_mask);
1447 if (!icq)
1448 goto fail_elvpriv;
1449 }
1450
1451 rq->elv.icq = icq;
1452 if (unlikely(elv_set_request(q, rq, bio, gfp_mask)))
1453 goto fail_elvpriv;
1454
1455
1456 if (icq)
1457 get_io_context(icq->ioc);
1458 }
1459out:
1460
1461
1462
1463
1464
1465
1466 if (ioc_batching(q, ioc))
1467 ioc->nr_batch_requests--;
1468
1469 trace_block_getrq(q, bio, op);
1470 return rq;
1471
1472fail_elvpriv:
1473
1474
1475
1476
1477
1478
1479 printk_ratelimited(KERN_WARNING "%s: dev %s: request aux data allocation failed, iosched may be disturbed\n",
1480 __func__, dev_name(q->backing_dev_info->dev));
1481
1482 rq->rq_flags &= ~RQF_ELVPRIV;
1483 rq->elv.icq = NULL;
1484
1485 spin_lock_irq(q->queue_lock);
1486 q->nr_rqs_elvpriv--;
1487 spin_unlock_irq(q->queue_lock);
1488 goto out;
1489
1490fail_alloc:
1491
1492
1493
1494
1495
1496
1497
1498 spin_lock_irq(q->queue_lock);
1499 freed_request(rl, is_sync, rq_flags);
1500
1501
1502
1503
1504
1505
1506
1507
1508rq_starved:
1509 if (unlikely(rl->count[is_sync] == 0))
1510 rl->starved[is_sync] = 1;
1511 return ERR_PTR(-ENOMEM);
1512}
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528static struct request *get_request(struct request_queue *q, unsigned int op,
1529 struct bio *bio, blk_mq_req_flags_t flags)
1530{
1531 const bool is_sync = op_is_sync(op);
1532 DEFINE_WAIT(wait);
1533 struct request_list *rl;
1534 struct request *rq;
1535
1536 lockdep_assert_held(q->queue_lock);
1537 WARN_ON_ONCE(q->mq_ops);
1538
1539 rl = blk_get_rl(q, bio);
1540retry:
1541 rq = __get_request(rl, op, bio, flags);
1542 if (!IS_ERR(rq))
1543 return rq;
1544
1545 if (op & REQ_NOWAIT) {
1546 blk_put_rl(rl);
1547 return ERR_PTR(-EAGAIN);
1548 }
1549
1550 if ((flags & BLK_MQ_REQ_NOWAIT) || unlikely(blk_queue_dying(q))) {
1551 blk_put_rl(rl);
1552 return rq;
1553 }
1554
1555
1556 prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
1557 TASK_UNINTERRUPTIBLE);
1558
1559 trace_block_sleeprq(q, bio, op);
1560
1561 spin_unlock_irq(q->queue_lock);
1562 io_schedule();
1563
1564
1565
1566
1567
1568
1569 ioc_set_batching(q, current->io_context);
1570
1571 spin_lock_irq(q->queue_lock);
1572 finish_wait(&rl->wait[is_sync], &wait);
1573
1574 goto retry;
1575}
1576
1577
1578static struct request *blk_old_get_request(struct request_queue *q,
1579 unsigned int op, blk_mq_req_flags_t flags)
1580{
1581 struct request *rq;
1582 gfp_t gfp_mask = flags & BLK_MQ_REQ_NOWAIT ? GFP_ATOMIC :
1583 __GFP_DIRECT_RECLAIM;
1584 int ret = 0;
1585
1586 WARN_ON_ONCE(q->mq_ops);
1587
1588
1589 create_io_context(gfp_mask, q->node);
1590
1591 ret = blk_queue_enter(q, flags);
1592 if (ret)
1593 return ERR_PTR(ret);
1594 spin_lock_irq(q->queue_lock);
1595 rq = get_request(q, op, NULL, flags);
1596 if (IS_ERR(rq)) {
1597 spin_unlock_irq(q->queue_lock);
1598 blk_queue_exit(q);
1599 return rq;
1600 }
1601
1602
1603 rq->__data_len = 0;
1604 rq->__sector = (sector_t) -1;
1605 rq->bio = rq->biotail = NULL;
1606 return rq;
1607}
1608
1609
1610
1611
1612
1613
1614
1615struct request *blk_get_request_flags(struct request_queue *q, unsigned int op,
1616 blk_mq_req_flags_t flags)
1617{
1618 struct request *req;
1619
1620 WARN_ON_ONCE(op & REQ_NOWAIT);
1621 WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PREEMPT));
1622
1623 if (q->mq_ops) {
1624 req = blk_mq_alloc_request(q, op, flags);
1625 if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn)
1626 q->mq_ops->initialize_rq_fn(req);
1627 } else {
1628 req = blk_old_get_request(q, op, flags);
1629 if (!IS_ERR(req) && q->initialize_rq_fn)
1630 q->initialize_rq_fn(req);
1631 }
1632
1633 return req;
1634}
1635EXPORT_SYMBOL(blk_get_request_flags);
1636
1637struct request *blk_get_request(struct request_queue *q, unsigned int op,
1638 gfp_t gfp_mask)
1639{
1640 return blk_get_request_flags(q, op, gfp_mask & __GFP_DIRECT_RECLAIM ?
1641 0 : BLK_MQ_REQ_NOWAIT);
1642}
1643EXPORT_SYMBOL(blk_get_request);
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655void blk_requeue_request(struct request_queue *q, struct request *rq)
1656{
1657 lockdep_assert_held(q->queue_lock);
1658 WARN_ON_ONCE(q->mq_ops);
1659
1660 blk_delete_timer(rq);
1661 blk_clear_rq_complete(rq);
1662 trace_block_rq_requeue(q, rq);
1663 wbt_requeue(q->rq_wb, &rq->issue_stat);
1664
1665 if (rq->rq_flags & RQF_QUEUED)
1666 blk_queue_end_tag(q, rq);
1667
1668 BUG_ON(blk_queued_rq(rq));
1669
1670 elv_requeue_request(q, rq);
1671}
1672EXPORT_SYMBOL(blk_requeue_request);
1673
1674static void add_acct_request(struct request_queue *q, struct request *rq,
1675 int where)
1676{
1677 blk_account_io_start(rq, true);
1678 __elv_add_request(q, rq, where);
1679}
1680
1681static void part_round_stats_single(struct request_queue *q, int cpu,
1682 struct hd_struct *part, unsigned long now,
1683 unsigned int inflight)
1684{
1685 if (inflight) {
1686 __part_stat_add(cpu, part, time_in_queue,
1687 inflight * (now - part->stamp));
1688 __part_stat_add(cpu, part, io_ticks, (now - part->stamp));
1689 }
1690 part->stamp = now;
1691}
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710void part_round_stats(struct request_queue *q, int cpu, struct hd_struct *part)
1711{
1712 struct hd_struct *part2 = NULL;
1713 unsigned long now = jiffies;
1714 unsigned int inflight[2];
1715 int stats = 0;
1716
1717 if (part->stamp != now)
1718 stats |= 1;
1719
1720 if (part->partno) {
1721 part2 = &part_to_disk(part)->part0;
1722 if (part2->stamp != now)
1723 stats |= 2;
1724 }
1725
1726 if (!stats)
1727 return;
1728
1729 part_in_flight(q, part, inflight);
1730
1731 if (stats & 2)
1732 part_round_stats_single(q, cpu, part2, now, inflight[1]);
1733 if (stats & 1)
1734 part_round_stats_single(q, cpu, part, now, inflight[0]);
1735}
1736EXPORT_SYMBOL_GPL(part_round_stats);
1737
1738#ifdef CONFIG_PM
1739static void blk_pm_put_request(struct request *rq)
1740{
1741 if (rq->q->dev && !(rq->rq_flags & RQF_PM) && !--rq->q->nr_pending)
1742 pm_runtime_mark_last_busy(rq->q->dev);
1743}
1744#else
1745static inline void blk_pm_put_request(struct request *rq) {}
1746#endif
1747
1748void __blk_put_request(struct request_queue *q, struct request *req)
1749{
1750 req_flags_t rq_flags = req->rq_flags;
1751
1752 if (unlikely(!q))
1753 return;
1754
1755 if (q->mq_ops) {
1756 blk_mq_free_request(req);
1757 return;
1758 }
1759
1760 lockdep_assert_held(q->queue_lock);
1761
1762 blk_req_zone_write_unlock(req);
1763 blk_pm_put_request(req);
1764
1765 elv_completed_request(q, req);
1766
1767
1768 WARN_ON(req->bio != NULL);
1769
1770 wbt_done(q->rq_wb, &req->issue_stat);
1771
1772
1773
1774
1775
1776 if (rq_flags & RQF_ALLOCED) {
1777 struct request_list *rl = blk_rq_rl(req);
1778 bool sync = op_is_sync(req->cmd_flags);
1779
1780 BUG_ON(!list_empty(&req->queuelist));
1781 BUG_ON(ELV_ON_HASH(req));
1782
1783 blk_free_request(rl, req);
1784 freed_request(rl, sync, rq_flags);
1785 blk_put_rl(rl);
1786 blk_queue_exit(q);
1787 }
1788}
1789EXPORT_SYMBOL_GPL(__blk_put_request);
1790
1791void blk_put_request(struct request *req)
1792{
1793 struct request_queue *q = req->q;
1794
1795 if (q->mq_ops)
1796 blk_mq_free_request(req);
1797 else {
1798 unsigned long flags;
1799
1800 spin_lock_irqsave(q->queue_lock, flags);
1801 __blk_put_request(q, req);
1802 spin_unlock_irqrestore(q->queue_lock, flags);
1803 }
1804}
1805EXPORT_SYMBOL(blk_put_request);
1806
1807bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
1808 struct bio *bio)
1809{
1810 const int ff = bio->bi_opf & REQ_FAILFAST_MASK;
1811
1812 if (!ll_back_merge_fn(q, req, bio))
1813 return false;
1814
1815 trace_block_bio_backmerge(q, req, bio);
1816
1817 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1818 blk_rq_set_mixed_merge(req);
1819
1820 req->biotail->bi_next = bio;
1821 req->biotail = bio;
1822 req->__data_len += bio->bi_iter.bi_size;
1823 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
1824
1825 blk_account_io_start(req, false);
1826 return true;
1827}
1828
1829bool bio_attempt_front_merge(struct request_queue *q, struct request *req,
1830 struct bio *bio)
1831{
1832 const int ff = bio->bi_opf & REQ_FAILFAST_MASK;
1833
1834 if (!ll_front_merge_fn(q, req, bio))
1835 return false;
1836
1837 trace_block_bio_frontmerge(q, req, bio);
1838
1839 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1840 blk_rq_set_mixed_merge(req);
1841
1842 bio->bi_next = req->bio;
1843 req->bio = bio;
1844
1845 req->__sector = bio->bi_iter.bi_sector;
1846 req->__data_len += bio->bi_iter.bi_size;
1847 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
1848
1849 blk_account_io_start(req, false);
1850 return true;
1851}
1852
1853bool bio_attempt_discard_merge(struct request_queue *q, struct request *req,
1854 struct bio *bio)
1855{
1856 unsigned short segments = blk_rq_nr_discard_segments(req);
1857
1858 if (segments >= queue_max_discard_segments(q))
1859 goto no_merge;
1860 if (blk_rq_sectors(req) + bio_sectors(bio) >
1861 blk_rq_get_max_sectors(req, blk_rq_pos(req)))
1862 goto no_merge;
1863
1864 req->biotail->bi_next = bio;
1865 req->biotail = bio;
1866 req->__data_len += bio->bi_iter.bi_size;
1867 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
1868 req->nr_phys_segments = segments + 1;
1869
1870 blk_account_io_start(req, false);
1871 return true;
1872no_merge:
1873 req_set_nomerge(q, req);
1874 return false;
1875}
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
1900 unsigned int *request_count,
1901 struct request **same_queue_rq)
1902{
1903 struct blk_plug *plug;
1904 struct request *rq;
1905 struct list_head *plug_list;
1906
1907 plug = current->plug;
1908 if (!plug)
1909 return false;
1910 *request_count = 0;
1911
1912 if (q->mq_ops)
1913 plug_list = &plug->mq_list;
1914 else
1915 plug_list = &plug->list;
1916
1917 list_for_each_entry_reverse(rq, plug_list, queuelist) {
1918 bool merged = false;
1919
1920 if (rq->q == q) {
1921 (*request_count)++;
1922
1923
1924
1925
1926
1927 if (same_queue_rq)
1928 *same_queue_rq = rq;
1929 }
1930
1931 if (rq->q != q || !blk_rq_merge_ok(rq, bio))
1932 continue;
1933
1934 switch (blk_try_merge(rq, bio)) {
1935 case ELEVATOR_BACK_MERGE:
1936 merged = bio_attempt_back_merge(q, rq, bio);
1937 break;
1938 case ELEVATOR_FRONT_MERGE:
1939 merged = bio_attempt_front_merge(q, rq, bio);
1940 break;
1941 case ELEVATOR_DISCARD_MERGE:
1942 merged = bio_attempt_discard_merge(q, rq, bio);
1943 break;
1944 default:
1945 break;
1946 }
1947
1948 if (merged)
1949 return true;
1950 }
1951
1952 return false;
1953}
1954
1955unsigned int blk_plug_queued_count(struct request_queue *q)
1956{
1957 struct blk_plug *plug;
1958 struct request *rq;
1959 struct list_head *plug_list;
1960 unsigned int ret = 0;
1961
1962 plug = current->plug;
1963 if (!plug)
1964 goto out;
1965
1966 if (q->mq_ops)
1967 plug_list = &plug->mq_list;
1968 else
1969 plug_list = &plug->list;
1970
1971 list_for_each_entry(rq, plug_list, queuelist) {
1972 if (rq->q == q)
1973 ret++;
1974 }
1975out:
1976 return ret;
1977}
1978
1979void blk_init_request_from_bio(struct request *req, struct bio *bio)
1980{
1981 struct io_context *ioc = rq_ioc(bio);
1982
1983 if (bio->bi_opf & REQ_RAHEAD)
1984 req->cmd_flags |= REQ_FAILFAST_MASK;
1985
1986 req->__sector = bio->bi_iter.bi_sector;
1987 if (ioprio_valid(bio_prio(bio)))
1988 req->ioprio = bio_prio(bio);
1989 else if (ioc)
1990 req->ioprio = ioc->ioprio;
1991 else
1992 req->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
1993 req->write_hint = bio->bi_write_hint;
1994 blk_rq_bio_prep(req->q, req, bio);
1995}
1996EXPORT_SYMBOL_GPL(blk_init_request_from_bio);
1997
1998static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
1999{
2000 struct blk_plug *plug;
2001 int where = ELEVATOR_INSERT_SORT;
2002 struct request *req, *free;
2003 unsigned int request_count = 0;
2004 unsigned int wb_acct;
2005
2006
2007
2008
2009
2010
2011 blk_queue_bounce(q, &bio);
2012
2013 blk_queue_split(q, &bio);
2014
2015 if (!bio_integrity_prep(bio))
2016 return BLK_QC_T_NONE;
2017
2018 if (op_is_flush(bio->bi_opf)) {
2019 spin_lock_irq(q->queue_lock);
2020 where = ELEVATOR_INSERT_FLUSH;
2021 goto get_rq;
2022 }
2023
2024
2025
2026
2027
2028 if (!blk_queue_nomerges(q)) {
2029 if (blk_attempt_plug_merge(q, bio, &request_count, NULL))
2030 return BLK_QC_T_NONE;
2031 } else
2032 request_count = blk_plug_queued_count(q);
2033
2034 spin_lock_irq(q->queue_lock);
2035
2036 switch (elv_merge(q, &req, bio)) {
2037 case ELEVATOR_BACK_MERGE:
2038 if (!bio_attempt_back_merge(q, req, bio))
2039 break;
2040 elv_bio_merged(q, req, bio);
2041 free = attempt_back_merge(q, req);
2042 if (free)
2043 __blk_put_request(q, free);
2044 else
2045 elv_merged_request(q, req, ELEVATOR_BACK_MERGE);
2046 goto out_unlock;
2047 case ELEVATOR_FRONT_MERGE:
2048 if (!bio_attempt_front_merge(q, req, bio))
2049 break;
2050 elv_bio_merged(q, req, bio);
2051 free = attempt_front_merge(q, req);
2052 if (free)
2053 __blk_put_request(q, free);
2054 else
2055 elv_merged_request(q, req, ELEVATOR_FRONT_MERGE);
2056 goto out_unlock;
2057 default:
2058 break;
2059 }
2060
2061get_rq:
2062 wb_acct = wbt_wait(q->rq_wb, bio, q->queue_lock);
2063
2064
2065
2066
2067
2068 blk_queue_enter_live(q);
2069 req = get_request(q, bio->bi_opf, bio, 0);
2070 if (IS_ERR(req)) {
2071 blk_queue_exit(q);
2072 __wbt_done(q->rq_wb, wb_acct);
2073 if (PTR_ERR(req) == -ENOMEM)
2074 bio->bi_status = BLK_STS_RESOURCE;
2075 else
2076 bio->bi_status = BLK_STS_IOERR;
2077 bio_endio(bio);
2078 goto out_unlock;
2079 }
2080
2081 wbt_track(&req->issue_stat, wb_acct);
2082
2083
2084
2085
2086
2087
2088
2089 blk_init_request_from_bio(req, bio);
2090
2091 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags))
2092 req->cpu = raw_smp_processor_id();
2093
2094 plug = current->plug;
2095 if (plug) {
2096
2097
2098
2099
2100
2101
2102
2103 if (!request_count || list_empty(&plug->list))
2104 trace_block_plug(q);
2105 else {
2106 struct request *last = list_entry_rq(plug->list.prev);
2107 if (request_count >= BLK_MAX_REQUEST_COUNT ||
2108 blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE) {
2109 blk_flush_plug_list(plug, false);
2110 trace_block_plug(q);
2111 }
2112 }
2113 list_add_tail(&req->queuelist, &plug->list);
2114 blk_account_io_start(req, true);
2115 } else {
2116 spin_lock_irq(q->queue_lock);
2117 add_acct_request(q, req, where);
2118 __blk_run_queue(q);
2119out_unlock:
2120 spin_unlock_irq(q->queue_lock);
2121 }
2122
2123 return BLK_QC_T_NONE;
2124}
2125
2126static void handle_bad_sector(struct bio *bio, sector_t maxsector)
2127{
2128 char b[BDEVNAME_SIZE];
2129
2130 printk(KERN_INFO "attempt to access beyond end of device\n");
2131 printk(KERN_INFO "%s: rw=%d, want=%Lu, limit=%Lu\n",
2132 bio_devname(bio, b), bio->bi_opf,
2133 (unsigned long long)bio_end_sector(bio),
2134 (long long)maxsector);
2135}
2136
2137#ifdef CONFIG_FAIL_MAKE_REQUEST
2138
2139static DECLARE_FAULT_ATTR(fail_make_request);
2140
2141static int __init setup_fail_make_request(char *str)
2142{
2143 return setup_fault_attr(&fail_make_request, str);
2144}
2145__setup("fail_make_request=", setup_fail_make_request);
2146
2147static bool should_fail_request(struct hd_struct *part, unsigned int bytes)
2148{
2149 return part->make_it_fail && should_fail(&fail_make_request, bytes);
2150}
2151
2152static int __init fail_make_request_debugfs(void)
2153{
2154 struct dentry *dir = fault_create_debugfs_attr("fail_make_request",
2155 NULL, &fail_make_request);
2156
2157 return PTR_ERR_OR_ZERO(dir);
2158}
2159
2160late_initcall(fail_make_request_debugfs);
2161
2162#else
2163
2164static inline bool should_fail_request(struct hd_struct *part,
2165 unsigned int bytes)
2166{
2167 return false;
2168}
2169
2170#endif
2171
2172static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part)
2173{
2174 if (part->policy && op_is_write(bio_op(bio))) {
2175 char b[BDEVNAME_SIZE];
2176
2177 printk(KERN_ERR
2178 "generic_make_request: Trying to write "
2179 "to read-only block-device %s (partno %d)\n",
2180 bio_devname(bio, b), part->partno);
2181 return true;
2182 }
2183
2184 return false;
2185}
2186
2187static noinline int should_fail_bio(struct bio *bio)
2188{
2189 if (should_fail_request(&bio->bi_disk->part0, bio->bi_iter.bi_size))
2190 return -EIO;
2191 return 0;
2192}
2193ALLOW_ERROR_INJECTION(should_fail_bio, ERRNO);
2194
2195
2196
2197
2198
2199
2200static inline int bio_check_eod(struct bio *bio, sector_t maxsector)
2201{
2202 unsigned int nr_sectors = bio_sectors(bio);
2203
2204 if (nr_sectors && maxsector &&
2205 (nr_sectors > maxsector ||
2206 bio->bi_iter.bi_sector > maxsector - nr_sectors)) {
2207 handle_bad_sector(bio, maxsector);
2208 return -EIO;
2209 }
2210 return 0;
2211}
2212
2213
2214
2215
2216static inline int blk_partition_remap(struct bio *bio)
2217{
2218 struct hd_struct *p;
2219 int ret = -EIO;
2220
2221 rcu_read_lock();
2222 p = __disk_get_part(bio->bi_disk, bio->bi_partno);
2223 if (unlikely(!p))
2224 goto out;
2225 if (unlikely(should_fail_request(p, bio->bi_iter.bi_size)))
2226 goto out;
2227 if (unlikely(bio_check_ro(bio, p)))
2228 goto out;
2229
2230
2231
2232
2233
2234 if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET) {
2235 if (bio_check_eod(bio, part_nr_sects_read(p)))
2236 goto out;
2237 bio->bi_iter.bi_sector += p->start_sect;
2238 bio->bi_partno = 0;
2239 trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p),
2240 bio->bi_iter.bi_sector - p->start_sect);
2241 }
2242 ret = 0;
2243out:
2244 rcu_read_unlock();
2245 return ret;
2246}
2247
2248static noinline_for_stack bool
2249generic_make_request_checks(struct bio *bio)
2250{
2251 struct request_queue *q;
2252 int nr_sectors = bio_sectors(bio);
2253 blk_status_t status = BLK_STS_IOERR;
2254 char b[BDEVNAME_SIZE];
2255
2256 might_sleep();
2257
2258 q = bio->bi_disk->queue;
2259 if (unlikely(!q)) {
2260 printk(KERN_ERR
2261 "generic_make_request: Trying to access "
2262 "nonexistent block-device %s (%Lu)\n",
2263 bio_devname(bio, b), (long long)bio->bi_iter.bi_sector);
2264 goto end_io;
2265 }
2266
2267
2268
2269
2270
2271 if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_rq_based(q))
2272 goto not_supported;
2273
2274 if (should_fail_bio(bio))
2275 goto end_io;
2276
2277 if (bio->bi_partno) {
2278 if (unlikely(blk_partition_remap(bio)))
2279 goto end_io;
2280 } else {
2281 if (unlikely(bio_check_ro(bio, &bio->bi_disk->part0)))
2282 goto end_io;
2283 if (unlikely(bio_check_eod(bio, get_capacity(bio->bi_disk))))
2284 goto end_io;
2285 }
2286
2287
2288
2289
2290
2291
2292 if (op_is_flush(bio->bi_opf) &&
2293 !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
2294 bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
2295 if (!nr_sectors) {
2296 status = BLK_STS_OK;
2297 goto end_io;
2298 }
2299 }
2300
2301 switch (bio_op(bio)) {
2302 case REQ_OP_DISCARD:
2303 if (!blk_queue_discard(q))
2304 goto not_supported;
2305 break;
2306 case REQ_OP_SECURE_ERASE:
2307 if (!blk_queue_secure_erase(q))
2308 goto not_supported;
2309 break;
2310 case REQ_OP_WRITE_SAME:
2311 if (!q->limits.max_write_same_sectors)
2312 goto not_supported;
2313 break;
2314 case REQ_OP_ZONE_REPORT:
2315 case REQ_OP_ZONE_RESET:
2316 if (!blk_queue_is_zoned(q))
2317 goto not_supported;
2318 break;
2319 case REQ_OP_WRITE_ZEROES:
2320 if (!q->limits.max_write_zeroes_sectors)
2321 goto not_supported;
2322 break;
2323 default:
2324 break;
2325 }
2326
2327
2328
2329
2330
2331
2332
2333 create_io_context(GFP_ATOMIC, q->node);
2334
2335 if (!blkcg_bio_issue_check(q, bio))
2336 return false;
2337
2338 if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) {
2339 trace_block_bio_queue(q, bio);
2340
2341
2342
2343 bio_set_flag(bio, BIO_TRACE_COMPLETION);
2344 }
2345 return true;
2346
2347not_supported:
2348 status = BLK_STS_NOTSUPP;
2349end_io:
2350 bio->bi_status = status;
2351 bio_endio(bio);
2352 return false;
2353}
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379blk_qc_t generic_make_request(struct bio *bio)
2380{
2381
2382
2383
2384
2385
2386
2387
2388 struct bio_list bio_list_on_stack[2];
2389 blk_mq_req_flags_t flags = 0;
2390 struct request_queue *q = bio->bi_disk->queue;
2391 blk_qc_t ret = BLK_QC_T_NONE;
2392
2393 if (bio->bi_opf & REQ_NOWAIT)
2394 flags = BLK_MQ_REQ_NOWAIT;
2395 if (blk_queue_enter(q, flags) < 0) {
2396 if (!blk_queue_dying(q) && (bio->bi_opf & REQ_NOWAIT))
2397 bio_wouldblock_error(bio);
2398 else
2399 bio_io_error(bio);
2400 return ret;
2401 }
2402
2403 if (!generic_make_request_checks(bio))
2404 goto out;
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416 if (current->bio_list) {
2417 bio_list_add(¤t->bio_list[0], bio);
2418 goto out;
2419 }
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435 BUG_ON(bio->bi_next);
2436 bio_list_init(&bio_list_on_stack[0]);
2437 current->bio_list = bio_list_on_stack;
2438 do {
2439 bool enter_succeeded = true;
2440
2441 if (unlikely(q != bio->bi_disk->queue)) {
2442 if (q)
2443 blk_queue_exit(q);
2444 q = bio->bi_disk->queue;
2445 flags = 0;
2446 if (bio->bi_opf & REQ_NOWAIT)
2447 flags = BLK_MQ_REQ_NOWAIT;
2448 if (blk_queue_enter(q, flags) < 0) {
2449 enter_succeeded = false;
2450 q = NULL;
2451 }
2452 }
2453
2454 if (enter_succeeded) {
2455 struct bio_list lower, same;
2456
2457
2458 bio_list_on_stack[1] = bio_list_on_stack[0];
2459 bio_list_init(&bio_list_on_stack[0]);
2460 ret = q->make_request_fn(q, bio);
2461
2462
2463
2464
2465 bio_list_init(&lower);
2466 bio_list_init(&same);
2467 while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
2468 if (q == bio->bi_disk->queue)
2469 bio_list_add(&same, bio);
2470 else
2471 bio_list_add(&lower, bio);
2472
2473 bio_list_merge(&bio_list_on_stack[0], &lower);
2474 bio_list_merge(&bio_list_on_stack[0], &same);
2475 bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
2476 } else {
2477 if (unlikely(!blk_queue_dying(q) &&
2478 (bio->bi_opf & REQ_NOWAIT)))
2479 bio_wouldblock_error(bio);
2480 else
2481 bio_io_error(bio);
2482 }
2483 bio = bio_list_pop(&bio_list_on_stack[0]);
2484 } while (bio);
2485 current->bio_list = NULL;
2486
2487out:
2488 if (q)
2489 blk_queue_exit(q);
2490 return ret;
2491}
2492EXPORT_SYMBOL(generic_make_request);
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504blk_qc_t direct_make_request(struct bio *bio)
2505{
2506 struct request_queue *q = bio->bi_disk->queue;
2507 bool nowait = bio->bi_opf & REQ_NOWAIT;
2508 blk_qc_t ret;
2509
2510 if (!generic_make_request_checks(bio))
2511 return BLK_QC_T_NONE;
2512
2513 if (unlikely(blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0))) {
2514 if (nowait && !blk_queue_dying(q))
2515 bio->bi_status = BLK_STS_AGAIN;
2516 else
2517 bio->bi_status = BLK_STS_IOERR;
2518 bio_endio(bio);
2519 return BLK_QC_T_NONE;
2520 }
2521
2522 ret = q->make_request_fn(q, bio);
2523 blk_queue_exit(q);
2524 return ret;
2525}
2526EXPORT_SYMBOL_GPL(direct_make_request);
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537blk_qc_t submit_bio(struct bio *bio)
2538{
2539
2540
2541
2542
2543 if (bio_has_data(bio)) {
2544 unsigned int count;
2545
2546 if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME))
2547 count = queue_logical_block_size(bio->bi_disk->queue) >> 9;
2548 else
2549 count = bio_sectors(bio);
2550
2551 if (op_is_write(bio_op(bio))) {
2552 count_vm_events(PGPGOUT, count);
2553 } else {
2554 task_io_account_read(bio->bi_iter.bi_size);
2555 count_vm_events(PGPGIN, count);
2556 }
2557
2558 if (unlikely(block_dump)) {
2559 char b[BDEVNAME_SIZE];
2560 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n",
2561 current->comm, task_pid_nr(current),
2562 op_is_write(bio_op(bio)) ? "WRITE" : "READ",
2563 (unsigned long long)bio->bi_iter.bi_sector,
2564 bio_devname(bio, b), count);
2565 }
2566 }
2567
2568 return generic_make_request(bio);
2569}
2570EXPORT_SYMBOL(submit_bio);
2571
2572bool blk_poll(struct request_queue *q, blk_qc_t cookie)
2573{
2574 if (!q->poll_fn || !blk_qc_t_valid(cookie))
2575 return false;
2576
2577 if (current->plug)
2578 blk_flush_plug_list(current->plug, false);
2579 return q->poll_fn(q, cookie);
2580}
2581EXPORT_SYMBOL_GPL(blk_poll);
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600static int blk_cloned_rq_check_limits(struct request_queue *q,
2601 struct request *rq)
2602{
2603 if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, req_op(rq))) {
2604 printk(KERN_ERR "%s: over max size limit.\n", __func__);
2605 return -EIO;
2606 }
2607
2608
2609
2610
2611
2612
2613
2614 blk_recalc_rq_segments(rq);
2615 if (rq->nr_phys_segments > queue_max_segments(q)) {
2616 printk(KERN_ERR "%s: over max segments limit.\n", __func__);
2617 return -EIO;
2618 }
2619
2620 return 0;
2621}
2622
2623
2624
2625
2626
2627
2628blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq)
2629{
2630 unsigned long flags;
2631 int where = ELEVATOR_INSERT_BACK;
2632
2633 if (blk_cloned_rq_check_limits(q, rq))
2634 return BLK_STS_IOERR;
2635
2636 if (rq->rq_disk &&
2637 should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq)))
2638 return BLK_STS_IOERR;
2639
2640 if (q->mq_ops) {
2641 if (blk_queue_io_stat(q))
2642 blk_account_io_start(rq, true);
2643
2644
2645
2646
2647
2648 return blk_mq_request_issue_directly(rq);
2649 }
2650
2651 spin_lock_irqsave(q->queue_lock, flags);
2652 if (unlikely(blk_queue_dying(q))) {
2653 spin_unlock_irqrestore(q->queue_lock, flags);
2654 return BLK_STS_IOERR;
2655 }
2656
2657
2658
2659
2660
2661 BUG_ON(blk_queued_rq(rq));
2662
2663 if (op_is_flush(rq->cmd_flags))
2664 where = ELEVATOR_INSERT_FLUSH;
2665
2666 add_acct_request(q, rq, where);
2667 if (where == ELEVATOR_INSERT_FLUSH)
2668 __blk_run_queue(q);
2669 spin_unlock_irqrestore(q->queue_lock, flags);
2670
2671 return BLK_STS_OK;
2672}
2673EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688unsigned int blk_rq_err_bytes(const struct request *rq)
2689{
2690 unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
2691 unsigned int bytes = 0;
2692 struct bio *bio;
2693
2694 if (!(rq->rq_flags & RQF_MIXED_MERGE))
2695 return blk_rq_bytes(rq);
2696
2697
2698
2699
2700
2701
2702
2703
2704 for (bio = rq->bio; bio; bio = bio->bi_next) {
2705 if ((bio->bi_opf & ff) != ff)
2706 break;
2707 bytes += bio->bi_iter.bi_size;
2708 }
2709
2710
2711 BUG_ON(blk_rq_bytes(rq) && !bytes);
2712 return bytes;
2713}
2714EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
2715
2716void blk_account_io_completion(struct request *req, unsigned int bytes)
2717{
2718 if (blk_do_io_stat(req)) {
2719 const int rw = rq_data_dir(req);
2720 struct hd_struct *part;
2721 int cpu;
2722
2723 cpu = part_stat_lock();
2724 part = req->part;
2725 part_stat_add(cpu, part, sectors[rw], bytes >> 9);
2726 part_stat_unlock();
2727 }
2728}
2729
2730void blk_account_io_done(struct request *req)
2731{
2732
2733
2734
2735
2736
2737 if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) {
2738 unsigned long duration = jiffies - req->start_time;
2739 const int rw = rq_data_dir(req);
2740 struct hd_struct *part;
2741 int cpu;
2742
2743 cpu = part_stat_lock();
2744 part = req->part;
2745
2746 part_stat_inc(cpu, part, ios[rw]);
2747 part_stat_add(cpu, part, ticks[rw], duration);
2748 part_round_stats(req->q, cpu, part);
2749 part_dec_in_flight(req->q, part, rw);
2750
2751 hd_struct_put(part);
2752 part_stat_unlock();
2753 }
2754}
2755
2756#ifdef CONFIG_PM
2757
2758
2759
2760
2761static bool blk_pm_allow_request(struct request *rq)
2762{
2763 switch (rq->q->rpm_status) {
2764 case RPM_RESUMING:
2765 case RPM_SUSPENDING:
2766 return rq->rq_flags & RQF_PM;
2767 case RPM_SUSPENDED:
2768 return false;
2769 }
2770
2771 return true;
2772}
2773#else
2774static bool blk_pm_allow_request(struct request *rq)
2775{
2776 return true;
2777}
2778#endif
2779
2780void blk_account_io_start(struct request *rq, bool new_io)
2781{
2782 struct hd_struct *part;
2783 int rw = rq_data_dir(rq);
2784 int cpu;
2785
2786 if (!blk_do_io_stat(rq))
2787 return;
2788
2789 cpu = part_stat_lock();
2790
2791 if (!new_io) {
2792 part = rq->part;
2793 part_stat_inc(cpu, part, merges[rw]);
2794 } else {
2795 part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
2796 if (!hd_struct_try_get(part)) {
2797
2798
2799
2800
2801
2802
2803
2804
2805 part = &rq->rq_disk->part0;
2806 hd_struct_get(part);
2807 }
2808 part_round_stats(rq->q, cpu, part);
2809 part_inc_in_flight(rq->q, part, rw);
2810 rq->part = part;
2811 }
2812
2813 part_stat_unlock();
2814}
2815
2816static struct request *elv_next_request(struct request_queue *q)
2817{
2818 struct request *rq;
2819 struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
2820
2821 WARN_ON_ONCE(q->mq_ops);
2822
2823 while (1) {
2824 list_for_each_entry(rq, &q->queue_head, queuelist) {
2825 if (blk_pm_allow_request(rq))
2826 return rq;
2827
2828 if (rq->rq_flags & RQF_SOFTBARRIER)
2829 break;
2830 }
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847 if (fq->flush_pending_idx != fq->flush_running_idx &&
2848 !queue_flush_queueable(q)) {
2849 fq->flush_queue_delayed = 1;
2850 return NULL;
2851 }
2852 if (unlikely(blk_queue_bypass(q)) ||
2853 !q->elevator->type->ops.sq.elevator_dispatch_fn(q, 0))
2854 return NULL;
2855 }
2856}
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871struct request *blk_peek_request(struct request_queue *q)
2872{
2873 struct request *rq;
2874 int ret;
2875
2876 lockdep_assert_held(q->queue_lock);
2877 WARN_ON_ONCE(q->mq_ops);
2878
2879 while ((rq = elv_next_request(q)) != NULL) {
2880 if (!(rq->rq_flags & RQF_STARTED)) {
2881
2882
2883
2884
2885
2886 if (rq->rq_flags & RQF_SORTED)
2887 elv_activate_rq(q, rq);
2888
2889
2890
2891
2892
2893
2894 rq->rq_flags |= RQF_STARTED;
2895 trace_block_rq_issue(q, rq);
2896 }
2897
2898 if (!q->boundary_rq || q->boundary_rq == rq) {
2899 q->end_sector = rq_end_sector(rq);
2900 q->boundary_rq = NULL;
2901 }
2902
2903 if (rq->rq_flags & RQF_DONTPREP)
2904 break;
2905
2906 if (q->dma_drain_size && blk_rq_bytes(rq)) {
2907
2908
2909
2910
2911
2912
2913 rq->nr_phys_segments++;
2914 }
2915
2916 if (!q->prep_rq_fn)
2917 break;
2918
2919 ret = q->prep_rq_fn(q, rq);
2920 if (ret == BLKPREP_OK) {
2921 break;
2922 } else if (ret == BLKPREP_DEFER) {
2923
2924
2925
2926
2927
2928
2929 if (q->dma_drain_size && blk_rq_bytes(rq) &&
2930 !(rq->rq_flags & RQF_DONTPREP)) {
2931
2932
2933
2934
2935 --rq->nr_phys_segments;
2936 }
2937
2938 rq = NULL;
2939 break;
2940 } else if (ret == BLKPREP_KILL || ret == BLKPREP_INVALID) {
2941 rq->rq_flags |= RQF_QUIET;
2942
2943
2944
2945
2946 blk_start_request(rq);
2947 __blk_end_request_all(rq, ret == BLKPREP_INVALID ?
2948 BLK_STS_TARGET : BLK_STS_IOERR);
2949 } else {
2950 printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
2951 break;
2952 }
2953 }
2954
2955 return rq;
2956}
2957EXPORT_SYMBOL(blk_peek_request);
2958
2959static void blk_dequeue_request(struct request *rq)
2960{
2961 struct request_queue *q = rq->q;
2962
2963 BUG_ON(list_empty(&rq->queuelist));
2964 BUG_ON(ELV_ON_HASH(rq));
2965
2966 list_del_init(&rq->queuelist);
2967
2968
2969
2970
2971
2972
2973 if (blk_account_rq(rq)) {
2974 q->in_flight[rq_is_sync(rq)]++;
2975 set_io_start_time_ns(rq);
2976 }
2977}
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987void blk_start_request(struct request *req)
2988{
2989 lockdep_assert_held(req->q->queue_lock);
2990 WARN_ON_ONCE(req->q->mq_ops);
2991
2992 blk_dequeue_request(req);
2993
2994 if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) {
2995 blk_stat_set_issue(&req->issue_stat, blk_rq_sectors(req));
2996 req->rq_flags |= RQF_STATS;
2997 wbt_issue(req->q->rq_wb, &req->issue_stat);
2998 }
2999
3000 BUG_ON(blk_rq_is_complete(req));
3001 blk_add_timer(req);
3002}
3003EXPORT_SYMBOL(blk_start_request);
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017struct request *blk_fetch_request(struct request_queue *q)
3018{
3019 struct request *rq;
3020
3021 lockdep_assert_held(q->queue_lock);
3022 WARN_ON_ONCE(q->mq_ops);
3023
3024 rq = blk_peek_request(q);
3025 if (rq)
3026 blk_start_request(rq);
3027 return rq;
3028}
3029EXPORT_SYMBOL(blk_fetch_request);
3030
3031
3032
3033
3034
3035void blk_steal_bios(struct bio_list *list, struct request *rq)
3036{
3037 if (rq->bio) {
3038 if (list->tail)
3039 list->tail->bi_next = rq->bio;
3040 else
3041 list->head = rq->bio;
3042 list->tail = rq->biotail;
3043
3044 rq->bio = NULL;
3045 rq->biotail = NULL;
3046 }
3047
3048 rq->__data_len = 0;
3049}
3050EXPORT_SYMBOL_GPL(blk_steal_bios);
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074bool blk_update_request(struct request *req, blk_status_t error,
3075 unsigned int nr_bytes)
3076{
3077 int total_bytes;
3078
3079 trace_block_rq_complete(req, blk_status_to_errno(error), nr_bytes);
3080
3081 if (!req->bio)
3082 return false;
3083
3084 if (unlikely(error && !blk_rq_is_passthrough(req) &&
3085 !(req->rq_flags & RQF_QUIET)))
3086 print_req_error(req, error);
3087
3088 blk_account_io_completion(req, nr_bytes);
3089
3090 total_bytes = 0;
3091 while (req->bio) {
3092 struct bio *bio = req->bio;
3093 unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes);
3094
3095 if (bio_bytes == bio->bi_iter.bi_size)
3096 req->bio = bio->bi_next;
3097
3098
3099 bio_clear_flag(bio, BIO_TRACE_COMPLETION);
3100 req_bio_endio(req, bio, bio_bytes, error);
3101
3102 total_bytes += bio_bytes;
3103 nr_bytes -= bio_bytes;
3104
3105 if (!nr_bytes)
3106 break;
3107 }
3108
3109
3110
3111
3112 if (!req->bio) {
3113
3114
3115
3116
3117
3118 req->__data_len = 0;
3119 return false;
3120 }
3121
3122 req->__data_len -= total_bytes;
3123
3124
3125 if (!blk_rq_is_passthrough(req))
3126 req->__sector += total_bytes >> 9;
3127
3128
3129 if (req->rq_flags & RQF_MIXED_MERGE) {
3130 req->cmd_flags &= ~REQ_FAILFAST_MASK;
3131 req->cmd_flags |= req->bio->bi_opf & REQ_FAILFAST_MASK;
3132 }
3133
3134 if (!(req->rq_flags & RQF_SPECIAL_PAYLOAD)) {
3135
3136
3137
3138
3139 if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
3140 blk_dump_rq_flags(req, "request botched");
3141 req->__data_len = blk_rq_cur_bytes(req);
3142 }
3143
3144
3145 blk_recalc_rq_segments(req);
3146 }
3147
3148 return true;
3149}
3150EXPORT_SYMBOL_GPL(blk_update_request);
3151
3152static bool blk_update_bidi_request(struct request *rq, blk_status_t error,
3153 unsigned int nr_bytes,
3154 unsigned int bidi_bytes)
3155{
3156 if (blk_update_request(rq, error, nr_bytes))
3157 return true;
3158
3159
3160 if (unlikely(blk_bidi_rq(rq)) &&
3161 blk_update_request(rq->next_rq, error, bidi_bytes))
3162 return true;
3163
3164 if (blk_queue_add_random(rq->q))
3165 add_disk_randomness(rq->rq_disk);
3166
3167 return false;
3168}
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180void blk_unprep_request(struct request *req)
3181{
3182 struct request_queue *q = req->q;
3183
3184 req->rq_flags &= ~RQF_DONTPREP;
3185 if (q->unprep_rq_fn)
3186 q->unprep_rq_fn(q, req);
3187}
3188EXPORT_SYMBOL_GPL(blk_unprep_request);
3189
3190void blk_finish_request(struct request *req, blk_status_t error)
3191{
3192 struct request_queue *q = req->q;
3193
3194 lockdep_assert_held(req->q->queue_lock);
3195 WARN_ON_ONCE(q->mq_ops);
3196
3197 if (req->rq_flags & RQF_STATS)
3198 blk_stat_add(req);
3199
3200 if (req->rq_flags & RQF_QUEUED)
3201 blk_queue_end_tag(q, req);
3202
3203 BUG_ON(blk_queued_rq(req));
3204
3205 if (unlikely(laptop_mode) && !blk_rq_is_passthrough(req))
3206 laptop_io_completion(req->q->backing_dev_info);
3207
3208 blk_delete_timer(req);
3209
3210 if (req->rq_flags & RQF_DONTPREP)
3211 blk_unprep_request(req);
3212
3213 blk_account_io_done(req);
3214
3215 if (req->end_io) {
3216 wbt_done(req->q->rq_wb, &req->issue_stat);
3217 req->end_io(req, error);
3218 } else {
3219 if (blk_bidi_rq(req))
3220 __blk_put_request(req->next_rq->q, req->next_rq);
3221
3222 __blk_put_request(q, req);
3223 }
3224}
3225EXPORT_SYMBOL(blk_finish_request);
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244static bool blk_end_bidi_request(struct request *rq, blk_status_t error,
3245 unsigned int nr_bytes, unsigned int bidi_bytes)
3246{
3247 struct request_queue *q = rq->q;
3248 unsigned long flags;
3249
3250 WARN_ON_ONCE(q->mq_ops);
3251
3252 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
3253 return true;
3254
3255 spin_lock_irqsave(q->queue_lock, flags);
3256 blk_finish_request(rq, error);
3257 spin_unlock_irqrestore(q->queue_lock, flags);
3258
3259 return false;
3260}
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277static bool __blk_end_bidi_request(struct request *rq, blk_status_t error,
3278 unsigned int nr_bytes, unsigned int bidi_bytes)
3279{
3280 lockdep_assert_held(rq->q->queue_lock);
3281 WARN_ON_ONCE(rq->q->mq_ops);
3282
3283 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
3284 return true;
3285
3286 blk_finish_request(rq, error);
3287
3288 return false;
3289}
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305bool blk_end_request(struct request *rq, blk_status_t error,
3306 unsigned int nr_bytes)
3307{
3308 WARN_ON_ONCE(rq->q->mq_ops);
3309 return blk_end_bidi_request(rq, error, nr_bytes, 0);
3310}
3311EXPORT_SYMBOL(blk_end_request);
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321void blk_end_request_all(struct request *rq, blk_status_t error)
3322{
3323 bool pending;
3324 unsigned int bidi_bytes = 0;
3325
3326 if (unlikely(blk_bidi_rq(rq)))
3327 bidi_bytes = blk_rq_bytes(rq->next_rq);
3328
3329 pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
3330 BUG_ON(pending);
3331}
3332EXPORT_SYMBOL(blk_end_request_all);
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347bool __blk_end_request(struct request *rq, blk_status_t error,
3348 unsigned int nr_bytes)
3349{
3350 lockdep_assert_held(rq->q->queue_lock);
3351 WARN_ON_ONCE(rq->q->mq_ops);
3352
3353 return __blk_end_bidi_request(rq, error, nr_bytes, 0);
3354}
3355EXPORT_SYMBOL(__blk_end_request);
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365void __blk_end_request_all(struct request *rq, blk_status_t error)
3366{
3367 bool pending;
3368 unsigned int bidi_bytes = 0;
3369
3370 lockdep_assert_held(rq->q->queue_lock);
3371 WARN_ON_ONCE(rq->q->mq_ops);
3372
3373 if (unlikely(blk_bidi_rq(rq)))
3374 bidi_bytes = blk_rq_bytes(rq->next_rq);
3375
3376 pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
3377 BUG_ON(pending);
3378}
3379EXPORT_SYMBOL(__blk_end_request_all);
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394bool __blk_end_request_cur(struct request *rq, blk_status_t error)
3395{
3396 return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
3397}
3398EXPORT_SYMBOL(__blk_end_request_cur);
3399
3400void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
3401 struct bio *bio)
3402{
3403 if (bio_has_data(bio))
3404 rq->nr_phys_segments = bio_phys_segments(q, bio);
3405 else if (bio_op(bio) == REQ_OP_DISCARD)
3406 rq->nr_phys_segments = 1;
3407
3408 rq->__data_len = bio->bi_iter.bi_size;
3409 rq->bio = rq->biotail = bio;
3410
3411 if (bio->bi_disk)
3412 rq->rq_disk = bio->bi_disk;
3413}
3414
3415#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
3416
3417
3418
3419
3420
3421
3422
3423void rq_flush_dcache_pages(struct request *rq)
3424{
3425 struct req_iterator iter;
3426 struct bio_vec bvec;
3427
3428 rq_for_each_segment(bvec, rq, iter)
3429 flush_dcache_page(bvec.bv_page);
3430}
3431EXPORT_SYMBOL_GPL(rq_flush_dcache_pages);
3432#endif
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453int blk_lld_busy(struct request_queue *q)
3454{
3455 if (q->lld_busy_fn)
3456 return q->lld_busy_fn(q);
3457
3458 return 0;
3459}
3460EXPORT_SYMBOL_GPL(blk_lld_busy);
3461
3462
3463
3464
3465
3466
3467
3468
3469void blk_rq_unprep_clone(struct request *rq)
3470{
3471 struct bio *bio;
3472
3473 while ((bio = rq->bio) != NULL) {
3474 rq->bio = bio->bi_next;
3475
3476 bio_put(bio);
3477 }
3478}
3479EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
3480
3481
3482
3483
3484
3485static void __blk_rq_prep_clone(struct request *dst, struct request *src)
3486{
3487 dst->cpu = src->cpu;
3488 dst->__sector = blk_rq_pos(src);
3489 dst->__data_len = blk_rq_bytes(src);
3490 dst->nr_phys_segments = src->nr_phys_segments;
3491 dst->ioprio = src->ioprio;
3492 dst->extra_len = src->extra_len;
3493}
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
3515 struct bio_set *bs, gfp_t gfp_mask,
3516 int (*bio_ctr)(struct bio *, struct bio *, void *),
3517 void *data)
3518{
3519 struct bio *bio, *bio_src;
3520
3521 if (!bs)
3522 bs = fs_bio_set;
3523
3524 __rq_for_each_bio(bio_src, rq_src) {
3525 bio = bio_clone_fast(bio_src, gfp_mask, bs);
3526 if (!bio)
3527 goto free_and_out;
3528
3529 if (bio_ctr && bio_ctr(bio, bio_src, data))
3530 goto free_and_out;
3531
3532 if (rq->bio) {
3533 rq->biotail->bi_next = bio;
3534 rq->biotail = bio;
3535 } else
3536 rq->bio = rq->biotail = bio;
3537 }
3538
3539 __blk_rq_prep_clone(rq, rq_src);
3540
3541 return 0;
3542
3543free_and_out:
3544 if (bio)
3545 bio_put(bio);
3546 blk_rq_unprep_clone(rq);
3547
3548 return -ENOMEM;
3549}
3550EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
3551
3552int kblockd_schedule_work(struct work_struct *work)
3553{
3554 return queue_work(kblockd_workqueue, work);
3555}
3556EXPORT_SYMBOL(kblockd_schedule_work);
3557
3558int kblockd_schedule_work_on(int cpu, struct work_struct *work)
3559{
3560 return queue_work_on(cpu, kblockd_workqueue, work);
3561}
3562EXPORT_SYMBOL(kblockd_schedule_work_on);
3563
3564int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork,
3565 unsigned long delay)
3566{
3567 return mod_delayed_work_on(cpu, kblockd_workqueue, dwork, delay);
3568}
3569EXPORT_SYMBOL(kblockd_mod_delayed_work_on);
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585void blk_start_plug(struct blk_plug *plug)
3586{
3587 struct task_struct *tsk = current;
3588
3589
3590
3591
3592 if (tsk->plug)
3593 return;
3594
3595 INIT_LIST_HEAD(&plug->list);
3596 INIT_LIST_HEAD(&plug->mq_list);
3597 INIT_LIST_HEAD(&plug->cb_list);
3598
3599
3600
3601
3602 tsk->plug = plug;
3603}
3604EXPORT_SYMBOL(blk_start_plug);
3605
3606static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
3607{
3608 struct request *rqa = container_of(a, struct request, queuelist);
3609 struct request *rqb = container_of(b, struct request, queuelist);
3610
3611 return !(rqa->q < rqb->q ||
3612 (rqa->q == rqb->q && blk_rq_pos(rqa) < blk_rq_pos(rqb)));
3613}
3614
3615
3616
3617
3618
3619
3620
3621static void queue_unplugged(struct request_queue *q, unsigned int depth,
3622 bool from_schedule)
3623 __releases(q->queue_lock)
3624{
3625 lockdep_assert_held(q->queue_lock);
3626
3627 trace_block_unplug(q, depth, !from_schedule);
3628
3629 if (from_schedule)
3630 blk_run_queue_async(q);
3631 else
3632 __blk_run_queue(q);
3633 spin_unlock(q->queue_lock);
3634}
3635
3636static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
3637{
3638 LIST_HEAD(callbacks);
3639
3640 while (!list_empty(&plug->cb_list)) {
3641 list_splice_init(&plug->cb_list, &callbacks);
3642
3643 while (!list_empty(&callbacks)) {
3644 struct blk_plug_cb *cb = list_first_entry(&callbacks,
3645 struct blk_plug_cb,
3646 list);
3647 list_del(&cb->list);
3648 cb->callback(cb, from_schedule);
3649 }
3650 }
3651}
3652
3653struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, void *data,
3654 int size)
3655{
3656 struct blk_plug *plug = current->plug;
3657 struct blk_plug_cb *cb;
3658
3659 if (!plug)
3660 return NULL;
3661
3662 list_for_each_entry(cb, &plug->cb_list, list)
3663 if (cb->callback == unplug && cb->data == data)
3664 return cb;
3665
3666
3667 BUG_ON(size < sizeof(*cb));
3668 cb = kzalloc(size, GFP_ATOMIC);
3669 if (cb) {
3670 cb->data = data;
3671 cb->callback = unplug;
3672 list_add(&cb->list, &plug->cb_list);
3673 }
3674 return cb;
3675}
3676EXPORT_SYMBOL(blk_check_plugged);
3677
3678void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
3679{
3680 struct request_queue *q;
3681 unsigned long flags;
3682 struct request *rq;
3683 LIST_HEAD(list);
3684 unsigned int depth;
3685
3686 flush_plug_callbacks(plug, from_schedule);
3687
3688 if (!list_empty(&plug->mq_list))
3689 blk_mq_flush_plug_list(plug, from_schedule);
3690
3691 if (list_empty(&plug->list))
3692 return;
3693
3694 list_splice_init(&plug->list, &list);
3695
3696 list_sort(NULL, &list, plug_rq_cmp);
3697
3698 q = NULL;
3699 depth = 0;
3700
3701
3702
3703
3704
3705 local_irq_save(flags);
3706 while (!list_empty(&list)) {
3707 rq = list_entry_rq(list.next);
3708 list_del_init(&rq->queuelist);
3709 BUG_ON(!rq->q);
3710 if (rq->q != q) {
3711
3712
3713
3714 if (q)
3715 queue_unplugged(q, depth, from_schedule);
3716 q = rq->q;
3717 depth = 0;
3718 spin_lock(q->queue_lock);
3719 }
3720
3721
3722
3723
3724 if (unlikely(blk_queue_dying(q))) {
3725 __blk_end_request_all(rq, BLK_STS_IOERR);
3726 continue;
3727 }
3728
3729
3730
3731
3732 if (op_is_flush(rq->cmd_flags))
3733 __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
3734 else
3735 __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
3736
3737 depth++;
3738 }
3739
3740
3741
3742
3743 if (q)
3744 queue_unplugged(q, depth, from_schedule);
3745
3746 local_irq_restore(flags);
3747}
3748
3749void blk_finish_plug(struct blk_plug *plug)
3750{
3751 if (plug != current->plug)
3752 return;
3753 blk_flush_plug_list(plug, false);
3754
3755 current->plug = NULL;
3756}
3757EXPORT_SYMBOL(blk_finish_plug);
3758
3759#ifdef CONFIG_PM
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781void blk_pm_runtime_init(struct request_queue *q, struct device *dev)
3782{
3783
3784 if (q->mq_ops)
3785 return;
3786
3787 q->dev = dev;
3788 q->rpm_status = RPM_ACTIVE;
3789 pm_runtime_set_autosuspend_delay(q->dev, -1);
3790 pm_runtime_use_autosuspend(q->dev);
3791}
3792EXPORT_SYMBOL(blk_pm_runtime_init);
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815int blk_pre_runtime_suspend(struct request_queue *q)
3816{
3817 int ret = 0;
3818
3819 if (!q->dev)
3820 return ret;
3821
3822 spin_lock_irq(q->queue_lock);
3823 if (q->nr_pending) {
3824 ret = -EBUSY;
3825 pm_runtime_mark_last_busy(q->dev);
3826 } else {
3827 q->rpm_status = RPM_SUSPENDING;
3828 }
3829 spin_unlock_irq(q->queue_lock);
3830 return ret;
3831}
3832EXPORT_SYMBOL(blk_pre_runtime_suspend);
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847void blk_post_runtime_suspend(struct request_queue *q, int err)
3848{
3849 if (!q->dev)
3850 return;
3851
3852 spin_lock_irq(q->queue_lock);
3853 if (!err) {
3854 q->rpm_status = RPM_SUSPENDED;
3855 } else {
3856 q->rpm_status = RPM_ACTIVE;
3857 pm_runtime_mark_last_busy(q->dev);
3858 }
3859 spin_unlock_irq(q->queue_lock);
3860}
3861EXPORT_SYMBOL(blk_post_runtime_suspend);
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874void blk_pre_runtime_resume(struct request_queue *q)
3875{
3876 if (!q->dev)
3877 return;
3878
3879 spin_lock_irq(q->queue_lock);
3880 q->rpm_status = RPM_RESUMING;
3881 spin_unlock_irq(q->queue_lock);
3882}
3883EXPORT_SYMBOL(blk_pre_runtime_resume);
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899void blk_post_runtime_resume(struct request_queue *q, int err)
3900{
3901 if (!q->dev)
3902 return;
3903
3904 spin_lock_irq(q->queue_lock);
3905 if (!err) {
3906 q->rpm_status = RPM_ACTIVE;
3907 __blk_run_queue(q);
3908 pm_runtime_mark_last_busy(q->dev);
3909 pm_request_autosuspend(q->dev);
3910 } else {
3911 q->rpm_status = RPM_SUSPENDED;
3912 }
3913 spin_unlock_irq(q->queue_lock);
3914}
3915EXPORT_SYMBOL(blk_post_runtime_resume);
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931void blk_set_runtime_active(struct request_queue *q)
3932{
3933 spin_lock_irq(q->queue_lock);
3934 q->rpm_status = RPM_ACTIVE;
3935 pm_runtime_mark_last_busy(q->dev);
3936 pm_request_autosuspend(q->dev);
3937 spin_unlock_irq(q->queue_lock);
3938}
3939EXPORT_SYMBOL(blk_set_runtime_active);
3940#endif
3941
3942int __init blk_dev_init(void)
3943{
3944 BUILD_BUG_ON(REQ_OP_LAST >= (1 << REQ_OP_BITS));
3945 BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 *
3946 FIELD_SIZEOF(struct request, cmd_flags));
3947 BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 *
3948 FIELD_SIZEOF(struct bio, bi_opf));
3949
3950
3951 kblockd_workqueue = alloc_workqueue("kblockd",
3952 WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
3953 if (!kblockd_workqueue)
3954 panic("Failed to create kblockd\n");
3955
3956 request_cachep = kmem_cache_create("blkdev_requests",
3957 sizeof(struct request), 0, SLAB_PANIC, NULL);
3958
3959 blk_requestq_cachep = kmem_cache_create("request_queue",
3960 sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
3961
3962#ifdef CONFIG_DEBUG_FS
3963 blk_debugfs_root = debugfs_create_dir("block", NULL);
3964#endif
3965
3966 return 0;
3967}
3968