1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/kernel.h>
15#include <linux/module.h>
16#include <linux/backing-dev.h>
17#include <linux/bio.h>
18#include <linux/blkdev.h>
19#include <linux/blk-mq.h>
20#include <linux/highmem.h>
21#include <linux/mm.h>
22#include <linux/kernel_stat.h>
23#include <linux/string.h>
24#include <linux/init.h>
25#include <linux/completion.h>
26#include <linux/slab.h>
27#include <linux/swap.h>
28#include <linux/writeback.h>
29#include <linux/task_io_accounting_ops.h>
30#include <linux/fault-inject.h>
31#include <linux/list_sort.h>
32#include <linux/delay.h>
33#include <linux/ratelimit.h>
34#include <linux/pm_runtime.h>
35#include <linux/blk-cgroup.h>
36#include <linux/debugfs.h>
37#include <linux/bpf.h>
38
39#define CREATE_TRACE_POINTS
40#include <trace/events/block.h>
41
42#include "blk.h"
43#include "blk-mq.h"
44#include "blk-mq-sched.h"
45#include "blk-rq-qos.h"
46
47#ifdef CONFIG_DEBUG_FS
48struct dentry *blk_debugfs_root;
49#endif
50
51EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
52EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
53EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
54EXPORT_TRACEPOINT_SYMBOL_GPL(block_split);
55EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug);
56
57DEFINE_IDA(blk_queue_ida);
58
59
60
61
62struct kmem_cache *request_cachep;
63
64
65
66
67struct kmem_cache *blk_requestq_cachep;
68
69
70
71
72static struct workqueue_struct *kblockd_workqueue;
73
74
75
76
77
78
79void blk_queue_flag_set(unsigned int flag, struct request_queue *q)
80{
81 unsigned long flags;
82
83 spin_lock_irqsave(q->queue_lock, flags);
84 queue_flag_set(flag, q);
85 spin_unlock_irqrestore(q->queue_lock, flags);
86}
87EXPORT_SYMBOL(blk_queue_flag_set);
88
89
90
91
92
93
94void blk_queue_flag_clear(unsigned int flag, struct request_queue *q)
95{
96 unsigned long flags;
97
98 spin_lock_irqsave(q->queue_lock, flags);
99 queue_flag_clear(flag, q);
100 spin_unlock_irqrestore(q->queue_lock, flags);
101}
102EXPORT_SYMBOL(blk_queue_flag_clear);
103
104
105
106
107
108
109
110
111
112bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q)
113{
114 unsigned long flags;
115 bool res;
116
117 spin_lock_irqsave(q->queue_lock, flags);
118 res = queue_flag_test_and_set(flag, q);
119 spin_unlock_irqrestore(q->queue_lock, flags);
120
121 return res;
122}
123EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_set);
124
125
126
127
128
129
130
131
132
133bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q)
134{
135 unsigned long flags;
136 bool res;
137
138 spin_lock_irqsave(q->queue_lock, flags);
139 res = queue_flag_test_and_clear(flag, q);
140 spin_unlock_irqrestore(q->queue_lock, flags);
141
142 return res;
143}
144EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_clear);
145
146static void blk_clear_congested(struct request_list *rl, int sync)
147{
148#ifdef CONFIG_CGROUP_WRITEBACK
149 clear_wb_congested(rl->blkg->wb_congested, sync);
150#else
151
152
153
154
155 if (rl == &rl->q->root_rl)
156 clear_wb_congested(rl->q->backing_dev_info->wb.congested, sync);
157#endif
158}
159
160static void blk_set_congested(struct request_list *rl, int sync)
161{
162#ifdef CONFIG_CGROUP_WRITEBACK
163 set_wb_congested(rl->blkg->wb_congested, sync);
164#else
165
166 if (rl == &rl->q->root_rl)
167 set_wb_congested(rl->q->backing_dev_info->wb.congested, sync);
168#endif
169}
170
171void blk_queue_congestion_threshold(struct request_queue *q)
172{
173 int nr;
174
175 nr = q->nr_requests - (q->nr_requests / 8) + 1;
176 if (nr > q->nr_requests)
177 nr = q->nr_requests;
178 q->nr_congestion_on = nr;
179
180 nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;
181 if (nr < 1)
182 nr = 1;
183 q->nr_congestion_off = nr;
184}
185
186void blk_rq_init(struct request_queue *q, struct request *rq)
187{
188 memset(rq, 0, sizeof(*rq));
189
190 INIT_LIST_HEAD(&rq->queuelist);
191 INIT_LIST_HEAD(&rq->timeout_list);
192 rq->cpu = -1;
193 rq->q = q;
194 rq->__sector = (sector_t) -1;
195 INIT_HLIST_NODE(&rq->hash);
196 RB_CLEAR_NODE(&rq->rb_node);
197 rq->tag = -1;
198 rq->internal_tag = -1;
199 rq->start_time_ns = ktime_get_ns();
200 rq->part = NULL;
201}
202EXPORT_SYMBOL(blk_rq_init);
203
204static const struct {
205 int errno;
206 const char *name;
207} blk_errors[] = {
208 [BLK_STS_OK] = { 0, "" },
209 [BLK_STS_NOTSUPP] = { -EOPNOTSUPP, "operation not supported" },
210 [BLK_STS_TIMEOUT] = { -ETIMEDOUT, "timeout" },
211 [BLK_STS_NOSPC] = { -ENOSPC, "critical space allocation" },
212 [BLK_STS_TRANSPORT] = { -ENOLINK, "recoverable transport" },
213 [BLK_STS_TARGET] = { -EREMOTEIO, "critical target" },
214 [BLK_STS_NEXUS] = { -EBADE, "critical nexus" },
215 [BLK_STS_MEDIUM] = { -ENODATA, "critical medium" },
216 [BLK_STS_PROTECTION] = { -EILSEQ, "protection" },
217 [BLK_STS_RESOURCE] = { -ENOMEM, "kernel resource" },
218 [BLK_STS_DEV_RESOURCE] = { -EBUSY, "device resource" },
219 [BLK_STS_AGAIN] = { -EAGAIN, "nonblocking retry" },
220
221
222 [BLK_STS_DM_REQUEUE] = { -EREMCHG, "dm internal retry" },
223
224
225 [BLK_STS_IOERR] = { -EIO, "I/O" },
226};
227
228blk_status_t errno_to_blk_status(int errno)
229{
230 int i;
231
232 for (i = 0; i < ARRAY_SIZE(blk_errors); i++) {
233 if (blk_errors[i].errno == errno)
234 return (__force blk_status_t)i;
235 }
236
237 return BLK_STS_IOERR;
238}
239EXPORT_SYMBOL_GPL(errno_to_blk_status);
240
241int blk_status_to_errno(blk_status_t status)
242{
243 int idx = (__force int)status;
244
245 if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
246 return -EIO;
247 return blk_errors[idx].errno;
248}
249EXPORT_SYMBOL_GPL(blk_status_to_errno);
250
251static void print_req_error(struct request *req, blk_status_t status)
252{
253 int idx = (__force int)status;
254
255 if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
256 return;
257
258 printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector %llu\n",
259 __func__, blk_errors[idx].name, req->rq_disk ?
260 req->rq_disk->disk_name : "?",
261 (unsigned long long)blk_rq_pos(req));
262}
263
264static void req_bio_endio(struct request *rq, struct bio *bio,
265 unsigned int nbytes, blk_status_t error)
266{
267 if (error)
268 bio->bi_status = error;
269
270 if (unlikely(rq->rq_flags & RQF_QUIET))
271 bio_set_flag(bio, BIO_QUIET);
272
273 bio_advance(bio, nbytes);
274
275
276 if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ))
277 bio_endio(bio);
278}
279
280void blk_dump_rq_flags(struct request *rq, char *msg)
281{
282 printk(KERN_INFO "%s: dev %s: flags=%llx\n", msg,
283 rq->rq_disk ? rq->rq_disk->disk_name : "?",
284 (unsigned long long) rq->cmd_flags);
285
286 printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n",
287 (unsigned long long)blk_rq_pos(rq),
288 blk_rq_sectors(rq), blk_rq_cur_sectors(rq));
289 printk(KERN_INFO " bio %p, biotail %p, len %u\n",
290 rq->bio, rq->biotail, blk_rq_bytes(rq));
291}
292EXPORT_SYMBOL(blk_dump_rq_flags);
293
294static void blk_delay_work(struct work_struct *work)
295{
296 struct request_queue *q;
297
298 q = container_of(work, struct request_queue, delay_work.work);
299 spin_lock_irq(q->queue_lock);
300 __blk_run_queue(q);
301 spin_unlock_irq(q->queue_lock);
302}
303
304
305
306
307
308
309
310
311
312
313
314void blk_delay_queue(struct request_queue *q, unsigned long msecs)
315{
316 lockdep_assert_held(q->queue_lock);
317 WARN_ON_ONCE(q->mq_ops);
318
319 if (likely(!blk_queue_dead(q)))
320 queue_delayed_work(kblockd_workqueue, &q->delay_work,
321 msecs_to_jiffies(msecs));
322}
323EXPORT_SYMBOL(blk_delay_queue);
324
325
326
327
328
329
330
331
332
333
334void blk_start_queue_async(struct request_queue *q)
335{
336 lockdep_assert_held(q->queue_lock);
337 WARN_ON_ONCE(q->mq_ops);
338
339 queue_flag_clear(QUEUE_FLAG_STOPPED, q);
340 blk_run_queue_async(q);
341}
342EXPORT_SYMBOL(blk_start_queue_async);
343
344
345
346
347
348
349
350
351
352
353void blk_start_queue(struct request_queue *q)
354{
355 lockdep_assert_held(q->queue_lock);
356 WARN_ON_ONCE(q->mq_ops);
357
358 queue_flag_clear(QUEUE_FLAG_STOPPED, q);
359 __blk_run_queue(q);
360}
361EXPORT_SYMBOL(blk_start_queue);
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377void blk_stop_queue(struct request_queue *q)
378{
379 lockdep_assert_held(q->queue_lock);
380 WARN_ON_ONCE(q->mq_ops);
381
382 cancel_delayed_work(&q->delay_work);
383 queue_flag_set(QUEUE_FLAG_STOPPED, q);
384}
385EXPORT_SYMBOL(blk_stop_queue);
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405void blk_sync_queue(struct request_queue *q)
406{
407 del_timer_sync(&q->timeout);
408 cancel_work_sync(&q->timeout_work);
409
410 if (q->mq_ops) {
411 struct blk_mq_hw_ctx *hctx;
412 int i;
413
414 cancel_delayed_work_sync(&q->requeue_work);
415 queue_for_each_hw_ctx(q, hctx, i)
416 cancel_delayed_work_sync(&hctx->run_work);
417 } else {
418 cancel_delayed_work_sync(&q->delay_work);
419 }
420}
421EXPORT_SYMBOL(blk_sync_queue);
422
423
424
425
426
427
428
429
430int blk_set_preempt_only(struct request_queue *q)
431{
432 return blk_queue_flag_test_and_set(QUEUE_FLAG_PREEMPT_ONLY, q);
433}
434EXPORT_SYMBOL_GPL(blk_set_preempt_only);
435
436void blk_clear_preempt_only(struct request_queue *q)
437{
438 blk_queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q);
439 wake_up_all(&q->mq_freeze_wq);
440}
441EXPORT_SYMBOL_GPL(blk_clear_preempt_only);
442
443
444
445
446
447
448
449
450
451
452
453
454inline void __blk_run_queue_uncond(struct request_queue *q)
455{
456 lockdep_assert_held(q->queue_lock);
457 WARN_ON_ONCE(q->mq_ops);
458
459 if (unlikely(blk_queue_dead(q)))
460 return;
461
462
463
464
465
466
467
468
469 q->request_fn_active++;
470 q->request_fn(q);
471 q->request_fn_active--;
472}
473EXPORT_SYMBOL_GPL(__blk_run_queue_uncond);
474
475
476
477
478
479
480
481
482void __blk_run_queue(struct request_queue *q)
483{
484 lockdep_assert_held(q->queue_lock);
485 WARN_ON_ONCE(q->mq_ops);
486
487 if (unlikely(blk_queue_stopped(q)))
488 return;
489
490 __blk_run_queue_uncond(q);
491}
492EXPORT_SYMBOL(__blk_run_queue);
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507void blk_run_queue_async(struct request_queue *q)
508{
509 lockdep_assert_held(q->queue_lock);
510 WARN_ON_ONCE(q->mq_ops);
511
512 if (likely(!blk_queue_stopped(q) && !blk_queue_dead(q)))
513 mod_delayed_work(kblockd_workqueue, &q->delay_work, 0);
514}
515EXPORT_SYMBOL(blk_run_queue_async);
516
517
518
519
520
521
522
523
524
525void blk_run_queue(struct request_queue *q)
526{
527 unsigned long flags;
528
529 WARN_ON_ONCE(q->mq_ops);
530
531 spin_lock_irqsave(q->queue_lock, flags);
532 __blk_run_queue(q);
533 spin_unlock_irqrestore(q->queue_lock, flags);
534}
535EXPORT_SYMBOL(blk_run_queue);
536
537void blk_put_queue(struct request_queue *q)
538{
539 kobject_put(&q->kobj);
540}
541EXPORT_SYMBOL(blk_put_queue);
542
543
544
545
546
547
548
549
550
551
552static void __blk_drain_queue(struct request_queue *q, bool drain_all)
553 __releases(q->queue_lock)
554 __acquires(q->queue_lock)
555{
556 int i;
557
558 lockdep_assert_held(q->queue_lock);
559 WARN_ON_ONCE(q->mq_ops);
560
561 while (true) {
562 bool drain = false;
563
564
565
566
567
568 if (q->elevator)
569 elv_drain_elevator(q);
570
571 blkcg_drain_queue(q);
572
573
574
575
576
577
578
579
580 if (!list_empty(&q->queue_head) && q->request_fn)
581 __blk_run_queue(q);
582
583 drain |= q->nr_rqs_elvpriv;
584 drain |= q->request_fn_active;
585
586
587
588
589
590
591 if (drain_all) {
592 struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
593 drain |= !list_empty(&q->queue_head);
594 for (i = 0; i < 2; i++) {
595 drain |= q->nr_rqs[i];
596 drain |= q->in_flight[i];
597 if (fq)
598 drain |= !list_empty(&fq->flush_queue[i]);
599 }
600 }
601
602 if (!drain)
603 break;
604
605 spin_unlock_irq(q->queue_lock);
606
607 msleep(10);
608
609 spin_lock_irq(q->queue_lock);
610 }
611
612
613
614
615
616
617 if (q->request_fn) {
618 struct request_list *rl;
619
620 blk_queue_for_each_rl(rl, q)
621 for (i = 0; i < ARRAY_SIZE(rl->wait); i++)
622 wake_up_all(&rl->wait[i]);
623 }
624}
625
626void blk_drain_queue(struct request_queue *q)
627{
628 spin_lock_irq(q->queue_lock);
629 __blk_drain_queue(q, true);
630 spin_unlock_irq(q->queue_lock);
631}
632
633
634
635
636
637
638
639
640
641
642
643void blk_queue_bypass_start(struct request_queue *q)
644{
645 WARN_ON_ONCE(q->mq_ops);
646
647 spin_lock_irq(q->queue_lock);
648 q->bypass_depth++;
649 queue_flag_set(QUEUE_FLAG_BYPASS, q);
650 spin_unlock_irq(q->queue_lock);
651
652
653
654
655
656
657 if (blk_queue_init_done(q)) {
658 spin_lock_irq(q->queue_lock);
659 __blk_drain_queue(q, false);
660 spin_unlock_irq(q->queue_lock);
661
662
663 synchronize_rcu();
664 }
665}
666EXPORT_SYMBOL_GPL(blk_queue_bypass_start);
667
668
669
670
671
672
673
674
675
676
677void blk_queue_bypass_end(struct request_queue *q)
678{
679 spin_lock_irq(q->queue_lock);
680 if (!--q->bypass_depth)
681 queue_flag_clear(QUEUE_FLAG_BYPASS, q);
682 WARN_ON_ONCE(q->bypass_depth < 0);
683 spin_unlock_irq(q->queue_lock);
684}
685EXPORT_SYMBOL_GPL(blk_queue_bypass_end);
686
687void blk_set_queue_dying(struct request_queue *q)
688{
689 blk_queue_flag_set(QUEUE_FLAG_DYING, q);
690
691
692
693
694
695
696 blk_freeze_queue_start(q);
697
698 if (q->mq_ops)
699 blk_mq_wake_waiters(q);
700 else {
701 struct request_list *rl;
702
703 spin_lock_irq(q->queue_lock);
704 blk_queue_for_each_rl(rl, q) {
705 if (rl->rq_pool) {
706 wake_up_all(&rl->wait[BLK_RW_SYNC]);
707 wake_up_all(&rl->wait[BLK_RW_ASYNC]);
708 }
709 }
710 spin_unlock_irq(q->queue_lock);
711 }
712
713
714 wake_up_all(&q->mq_freeze_wq);
715}
716EXPORT_SYMBOL_GPL(blk_set_queue_dying);
717
718
719void blk_exit_queue(struct request_queue *q)
720{
721
722
723
724
725
726 if (q->elevator) {
727 ioc_clear_queue(q);
728 elevator_exit(q, q->elevator);
729 q->elevator = NULL;
730 }
731
732
733
734
735
736
737 blkcg_exit_queue(q);
738
739
740
741
742
743
744 bdi_put(q->backing_dev_info);
745}
746
747
748
749
750
751
752
753
754void blk_cleanup_queue(struct request_queue *q)
755{
756 spinlock_t *lock = q->queue_lock;
757
758
759 mutex_lock(&q->sysfs_lock);
760 blk_set_queue_dying(q);
761 spin_lock_irq(lock);
762
763
764
765
766
767
768
769
770
771
772 q->bypass_depth++;
773 queue_flag_set(QUEUE_FLAG_BYPASS, q);
774
775 queue_flag_set(QUEUE_FLAG_NOMERGES, q);
776 queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
777 queue_flag_set(QUEUE_FLAG_DYING, q);
778 spin_unlock_irq(lock);
779 mutex_unlock(&q->sysfs_lock);
780
781
782
783
784
785 blk_freeze_queue(q);
786 spin_lock_irq(lock);
787 queue_flag_set(QUEUE_FLAG_DEAD, q);
788 spin_unlock_irq(lock);
789
790
791
792
793
794
795
796
797
798
799
800 if (q->mq_ops && blk_queue_init_done(q))
801 blk_mq_quiesce_queue(q);
802
803
804 blk_flush_integrity();
805
806
807 del_timer_sync(&q->backing_dev_info->laptop_mode_wb_timer);
808 blk_sync_queue(q);
809
810
811
812
813
814 WARN_ON_ONCE(q->kobj.state_in_sysfs);
815
816 blk_exit_queue(q);
817
818 if (q->mq_ops)
819 blk_mq_free_queue(q);
820 percpu_ref_exit(&q->q_usage_counter);
821
822 spin_lock_irq(lock);
823 if (q->queue_lock != &q->__queue_lock)
824 q->queue_lock = &q->__queue_lock;
825 spin_unlock_irq(lock);
826
827
828 blk_put_queue(q);
829}
830EXPORT_SYMBOL(blk_cleanup_queue);
831
832
833static void *alloc_request_simple(gfp_t gfp_mask, void *data)
834{
835 struct request_queue *q = data;
836
837 return kmem_cache_alloc_node(request_cachep, gfp_mask, q->node);
838}
839
840static void free_request_simple(void *element, void *data)
841{
842 kmem_cache_free(request_cachep, element);
843}
844
845static void *alloc_request_size(gfp_t gfp_mask, void *data)
846{
847 struct request_queue *q = data;
848 struct request *rq;
849
850 rq = kmalloc_node(sizeof(struct request) + q->cmd_size, gfp_mask,
851 q->node);
852 if (rq && q->init_rq_fn && q->init_rq_fn(q, rq, gfp_mask) < 0) {
853 kfree(rq);
854 rq = NULL;
855 }
856 return rq;
857}
858
859static void free_request_size(void *element, void *data)
860{
861 struct request_queue *q = data;
862
863 if (q->exit_rq_fn)
864 q->exit_rq_fn(q, element);
865 kfree(element);
866}
867
868int blk_init_rl(struct request_list *rl, struct request_queue *q,
869 gfp_t gfp_mask)
870{
871 if (unlikely(rl->rq_pool) || q->mq_ops)
872 return 0;
873
874 rl->q = q;
875 rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;
876 rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;
877 init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
878 init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);
879
880 if (q->cmd_size) {
881 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ,
882 alloc_request_size, free_request_size,
883 q, gfp_mask, q->node);
884 } else {
885 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ,
886 alloc_request_simple, free_request_simple,
887 q, gfp_mask, q->node);
888 }
889 if (!rl->rq_pool)
890 return -ENOMEM;
891
892 if (rl != &q->root_rl)
893 WARN_ON_ONCE(!blk_get_queue(q));
894
895 return 0;
896}
897
898void blk_exit_rl(struct request_queue *q, struct request_list *rl)
899{
900 if (rl->rq_pool) {
901 mempool_destroy(rl->rq_pool);
902 if (rl != &q->root_rl)
903 blk_put_queue(q);
904 }
905}
906
907struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
908{
909 return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE, NULL);
910}
911EXPORT_SYMBOL(blk_alloc_queue);
912
913
914
915
916
917
918int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
919{
920 const bool preempt = flags & BLK_MQ_REQ_PREEMPT;
921
922 while (true) {
923 bool success = false;
924
925 rcu_read_lock();
926 if (percpu_ref_tryget_live(&q->q_usage_counter)) {
927
928
929
930
931
932 if (preempt || !blk_queue_preempt_only(q)) {
933 success = true;
934 } else {
935 percpu_ref_put(&q->q_usage_counter);
936 }
937 }
938 rcu_read_unlock();
939
940 if (success)
941 return 0;
942
943 if (flags & BLK_MQ_REQ_NOWAIT)
944 return -EBUSY;
945
946
947
948
949
950
951
952
953 smp_rmb();
954
955 wait_event(q->mq_freeze_wq,
956 (atomic_read(&q->mq_freeze_depth) == 0 &&
957 (preempt || !blk_queue_preempt_only(q))) ||
958 blk_queue_dying(q));
959 if (blk_queue_dying(q))
960 return -ENODEV;
961 }
962}
963
964void blk_queue_exit(struct request_queue *q)
965{
966 percpu_ref_put(&q->q_usage_counter);
967}
968
969static void blk_queue_usage_counter_release(struct percpu_ref *ref)
970{
971 struct request_queue *q =
972 container_of(ref, struct request_queue, q_usage_counter);
973
974 wake_up_all(&q->mq_freeze_wq);
975}
976
977static void blk_rq_timed_out_timer(struct timer_list *t)
978{
979 struct request_queue *q = from_timer(q, t, timeout);
980
981 kblockd_schedule_work(&q->timeout_work);
982}
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
998 spinlock_t *lock)
999{
1000 struct request_queue *q;
1001 int ret;
1002
1003 q = kmem_cache_alloc_node(blk_requestq_cachep,
1004 gfp_mask | __GFP_ZERO, node_id);
1005 if (!q)
1006 return NULL;
1007
1008 INIT_LIST_HEAD(&q->queue_head);
1009 q->last_merge = NULL;
1010 q->end_sector = 0;
1011 q->boundary_rq = NULL;
1012
1013 q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
1014 if (q->id < 0)
1015 goto fail_q;
1016
1017 ret = bioset_init(&q->bio_split, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
1018 if (ret)
1019 goto fail_id;
1020
1021 q->backing_dev_info = bdi_alloc_node(gfp_mask, node_id);
1022 if (!q->backing_dev_info)
1023 goto fail_split;
1024
1025 q->stats = blk_alloc_queue_stats();
1026 if (!q->stats)
1027 goto fail_stats;
1028
1029 q->backing_dev_info->ra_pages =
1030 (VM_MAX_READAHEAD * 1024) / PAGE_SIZE;
1031 q->backing_dev_info->capabilities = BDI_CAP_CGROUP_WRITEBACK;
1032 q->backing_dev_info->name = "block";
1033 q->node = node_id;
1034
1035 timer_setup(&q->backing_dev_info->laptop_mode_wb_timer,
1036 laptop_mode_timer_fn, 0);
1037 timer_setup(&q->timeout, blk_rq_timed_out_timer, 0);
1038 INIT_WORK(&q->timeout_work, NULL);
1039 INIT_LIST_HEAD(&q->timeout_list);
1040 INIT_LIST_HEAD(&q->icq_list);
1041#ifdef CONFIG_BLK_CGROUP
1042 INIT_LIST_HEAD(&q->blkg_list);
1043#endif
1044 INIT_DELAYED_WORK(&q->delay_work, blk_delay_work);
1045
1046 kobject_init(&q->kobj, &blk_queue_ktype);
1047
1048#ifdef CONFIG_BLK_DEV_IO_TRACE
1049 mutex_init(&q->blk_trace_mutex);
1050#endif
1051 mutex_init(&q->sysfs_lock);
1052 spin_lock_init(&q->__queue_lock);
1053
1054 if (!q->mq_ops)
1055 q->queue_lock = lock ? : &q->__queue_lock;
1056
1057
1058
1059
1060
1061
1062
1063 q->bypass_depth = 1;
1064 queue_flag_set_unlocked(QUEUE_FLAG_BYPASS, q);
1065
1066 init_waitqueue_head(&q->mq_freeze_wq);
1067
1068
1069
1070
1071
1072 if (percpu_ref_init(&q->q_usage_counter,
1073 blk_queue_usage_counter_release,
1074 PERCPU_REF_INIT_ATOMIC, GFP_KERNEL))
1075 goto fail_bdi;
1076
1077 if (blkcg_init_queue(q))
1078 goto fail_ref;
1079
1080 return q;
1081
1082fail_ref:
1083 percpu_ref_exit(&q->q_usage_counter);
1084fail_bdi:
1085 blk_free_queue_stats(q->stats);
1086fail_stats:
1087 bdi_put(q->backing_dev_info);
1088fail_split:
1089 bioset_exit(&q->bio_split);
1090fail_id:
1091 ida_simple_remove(&blk_queue_ida, q->id);
1092fail_q:
1093 kmem_cache_free(blk_requestq_cachep, q);
1094 return NULL;
1095}
1096EXPORT_SYMBOL(blk_alloc_queue_node);
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
1132{
1133 return blk_init_queue_node(rfn, lock, NUMA_NO_NODE);
1134}
1135EXPORT_SYMBOL(blk_init_queue);
1136
1137struct request_queue *
1138blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
1139{
1140 struct request_queue *q;
1141
1142 q = blk_alloc_queue_node(GFP_KERNEL, node_id, lock);
1143 if (!q)
1144 return NULL;
1145
1146 q->request_fn = rfn;
1147 if (blk_init_allocated_queue(q) < 0) {
1148 blk_cleanup_queue(q);
1149 return NULL;
1150 }
1151
1152 return q;
1153}
1154EXPORT_SYMBOL(blk_init_queue_node);
1155
1156static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio);
1157
1158
1159int blk_init_allocated_queue(struct request_queue *q)
1160{
1161 WARN_ON_ONCE(q->mq_ops);
1162
1163 q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, q->cmd_size);
1164 if (!q->fq)
1165 return -ENOMEM;
1166
1167 if (q->init_rq_fn && q->init_rq_fn(q, q->fq->flush_rq, GFP_KERNEL))
1168 goto out_free_flush_queue;
1169
1170 if (blk_init_rl(&q->root_rl, q, GFP_KERNEL))
1171 goto out_exit_flush_rq;
1172
1173 INIT_WORK(&q->timeout_work, blk_timeout_work);
1174 q->queue_flags |= QUEUE_FLAG_DEFAULT;
1175
1176
1177
1178
1179 blk_queue_make_request(q, blk_queue_bio);
1180
1181 q->sg_reserved_size = INT_MAX;
1182
1183 if (elevator_init(q))
1184 goto out_exit_flush_rq;
1185 return 0;
1186
1187out_exit_flush_rq:
1188 if (q->exit_rq_fn)
1189 q->exit_rq_fn(q, q->fq->flush_rq);
1190out_free_flush_queue:
1191 blk_free_flush_queue(q->fq);
1192 q->fq = NULL;
1193 return -ENOMEM;
1194}
1195EXPORT_SYMBOL(blk_init_allocated_queue);
1196
1197bool blk_get_queue(struct request_queue *q)
1198{
1199 if (likely(!blk_queue_dying(q))) {
1200 __blk_get_queue(q);
1201 return true;
1202 }
1203
1204 return false;
1205}
1206EXPORT_SYMBOL(blk_get_queue);
1207
1208static inline void blk_free_request(struct request_list *rl, struct request *rq)
1209{
1210 if (rq->rq_flags & RQF_ELVPRIV) {
1211 elv_put_request(rl->q, rq);
1212 if (rq->elv.icq)
1213 put_io_context(rq->elv.icq->ioc);
1214 }
1215
1216 mempool_free(rq, rl->rq_pool);
1217}
1218
1219
1220
1221
1222
1223static inline int ioc_batching(struct request_queue *q, struct io_context *ioc)
1224{
1225 if (!ioc)
1226 return 0;
1227
1228
1229
1230
1231
1232
1233 return ioc->nr_batch_requests == q->nr_batching ||
1234 (ioc->nr_batch_requests > 0
1235 && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));
1236}
1237
1238
1239
1240
1241
1242
1243
1244static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)
1245{
1246 if (!ioc || ioc_batching(q, ioc))
1247 return;
1248
1249 ioc->nr_batch_requests = q->nr_batching;
1250 ioc->last_waited = jiffies;
1251}
1252
1253static void __freed_request(struct request_list *rl, int sync)
1254{
1255 struct request_queue *q = rl->q;
1256
1257 if (rl->count[sync] < queue_congestion_off_threshold(q))
1258 blk_clear_congested(rl, sync);
1259
1260 if (rl->count[sync] + 1 <= q->nr_requests) {
1261 if (waitqueue_active(&rl->wait[sync]))
1262 wake_up(&rl->wait[sync]);
1263
1264 blk_clear_rl_full(rl, sync);
1265 }
1266}
1267
1268
1269
1270
1271
1272static void freed_request(struct request_list *rl, bool sync,
1273 req_flags_t rq_flags)
1274{
1275 struct request_queue *q = rl->q;
1276
1277 q->nr_rqs[sync]--;
1278 rl->count[sync]--;
1279 if (rq_flags & RQF_ELVPRIV)
1280 q->nr_rqs_elvpriv--;
1281
1282 __freed_request(rl, sync);
1283
1284 if (unlikely(rl->starved[sync ^ 1]))
1285 __freed_request(rl, sync ^ 1);
1286}
1287
1288int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
1289{
1290 struct request_list *rl;
1291 int on_thresh, off_thresh;
1292
1293 WARN_ON_ONCE(q->mq_ops);
1294
1295 spin_lock_irq(q->queue_lock);
1296 q->nr_requests = nr;
1297 blk_queue_congestion_threshold(q);
1298 on_thresh = queue_congestion_on_threshold(q);
1299 off_thresh = queue_congestion_off_threshold(q);
1300
1301 blk_queue_for_each_rl(rl, q) {
1302 if (rl->count[BLK_RW_SYNC] >= on_thresh)
1303 blk_set_congested(rl, BLK_RW_SYNC);
1304 else if (rl->count[BLK_RW_SYNC] < off_thresh)
1305 blk_clear_congested(rl, BLK_RW_SYNC);
1306
1307 if (rl->count[BLK_RW_ASYNC] >= on_thresh)
1308 blk_set_congested(rl, BLK_RW_ASYNC);
1309 else if (rl->count[BLK_RW_ASYNC] < off_thresh)
1310 blk_clear_congested(rl, BLK_RW_ASYNC);
1311
1312 if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
1313 blk_set_rl_full(rl, BLK_RW_SYNC);
1314 } else {
1315 blk_clear_rl_full(rl, BLK_RW_SYNC);
1316 wake_up(&rl->wait[BLK_RW_SYNC]);
1317 }
1318
1319 if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) {
1320 blk_set_rl_full(rl, BLK_RW_ASYNC);
1321 } else {
1322 blk_clear_rl_full(rl, BLK_RW_ASYNC);
1323 wake_up(&rl->wait[BLK_RW_ASYNC]);
1324 }
1325 }
1326
1327 spin_unlock_irq(q->queue_lock);
1328 return 0;
1329}
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346static struct request *__get_request(struct request_list *rl, unsigned int op,
1347 struct bio *bio, blk_mq_req_flags_t flags, gfp_t gfp_mask)
1348{
1349 struct request_queue *q = rl->q;
1350 struct request *rq;
1351 struct elevator_type *et = q->elevator->type;
1352 struct io_context *ioc = rq_ioc(bio);
1353 struct io_cq *icq = NULL;
1354 const bool is_sync = op_is_sync(op);
1355 int may_queue;
1356 req_flags_t rq_flags = RQF_ALLOCED;
1357
1358 lockdep_assert_held(q->queue_lock);
1359
1360 if (unlikely(blk_queue_dying(q)))
1361 return ERR_PTR(-ENODEV);
1362
1363 may_queue = elv_may_queue(q, op);
1364 if (may_queue == ELV_MQUEUE_NO)
1365 goto rq_starved;
1366
1367 if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
1368 if (rl->count[is_sync]+1 >= q->nr_requests) {
1369
1370
1371
1372
1373
1374
1375 if (!blk_rl_full(rl, is_sync)) {
1376 ioc_set_batching(q, ioc);
1377 blk_set_rl_full(rl, is_sync);
1378 } else {
1379 if (may_queue != ELV_MQUEUE_MUST
1380 && !ioc_batching(q, ioc)) {
1381
1382
1383
1384
1385
1386 return ERR_PTR(-ENOMEM);
1387 }
1388 }
1389 }
1390 blk_set_congested(rl, is_sync);
1391 }
1392
1393
1394
1395
1396
1397
1398 if (rl->count[is_sync] >= (3 * q->nr_requests / 2))
1399 return ERR_PTR(-ENOMEM);
1400
1401 q->nr_rqs[is_sync]++;
1402 rl->count[is_sync]++;
1403 rl->starved[is_sync] = 0;
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418 if (!op_is_flush(op) && !blk_queue_bypass(q)) {
1419 rq_flags |= RQF_ELVPRIV;
1420 q->nr_rqs_elvpriv++;
1421 if (et->icq_cache && ioc)
1422 icq = ioc_lookup_icq(ioc, q);
1423 }
1424
1425 if (blk_queue_io_stat(q))
1426 rq_flags |= RQF_IO_STAT;
1427 spin_unlock_irq(q->queue_lock);
1428
1429
1430 rq = mempool_alloc(rl->rq_pool, gfp_mask);
1431 if (!rq)
1432 goto fail_alloc;
1433
1434 blk_rq_init(q, rq);
1435 blk_rq_set_rl(rq, rl);
1436 rq->cmd_flags = op;
1437 rq->rq_flags = rq_flags;
1438 if (flags & BLK_MQ_REQ_PREEMPT)
1439 rq->rq_flags |= RQF_PREEMPT;
1440
1441
1442 if (rq_flags & RQF_ELVPRIV) {
1443 if (unlikely(et->icq_cache && !icq)) {
1444 if (ioc)
1445 icq = ioc_create_icq(ioc, q, gfp_mask);
1446 if (!icq)
1447 goto fail_elvpriv;
1448 }
1449
1450 rq->elv.icq = icq;
1451 if (unlikely(elv_set_request(q, rq, bio, gfp_mask)))
1452 goto fail_elvpriv;
1453
1454
1455 if (icq)
1456 get_io_context(icq->ioc);
1457 }
1458out:
1459
1460
1461
1462
1463
1464
1465 if (ioc_batching(q, ioc))
1466 ioc->nr_batch_requests--;
1467
1468 trace_block_getrq(q, bio, op);
1469 return rq;
1470
1471fail_elvpriv:
1472
1473
1474
1475
1476
1477
1478 printk_ratelimited(KERN_WARNING "%s: dev %s: request aux data allocation failed, iosched may be disturbed\n",
1479 __func__, dev_name(q->backing_dev_info->dev));
1480
1481 rq->rq_flags &= ~RQF_ELVPRIV;
1482 rq->elv.icq = NULL;
1483
1484 spin_lock_irq(q->queue_lock);
1485 q->nr_rqs_elvpriv--;
1486 spin_unlock_irq(q->queue_lock);
1487 goto out;
1488
1489fail_alloc:
1490
1491
1492
1493
1494
1495
1496
1497 spin_lock_irq(q->queue_lock);
1498 freed_request(rl, is_sync, rq_flags);
1499
1500
1501
1502
1503
1504
1505
1506
1507rq_starved:
1508 if (unlikely(rl->count[is_sync] == 0))
1509 rl->starved[is_sync] = 1;
1510 return ERR_PTR(-ENOMEM);
1511}
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528static struct request *get_request(struct request_queue *q, unsigned int op,
1529 struct bio *bio, blk_mq_req_flags_t flags, gfp_t gfp)
1530{
1531 const bool is_sync = op_is_sync(op);
1532 DEFINE_WAIT(wait);
1533 struct request_list *rl;
1534 struct request *rq;
1535
1536 lockdep_assert_held(q->queue_lock);
1537 WARN_ON_ONCE(q->mq_ops);
1538
1539 rl = blk_get_rl(q, bio);
1540retry:
1541 rq = __get_request(rl, op, bio, flags, gfp);
1542 if (!IS_ERR(rq))
1543 return rq;
1544
1545 if (op & REQ_NOWAIT) {
1546 blk_put_rl(rl);
1547 return ERR_PTR(-EAGAIN);
1548 }
1549
1550 if ((flags & BLK_MQ_REQ_NOWAIT) || unlikely(blk_queue_dying(q))) {
1551 blk_put_rl(rl);
1552 return rq;
1553 }
1554
1555
1556 prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
1557 TASK_UNINTERRUPTIBLE);
1558
1559 trace_block_sleeprq(q, bio, op);
1560
1561 spin_unlock_irq(q->queue_lock);
1562 io_schedule();
1563
1564
1565
1566
1567
1568
1569 ioc_set_batching(q, current->io_context);
1570
1571 spin_lock_irq(q->queue_lock);
1572 finish_wait(&rl->wait[is_sync], &wait);
1573
1574 goto retry;
1575}
1576
1577
1578static struct request *blk_old_get_request(struct request_queue *q,
1579 unsigned int op, blk_mq_req_flags_t flags)
1580{
1581 struct request *rq;
1582 gfp_t gfp_mask = flags & BLK_MQ_REQ_NOWAIT ? GFP_ATOMIC : GFP_NOIO;
1583 int ret = 0;
1584
1585 WARN_ON_ONCE(q->mq_ops);
1586
1587
1588 create_io_context(gfp_mask, q->node);
1589
1590 ret = blk_queue_enter(q, flags);
1591 if (ret)
1592 return ERR_PTR(ret);
1593 spin_lock_irq(q->queue_lock);
1594 rq = get_request(q, op, NULL, flags, gfp_mask);
1595 if (IS_ERR(rq)) {
1596 spin_unlock_irq(q->queue_lock);
1597 blk_queue_exit(q);
1598 return rq;
1599 }
1600
1601
1602 rq->__data_len = 0;
1603 rq->__sector = (sector_t) -1;
1604 rq->bio = rq->biotail = NULL;
1605 return rq;
1606}
1607
1608
1609
1610
1611
1612
1613
1614struct request *blk_get_request(struct request_queue *q, unsigned int op,
1615 blk_mq_req_flags_t flags)
1616{
1617 struct request *req;
1618
1619 WARN_ON_ONCE(op & REQ_NOWAIT);
1620 WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PREEMPT));
1621
1622 if (q->mq_ops) {
1623 req = blk_mq_alloc_request(q, op, flags);
1624 if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn)
1625 q->mq_ops->initialize_rq_fn(req);
1626 } else {
1627 req = blk_old_get_request(q, op, flags);
1628 if (!IS_ERR(req) && q->initialize_rq_fn)
1629 q->initialize_rq_fn(req);
1630 }
1631
1632 return req;
1633}
1634EXPORT_SYMBOL(blk_get_request);
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646void blk_requeue_request(struct request_queue *q, struct request *rq)
1647{
1648 lockdep_assert_held(q->queue_lock);
1649 WARN_ON_ONCE(q->mq_ops);
1650
1651 blk_delete_timer(rq);
1652 blk_clear_rq_complete(rq);
1653 trace_block_rq_requeue(q, rq);
1654 rq_qos_requeue(q, rq);
1655
1656 if (rq->rq_flags & RQF_QUEUED)
1657 blk_queue_end_tag(q, rq);
1658
1659 BUG_ON(blk_queued_rq(rq));
1660
1661 elv_requeue_request(q, rq);
1662}
1663EXPORT_SYMBOL(blk_requeue_request);
1664
1665static void add_acct_request(struct request_queue *q, struct request *rq,
1666 int where)
1667{
1668 blk_account_io_start(rq, true);
1669 __elv_add_request(q, rq, where);
1670}
1671
1672static void part_round_stats_single(struct request_queue *q, int cpu,
1673 struct hd_struct *part, unsigned long now,
1674 unsigned int inflight)
1675{
1676 if (inflight) {
1677 __part_stat_add(cpu, part, time_in_queue,
1678 inflight * (now - part->stamp));
1679 __part_stat_add(cpu, part, io_ticks, (now - part->stamp));
1680 }
1681 part->stamp = now;
1682}
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701void part_round_stats(struct request_queue *q, int cpu, struct hd_struct *part)
1702{
1703 struct hd_struct *part2 = NULL;
1704 unsigned long now = jiffies;
1705 unsigned int inflight[2];
1706 int stats = 0;
1707
1708 if (part->stamp != now)
1709 stats |= 1;
1710
1711 if (part->partno) {
1712 part2 = &part_to_disk(part)->part0;
1713 if (part2->stamp != now)
1714 stats |= 2;
1715 }
1716
1717 if (!stats)
1718 return;
1719
1720 part_in_flight(q, part, inflight);
1721
1722 if (stats & 2)
1723 part_round_stats_single(q, cpu, part2, now, inflight[1]);
1724 if (stats & 1)
1725 part_round_stats_single(q, cpu, part, now, inflight[0]);
1726}
1727EXPORT_SYMBOL_GPL(part_round_stats);
1728
1729#ifdef CONFIG_PM
1730static void blk_pm_put_request(struct request *rq)
1731{
1732 if (rq->q->dev && !(rq->rq_flags & RQF_PM) && !--rq->q->nr_pending)
1733 pm_runtime_mark_last_busy(rq->q->dev);
1734}
1735#else
1736static inline void blk_pm_put_request(struct request *rq) {}
1737#endif
1738
1739void __blk_put_request(struct request_queue *q, struct request *req)
1740{
1741 req_flags_t rq_flags = req->rq_flags;
1742
1743 if (unlikely(!q))
1744 return;
1745
1746 if (q->mq_ops) {
1747 blk_mq_free_request(req);
1748 return;
1749 }
1750
1751 lockdep_assert_held(q->queue_lock);
1752
1753 blk_req_zone_write_unlock(req);
1754 blk_pm_put_request(req);
1755
1756 elv_completed_request(q, req);
1757
1758
1759 WARN_ON(req->bio != NULL);
1760
1761 rq_qos_done(q, req);
1762
1763
1764
1765
1766
1767 if (rq_flags & RQF_ALLOCED) {
1768 struct request_list *rl = blk_rq_rl(req);
1769 bool sync = op_is_sync(req->cmd_flags);
1770
1771 BUG_ON(!list_empty(&req->queuelist));
1772 BUG_ON(ELV_ON_HASH(req));
1773
1774 blk_free_request(rl, req);
1775 freed_request(rl, sync, rq_flags);
1776 blk_put_rl(rl);
1777 blk_queue_exit(q);
1778 }
1779}
1780EXPORT_SYMBOL_GPL(__blk_put_request);
1781
1782void blk_put_request(struct request *req)
1783{
1784 struct request_queue *q = req->q;
1785
1786 if (q->mq_ops)
1787 blk_mq_free_request(req);
1788 else {
1789 unsigned long flags;
1790
1791 spin_lock_irqsave(q->queue_lock, flags);
1792 __blk_put_request(q, req);
1793 spin_unlock_irqrestore(q->queue_lock, flags);
1794 }
1795}
1796EXPORT_SYMBOL(blk_put_request);
1797
1798bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
1799 struct bio *bio)
1800{
1801 const int ff = bio->bi_opf & REQ_FAILFAST_MASK;
1802
1803 if (!ll_back_merge_fn(q, req, bio))
1804 return false;
1805
1806 trace_block_bio_backmerge(q, req, bio);
1807
1808 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1809 blk_rq_set_mixed_merge(req);
1810
1811 req->biotail->bi_next = bio;
1812 req->biotail = bio;
1813 req->__data_len += bio->bi_iter.bi_size;
1814 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
1815
1816 blk_account_io_start(req, false);
1817 return true;
1818}
1819
1820bool bio_attempt_front_merge(struct request_queue *q, struct request *req,
1821 struct bio *bio)
1822{
1823 const int ff = bio->bi_opf & REQ_FAILFAST_MASK;
1824
1825 if (!ll_front_merge_fn(q, req, bio))
1826 return false;
1827
1828 trace_block_bio_frontmerge(q, req, bio);
1829
1830 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1831 blk_rq_set_mixed_merge(req);
1832
1833 bio->bi_next = req->bio;
1834 req->bio = bio;
1835
1836 req->__sector = bio->bi_iter.bi_sector;
1837 req->__data_len += bio->bi_iter.bi_size;
1838 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
1839
1840 blk_account_io_start(req, false);
1841 return true;
1842}
1843
1844bool bio_attempt_discard_merge(struct request_queue *q, struct request *req,
1845 struct bio *bio)
1846{
1847 unsigned short segments = blk_rq_nr_discard_segments(req);
1848
1849 if (segments >= queue_max_discard_segments(q))
1850 goto no_merge;
1851 if (blk_rq_sectors(req) + bio_sectors(bio) >
1852 blk_rq_get_max_sectors(req, blk_rq_pos(req)))
1853 goto no_merge;
1854
1855 req->biotail->bi_next = bio;
1856 req->biotail = bio;
1857 req->__data_len += bio->bi_iter.bi_size;
1858 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
1859 req->nr_phys_segments = segments + 1;
1860
1861 blk_account_io_start(req, false);
1862 return true;
1863no_merge:
1864 req_set_nomerge(q, req);
1865 return false;
1866}
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
1891 unsigned int *request_count,
1892 struct request **same_queue_rq)
1893{
1894 struct blk_plug *plug;
1895 struct request *rq;
1896 struct list_head *plug_list;
1897
1898 plug = current->plug;
1899 if (!plug)
1900 return false;
1901 *request_count = 0;
1902
1903 if (q->mq_ops)
1904 plug_list = &plug->mq_list;
1905 else
1906 plug_list = &plug->list;
1907
1908 list_for_each_entry_reverse(rq, plug_list, queuelist) {
1909 bool merged = false;
1910
1911 if (rq->q == q) {
1912 (*request_count)++;
1913
1914
1915
1916
1917
1918 if (same_queue_rq)
1919 *same_queue_rq = rq;
1920 }
1921
1922 if (rq->q != q || !blk_rq_merge_ok(rq, bio))
1923 continue;
1924
1925 switch (blk_try_merge(rq, bio)) {
1926 case ELEVATOR_BACK_MERGE:
1927 merged = bio_attempt_back_merge(q, rq, bio);
1928 break;
1929 case ELEVATOR_FRONT_MERGE:
1930 merged = bio_attempt_front_merge(q, rq, bio);
1931 break;
1932 case ELEVATOR_DISCARD_MERGE:
1933 merged = bio_attempt_discard_merge(q, rq, bio);
1934 break;
1935 default:
1936 break;
1937 }
1938
1939 if (merged)
1940 return true;
1941 }
1942
1943 return false;
1944}
1945
1946unsigned int blk_plug_queued_count(struct request_queue *q)
1947{
1948 struct blk_plug *plug;
1949 struct request *rq;
1950 struct list_head *plug_list;
1951 unsigned int ret = 0;
1952
1953 plug = current->plug;
1954 if (!plug)
1955 goto out;
1956
1957 if (q->mq_ops)
1958 plug_list = &plug->mq_list;
1959 else
1960 plug_list = &plug->list;
1961
1962 list_for_each_entry(rq, plug_list, queuelist) {
1963 if (rq->q == q)
1964 ret++;
1965 }
1966out:
1967 return ret;
1968}
1969
1970void blk_init_request_from_bio(struct request *req, struct bio *bio)
1971{
1972 struct io_context *ioc = rq_ioc(bio);
1973
1974 if (bio->bi_opf & REQ_RAHEAD)
1975 req->cmd_flags |= REQ_FAILFAST_MASK;
1976
1977 req->__sector = bio->bi_iter.bi_sector;
1978 if (ioprio_valid(bio_prio(bio)))
1979 req->ioprio = bio_prio(bio);
1980 else if (ioc)
1981 req->ioprio = ioc->ioprio;
1982 else
1983 req->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
1984 req->write_hint = bio->bi_write_hint;
1985 blk_rq_bio_prep(req->q, req, bio);
1986}
1987EXPORT_SYMBOL_GPL(blk_init_request_from_bio);
1988
1989static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
1990{
1991 struct blk_plug *plug;
1992 int where = ELEVATOR_INSERT_SORT;
1993 struct request *req, *free;
1994 unsigned int request_count = 0;
1995
1996
1997
1998
1999
2000
2001 blk_queue_bounce(q, &bio);
2002
2003 blk_queue_split(q, &bio);
2004
2005 if (!bio_integrity_prep(bio))
2006 return BLK_QC_T_NONE;
2007
2008 if (op_is_flush(bio->bi_opf)) {
2009 spin_lock_irq(q->queue_lock);
2010 where = ELEVATOR_INSERT_FLUSH;
2011 goto get_rq;
2012 }
2013
2014
2015
2016
2017
2018 if (!blk_queue_nomerges(q)) {
2019 if (blk_attempt_plug_merge(q, bio, &request_count, NULL))
2020 return BLK_QC_T_NONE;
2021 } else
2022 request_count = blk_plug_queued_count(q);
2023
2024 spin_lock_irq(q->queue_lock);
2025
2026 switch (elv_merge(q, &req, bio)) {
2027 case ELEVATOR_BACK_MERGE:
2028 if (!bio_attempt_back_merge(q, req, bio))
2029 break;
2030 elv_bio_merged(q, req, bio);
2031 free = attempt_back_merge(q, req);
2032 if (free)
2033 __blk_put_request(q, free);
2034 else
2035 elv_merged_request(q, req, ELEVATOR_BACK_MERGE);
2036 goto out_unlock;
2037 case ELEVATOR_FRONT_MERGE:
2038 if (!bio_attempt_front_merge(q, req, bio))
2039 break;
2040 elv_bio_merged(q, req, bio);
2041 free = attempt_front_merge(q, req);
2042 if (free)
2043 __blk_put_request(q, free);
2044 else
2045 elv_merged_request(q, req, ELEVATOR_FRONT_MERGE);
2046 goto out_unlock;
2047 default:
2048 break;
2049 }
2050
2051get_rq:
2052 rq_qos_throttle(q, bio, q->queue_lock);
2053
2054
2055
2056
2057
2058 blk_queue_enter_live(q);
2059 req = get_request(q, bio->bi_opf, bio, 0, GFP_NOIO);
2060 if (IS_ERR(req)) {
2061 blk_queue_exit(q);
2062 rq_qos_cleanup(q, bio);
2063 if (PTR_ERR(req) == -ENOMEM)
2064 bio->bi_status = BLK_STS_RESOURCE;
2065 else
2066 bio->bi_status = BLK_STS_IOERR;
2067 bio_endio(bio);
2068 goto out_unlock;
2069 }
2070
2071 rq_qos_track(q, req, bio);
2072
2073
2074
2075
2076
2077
2078
2079 blk_init_request_from_bio(req, bio);
2080
2081 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags))
2082 req->cpu = raw_smp_processor_id();
2083
2084 plug = current->plug;
2085 if (plug) {
2086
2087
2088
2089
2090
2091
2092
2093 if (!request_count || list_empty(&plug->list))
2094 trace_block_plug(q);
2095 else {
2096 struct request *last = list_entry_rq(plug->list.prev);
2097 if (request_count >= BLK_MAX_REQUEST_COUNT ||
2098 blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE) {
2099 blk_flush_plug_list(plug, false);
2100 trace_block_plug(q);
2101 }
2102 }
2103 list_add_tail(&req->queuelist, &plug->list);
2104 blk_account_io_start(req, true);
2105 } else {
2106 spin_lock_irq(q->queue_lock);
2107 add_acct_request(q, req, where);
2108 __blk_run_queue(q);
2109out_unlock:
2110 spin_unlock_irq(q->queue_lock);
2111 }
2112
2113 return BLK_QC_T_NONE;
2114}
2115
2116static void handle_bad_sector(struct bio *bio, sector_t maxsector)
2117{
2118 char b[BDEVNAME_SIZE];
2119
2120 printk(KERN_INFO "attempt to access beyond end of device\n");
2121 printk(KERN_INFO "%s: rw=%d, want=%Lu, limit=%Lu\n",
2122 bio_devname(bio, b), bio->bi_opf,
2123 (unsigned long long)bio_end_sector(bio),
2124 (long long)maxsector);
2125}
2126
2127#ifdef CONFIG_FAIL_MAKE_REQUEST
2128
2129static DECLARE_FAULT_ATTR(fail_make_request);
2130
2131static int __init setup_fail_make_request(char *str)
2132{
2133 return setup_fault_attr(&fail_make_request, str);
2134}
2135__setup("fail_make_request=", setup_fail_make_request);
2136
2137static bool should_fail_request(struct hd_struct *part, unsigned int bytes)
2138{
2139 return part->make_it_fail && should_fail(&fail_make_request, bytes);
2140}
2141
2142static int __init fail_make_request_debugfs(void)
2143{
2144 struct dentry *dir = fault_create_debugfs_attr("fail_make_request",
2145 NULL, &fail_make_request);
2146
2147 return PTR_ERR_OR_ZERO(dir);
2148}
2149
2150late_initcall(fail_make_request_debugfs);
2151
2152#else
2153
2154static inline bool should_fail_request(struct hd_struct *part,
2155 unsigned int bytes)
2156{
2157 return false;
2158}
2159
2160#endif
2161
2162static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part)
2163{
2164 const int op = bio_op(bio);
2165
2166 if (part->policy && op_is_write(op)) {
2167 char b[BDEVNAME_SIZE];
2168
2169 if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
2170 return false;
2171
2172 WARN_ONCE(1,
2173 "generic_make_request: Trying to write "
2174 "to read-only block-device %s (partno %d)\n",
2175 bio_devname(bio, b), part->partno);
2176
2177 return false;
2178 }
2179
2180 return false;
2181}
2182
2183static noinline int should_fail_bio(struct bio *bio)
2184{
2185 if (should_fail_request(&bio->bi_disk->part0, bio->bi_iter.bi_size))
2186 return -EIO;
2187 return 0;
2188}
2189ALLOW_ERROR_INJECTION(should_fail_bio, ERRNO);
2190
2191
2192
2193
2194
2195
2196static inline int bio_check_eod(struct bio *bio, sector_t maxsector)
2197{
2198 unsigned int nr_sectors = bio_sectors(bio);
2199
2200 if (nr_sectors && maxsector &&
2201 (nr_sectors > maxsector ||
2202 bio->bi_iter.bi_sector > maxsector - nr_sectors)) {
2203 handle_bad_sector(bio, maxsector);
2204 return -EIO;
2205 }
2206 return 0;
2207}
2208
2209
2210
2211
2212static inline int blk_partition_remap(struct bio *bio)
2213{
2214 struct hd_struct *p;
2215 int ret = -EIO;
2216
2217 rcu_read_lock();
2218 p = __disk_get_part(bio->bi_disk, bio->bi_partno);
2219 if (unlikely(!p))
2220 goto out;
2221 if (unlikely(should_fail_request(p, bio->bi_iter.bi_size)))
2222 goto out;
2223 if (unlikely(bio_check_ro(bio, p)))
2224 goto out;
2225
2226
2227
2228
2229
2230 if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET) {
2231 if (bio_check_eod(bio, part_nr_sects_read(p)))
2232 goto out;
2233 bio->bi_iter.bi_sector += p->start_sect;
2234 trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p),
2235 bio->bi_iter.bi_sector - p->start_sect);
2236 }
2237 bio->bi_partno = 0;
2238 ret = 0;
2239out:
2240 rcu_read_unlock();
2241 return ret;
2242}
2243
2244static noinline_for_stack bool
2245generic_make_request_checks(struct bio *bio)
2246{
2247 struct request_queue *q;
2248 int nr_sectors = bio_sectors(bio);
2249 blk_status_t status = BLK_STS_IOERR;
2250 char b[BDEVNAME_SIZE];
2251
2252 might_sleep();
2253
2254 q = bio->bi_disk->queue;
2255 if (unlikely(!q)) {
2256 printk(KERN_ERR
2257 "generic_make_request: Trying to access "
2258 "nonexistent block-device %s (%Lu)\n",
2259 bio_devname(bio, b), (long long)bio->bi_iter.bi_sector);
2260 goto end_io;
2261 }
2262
2263
2264
2265
2266
2267 if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_rq_based(q))
2268 goto not_supported;
2269
2270 if (should_fail_bio(bio))
2271 goto end_io;
2272
2273 if (bio->bi_partno) {
2274 if (unlikely(blk_partition_remap(bio)))
2275 goto end_io;
2276 } else {
2277 if (unlikely(bio_check_ro(bio, &bio->bi_disk->part0)))
2278 goto end_io;
2279 if (unlikely(bio_check_eod(bio, get_capacity(bio->bi_disk))))
2280 goto end_io;
2281 }
2282
2283
2284
2285
2286
2287
2288 if (op_is_flush(bio->bi_opf) &&
2289 !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
2290 bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
2291 if (!nr_sectors) {
2292 status = BLK_STS_OK;
2293 goto end_io;
2294 }
2295 }
2296
2297 switch (bio_op(bio)) {
2298 case REQ_OP_DISCARD:
2299 if (!blk_queue_discard(q))
2300 goto not_supported;
2301 break;
2302 case REQ_OP_SECURE_ERASE:
2303 if (!blk_queue_secure_erase(q))
2304 goto not_supported;
2305 break;
2306 case REQ_OP_WRITE_SAME:
2307 if (!q->limits.max_write_same_sectors)
2308 goto not_supported;
2309 break;
2310 case REQ_OP_ZONE_REPORT:
2311 case REQ_OP_ZONE_RESET:
2312 if (!blk_queue_is_zoned(q))
2313 goto not_supported;
2314 break;
2315 case REQ_OP_WRITE_ZEROES:
2316 if (!q->limits.max_write_zeroes_sectors)
2317 goto not_supported;
2318 break;
2319 default:
2320 break;
2321 }
2322
2323
2324
2325
2326
2327
2328
2329 create_io_context(GFP_ATOMIC, q->node);
2330
2331 if (!blkcg_bio_issue_check(q, bio))
2332 return false;
2333
2334 if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) {
2335 trace_block_bio_queue(q, bio);
2336
2337
2338
2339 bio_set_flag(bio, BIO_TRACE_COMPLETION);
2340 }
2341 return true;
2342
2343not_supported:
2344 status = BLK_STS_NOTSUPP;
2345end_io:
2346 bio->bi_status = status;
2347 bio_endio(bio);
2348 return false;
2349}
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375blk_qc_t generic_make_request(struct bio *bio)
2376{
2377
2378
2379
2380
2381
2382
2383
2384 struct bio_list bio_list_on_stack[2];
2385 blk_mq_req_flags_t flags = 0;
2386 struct request_queue *q = bio->bi_disk->queue;
2387 blk_qc_t ret = BLK_QC_T_NONE;
2388
2389 if (bio->bi_opf & REQ_NOWAIT)
2390 flags = BLK_MQ_REQ_NOWAIT;
2391 if (bio_flagged(bio, BIO_QUEUE_ENTERED))
2392 blk_queue_enter_live(q);
2393 else if (blk_queue_enter(q, flags) < 0) {
2394 if (!blk_queue_dying(q) && (bio->bi_opf & REQ_NOWAIT))
2395 bio_wouldblock_error(bio);
2396 else
2397 bio_io_error(bio);
2398 return ret;
2399 }
2400
2401 if (!generic_make_request_checks(bio))
2402 goto out;
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414 if (current->bio_list) {
2415 bio_list_add(¤t->bio_list[0], bio);
2416 goto out;
2417 }
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433 BUG_ON(bio->bi_next);
2434 bio_list_init(&bio_list_on_stack[0]);
2435 current->bio_list = bio_list_on_stack;
2436 do {
2437 bool enter_succeeded = true;
2438
2439 if (unlikely(q != bio->bi_disk->queue)) {
2440 if (q)
2441 blk_queue_exit(q);
2442 q = bio->bi_disk->queue;
2443 flags = 0;
2444 if (bio->bi_opf & REQ_NOWAIT)
2445 flags = BLK_MQ_REQ_NOWAIT;
2446 if (blk_queue_enter(q, flags) < 0) {
2447 enter_succeeded = false;
2448 q = NULL;
2449 }
2450 }
2451
2452 if (enter_succeeded) {
2453 struct bio_list lower, same;
2454
2455
2456 bio_list_on_stack[1] = bio_list_on_stack[0];
2457 bio_list_init(&bio_list_on_stack[0]);
2458 ret = q->make_request_fn(q, bio);
2459
2460
2461
2462
2463 bio_list_init(&lower);
2464 bio_list_init(&same);
2465 while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
2466 if (q == bio->bi_disk->queue)
2467 bio_list_add(&same, bio);
2468 else
2469 bio_list_add(&lower, bio);
2470
2471 bio_list_merge(&bio_list_on_stack[0], &lower);
2472 bio_list_merge(&bio_list_on_stack[0], &same);
2473 bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
2474 } else {
2475 if (unlikely(!blk_queue_dying(q) &&
2476 (bio->bi_opf & REQ_NOWAIT)))
2477 bio_wouldblock_error(bio);
2478 else
2479 bio_io_error(bio);
2480 }
2481 bio = bio_list_pop(&bio_list_on_stack[0]);
2482 } while (bio);
2483 current->bio_list = NULL;
2484
2485out:
2486 if (q)
2487 blk_queue_exit(q);
2488 return ret;
2489}
2490EXPORT_SYMBOL(generic_make_request);
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502blk_qc_t direct_make_request(struct bio *bio)
2503{
2504 struct request_queue *q = bio->bi_disk->queue;
2505 bool nowait = bio->bi_opf & REQ_NOWAIT;
2506 blk_qc_t ret;
2507
2508 if (!generic_make_request_checks(bio))
2509 return BLK_QC_T_NONE;
2510
2511 if (unlikely(blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0))) {
2512 if (nowait && !blk_queue_dying(q))
2513 bio->bi_status = BLK_STS_AGAIN;
2514 else
2515 bio->bi_status = BLK_STS_IOERR;
2516 bio_endio(bio);
2517 return BLK_QC_T_NONE;
2518 }
2519
2520 ret = q->make_request_fn(q, bio);
2521 blk_queue_exit(q);
2522 return ret;
2523}
2524EXPORT_SYMBOL_GPL(direct_make_request);
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535blk_qc_t submit_bio(struct bio *bio)
2536{
2537
2538
2539
2540
2541 if (bio_has_data(bio)) {
2542 unsigned int count;
2543
2544 if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME))
2545 count = queue_logical_block_size(bio->bi_disk->queue) >> 9;
2546 else
2547 count = bio_sectors(bio);
2548
2549 if (op_is_write(bio_op(bio))) {
2550 count_vm_events(PGPGOUT, count);
2551 } else {
2552 task_io_account_read(bio->bi_iter.bi_size);
2553 count_vm_events(PGPGIN, count);
2554 }
2555
2556 if (unlikely(block_dump)) {
2557 char b[BDEVNAME_SIZE];
2558 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n",
2559 current->comm, task_pid_nr(current),
2560 op_is_write(bio_op(bio)) ? "WRITE" : "READ",
2561 (unsigned long long)bio->bi_iter.bi_sector,
2562 bio_devname(bio, b), count);
2563 }
2564 }
2565
2566 return generic_make_request(bio);
2567}
2568EXPORT_SYMBOL(submit_bio);
2569
2570bool blk_poll(struct request_queue *q, blk_qc_t cookie)
2571{
2572 if (!q->poll_fn || !blk_qc_t_valid(cookie))
2573 return false;
2574
2575 if (current->plug)
2576 blk_flush_plug_list(current->plug, false);
2577 return q->poll_fn(q, cookie);
2578}
2579EXPORT_SYMBOL_GPL(blk_poll);
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598static int blk_cloned_rq_check_limits(struct request_queue *q,
2599 struct request *rq)
2600{
2601 if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, req_op(rq))) {
2602 printk(KERN_ERR "%s: over max size limit.\n", __func__);
2603 return -EIO;
2604 }
2605
2606
2607
2608
2609
2610
2611
2612 blk_recalc_rq_segments(rq);
2613 if (rq->nr_phys_segments > queue_max_segments(q)) {
2614 printk(KERN_ERR "%s: over max segments limit.\n", __func__);
2615 return -EIO;
2616 }
2617
2618 return 0;
2619}
2620
2621
2622
2623
2624
2625
2626blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq)
2627{
2628 unsigned long flags;
2629 int where = ELEVATOR_INSERT_BACK;
2630
2631 if (blk_cloned_rq_check_limits(q, rq))
2632 return BLK_STS_IOERR;
2633
2634 if (rq->rq_disk &&
2635 should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq)))
2636 return BLK_STS_IOERR;
2637
2638 if (q->mq_ops) {
2639 if (blk_queue_io_stat(q))
2640 blk_account_io_start(rq, true);
2641
2642
2643
2644
2645
2646 return blk_mq_request_issue_directly(rq);
2647 }
2648
2649 spin_lock_irqsave(q->queue_lock, flags);
2650 if (unlikely(blk_queue_dying(q))) {
2651 spin_unlock_irqrestore(q->queue_lock, flags);
2652 return BLK_STS_IOERR;
2653 }
2654
2655
2656
2657
2658
2659 BUG_ON(blk_queued_rq(rq));
2660
2661 if (op_is_flush(rq->cmd_flags))
2662 where = ELEVATOR_INSERT_FLUSH;
2663
2664 add_acct_request(q, rq, where);
2665 if (where == ELEVATOR_INSERT_FLUSH)
2666 __blk_run_queue(q);
2667 spin_unlock_irqrestore(q->queue_lock, flags);
2668
2669 return BLK_STS_OK;
2670}
2671EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686unsigned int blk_rq_err_bytes(const struct request *rq)
2687{
2688 unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
2689 unsigned int bytes = 0;
2690 struct bio *bio;
2691
2692 if (!(rq->rq_flags & RQF_MIXED_MERGE))
2693 return blk_rq_bytes(rq);
2694
2695
2696
2697
2698
2699
2700
2701
2702 for (bio = rq->bio; bio; bio = bio->bi_next) {
2703 if ((bio->bi_opf & ff) != ff)
2704 break;
2705 bytes += bio->bi_iter.bi_size;
2706 }
2707
2708
2709 BUG_ON(blk_rq_bytes(rq) && !bytes);
2710 return bytes;
2711}
2712EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
2713
2714void blk_account_io_completion(struct request *req, unsigned int bytes)
2715{
2716 if (blk_do_io_stat(req)) {
2717 const int sgrp = op_stat_group(req_op(req));
2718 struct hd_struct *part;
2719 int cpu;
2720
2721 cpu = part_stat_lock();
2722 part = req->part;
2723 part_stat_add(cpu, part, sectors[sgrp], bytes >> 9);
2724 part_stat_unlock();
2725 }
2726}
2727
2728void blk_account_io_done(struct request *req, u64 now)
2729{
2730
2731
2732
2733
2734
2735 if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) {
2736 const int sgrp = op_stat_group(req_op(req));
2737 struct hd_struct *part;
2738 int cpu;
2739
2740 cpu = part_stat_lock();
2741 part = req->part;
2742
2743 part_stat_inc(cpu, part, ios[sgrp]);
2744 part_stat_add(cpu, part, nsecs[sgrp], now - req->start_time_ns);
2745 part_round_stats(req->q, cpu, part);
2746 part_dec_in_flight(req->q, part, rq_data_dir(req));
2747
2748 hd_struct_put(part);
2749 part_stat_unlock();
2750 }
2751}
2752
2753#ifdef CONFIG_PM
2754
2755
2756
2757
2758static bool blk_pm_allow_request(struct request *rq)
2759{
2760 switch (rq->q->rpm_status) {
2761 case RPM_RESUMING:
2762 case RPM_SUSPENDING:
2763 return rq->rq_flags & RQF_PM;
2764 case RPM_SUSPENDED:
2765 return false;
2766 default:
2767 return true;
2768 }
2769}
2770#else
2771static bool blk_pm_allow_request(struct request *rq)
2772{
2773 return true;
2774}
2775#endif
2776
2777void blk_account_io_start(struct request *rq, bool new_io)
2778{
2779 struct hd_struct *part;
2780 int rw = rq_data_dir(rq);
2781 int cpu;
2782
2783 if (!blk_do_io_stat(rq))
2784 return;
2785
2786 cpu = part_stat_lock();
2787
2788 if (!new_io) {
2789 part = rq->part;
2790 part_stat_inc(cpu, part, merges[rw]);
2791 } else {
2792 part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
2793 if (!hd_struct_try_get(part)) {
2794
2795
2796
2797
2798
2799
2800
2801
2802 part = &rq->rq_disk->part0;
2803 hd_struct_get(part);
2804 }
2805 part_round_stats(rq->q, cpu, part);
2806 part_inc_in_flight(rq->q, part, rw);
2807 rq->part = part;
2808 }
2809
2810 part_stat_unlock();
2811}
2812
2813static struct request *elv_next_request(struct request_queue *q)
2814{
2815 struct request *rq;
2816 struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
2817
2818 WARN_ON_ONCE(q->mq_ops);
2819
2820 while (1) {
2821 list_for_each_entry(rq, &q->queue_head, queuelist) {
2822 if (blk_pm_allow_request(rq))
2823 return rq;
2824
2825 if (rq->rq_flags & RQF_SOFTBARRIER)
2826 break;
2827 }
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844 if (fq->flush_pending_idx != fq->flush_running_idx &&
2845 !queue_flush_queueable(q)) {
2846 fq->flush_queue_delayed = 1;
2847 return NULL;
2848 }
2849 if (unlikely(blk_queue_bypass(q)) ||
2850 !q->elevator->type->ops.sq.elevator_dispatch_fn(q, 0))
2851 return NULL;
2852 }
2853}
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868struct request *blk_peek_request(struct request_queue *q)
2869{
2870 struct request *rq;
2871 int ret;
2872
2873 lockdep_assert_held(q->queue_lock);
2874 WARN_ON_ONCE(q->mq_ops);
2875
2876 while ((rq = elv_next_request(q)) != NULL) {
2877 if (!(rq->rq_flags & RQF_STARTED)) {
2878
2879
2880
2881
2882
2883 if (rq->rq_flags & RQF_SORTED)
2884 elv_activate_rq(q, rq);
2885
2886
2887
2888
2889
2890
2891 rq->rq_flags |= RQF_STARTED;
2892 trace_block_rq_issue(q, rq);
2893 }
2894
2895 if (!q->boundary_rq || q->boundary_rq == rq) {
2896 q->end_sector = rq_end_sector(rq);
2897 q->boundary_rq = NULL;
2898 }
2899
2900 if (rq->rq_flags & RQF_DONTPREP)
2901 break;
2902
2903 if (q->dma_drain_size && blk_rq_bytes(rq)) {
2904
2905
2906
2907
2908
2909
2910 rq->nr_phys_segments++;
2911 }
2912
2913 if (!q->prep_rq_fn)
2914 break;
2915
2916 ret = q->prep_rq_fn(q, rq);
2917 if (ret == BLKPREP_OK) {
2918 break;
2919 } else if (ret == BLKPREP_DEFER) {
2920
2921
2922
2923
2924
2925
2926 if (q->dma_drain_size && blk_rq_bytes(rq) &&
2927 !(rq->rq_flags & RQF_DONTPREP)) {
2928
2929
2930
2931
2932 --rq->nr_phys_segments;
2933 }
2934
2935 rq = NULL;
2936 break;
2937 } else if (ret == BLKPREP_KILL || ret == BLKPREP_INVALID) {
2938 rq->rq_flags |= RQF_QUIET;
2939
2940
2941
2942
2943 blk_start_request(rq);
2944 __blk_end_request_all(rq, ret == BLKPREP_INVALID ?
2945 BLK_STS_TARGET : BLK_STS_IOERR);
2946 } else {
2947 printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
2948 break;
2949 }
2950 }
2951
2952 return rq;
2953}
2954EXPORT_SYMBOL(blk_peek_request);
2955
2956static void blk_dequeue_request(struct request *rq)
2957{
2958 struct request_queue *q = rq->q;
2959
2960 BUG_ON(list_empty(&rq->queuelist));
2961 BUG_ON(ELV_ON_HASH(rq));
2962
2963 list_del_init(&rq->queuelist);
2964
2965
2966
2967
2968
2969
2970 if (blk_account_rq(rq))
2971 q->in_flight[rq_is_sync(rq)]++;
2972}
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982void blk_start_request(struct request *req)
2983{
2984 lockdep_assert_held(req->q->queue_lock);
2985 WARN_ON_ONCE(req->q->mq_ops);
2986
2987 blk_dequeue_request(req);
2988
2989 if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) {
2990 req->io_start_time_ns = ktime_get_ns();
2991#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
2992 req->throtl_size = blk_rq_sectors(req);
2993#endif
2994 req->rq_flags |= RQF_STATS;
2995 rq_qos_issue(req->q, req);
2996 }
2997
2998 BUG_ON(blk_rq_is_complete(req));
2999 blk_add_timer(req);
3000}
3001EXPORT_SYMBOL(blk_start_request);
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015struct request *blk_fetch_request(struct request_queue *q)
3016{
3017 struct request *rq;
3018
3019 lockdep_assert_held(q->queue_lock);
3020 WARN_ON_ONCE(q->mq_ops);
3021
3022 rq = blk_peek_request(q);
3023 if (rq)
3024 blk_start_request(rq);
3025 return rq;
3026}
3027EXPORT_SYMBOL(blk_fetch_request);
3028
3029
3030
3031
3032
3033void blk_steal_bios(struct bio_list *list, struct request *rq)
3034{
3035 if (rq->bio) {
3036 if (list->tail)
3037 list->tail->bi_next = rq->bio;
3038 else
3039 list->head = rq->bio;
3040 list->tail = rq->biotail;
3041
3042 rq->bio = NULL;
3043 rq->biotail = NULL;
3044 }
3045
3046 rq->__data_len = 0;
3047}
3048EXPORT_SYMBOL_GPL(blk_steal_bios);
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076bool blk_update_request(struct request *req, blk_status_t error,
3077 unsigned int nr_bytes)
3078{
3079 int total_bytes;
3080
3081 trace_block_rq_complete(req, blk_status_to_errno(error), nr_bytes);
3082
3083 if (!req->bio)
3084 return false;
3085
3086 if (unlikely(error && !blk_rq_is_passthrough(req) &&
3087 !(req->rq_flags & RQF_QUIET)))
3088 print_req_error(req, error);
3089
3090 blk_account_io_completion(req, nr_bytes);
3091
3092 total_bytes = 0;
3093 while (req->bio) {
3094 struct bio *bio = req->bio;
3095 unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes);
3096
3097 if (bio_bytes == bio->bi_iter.bi_size)
3098 req->bio = bio->bi_next;
3099
3100
3101 bio_clear_flag(bio, BIO_TRACE_COMPLETION);
3102 req_bio_endio(req, bio, bio_bytes, error);
3103
3104 total_bytes += bio_bytes;
3105 nr_bytes -= bio_bytes;
3106
3107 if (!nr_bytes)
3108 break;
3109 }
3110
3111
3112
3113
3114 if (!req->bio) {
3115
3116
3117
3118
3119
3120 req->__data_len = 0;
3121 return false;
3122 }
3123
3124 req->__data_len -= total_bytes;
3125
3126
3127 if (!blk_rq_is_passthrough(req))
3128 req->__sector += total_bytes >> 9;
3129
3130
3131 if (req->rq_flags & RQF_MIXED_MERGE) {
3132 req->cmd_flags &= ~REQ_FAILFAST_MASK;
3133 req->cmd_flags |= req->bio->bi_opf & REQ_FAILFAST_MASK;
3134 }
3135
3136 if (!(req->rq_flags & RQF_SPECIAL_PAYLOAD)) {
3137
3138
3139
3140
3141 if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
3142 blk_dump_rq_flags(req, "request botched");
3143 req->__data_len = blk_rq_cur_bytes(req);
3144 }
3145
3146
3147 blk_recalc_rq_segments(req);
3148 }
3149
3150 return true;
3151}
3152EXPORT_SYMBOL_GPL(blk_update_request);
3153
3154static bool blk_update_bidi_request(struct request *rq, blk_status_t error,
3155 unsigned int nr_bytes,
3156 unsigned int bidi_bytes)
3157{
3158 if (blk_update_request(rq, error, nr_bytes))
3159 return true;
3160
3161
3162 if (unlikely(blk_bidi_rq(rq)) &&
3163 blk_update_request(rq->next_rq, error, bidi_bytes))
3164 return true;
3165
3166 if (blk_queue_add_random(rq->q))
3167 add_disk_randomness(rq->rq_disk);
3168
3169 return false;
3170}
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182void blk_unprep_request(struct request *req)
3183{
3184 struct request_queue *q = req->q;
3185
3186 req->rq_flags &= ~RQF_DONTPREP;
3187 if (q->unprep_rq_fn)
3188 q->unprep_rq_fn(q, req);
3189}
3190EXPORT_SYMBOL_GPL(blk_unprep_request);
3191
3192void blk_finish_request(struct request *req, blk_status_t error)
3193{
3194 struct request_queue *q = req->q;
3195 u64 now = ktime_get_ns();
3196
3197 lockdep_assert_held(req->q->queue_lock);
3198 WARN_ON_ONCE(q->mq_ops);
3199
3200 if (req->rq_flags & RQF_STATS)
3201 blk_stat_add(req, now);
3202
3203 if (req->rq_flags & RQF_QUEUED)
3204 blk_queue_end_tag(q, req);
3205
3206 BUG_ON(blk_queued_rq(req));
3207
3208 if (unlikely(laptop_mode) && !blk_rq_is_passthrough(req))
3209 laptop_io_completion(req->q->backing_dev_info);
3210
3211 blk_delete_timer(req);
3212
3213 if (req->rq_flags & RQF_DONTPREP)
3214 blk_unprep_request(req);
3215
3216 blk_account_io_done(req, now);
3217
3218 if (req->end_io) {
3219 rq_qos_done(q, req);
3220 req->end_io(req, error);
3221 } else {
3222 if (blk_bidi_rq(req))
3223 __blk_put_request(req->next_rq->q, req->next_rq);
3224
3225 __blk_put_request(q, req);
3226 }
3227}
3228EXPORT_SYMBOL(blk_finish_request);
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247static bool blk_end_bidi_request(struct request *rq, blk_status_t error,
3248 unsigned int nr_bytes, unsigned int bidi_bytes)
3249{
3250 struct request_queue *q = rq->q;
3251 unsigned long flags;
3252
3253 WARN_ON_ONCE(q->mq_ops);
3254
3255 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
3256 return true;
3257
3258 spin_lock_irqsave(q->queue_lock, flags);
3259 blk_finish_request(rq, error);
3260 spin_unlock_irqrestore(q->queue_lock, flags);
3261
3262 return false;
3263}
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280static bool __blk_end_bidi_request(struct request *rq, blk_status_t error,
3281 unsigned int nr_bytes, unsigned int bidi_bytes)
3282{
3283 lockdep_assert_held(rq->q->queue_lock);
3284 WARN_ON_ONCE(rq->q->mq_ops);
3285
3286 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
3287 return true;
3288
3289 blk_finish_request(rq, error);
3290
3291 return false;
3292}
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308bool blk_end_request(struct request *rq, blk_status_t error,
3309 unsigned int nr_bytes)
3310{
3311 WARN_ON_ONCE(rq->q->mq_ops);
3312 return blk_end_bidi_request(rq, error, nr_bytes, 0);
3313}
3314EXPORT_SYMBOL(blk_end_request);
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324void blk_end_request_all(struct request *rq, blk_status_t error)
3325{
3326 bool pending;
3327 unsigned int bidi_bytes = 0;
3328
3329 if (unlikely(blk_bidi_rq(rq)))
3330 bidi_bytes = blk_rq_bytes(rq->next_rq);
3331
3332 pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
3333 BUG_ON(pending);
3334}
3335EXPORT_SYMBOL(blk_end_request_all);
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350bool __blk_end_request(struct request *rq, blk_status_t error,
3351 unsigned int nr_bytes)
3352{
3353 lockdep_assert_held(rq->q->queue_lock);
3354 WARN_ON_ONCE(rq->q->mq_ops);
3355
3356 return __blk_end_bidi_request(rq, error, nr_bytes, 0);
3357}
3358EXPORT_SYMBOL(__blk_end_request);
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368void __blk_end_request_all(struct request *rq, blk_status_t error)
3369{
3370 bool pending;
3371 unsigned int bidi_bytes = 0;
3372
3373 lockdep_assert_held(rq->q->queue_lock);
3374 WARN_ON_ONCE(rq->q->mq_ops);
3375
3376 if (unlikely(blk_bidi_rq(rq)))
3377 bidi_bytes = blk_rq_bytes(rq->next_rq);
3378
3379 pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
3380 BUG_ON(pending);
3381}
3382EXPORT_SYMBOL(__blk_end_request_all);
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397bool __blk_end_request_cur(struct request *rq, blk_status_t error)
3398{
3399 return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
3400}
3401EXPORT_SYMBOL(__blk_end_request_cur);
3402
3403void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
3404 struct bio *bio)
3405{
3406 if (bio_has_data(bio))
3407 rq->nr_phys_segments = bio_phys_segments(q, bio);
3408 else if (bio_op(bio) == REQ_OP_DISCARD)
3409 rq->nr_phys_segments = 1;
3410
3411 rq->__data_len = bio->bi_iter.bi_size;
3412 rq->bio = rq->biotail = bio;
3413
3414 if (bio->bi_disk)
3415 rq->rq_disk = bio->bi_disk;
3416}
3417
3418#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
3419
3420
3421
3422
3423
3424
3425
3426void rq_flush_dcache_pages(struct request *rq)
3427{
3428 struct req_iterator iter;
3429 struct bio_vec bvec;
3430
3431 rq_for_each_segment(bvec, rq, iter)
3432 flush_dcache_page(bvec.bv_page);
3433}
3434EXPORT_SYMBOL_GPL(rq_flush_dcache_pages);
3435#endif
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456int blk_lld_busy(struct request_queue *q)
3457{
3458 if (q->lld_busy_fn)
3459 return q->lld_busy_fn(q);
3460
3461 return 0;
3462}
3463EXPORT_SYMBOL_GPL(blk_lld_busy);
3464
3465
3466
3467
3468
3469
3470
3471
3472void blk_rq_unprep_clone(struct request *rq)
3473{
3474 struct bio *bio;
3475
3476 while ((bio = rq->bio) != NULL) {
3477 rq->bio = bio->bi_next;
3478
3479 bio_put(bio);
3480 }
3481}
3482EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
3483
3484
3485
3486
3487
3488static void __blk_rq_prep_clone(struct request *dst, struct request *src)
3489{
3490 dst->cpu = src->cpu;
3491 dst->__sector = blk_rq_pos(src);
3492 dst->__data_len = blk_rq_bytes(src);
3493 if (src->rq_flags & RQF_SPECIAL_PAYLOAD) {
3494 dst->rq_flags |= RQF_SPECIAL_PAYLOAD;
3495 dst->special_vec = src->special_vec;
3496 }
3497 dst->nr_phys_segments = src->nr_phys_segments;
3498 dst->ioprio = src->ioprio;
3499 dst->extra_len = src->extra_len;
3500}
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
3522 struct bio_set *bs, gfp_t gfp_mask,
3523 int (*bio_ctr)(struct bio *, struct bio *, void *),
3524 void *data)
3525{
3526 struct bio *bio, *bio_src;
3527
3528 if (!bs)
3529 bs = &fs_bio_set;
3530
3531 __rq_for_each_bio(bio_src, rq_src) {
3532 bio = bio_clone_fast(bio_src, gfp_mask, bs);
3533 if (!bio)
3534 goto free_and_out;
3535
3536 if (bio_ctr && bio_ctr(bio, bio_src, data))
3537 goto free_and_out;
3538
3539 if (rq->bio) {
3540 rq->biotail->bi_next = bio;
3541 rq->biotail = bio;
3542 } else
3543 rq->bio = rq->biotail = bio;
3544 }
3545
3546 __blk_rq_prep_clone(rq, rq_src);
3547
3548 return 0;
3549
3550free_and_out:
3551 if (bio)
3552 bio_put(bio);
3553 blk_rq_unprep_clone(rq);
3554
3555 return -ENOMEM;
3556}
3557EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
3558
3559int kblockd_schedule_work(struct work_struct *work)
3560{
3561 return queue_work(kblockd_workqueue, work);
3562}
3563EXPORT_SYMBOL(kblockd_schedule_work);
3564
3565int kblockd_schedule_work_on(int cpu, struct work_struct *work)
3566{
3567 return queue_work_on(cpu, kblockd_workqueue, work);
3568}
3569EXPORT_SYMBOL(kblockd_schedule_work_on);
3570
3571int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork,
3572 unsigned long delay)
3573{
3574 return mod_delayed_work_on(cpu, kblockd_workqueue, dwork, delay);
3575}
3576EXPORT_SYMBOL(kblockd_mod_delayed_work_on);
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592void blk_start_plug(struct blk_plug *plug)
3593{
3594 struct task_struct *tsk = current;
3595
3596
3597
3598
3599 if (tsk->plug)
3600 return;
3601
3602 INIT_LIST_HEAD(&plug->list);
3603 INIT_LIST_HEAD(&plug->mq_list);
3604 INIT_LIST_HEAD(&plug->cb_list);
3605
3606
3607
3608
3609 tsk->plug = plug;
3610}
3611EXPORT_SYMBOL(blk_start_plug);
3612
3613static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
3614{
3615 struct request *rqa = container_of(a, struct request, queuelist);
3616 struct request *rqb = container_of(b, struct request, queuelist);
3617
3618 return !(rqa->q < rqb->q ||
3619 (rqa->q == rqb->q && blk_rq_pos(rqa) < blk_rq_pos(rqb)));
3620}
3621
3622
3623
3624
3625
3626
3627
3628static void queue_unplugged(struct request_queue *q, unsigned int depth,
3629 bool from_schedule)
3630 __releases(q->queue_lock)
3631{
3632 lockdep_assert_held(q->queue_lock);
3633
3634 trace_block_unplug(q, depth, !from_schedule);
3635
3636 if (from_schedule)
3637 blk_run_queue_async(q);
3638 else
3639 __blk_run_queue(q);
3640 spin_unlock_irq(q->queue_lock);
3641}
3642
3643static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
3644{
3645 LIST_HEAD(callbacks);
3646
3647 while (!list_empty(&plug->cb_list)) {
3648 list_splice_init(&plug->cb_list, &callbacks);
3649
3650 while (!list_empty(&callbacks)) {
3651 struct blk_plug_cb *cb = list_first_entry(&callbacks,
3652 struct blk_plug_cb,
3653 list);
3654 list_del(&cb->list);
3655 cb->callback(cb, from_schedule);
3656 }
3657 }
3658}
3659
3660struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, void *data,
3661 int size)
3662{
3663 struct blk_plug *plug = current->plug;
3664 struct blk_plug_cb *cb;
3665
3666 if (!plug)
3667 return NULL;
3668
3669 list_for_each_entry(cb, &plug->cb_list, list)
3670 if (cb->callback == unplug && cb->data == data)
3671 return cb;
3672
3673
3674 BUG_ON(size < sizeof(*cb));
3675 cb = kzalloc(size, GFP_ATOMIC);
3676 if (cb) {
3677 cb->data = data;
3678 cb->callback = unplug;
3679 list_add(&cb->list, &plug->cb_list);
3680 }
3681 return cb;
3682}
3683EXPORT_SYMBOL(blk_check_plugged);
3684
3685void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
3686{
3687 struct request_queue *q;
3688 struct request *rq;
3689 LIST_HEAD(list);
3690 unsigned int depth;
3691
3692 flush_plug_callbacks(plug, from_schedule);
3693
3694 if (!list_empty(&plug->mq_list))
3695 blk_mq_flush_plug_list(plug, from_schedule);
3696
3697 if (list_empty(&plug->list))
3698 return;
3699
3700 list_splice_init(&plug->list, &list);
3701
3702 list_sort(NULL, &list, plug_rq_cmp);
3703
3704 q = NULL;
3705 depth = 0;
3706
3707 while (!list_empty(&list)) {
3708 rq = list_entry_rq(list.next);
3709 list_del_init(&rq->queuelist);
3710 BUG_ON(!rq->q);
3711 if (rq->q != q) {
3712
3713
3714
3715 if (q)
3716 queue_unplugged(q, depth, from_schedule);
3717 q = rq->q;
3718 depth = 0;
3719 spin_lock_irq(q->queue_lock);
3720 }
3721
3722
3723
3724
3725 if (unlikely(blk_queue_dying(q))) {
3726 __blk_end_request_all(rq, BLK_STS_IOERR);
3727 continue;
3728 }
3729
3730
3731
3732
3733 if (op_is_flush(rq->cmd_flags))
3734 __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
3735 else
3736 __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
3737
3738 depth++;
3739 }
3740
3741
3742
3743
3744 if (q)
3745 queue_unplugged(q, depth, from_schedule);
3746}
3747
3748void blk_finish_plug(struct blk_plug *plug)
3749{
3750 if (plug != current->plug)
3751 return;
3752 blk_flush_plug_list(plug, false);
3753
3754 current->plug = NULL;
3755}
3756EXPORT_SYMBOL(blk_finish_plug);
3757
3758#ifdef CONFIG_PM
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780void blk_pm_runtime_init(struct request_queue *q, struct device *dev)
3781{
3782
3783 if (q->mq_ops) {
3784 pm_runtime_disable(dev);
3785 return;
3786 }
3787
3788 q->dev = dev;
3789 q->rpm_status = RPM_ACTIVE;
3790 pm_runtime_set_autosuspend_delay(q->dev, -1);
3791 pm_runtime_use_autosuspend(q->dev);
3792}
3793EXPORT_SYMBOL(blk_pm_runtime_init);
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816int blk_pre_runtime_suspend(struct request_queue *q)
3817{
3818 int ret = 0;
3819
3820 if (!q->dev)
3821 return ret;
3822
3823 spin_lock_irq(q->queue_lock);
3824 if (q->nr_pending) {
3825 ret = -EBUSY;
3826 pm_runtime_mark_last_busy(q->dev);
3827 } else {
3828 q->rpm_status = RPM_SUSPENDING;
3829 }
3830 spin_unlock_irq(q->queue_lock);
3831 return ret;
3832}
3833EXPORT_SYMBOL(blk_pre_runtime_suspend);
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848void blk_post_runtime_suspend(struct request_queue *q, int err)
3849{
3850 if (!q->dev)
3851 return;
3852
3853 spin_lock_irq(q->queue_lock);
3854 if (!err) {
3855 q->rpm_status = RPM_SUSPENDED;
3856 } else {
3857 q->rpm_status = RPM_ACTIVE;
3858 pm_runtime_mark_last_busy(q->dev);
3859 }
3860 spin_unlock_irq(q->queue_lock);
3861}
3862EXPORT_SYMBOL(blk_post_runtime_suspend);
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875void blk_pre_runtime_resume(struct request_queue *q)
3876{
3877 if (!q->dev)
3878 return;
3879
3880 spin_lock_irq(q->queue_lock);
3881 q->rpm_status = RPM_RESUMING;
3882 spin_unlock_irq(q->queue_lock);
3883}
3884EXPORT_SYMBOL(blk_pre_runtime_resume);
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900void blk_post_runtime_resume(struct request_queue *q, int err)
3901{
3902 if (!q->dev)
3903 return;
3904
3905 spin_lock_irq(q->queue_lock);
3906 if (!err) {
3907 q->rpm_status = RPM_ACTIVE;
3908 __blk_run_queue(q);
3909 pm_runtime_mark_last_busy(q->dev);
3910 pm_request_autosuspend(q->dev);
3911 } else {
3912 q->rpm_status = RPM_SUSPENDED;
3913 }
3914 spin_unlock_irq(q->queue_lock);
3915}
3916EXPORT_SYMBOL(blk_post_runtime_resume);
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932void blk_set_runtime_active(struct request_queue *q)
3933{
3934 spin_lock_irq(q->queue_lock);
3935 q->rpm_status = RPM_ACTIVE;
3936 pm_runtime_mark_last_busy(q->dev);
3937 pm_request_autosuspend(q->dev);
3938 spin_unlock_irq(q->queue_lock);
3939}
3940EXPORT_SYMBOL(blk_set_runtime_active);
3941#endif
3942
3943int __init blk_dev_init(void)
3944{
3945 BUILD_BUG_ON(REQ_OP_LAST >= (1 << REQ_OP_BITS));
3946 BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 *
3947 FIELD_SIZEOF(struct request, cmd_flags));
3948 BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 *
3949 FIELD_SIZEOF(struct bio, bi_opf));
3950
3951
3952 kblockd_workqueue = alloc_workqueue("kblockd",
3953 WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
3954 if (!kblockd_workqueue)
3955 panic("Failed to create kblockd\n");
3956
3957 request_cachep = kmem_cache_create("blkdev_requests",
3958 sizeof(struct request), 0, SLAB_PANIC, NULL);
3959
3960 blk_requestq_cachep = kmem_cache_create("request_queue",
3961 sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
3962
3963#ifdef CONFIG_DEBUG_FS
3964 blk_debugfs_root = debugfs_create_dir("block", NULL);
3965#endif
3966
3967 return 0;
3968}
3969