1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/module.h>
15#include <linux/slab.h>
16#include <linux/blkdev.h>
17#include <linux/cgroup.h>
18#include <linux/elevator.h>
19#include <linux/ktime.h>
20#include <linux/rbtree.h>
21#include <linux/ioprio.h>
22#include <linux/sbitmap.h>
23#include <linux/delay.h>
24
25#include "bfq-iosched.h"
26
27#ifdef CONFIG_BFQ_CGROUP_DEBUG
28static int bfq_stat_init(struct bfq_stat *stat, gfp_t gfp)
29{
30 int ret;
31
32 ret = percpu_counter_init(&stat->cpu_cnt, 0, gfp);
33 if (ret)
34 return ret;
35
36 atomic64_set(&stat->aux_cnt, 0);
37 return 0;
38}
39
40static void bfq_stat_exit(struct bfq_stat *stat)
41{
42 percpu_counter_destroy(&stat->cpu_cnt);
43}
44
45
46
47
48
49
50
51
52
53static inline void bfq_stat_add(struct bfq_stat *stat, uint64_t val)
54{
55 percpu_counter_add_batch(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH);
56}
57
58
59
60
61
62static inline uint64_t bfq_stat_read(struct bfq_stat *stat)
63{
64 return percpu_counter_sum_positive(&stat->cpu_cnt);
65}
66
67
68
69
70
71static inline void bfq_stat_reset(struct bfq_stat *stat)
72{
73 percpu_counter_set(&stat->cpu_cnt, 0);
74 atomic64_set(&stat->aux_cnt, 0);
75}
76
77
78
79
80
81
82
83
84static inline void bfq_stat_add_aux(struct bfq_stat *to,
85 struct bfq_stat *from)
86{
87 atomic64_add(bfq_stat_read(from) + atomic64_read(&from->aux_cnt),
88 &to->aux_cnt);
89}
90
91
92
93
94
95
96
97
98
99static u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd,
100 int off)
101{
102 return __blkg_prfill_u64(sf, pd, bfq_stat_read((void *)pd + off));
103}
104
105
106enum bfqg_stats_flags {
107 BFQG_stats_waiting = 0,
108 BFQG_stats_idling,
109 BFQG_stats_empty,
110};
111
112#define BFQG_FLAG_FNS(name) \
113static void bfqg_stats_mark_##name(struct bfqg_stats *stats) \
114{ \
115 stats->flags |= (1 << BFQG_stats_##name); \
116} \
117static void bfqg_stats_clear_##name(struct bfqg_stats *stats) \
118{ \
119 stats->flags &= ~(1 << BFQG_stats_##name); \
120} \
121static int bfqg_stats_##name(struct bfqg_stats *stats) \
122{ \
123 return (stats->flags & (1 << BFQG_stats_##name)) != 0; \
124} \
125
126BFQG_FLAG_FNS(waiting)
127BFQG_FLAG_FNS(idling)
128BFQG_FLAG_FNS(empty)
129#undef BFQG_FLAG_FNS
130
131
132static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats)
133{
134 u64 now;
135
136 if (!bfqg_stats_waiting(stats))
137 return;
138
139 now = ktime_get_ns();
140 if (now > stats->start_group_wait_time)
141 bfq_stat_add(&stats->group_wait_time,
142 now - stats->start_group_wait_time);
143 bfqg_stats_clear_waiting(stats);
144}
145
146
147static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg,
148 struct bfq_group *curr_bfqg)
149{
150 struct bfqg_stats *stats = &bfqg->stats;
151
152 if (bfqg_stats_waiting(stats))
153 return;
154 if (bfqg == curr_bfqg)
155 return;
156 stats->start_group_wait_time = ktime_get_ns();
157 bfqg_stats_mark_waiting(stats);
158}
159
160
161static void bfqg_stats_end_empty_time(struct bfqg_stats *stats)
162{
163 u64 now;
164
165 if (!bfqg_stats_empty(stats))
166 return;
167
168 now = ktime_get_ns();
169 if (now > stats->start_empty_time)
170 bfq_stat_add(&stats->empty_time,
171 now - stats->start_empty_time);
172 bfqg_stats_clear_empty(stats);
173}
174
175void bfqg_stats_update_dequeue(struct bfq_group *bfqg)
176{
177 bfq_stat_add(&bfqg->stats.dequeue, 1);
178}
179
180void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg)
181{
182 struct bfqg_stats *stats = &bfqg->stats;
183
184 if (blkg_rwstat_total(&stats->queued))
185 return;
186
187
188
189
190
191
192 if (bfqg_stats_empty(stats))
193 return;
194
195 stats->start_empty_time = ktime_get_ns();
196 bfqg_stats_mark_empty(stats);
197}
198
199void bfqg_stats_update_idle_time(struct bfq_group *bfqg)
200{
201 struct bfqg_stats *stats = &bfqg->stats;
202
203 if (bfqg_stats_idling(stats)) {
204 u64 now = ktime_get_ns();
205
206 if (now > stats->start_idle_time)
207 bfq_stat_add(&stats->idle_time,
208 now - stats->start_idle_time);
209 bfqg_stats_clear_idling(stats);
210 }
211}
212
213void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg)
214{
215 struct bfqg_stats *stats = &bfqg->stats;
216
217 stats->start_idle_time = ktime_get_ns();
218 bfqg_stats_mark_idling(stats);
219}
220
221void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg)
222{
223 struct bfqg_stats *stats = &bfqg->stats;
224
225 bfq_stat_add(&stats->avg_queue_size_sum,
226 blkg_rwstat_total(&stats->queued));
227 bfq_stat_add(&stats->avg_queue_size_samples, 1);
228 bfqg_stats_update_group_wait_time(stats);
229}
230
231void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
232 unsigned int op)
233{
234 blkg_rwstat_add(&bfqg->stats.queued, op, 1);
235 bfqg_stats_end_empty_time(&bfqg->stats);
236 if (!(bfqq == ((struct bfq_data *)bfqg->bfqd)->in_service_queue))
237 bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq));
238}
239
240void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op)
241{
242 blkg_rwstat_add(&bfqg->stats.queued, op, -1);
243}
244
245void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op)
246{
247 blkg_rwstat_add(&bfqg->stats.merged, op, 1);
248}
249
250void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
251 u64 io_start_time_ns, unsigned int op)
252{
253 struct bfqg_stats *stats = &bfqg->stats;
254 u64 now = ktime_get_ns();
255
256 if (now > io_start_time_ns)
257 blkg_rwstat_add(&stats->service_time, op,
258 now - io_start_time_ns);
259 if (io_start_time_ns > start_time_ns)
260 blkg_rwstat_add(&stats->wait_time, op,
261 io_start_time_ns - start_time_ns);
262}
263
264#else
265
266void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
267 unsigned int op) { }
268void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op) { }
269void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op) { }
270void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
271 u64 io_start_time_ns, unsigned int op) { }
272void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { }
273void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { }
274void bfqg_stats_update_idle_time(struct bfq_group *bfqg) { }
275void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg) { }
276void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg) { }
277
278#endif
279
280#ifdef CONFIG_BFQ_GROUP_IOSCHED
281
282
283
284
285
286
287
288static struct bfq_group *pd_to_bfqg(struct blkg_policy_data *pd)
289{
290 return pd ? container_of(pd, struct bfq_group, pd) : NULL;
291}
292
293struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg)
294{
295 return pd_to_blkg(&bfqg->pd);
296}
297
298static struct bfq_group *blkg_to_bfqg(struct blkcg_gq *blkg)
299{
300 return pd_to_bfqg(blkg_to_pd(blkg, &blkcg_policy_bfq));
301}
302
303
304
305
306
307
308
309
310static struct bfq_group *bfqg_parent(struct bfq_group *bfqg)
311{
312 struct blkcg_gq *pblkg = bfqg_to_blkg(bfqg)->parent;
313
314 return pblkg ? blkg_to_bfqg(pblkg) : NULL;
315}
316
317struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
318{
319 struct bfq_entity *group_entity = bfqq->entity.parent;
320
321 return group_entity ? container_of(group_entity, struct bfq_group,
322 entity) :
323 bfqq->bfqd->root_group;
324}
325
326
327
328
329
330
331static void bfqg_get(struct bfq_group *bfqg)
332{
333 bfqg->ref++;
334}
335
336static void bfqg_put(struct bfq_group *bfqg)
337{
338 bfqg->ref--;
339
340 if (bfqg->ref == 0)
341 kfree(bfqg);
342}
343
344void bfqg_and_blkg_get(struct bfq_group *bfqg)
345{
346
347 bfqg_get(bfqg);
348
349 blkg_get(bfqg_to_blkg(bfqg));
350}
351
352void bfqg_and_blkg_put(struct bfq_group *bfqg)
353{
354 blkg_put(bfqg_to_blkg(bfqg));
355
356 bfqg_put(bfqg);
357}
358
359void bfqg_stats_update_legacy_io(struct request_queue *q, struct request *rq)
360{
361 struct bfq_group *bfqg = blkg_to_bfqg(rq->bio->bi_blkg);
362
363 if (!bfqg)
364 return;
365
366 blkg_rwstat_add(&bfqg->stats.bytes, rq->cmd_flags, blk_rq_bytes(rq));
367 blkg_rwstat_add(&bfqg->stats.ios, rq->cmd_flags, 1);
368}
369
370
371static void bfqg_stats_reset(struct bfqg_stats *stats)
372{
373#ifdef CONFIG_BFQ_CGROUP_DEBUG
374
375 blkg_rwstat_reset(&stats->merged);
376 blkg_rwstat_reset(&stats->service_time);
377 blkg_rwstat_reset(&stats->wait_time);
378 bfq_stat_reset(&stats->time);
379 bfq_stat_reset(&stats->avg_queue_size_sum);
380 bfq_stat_reset(&stats->avg_queue_size_samples);
381 bfq_stat_reset(&stats->dequeue);
382 bfq_stat_reset(&stats->group_wait_time);
383 bfq_stat_reset(&stats->idle_time);
384 bfq_stat_reset(&stats->empty_time);
385#endif
386}
387
388
389static void bfqg_stats_add_aux(struct bfqg_stats *to, struct bfqg_stats *from)
390{
391 if (!to || !from)
392 return;
393
394#ifdef CONFIG_BFQ_CGROUP_DEBUG
395
396 blkg_rwstat_add_aux(&to->merged, &from->merged);
397 blkg_rwstat_add_aux(&to->service_time, &from->service_time);
398 blkg_rwstat_add_aux(&to->wait_time, &from->wait_time);
399 bfq_stat_add_aux(&from->time, &from->time);
400 bfq_stat_add_aux(&to->avg_queue_size_sum, &from->avg_queue_size_sum);
401 bfq_stat_add_aux(&to->avg_queue_size_samples,
402 &from->avg_queue_size_samples);
403 bfq_stat_add_aux(&to->dequeue, &from->dequeue);
404 bfq_stat_add_aux(&to->group_wait_time, &from->group_wait_time);
405 bfq_stat_add_aux(&to->idle_time, &from->idle_time);
406 bfq_stat_add_aux(&to->empty_time, &from->empty_time);
407#endif
408}
409
410
411
412
413
414
415static void bfqg_stats_xfer_dead(struct bfq_group *bfqg)
416{
417 struct bfq_group *parent;
418
419 if (!bfqg)
420 return;
421
422 parent = bfqg_parent(bfqg);
423
424 lockdep_assert_held(&bfqg_to_blkg(bfqg)->q->queue_lock);
425
426 if (unlikely(!parent))
427 return;
428
429 bfqg_stats_add_aux(&parent->stats, &bfqg->stats);
430 bfqg_stats_reset(&bfqg->stats);
431}
432
433void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg)
434{
435 struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
436
437 entity->weight = entity->new_weight;
438 entity->orig_weight = entity->new_weight;
439 if (bfqq) {
440 bfqq->ioprio = bfqq->new_ioprio;
441 bfqq->ioprio_class = bfqq->new_ioprio_class;
442
443
444
445
446 bfqg_and_blkg_get(bfqg);
447 }
448 entity->parent = bfqg->my_entity;
449 entity->sched_data = &bfqg->sched_data;
450}
451
452static void bfqg_stats_exit(struct bfqg_stats *stats)
453{
454 blkg_rwstat_exit(&stats->bytes);
455 blkg_rwstat_exit(&stats->ios);
456#ifdef CONFIG_BFQ_CGROUP_DEBUG
457 blkg_rwstat_exit(&stats->merged);
458 blkg_rwstat_exit(&stats->service_time);
459 blkg_rwstat_exit(&stats->wait_time);
460 blkg_rwstat_exit(&stats->queued);
461 bfq_stat_exit(&stats->time);
462 bfq_stat_exit(&stats->avg_queue_size_sum);
463 bfq_stat_exit(&stats->avg_queue_size_samples);
464 bfq_stat_exit(&stats->dequeue);
465 bfq_stat_exit(&stats->group_wait_time);
466 bfq_stat_exit(&stats->idle_time);
467 bfq_stat_exit(&stats->empty_time);
468#endif
469}
470
471static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp)
472{
473 if (blkg_rwstat_init(&stats->bytes, gfp) ||
474 blkg_rwstat_init(&stats->ios, gfp))
475 return -ENOMEM;
476
477#ifdef CONFIG_BFQ_CGROUP_DEBUG
478 if (blkg_rwstat_init(&stats->merged, gfp) ||
479 blkg_rwstat_init(&stats->service_time, gfp) ||
480 blkg_rwstat_init(&stats->wait_time, gfp) ||
481 blkg_rwstat_init(&stats->queued, gfp) ||
482 bfq_stat_init(&stats->time, gfp) ||
483 bfq_stat_init(&stats->avg_queue_size_sum, gfp) ||
484 bfq_stat_init(&stats->avg_queue_size_samples, gfp) ||
485 bfq_stat_init(&stats->dequeue, gfp) ||
486 bfq_stat_init(&stats->group_wait_time, gfp) ||
487 bfq_stat_init(&stats->idle_time, gfp) ||
488 bfq_stat_init(&stats->empty_time, gfp)) {
489 bfqg_stats_exit(stats);
490 return -ENOMEM;
491 }
492#endif
493
494 return 0;
495}
496
497static struct bfq_group_data *cpd_to_bfqgd(struct blkcg_policy_data *cpd)
498{
499 return cpd ? container_of(cpd, struct bfq_group_data, pd) : NULL;
500}
501
502static struct bfq_group_data *blkcg_to_bfqgd(struct blkcg *blkcg)
503{
504 return cpd_to_bfqgd(blkcg_to_cpd(blkcg, &blkcg_policy_bfq));
505}
506
507static struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp)
508{
509 struct bfq_group_data *bgd;
510
511 bgd = kzalloc(sizeof(*bgd), gfp);
512 if (!bgd)
513 return NULL;
514 return &bgd->pd;
515}
516
517static void bfq_cpd_init(struct blkcg_policy_data *cpd)
518{
519 struct bfq_group_data *d = cpd_to_bfqgd(cpd);
520
521 d->weight = cgroup_subsys_on_dfl(io_cgrp_subsys) ?
522 CGROUP_WEIGHT_DFL : BFQ_WEIGHT_LEGACY_DFL;
523}
524
525static void bfq_cpd_free(struct blkcg_policy_data *cpd)
526{
527 kfree(cpd_to_bfqgd(cpd));
528}
529
530static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, struct request_queue *q,
531 struct blkcg *blkcg)
532{
533 struct bfq_group *bfqg;
534
535 bfqg = kzalloc_node(sizeof(*bfqg), gfp, q->node);
536 if (!bfqg)
537 return NULL;
538
539 if (bfqg_stats_init(&bfqg->stats, gfp)) {
540 kfree(bfqg);
541 return NULL;
542 }
543
544
545 bfqg_get(bfqg);
546 return &bfqg->pd;
547}
548
549static void bfq_pd_init(struct blkg_policy_data *pd)
550{
551 struct blkcg_gq *blkg = pd_to_blkg(pd);
552 struct bfq_group *bfqg = blkg_to_bfqg(blkg);
553 struct bfq_data *bfqd = blkg->q->elevator->elevator_data;
554 struct bfq_entity *entity = &bfqg->entity;
555 struct bfq_group_data *d = blkcg_to_bfqgd(blkg->blkcg);
556
557 entity->orig_weight = entity->weight = entity->new_weight = d->weight;
558 entity->my_sched_data = &bfqg->sched_data;
559 bfqg->my_entity = entity;
560
561
562
563 bfqg->bfqd = bfqd;
564 bfqg->active_entities = 0;
565 bfqg->rq_pos_tree = RB_ROOT;
566}
567
568static void bfq_pd_free(struct blkg_policy_data *pd)
569{
570 struct bfq_group *bfqg = pd_to_bfqg(pd);
571
572 bfqg_stats_exit(&bfqg->stats);
573 bfqg_put(bfqg);
574}
575
576static void bfq_pd_reset_stats(struct blkg_policy_data *pd)
577{
578 struct bfq_group *bfqg = pd_to_bfqg(pd);
579
580 bfqg_stats_reset(&bfqg->stats);
581}
582
583static void bfq_group_set_parent(struct bfq_group *bfqg,
584 struct bfq_group *parent)
585{
586 struct bfq_entity *entity;
587
588 entity = &bfqg->entity;
589 entity->parent = parent->my_entity;
590 entity->sched_data = &parent->sched_data;
591}
592
593static struct bfq_group *bfq_lookup_bfqg(struct bfq_data *bfqd,
594 struct blkcg *blkcg)
595{
596 struct blkcg_gq *blkg;
597
598 blkg = blkg_lookup(blkcg, bfqd->queue);
599 if (likely(blkg))
600 return blkg_to_bfqg(blkg);
601 return NULL;
602}
603
604struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
605 struct blkcg *blkcg)
606{
607 struct bfq_group *bfqg, *parent;
608 struct bfq_entity *entity;
609
610 bfqg = bfq_lookup_bfqg(bfqd, blkcg);
611
612 if (unlikely(!bfqg))
613 return NULL;
614
615
616
617
618
619
620 entity = &bfqg->entity;
621 for_each_entity(entity) {
622 struct bfq_group *curr_bfqg = container_of(entity,
623 struct bfq_group, entity);
624 if (curr_bfqg != bfqd->root_group) {
625 parent = bfqg_parent(curr_bfqg);
626 if (!parent)
627 parent = bfqd->root_group;
628 bfq_group_set_parent(curr_bfqg, parent);
629 }
630 }
631
632 return bfqg;
633}
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
650 struct bfq_group *bfqg)
651{
652 struct bfq_entity *entity = &bfqq->entity;
653
654
655
656
657
658 bfqq->ref++;
659
660
661
662
663
664
665
666 if (bfqq == bfqd->in_service_queue)
667 bfq_bfqq_expire(bfqd, bfqd->in_service_queue,
668 false, BFQQE_PREEMPTED);
669
670 if (bfq_bfqq_busy(bfqq))
671 bfq_deactivate_bfqq(bfqd, bfqq, false, false);
672 else if (entity->on_st_or_in_serv)
673 bfq_put_idle_entity(bfq_entity_service_tree(entity), entity);
674 bfqg_and_blkg_put(bfqq_group(bfqq));
675
676 entity->parent = bfqg->my_entity;
677 entity->sched_data = &bfqg->sched_data;
678
679 bfqg_and_blkg_get(bfqg);
680
681 if (bfq_bfqq_busy(bfqq)) {
682 if (unlikely(!bfqd->nonrot_with_queueing))
683 bfq_pos_tree_add_move(bfqd, bfqq);
684 bfq_activate_bfqq(bfqd, bfqq);
685 }
686
687 if (!bfqd->in_service_queue && !bfqd->rq_in_driver)
688 bfq_schedule_dispatch(bfqd);
689
690 bfq_put_queue(bfqq);
691}
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
708 struct bfq_io_cq *bic,
709 struct blkcg *blkcg)
710{
711 struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0);
712 struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1);
713 struct bfq_group *bfqg;
714 struct bfq_entity *entity;
715
716 bfqg = bfq_find_set_group(bfqd, blkcg);
717
718 if (unlikely(!bfqg))
719 bfqg = bfqd->root_group;
720
721 if (async_bfqq) {
722 entity = &async_bfqq->entity;
723
724 if (entity->sched_data != &bfqg->sched_data) {
725 bic_set_bfqq(bic, NULL, 0);
726 bfq_release_process_ref(bfqd, async_bfqq);
727 }
728 }
729
730 if (sync_bfqq) {
731 entity = &sync_bfqq->entity;
732 if (entity->sched_data != &bfqg->sched_data)
733 bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
734 }
735
736 return bfqg;
737}
738
739void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
740{
741 struct bfq_data *bfqd = bic_to_bfqd(bic);
742 struct bfq_group *bfqg = NULL;
743 uint64_t serial_nr;
744
745 rcu_read_lock();
746 serial_nr = __bio_blkcg(bio)->css.serial_nr;
747
748
749
750
751
752 if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
753 goto out;
754
755 bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio));
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806 blkg_path(bfqg_to_blkg(bfqg), bfqg->blkg_path, sizeof(bfqg->blkg_path));
807 bic->blkcg_serial_nr = serial_nr;
808out:
809 rcu_read_unlock();
810}
811
812
813
814
815
816static void bfq_flush_idle_tree(struct bfq_service_tree *st)
817{
818 struct bfq_entity *entity = st->first_idle;
819
820 for (; entity ; entity = st->first_idle)
821 __bfq_deactivate_entity(entity, false);
822}
823
824
825
826
827
828
829
830static void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
831 struct bfq_entity *entity,
832 int ioprio_class)
833{
834 struct bfq_queue *bfqq;
835 struct bfq_entity *child_entity = entity;
836
837 while (child_entity->my_sched_data) {
838 struct bfq_sched_data *child_sd = child_entity->my_sched_data;
839 struct bfq_service_tree *child_st = child_sd->service_tree +
840 ioprio_class;
841 struct rb_root *child_active = &child_st->active;
842
843 child_entity = bfq_entity_of(rb_first(child_active));
844
845 if (!child_entity)
846 child_entity = child_sd->in_service_entity;
847 }
848
849 bfqq = bfq_entity_to_bfqq(child_entity);
850 bfq_bfqq_move(bfqd, bfqq, bfqd->root_group);
851}
852
853
854
855
856
857
858
859static void bfq_reparent_active_queues(struct bfq_data *bfqd,
860 struct bfq_group *bfqg,
861 struct bfq_service_tree *st,
862 int ioprio_class)
863{
864 struct rb_root *active = &st->active;
865 struct bfq_entity *entity;
866
867 while ((entity = bfq_entity_of(rb_first(active))))
868 bfq_reparent_leaf_entity(bfqd, entity, ioprio_class);
869
870 if (bfqg->sched_data.in_service_entity)
871 bfq_reparent_leaf_entity(bfqd,
872 bfqg->sched_data.in_service_entity,
873 ioprio_class);
874}
875
876
877
878
879
880
881
882
883
884static void bfq_pd_offline(struct blkg_policy_data *pd)
885{
886 struct bfq_service_tree *st;
887 struct bfq_group *bfqg = pd_to_bfqg(pd);
888 struct bfq_data *bfqd = bfqg->bfqd;
889 struct bfq_entity *entity = bfqg->my_entity;
890 unsigned long flags;
891 int i;
892
893 spin_lock_irqsave(&bfqd->lock, flags);
894
895 if (!entity)
896 goto put_async_queues;
897
898
899
900
901
902 for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) {
903 st = bfqg->sched_data.service_tree + i;
904
905
906
907
908
909
910
911
912
913
914
915
916
917 bfq_reparent_active_queues(bfqd, bfqg, st, i);
918
919
920
921
922
923
924
925
926
927
928
929
930 bfq_flush_idle_tree(st);
931 }
932
933 __bfq_deactivate_entity(entity, false);
934
935put_async_queues:
936 bfq_put_async_queues(bfqd, bfqg);
937
938 spin_unlock_irqrestore(&bfqd->lock, flags);
939
940
941
942
943
944
945 bfqg_stats_xfer_dead(bfqg);
946}
947
948void bfq_end_wr_async(struct bfq_data *bfqd)
949{
950 struct blkcg_gq *blkg;
951
952 list_for_each_entry(blkg, &bfqd->queue->blkg_list, q_node) {
953 struct bfq_group *bfqg = blkg_to_bfqg(blkg);
954
955 bfq_end_wr_async_queues(bfqd, bfqg);
956 }
957 bfq_end_wr_async_queues(bfqd, bfqd->root_group);
958}
959
960static int bfq_io_show_weight_legacy(struct seq_file *sf, void *v)
961{
962 struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
963 struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
964 unsigned int val = 0;
965
966 if (bfqgd)
967 val = bfqgd->weight;
968
969 seq_printf(sf, "%u\n", val);
970
971 return 0;
972}
973
974static u64 bfqg_prfill_weight_device(struct seq_file *sf,
975 struct blkg_policy_data *pd, int off)
976{
977 struct bfq_group *bfqg = pd_to_bfqg(pd);
978
979 if (!bfqg->entity.dev_weight)
980 return 0;
981 return __blkg_prfill_u64(sf, pd, bfqg->entity.dev_weight);
982}
983
984static int bfq_io_show_weight(struct seq_file *sf, void *v)
985{
986 struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
987 struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
988
989 seq_printf(sf, "default %u\n", bfqgd->weight);
990 blkcg_print_blkgs(sf, blkcg, bfqg_prfill_weight_device,
991 &blkcg_policy_bfq, 0, false);
992 return 0;
993}
994
995static void bfq_group_set_weight(struct bfq_group *bfqg, u64 weight, u64 dev_weight)
996{
997 weight = dev_weight ?: weight;
998
999 bfqg->entity.dev_weight = dev_weight;
1000
1001
1002
1003
1004
1005
1006 if ((unsigned short)weight != bfqg->entity.new_weight) {
1007 bfqg->entity.new_weight = (unsigned short)weight;
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023 smp_wmb();
1024 bfqg->entity.prio_changed = 1;
1025 }
1026}
1027
1028static int bfq_io_set_weight_legacy(struct cgroup_subsys_state *css,
1029 struct cftype *cftype,
1030 u64 val)
1031{
1032 struct blkcg *blkcg = css_to_blkcg(css);
1033 struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
1034 struct blkcg_gq *blkg;
1035 int ret = -ERANGE;
1036
1037 if (val < BFQ_MIN_WEIGHT || val > BFQ_MAX_WEIGHT)
1038 return ret;
1039
1040 ret = 0;
1041 spin_lock_irq(&blkcg->lock);
1042 bfqgd->weight = (unsigned short)val;
1043 hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
1044 struct bfq_group *bfqg = blkg_to_bfqg(blkg);
1045
1046 if (bfqg)
1047 bfq_group_set_weight(bfqg, val, 0);
1048 }
1049 spin_unlock_irq(&blkcg->lock);
1050
1051 return ret;
1052}
1053
1054static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of,
1055 char *buf, size_t nbytes,
1056 loff_t off)
1057{
1058 int ret;
1059 struct blkg_conf_ctx ctx;
1060 struct blkcg *blkcg = css_to_blkcg(of_css(of));
1061 struct bfq_group *bfqg;
1062 u64 v;
1063
1064 ret = blkg_conf_prep(blkcg, &blkcg_policy_bfq, buf, &ctx);
1065 if (ret)
1066 return ret;
1067
1068 if (sscanf(ctx.body, "%llu", &v) == 1) {
1069
1070 ret = -ERANGE;
1071 if (!v)
1072 goto out;
1073 } else if (!strcmp(strim(ctx.body), "default")) {
1074 v = 0;
1075 } else {
1076 ret = -EINVAL;
1077 goto out;
1078 }
1079
1080 bfqg = blkg_to_bfqg(ctx.blkg);
1081
1082 ret = -ERANGE;
1083 if (!v || (v >= BFQ_MIN_WEIGHT && v <= BFQ_MAX_WEIGHT)) {
1084 bfq_group_set_weight(bfqg, bfqg->entity.weight, v);
1085 ret = 0;
1086 }
1087out:
1088 blkg_conf_finish(&ctx);
1089 return ret ?: nbytes;
1090}
1091
1092static ssize_t bfq_io_set_weight(struct kernfs_open_file *of,
1093 char *buf, size_t nbytes,
1094 loff_t off)
1095{
1096 char *endp;
1097 int ret;
1098 u64 v;
1099
1100 buf = strim(buf);
1101
1102
1103 v = simple_strtoull(buf, &endp, 0);
1104 if (*endp == '\0' || sscanf(buf, "default %llu", &v) == 1) {
1105 ret = bfq_io_set_weight_legacy(of_css(of), NULL, v);
1106 return ret ?: nbytes;
1107 }
1108
1109 return bfq_io_set_device_weight(of, buf, nbytes, off);
1110}
1111
1112static int bfqg_print_rwstat(struct seq_file *sf, void *v)
1113{
1114 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat,
1115 &blkcg_policy_bfq, seq_cft(sf)->private, true);
1116 return 0;
1117}
1118
1119static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf,
1120 struct blkg_policy_data *pd, int off)
1121{
1122 struct blkg_rwstat_sample sum;
1123
1124 blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off, &sum);
1125 return __blkg_prfill_rwstat(sf, pd, &sum);
1126}
1127
1128static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v)
1129{
1130 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1131 bfqg_prfill_rwstat_recursive, &blkcg_policy_bfq,
1132 seq_cft(sf)->private, true);
1133 return 0;
1134}
1135
1136#ifdef CONFIG_BFQ_CGROUP_DEBUG
1137static int bfqg_print_stat(struct seq_file *sf, void *v)
1138{
1139 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat,
1140 &blkcg_policy_bfq, seq_cft(sf)->private, false);
1141 return 0;
1142}
1143
1144static u64 bfqg_prfill_stat_recursive(struct seq_file *sf,
1145 struct blkg_policy_data *pd, int off)
1146{
1147 struct blkcg_gq *blkg = pd_to_blkg(pd);
1148 struct blkcg_gq *pos_blkg;
1149 struct cgroup_subsys_state *pos_css;
1150 u64 sum = 0;
1151
1152 lockdep_assert_held(&blkg->q->queue_lock);
1153
1154 rcu_read_lock();
1155 blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
1156 struct bfq_stat *stat;
1157
1158 if (!pos_blkg->online)
1159 continue;
1160
1161 stat = (void *)blkg_to_pd(pos_blkg, &blkcg_policy_bfq) + off;
1162 sum += bfq_stat_read(stat) + atomic64_read(&stat->aux_cnt);
1163 }
1164 rcu_read_unlock();
1165
1166 return __blkg_prfill_u64(sf, pd, sum);
1167}
1168
1169static int bfqg_print_stat_recursive(struct seq_file *sf, void *v)
1170{
1171 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1172 bfqg_prfill_stat_recursive, &blkcg_policy_bfq,
1173 seq_cft(sf)->private, false);
1174 return 0;
1175}
1176
1177static u64 bfqg_prfill_sectors(struct seq_file *sf, struct blkg_policy_data *pd,
1178 int off)
1179{
1180 struct bfq_group *bfqg = blkg_to_bfqg(pd->blkg);
1181 u64 sum = blkg_rwstat_total(&bfqg->stats.bytes);
1182
1183 return __blkg_prfill_u64(sf, pd, sum >> 9);
1184}
1185
1186static int bfqg_print_stat_sectors(struct seq_file *sf, void *v)
1187{
1188 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1189 bfqg_prfill_sectors, &blkcg_policy_bfq, 0, false);
1190 return 0;
1191}
1192
1193static u64 bfqg_prfill_sectors_recursive(struct seq_file *sf,
1194 struct blkg_policy_data *pd, int off)
1195{
1196 struct blkg_rwstat_sample tmp;
1197
1198 blkg_rwstat_recursive_sum(pd->blkg, &blkcg_policy_bfq,
1199 offsetof(struct bfq_group, stats.bytes), &tmp);
1200
1201 return __blkg_prfill_u64(sf, pd,
1202 (tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]) >> 9);
1203}
1204
1205static int bfqg_print_stat_sectors_recursive(struct seq_file *sf, void *v)
1206{
1207 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1208 bfqg_prfill_sectors_recursive, &blkcg_policy_bfq, 0,
1209 false);
1210 return 0;
1211}
1212
1213static u64 bfqg_prfill_avg_queue_size(struct seq_file *sf,
1214 struct blkg_policy_data *pd, int off)
1215{
1216 struct bfq_group *bfqg = pd_to_bfqg(pd);
1217 u64 samples = bfq_stat_read(&bfqg->stats.avg_queue_size_samples);
1218 u64 v = 0;
1219
1220 if (samples) {
1221 v = bfq_stat_read(&bfqg->stats.avg_queue_size_sum);
1222 v = div64_u64(v, samples);
1223 }
1224 __blkg_prfill_u64(sf, pd, v);
1225 return 0;
1226}
1227
1228
1229static int bfqg_print_avg_queue_size(struct seq_file *sf, void *v)
1230{
1231 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1232 bfqg_prfill_avg_queue_size, &blkcg_policy_bfq,
1233 0, false);
1234 return 0;
1235}
1236#endif
1237
1238struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
1239{
1240 int ret;
1241
1242 ret = blkcg_activate_policy(bfqd->queue, &blkcg_policy_bfq);
1243 if (ret)
1244 return NULL;
1245
1246 return blkg_to_bfqg(bfqd->queue->root_blkg);
1247}
1248
1249struct blkcg_policy blkcg_policy_bfq = {
1250 .dfl_cftypes = bfq_blkg_files,
1251 .legacy_cftypes = bfq_blkcg_legacy_files,
1252
1253 .cpd_alloc_fn = bfq_cpd_alloc,
1254 .cpd_init_fn = bfq_cpd_init,
1255 .cpd_bind_fn = bfq_cpd_init,
1256 .cpd_free_fn = bfq_cpd_free,
1257
1258 .pd_alloc_fn = bfq_pd_alloc,
1259 .pd_init_fn = bfq_pd_init,
1260 .pd_offline_fn = bfq_pd_offline,
1261 .pd_free_fn = bfq_pd_free,
1262 .pd_reset_stats_fn = bfq_pd_reset_stats,
1263};
1264
1265struct cftype bfq_blkcg_legacy_files[] = {
1266 {
1267 .name = "bfq.weight",
1268 .flags = CFTYPE_NOT_ON_ROOT,
1269 .seq_show = bfq_io_show_weight_legacy,
1270 .write_u64 = bfq_io_set_weight_legacy,
1271 },
1272 {
1273 .name = "bfq.weight_device",
1274 .flags = CFTYPE_NOT_ON_ROOT,
1275 .seq_show = bfq_io_show_weight,
1276 .write = bfq_io_set_weight,
1277 },
1278
1279
1280 {
1281 .name = "bfq.io_service_bytes",
1282 .private = offsetof(struct bfq_group, stats.bytes),
1283 .seq_show = bfqg_print_rwstat,
1284 },
1285 {
1286 .name = "bfq.io_serviced",
1287 .private = offsetof(struct bfq_group, stats.ios),
1288 .seq_show = bfqg_print_rwstat,
1289 },
1290#ifdef CONFIG_BFQ_CGROUP_DEBUG
1291 {
1292 .name = "bfq.time",
1293 .private = offsetof(struct bfq_group, stats.time),
1294 .seq_show = bfqg_print_stat,
1295 },
1296 {
1297 .name = "bfq.sectors",
1298 .seq_show = bfqg_print_stat_sectors,
1299 },
1300 {
1301 .name = "bfq.io_service_time",
1302 .private = offsetof(struct bfq_group, stats.service_time),
1303 .seq_show = bfqg_print_rwstat,
1304 },
1305 {
1306 .name = "bfq.io_wait_time",
1307 .private = offsetof(struct bfq_group, stats.wait_time),
1308 .seq_show = bfqg_print_rwstat,
1309 },
1310 {
1311 .name = "bfq.io_merged",
1312 .private = offsetof(struct bfq_group, stats.merged),
1313 .seq_show = bfqg_print_rwstat,
1314 },
1315 {
1316 .name = "bfq.io_queued",
1317 .private = offsetof(struct bfq_group, stats.queued),
1318 .seq_show = bfqg_print_rwstat,
1319 },
1320#endif
1321
1322
1323 {
1324 .name = "bfq.io_service_bytes_recursive",
1325 .private = offsetof(struct bfq_group, stats.bytes),
1326 .seq_show = bfqg_print_rwstat_recursive,
1327 },
1328 {
1329 .name = "bfq.io_serviced_recursive",
1330 .private = offsetof(struct bfq_group, stats.ios),
1331 .seq_show = bfqg_print_rwstat_recursive,
1332 },
1333#ifdef CONFIG_BFQ_CGROUP_DEBUG
1334 {
1335 .name = "bfq.time_recursive",
1336 .private = offsetof(struct bfq_group, stats.time),
1337 .seq_show = bfqg_print_stat_recursive,
1338 },
1339 {
1340 .name = "bfq.sectors_recursive",
1341 .seq_show = bfqg_print_stat_sectors_recursive,
1342 },
1343 {
1344 .name = "bfq.io_service_time_recursive",
1345 .private = offsetof(struct bfq_group, stats.service_time),
1346 .seq_show = bfqg_print_rwstat_recursive,
1347 },
1348 {
1349 .name = "bfq.io_wait_time_recursive",
1350 .private = offsetof(struct bfq_group, stats.wait_time),
1351 .seq_show = bfqg_print_rwstat_recursive,
1352 },
1353 {
1354 .name = "bfq.io_merged_recursive",
1355 .private = offsetof(struct bfq_group, stats.merged),
1356 .seq_show = bfqg_print_rwstat_recursive,
1357 },
1358 {
1359 .name = "bfq.io_queued_recursive",
1360 .private = offsetof(struct bfq_group, stats.queued),
1361 .seq_show = bfqg_print_rwstat_recursive,
1362 },
1363 {
1364 .name = "bfq.avg_queue_size",
1365 .seq_show = bfqg_print_avg_queue_size,
1366 },
1367 {
1368 .name = "bfq.group_wait_time",
1369 .private = offsetof(struct bfq_group, stats.group_wait_time),
1370 .seq_show = bfqg_print_stat,
1371 },
1372 {
1373 .name = "bfq.idle_time",
1374 .private = offsetof(struct bfq_group, stats.idle_time),
1375 .seq_show = bfqg_print_stat,
1376 },
1377 {
1378 .name = "bfq.empty_time",
1379 .private = offsetof(struct bfq_group, stats.empty_time),
1380 .seq_show = bfqg_print_stat,
1381 },
1382 {
1383 .name = "bfq.dequeue",
1384 .private = offsetof(struct bfq_group, stats.dequeue),
1385 .seq_show = bfqg_print_stat,
1386 },
1387#endif
1388 { }
1389};
1390
1391struct cftype bfq_blkg_files[] = {
1392 {
1393 .name = "bfq.weight",
1394 .flags = CFTYPE_NOT_ON_ROOT,
1395 .seq_show = bfq_io_show_weight,
1396 .write = bfq_io_set_weight,
1397 },
1398 {}
1399};
1400
1401#else
1402
1403void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
1404 struct bfq_group *bfqg) {}
1405
1406void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg)
1407{
1408 struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
1409
1410 entity->weight = entity->new_weight;
1411 entity->orig_weight = entity->new_weight;
1412 if (bfqq) {
1413 bfqq->ioprio = bfqq->new_ioprio;
1414 bfqq->ioprio_class = bfqq->new_ioprio_class;
1415 }
1416 entity->sched_data = &bfqg->sched_data;
1417}
1418
1419void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) {}
1420
1421void bfq_end_wr_async(struct bfq_data *bfqd)
1422{
1423 bfq_end_wr_async_queues(bfqd, bfqd->root_group);
1424}
1425
1426struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, struct blkcg *blkcg)
1427{
1428 return bfqd->root_group;
1429}
1430
1431struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
1432{
1433 return bfqq->bfqd->root_group;
1434}
1435
1436void bfqg_and_blkg_get(struct bfq_group *bfqg) {}
1437
1438void bfqg_and_blkg_put(struct bfq_group *bfqg) {}
1439
1440struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
1441{
1442 struct bfq_group *bfqg;
1443 int i;
1444
1445 bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node);
1446 if (!bfqg)
1447 return NULL;
1448
1449 for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
1450 bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
1451
1452 return bfqg;
1453}
1454#endif
1455