1
2
3
4
5#include <linux/module.h>
6#include <linux/slab.h>
7#include <linux/blkdev.h>
8#include <linux/cgroup.h>
9#include <linux/elevator.h>
10#include <linux/ktime.h>
11#include <linux/rbtree.h>
12#include <linux/ioprio.h>
13#include <linux/sbitmap.h>
14#include <linux/delay.h>
15
16#include "bfq-iosched.h"
17
18#ifdef CONFIG_BFQ_CGROUP_DEBUG
19static int bfq_stat_init(struct bfq_stat *stat, gfp_t gfp)
20{
21 int ret;
22
23 ret = percpu_counter_init(&stat->cpu_cnt, 0, gfp);
24 if (ret)
25 return ret;
26
27 atomic64_set(&stat->aux_cnt, 0);
28 return 0;
29}
30
31static void bfq_stat_exit(struct bfq_stat *stat)
32{
33 percpu_counter_destroy(&stat->cpu_cnt);
34}
35
36
37
38
39
40
41
42
43
44static inline void bfq_stat_add(struct bfq_stat *stat, uint64_t val)
45{
46 percpu_counter_add_batch(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH);
47}
48
49
50
51
52
53static inline uint64_t bfq_stat_read(struct bfq_stat *stat)
54{
55 return percpu_counter_sum_positive(&stat->cpu_cnt);
56}
57
58
59
60
61
62static inline void bfq_stat_reset(struct bfq_stat *stat)
63{
64 percpu_counter_set(&stat->cpu_cnt, 0);
65 atomic64_set(&stat->aux_cnt, 0);
66}
67
68
69
70
71
72
73
74
75static inline void bfq_stat_add_aux(struct bfq_stat *to,
76 struct bfq_stat *from)
77{
78 atomic64_add(bfq_stat_read(from) + atomic64_read(&from->aux_cnt),
79 &to->aux_cnt);
80}
81
82
83
84
85
86
87
88
89
90static u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd,
91 int off)
92{
93 return __blkg_prfill_u64(sf, pd, bfq_stat_read((void *)pd + off));
94}
95
96
97enum bfqg_stats_flags {
98 BFQG_stats_waiting = 0,
99 BFQG_stats_idling,
100 BFQG_stats_empty,
101};
102
103#define BFQG_FLAG_FNS(name) \
104static void bfqg_stats_mark_##name(struct bfqg_stats *stats) \
105{ \
106 stats->flags |= (1 << BFQG_stats_##name); \
107} \
108static void bfqg_stats_clear_##name(struct bfqg_stats *stats) \
109{ \
110 stats->flags &= ~(1 << BFQG_stats_##name); \
111} \
112static int bfqg_stats_##name(struct bfqg_stats *stats) \
113{ \
114 return (stats->flags & (1 << BFQG_stats_##name)) != 0; \
115} \
116
117BFQG_FLAG_FNS(waiting)
118BFQG_FLAG_FNS(idling)
119BFQG_FLAG_FNS(empty)
120#undef BFQG_FLAG_FNS
121
122
123static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats)
124{
125 u64 now;
126
127 if (!bfqg_stats_waiting(stats))
128 return;
129
130 now = ktime_get_ns();
131 if (now > stats->start_group_wait_time)
132 bfq_stat_add(&stats->group_wait_time,
133 now - stats->start_group_wait_time);
134 bfqg_stats_clear_waiting(stats);
135}
136
137
138static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg,
139 struct bfq_group *curr_bfqg)
140{
141 struct bfqg_stats *stats = &bfqg->stats;
142
143 if (bfqg_stats_waiting(stats))
144 return;
145 if (bfqg == curr_bfqg)
146 return;
147 stats->start_group_wait_time = ktime_get_ns();
148 bfqg_stats_mark_waiting(stats);
149}
150
151
152static void bfqg_stats_end_empty_time(struct bfqg_stats *stats)
153{
154 u64 now;
155
156 if (!bfqg_stats_empty(stats))
157 return;
158
159 now = ktime_get_ns();
160 if (now > stats->start_empty_time)
161 bfq_stat_add(&stats->empty_time,
162 now - stats->start_empty_time);
163 bfqg_stats_clear_empty(stats);
164}
165
166void bfqg_stats_update_dequeue(struct bfq_group *bfqg)
167{
168 bfq_stat_add(&bfqg->stats.dequeue, 1);
169}
170
171void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg)
172{
173 struct bfqg_stats *stats = &bfqg->stats;
174
175 if (blkg_rwstat_total(&stats->queued))
176 return;
177
178
179
180
181
182
183 if (bfqg_stats_empty(stats))
184 return;
185
186 stats->start_empty_time = ktime_get_ns();
187 bfqg_stats_mark_empty(stats);
188}
189
190void bfqg_stats_update_idle_time(struct bfq_group *bfqg)
191{
192 struct bfqg_stats *stats = &bfqg->stats;
193
194 if (bfqg_stats_idling(stats)) {
195 u64 now = ktime_get_ns();
196
197 if (now > stats->start_idle_time)
198 bfq_stat_add(&stats->idle_time,
199 now - stats->start_idle_time);
200 bfqg_stats_clear_idling(stats);
201 }
202}
203
204void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg)
205{
206 struct bfqg_stats *stats = &bfqg->stats;
207
208 stats->start_idle_time = ktime_get_ns();
209 bfqg_stats_mark_idling(stats);
210}
211
212void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg)
213{
214 struct bfqg_stats *stats = &bfqg->stats;
215
216 bfq_stat_add(&stats->avg_queue_size_sum,
217 blkg_rwstat_total(&stats->queued));
218 bfq_stat_add(&stats->avg_queue_size_samples, 1);
219 bfqg_stats_update_group_wait_time(stats);
220}
221
222void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
223 unsigned int op)
224{
225 blkg_rwstat_add(&bfqg->stats.queued, op, 1);
226 bfqg_stats_end_empty_time(&bfqg->stats);
227 if (!(bfqq == ((struct bfq_data *)bfqg->bfqd)->in_service_queue))
228 bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq));
229}
230
231void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op)
232{
233 blkg_rwstat_add(&bfqg->stats.queued, op, -1);
234}
235
236void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op)
237{
238 blkg_rwstat_add(&bfqg->stats.merged, op, 1);
239}
240
241void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
242 u64 io_start_time_ns, unsigned int op)
243{
244 struct bfqg_stats *stats = &bfqg->stats;
245 u64 now = ktime_get_ns();
246
247 if (now > io_start_time_ns)
248 blkg_rwstat_add(&stats->service_time, op,
249 now - io_start_time_ns);
250 if (io_start_time_ns > start_time_ns)
251 blkg_rwstat_add(&stats->wait_time, op,
252 io_start_time_ns - start_time_ns);
253}
254
255#else
256
257void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
258 unsigned int op) { }
259void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op) { }
260void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op) { }
261void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
262 u64 io_start_time_ns, unsigned int op) { }
263void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { }
264void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { }
265void bfqg_stats_update_idle_time(struct bfq_group *bfqg) { }
266void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg) { }
267void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg) { }
268
269#endif
270
271#ifdef CONFIG_BFQ_GROUP_IOSCHED
272
273
274
275
276
277
278
279static struct bfq_group *pd_to_bfqg(struct blkg_policy_data *pd)
280{
281 return pd ? container_of(pd, struct bfq_group, pd) : NULL;
282}
283
284struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg)
285{
286 return pd_to_blkg(&bfqg->pd);
287}
288
289static struct bfq_group *blkg_to_bfqg(struct blkcg_gq *blkg)
290{
291 return pd_to_bfqg(blkg_to_pd(blkg, &blkcg_policy_bfq));
292}
293
294
295
296
297
298
299
300
301static struct bfq_group *bfqg_parent(struct bfq_group *bfqg)
302{
303 struct blkcg_gq *pblkg = bfqg_to_blkg(bfqg)->parent;
304
305 return pblkg ? blkg_to_bfqg(pblkg) : NULL;
306}
307
308struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
309{
310 struct bfq_entity *group_entity = bfqq->entity.parent;
311
312 return group_entity ? container_of(group_entity, struct bfq_group,
313 entity) :
314 bfqq->bfqd->root_group;
315}
316
317
318
319
320
321
322static void bfqg_get(struct bfq_group *bfqg)
323{
324 bfqg->ref++;
325}
326
327static void bfqg_put(struct bfq_group *bfqg)
328{
329 bfqg->ref--;
330
331 if (bfqg->ref == 0)
332 kfree(bfqg);
333}
334
335static void bfqg_and_blkg_get(struct bfq_group *bfqg)
336{
337
338 bfqg_get(bfqg);
339
340 blkg_get(bfqg_to_blkg(bfqg));
341}
342
343void bfqg_and_blkg_put(struct bfq_group *bfqg)
344{
345 blkg_put(bfqg_to_blkg(bfqg));
346
347 bfqg_put(bfqg);
348}
349
350void bfqg_stats_update_legacy_io(struct request_queue *q, struct request *rq)
351{
352 struct bfq_group *bfqg = blkg_to_bfqg(rq->bio->bi_blkg);
353
354 if (!bfqg)
355 return;
356
357 blkg_rwstat_add(&bfqg->stats.bytes, rq->cmd_flags, blk_rq_bytes(rq));
358 blkg_rwstat_add(&bfqg->stats.ios, rq->cmd_flags, 1);
359}
360
361
362static void bfqg_stats_reset(struct bfqg_stats *stats)
363{
364#ifdef CONFIG_BFQ_CGROUP_DEBUG
365
366 blkg_rwstat_reset(&stats->merged);
367 blkg_rwstat_reset(&stats->service_time);
368 blkg_rwstat_reset(&stats->wait_time);
369 bfq_stat_reset(&stats->time);
370 bfq_stat_reset(&stats->avg_queue_size_sum);
371 bfq_stat_reset(&stats->avg_queue_size_samples);
372 bfq_stat_reset(&stats->dequeue);
373 bfq_stat_reset(&stats->group_wait_time);
374 bfq_stat_reset(&stats->idle_time);
375 bfq_stat_reset(&stats->empty_time);
376#endif
377}
378
379
380static void bfqg_stats_add_aux(struct bfqg_stats *to, struct bfqg_stats *from)
381{
382 if (!to || !from)
383 return;
384
385#ifdef CONFIG_BFQ_CGROUP_DEBUG
386
387 blkg_rwstat_add_aux(&to->merged, &from->merged);
388 blkg_rwstat_add_aux(&to->service_time, &from->service_time);
389 blkg_rwstat_add_aux(&to->wait_time, &from->wait_time);
390 bfq_stat_add_aux(&from->time, &from->time);
391 bfq_stat_add_aux(&to->avg_queue_size_sum, &from->avg_queue_size_sum);
392 bfq_stat_add_aux(&to->avg_queue_size_samples,
393 &from->avg_queue_size_samples);
394 bfq_stat_add_aux(&to->dequeue, &from->dequeue);
395 bfq_stat_add_aux(&to->group_wait_time, &from->group_wait_time);
396 bfq_stat_add_aux(&to->idle_time, &from->idle_time);
397 bfq_stat_add_aux(&to->empty_time, &from->empty_time);
398#endif
399}
400
401
402
403
404
405
406static void bfqg_stats_xfer_dead(struct bfq_group *bfqg)
407{
408 struct bfq_group *parent;
409
410 if (!bfqg)
411 return;
412
413 parent = bfqg_parent(bfqg);
414
415 lockdep_assert_held(&bfqg_to_blkg(bfqg)->q->queue_lock);
416
417 if (unlikely(!parent))
418 return;
419
420 bfqg_stats_add_aux(&parent->stats, &bfqg->stats);
421 bfqg_stats_reset(&bfqg->stats);
422}
423
424void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg)
425{
426 struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
427
428 entity->weight = entity->new_weight;
429 entity->orig_weight = entity->new_weight;
430 if (bfqq) {
431 bfqq->ioprio = bfqq->new_ioprio;
432 bfqq->ioprio_class = bfqq->new_ioprio_class;
433
434
435
436
437 bfqg_and_blkg_get(bfqg);
438 }
439 entity->parent = bfqg->my_entity;
440 entity->sched_data = &bfqg->sched_data;
441}
442
443static void bfqg_stats_exit(struct bfqg_stats *stats)
444{
445 blkg_rwstat_exit(&stats->bytes);
446 blkg_rwstat_exit(&stats->ios);
447#ifdef CONFIG_BFQ_CGROUP_DEBUG
448 blkg_rwstat_exit(&stats->merged);
449 blkg_rwstat_exit(&stats->service_time);
450 blkg_rwstat_exit(&stats->wait_time);
451 blkg_rwstat_exit(&stats->queued);
452 bfq_stat_exit(&stats->time);
453 bfq_stat_exit(&stats->avg_queue_size_sum);
454 bfq_stat_exit(&stats->avg_queue_size_samples);
455 bfq_stat_exit(&stats->dequeue);
456 bfq_stat_exit(&stats->group_wait_time);
457 bfq_stat_exit(&stats->idle_time);
458 bfq_stat_exit(&stats->empty_time);
459#endif
460}
461
462static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp)
463{
464 if (blkg_rwstat_init(&stats->bytes, gfp) ||
465 blkg_rwstat_init(&stats->ios, gfp))
466 return -ENOMEM;
467
468#ifdef CONFIG_BFQ_CGROUP_DEBUG
469 if (blkg_rwstat_init(&stats->merged, gfp) ||
470 blkg_rwstat_init(&stats->service_time, gfp) ||
471 blkg_rwstat_init(&stats->wait_time, gfp) ||
472 blkg_rwstat_init(&stats->queued, gfp) ||
473 bfq_stat_init(&stats->time, gfp) ||
474 bfq_stat_init(&stats->avg_queue_size_sum, gfp) ||
475 bfq_stat_init(&stats->avg_queue_size_samples, gfp) ||
476 bfq_stat_init(&stats->dequeue, gfp) ||
477 bfq_stat_init(&stats->group_wait_time, gfp) ||
478 bfq_stat_init(&stats->idle_time, gfp) ||
479 bfq_stat_init(&stats->empty_time, gfp)) {
480 bfqg_stats_exit(stats);
481 return -ENOMEM;
482 }
483#endif
484
485 return 0;
486}
487
488static struct bfq_group_data *cpd_to_bfqgd(struct blkcg_policy_data *cpd)
489{
490 return cpd ? container_of(cpd, struct bfq_group_data, pd) : NULL;
491}
492
493static struct bfq_group_data *blkcg_to_bfqgd(struct blkcg *blkcg)
494{
495 return cpd_to_bfqgd(blkcg_to_cpd(blkcg, &blkcg_policy_bfq));
496}
497
498static struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp)
499{
500 struct bfq_group_data *bgd;
501
502 bgd = kzalloc(sizeof(*bgd), gfp);
503 if (!bgd)
504 return NULL;
505 return &bgd->pd;
506}
507
508static void bfq_cpd_init(struct blkcg_policy_data *cpd)
509{
510 struct bfq_group_data *d = cpd_to_bfqgd(cpd);
511
512 d->weight = cgroup_subsys_on_dfl(io_cgrp_subsys) ?
513 CGROUP_WEIGHT_DFL : BFQ_WEIGHT_LEGACY_DFL;
514}
515
516static void bfq_cpd_free(struct blkcg_policy_data *cpd)
517{
518 kfree(cpd_to_bfqgd(cpd));
519}
520
521static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, struct request_queue *q,
522 struct blkcg *blkcg)
523{
524 struct bfq_group *bfqg;
525
526 bfqg = kzalloc_node(sizeof(*bfqg), gfp, q->node);
527 if (!bfqg)
528 return NULL;
529
530 if (bfqg_stats_init(&bfqg->stats, gfp)) {
531 kfree(bfqg);
532 return NULL;
533 }
534
535
536 bfqg_get(bfqg);
537 return &bfqg->pd;
538}
539
540static void bfq_pd_init(struct blkg_policy_data *pd)
541{
542 struct blkcg_gq *blkg = pd_to_blkg(pd);
543 struct bfq_group *bfqg = blkg_to_bfqg(blkg);
544 struct bfq_data *bfqd = blkg->q->elevator->elevator_data;
545 struct bfq_entity *entity = &bfqg->entity;
546 struct bfq_group_data *d = blkcg_to_bfqgd(blkg->blkcg);
547
548 entity->orig_weight = entity->weight = entity->new_weight = d->weight;
549 entity->my_sched_data = &bfqg->sched_data;
550 bfqg->my_entity = entity;
551
552
553
554 bfqg->bfqd = bfqd;
555 bfqg->active_entities = 0;
556 bfqg->rq_pos_tree = RB_ROOT;
557}
558
559static void bfq_pd_free(struct blkg_policy_data *pd)
560{
561 struct bfq_group *bfqg = pd_to_bfqg(pd);
562
563 bfqg_stats_exit(&bfqg->stats);
564 bfqg_put(bfqg);
565}
566
567static void bfq_pd_reset_stats(struct blkg_policy_data *pd)
568{
569 struct bfq_group *bfqg = pd_to_bfqg(pd);
570
571 bfqg_stats_reset(&bfqg->stats);
572}
573
574static void bfq_group_set_parent(struct bfq_group *bfqg,
575 struct bfq_group *parent)
576{
577 struct bfq_entity *entity;
578
579 entity = &bfqg->entity;
580 entity->parent = parent->my_entity;
581 entity->sched_data = &parent->sched_data;
582}
583
584static struct bfq_group *bfq_lookup_bfqg(struct bfq_data *bfqd,
585 struct blkcg *blkcg)
586{
587 struct blkcg_gq *blkg;
588
589 blkg = blkg_lookup(blkcg, bfqd->queue);
590 if (likely(blkg))
591 return blkg_to_bfqg(blkg);
592 return NULL;
593}
594
595struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
596 struct blkcg *blkcg)
597{
598 struct bfq_group *bfqg, *parent;
599 struct bfq_entity *entity;
600
601 bfqg = bfq_lookup_bfqg(bfqd, blkcg);
602
603 if (unlikely(!bfqg))
604 return NULL;
605
606
607
608
609
610
611 entity = &bfqg->entity;
612 for_each_entity(entity) {
613 struct bfq_group *curr_bfqg = container_of(entity,
614 struct bfq_group, entity);
615 if (curr_bfqg != bfqd->root_group) {
616 parent = bfqg_parent(curr_bfqg);
617 if (!parent)
618 parent = bfqd->root_group;
619 bfq_group_set_parent(curr_bfqg, parent);
620 }
621 }
622
623 return bfqg;
624}
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
641 struct bfq_group *bfqg)
642{
643 struct bfq_entity *entity = &bfqq->entity;
644
645
646
647
648
649 bfqq->ref++;
650
651
652
653
654
655
656
657 if (bfqq == bfqd->in_service_queue)
658 bfq_bfqq_expire(bfqd, bfqd->in_service_queue,
659 false, BFQQE_PREEMPTED);
660
661 if (bfq_bfqq_busy(bfqq))
662 bfq_deactivate_bfqq(bfqd, bfqq, false, false);
663 else if (entity->on_st_or_in_serv)
664 bfq_put_idle_entity(bfq_entity_service_tree(entity), entity);
665 bfqg_and_blkg_put(bfqq_group(bfqq));
666
667 entity->parent = bfqg->my_entity;
668 entity->sched_data = &bfqg->sched_data;
669
670 bfqg_and_blkg_get(bfqg);
671
672 if (bfq_bfqq_busy(bfqq)) {
673 if (unlikely(!bfqd->nonrot_with_queueing))
674 bfq_pos_tree_add_move(bfqd, bfqq);
675 bfq_activate_bfqq(bfqd, bfqq);
676 }
677
678 if (!bfqd->in_service_queue && !bfqd->rq_in_driver)
679 bfq_schedule_dispatch(bfqd);
680
681 bfq_put_queue(bfqq);
682}
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
699 struct bfq_io_cq *bic,
700 struct blkcg *blkcg)
701{
702 struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0);
703 struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1);
704 struct bfq_group *bfqg;
705 struct bfq_entity *entity;
706
707 bfqg = bfq_find_set_group(bfqd, blkcg);
708
709 if (unlikely(!bfqg))
710 bfqg = bfqd->root_group;
711
712 if (async_bfqq) {
713 entity = &async_bfqq->entity;
714
715 if (entity->sched_data != &bfqg->sched_data) {
716 bic_set_bfqq(bic, NULL, 0);
717 bfq_release_process_ref(bfqd, async_bfqq);
718 }
719 }
720
721 if (sync_bfqq) {
722 entity = &sync_bfqq->entity;
723 if (entity->sched_data != &bfqg->sched_data)
724 bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
725 }
726
727 return bfqg;
728}
729
730void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
731{
732 struct bfq_data *bfqd = bic_to_bfqd(bic);
733 struct bfq_group *bfqg = NULL;
734 uint64_t serial_nr;
735
736 rcu_read_lock();
737 serial_nr = __bio_blkcg(bio)->css.serial_nr;
738
739
740
741
742
743 if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
744 goto out;
745
746 bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio));
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797 blkg_path(bfqg_to_blkg(bfqg), bfqg->blkg_path, sizeof(bfqg->blkg_path));
798 bic->blkcg_serial_nr = serial_nr;
799out:
800 rcu_read_unlock();
801}
802
803
804
805
806
807static void bfq_flush_idle_tree(struct bfq_service_tree *st)
808{
809 struct bfq_entity *entity = st->first_idle;
810
811 for (; entity ; entity = st->first_idle)
812 __bfq_deactivate_entity(entity, false);
813}
814
815
816
817
818
819
820
821static void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
822 struct bfq_entity *entity,
823 int ioprio_class)
824{
825 struct bfq_queue *bfqq;
826 struct bfq_entity *child_entity = entity;
827
828 while (child_entity->my_sched_data) {
829 struct bfq_sched_data *child_sd = child_entity->my_sched_data;
830 struct bfq_service_tree *child_st = child_sd->service_tree +
831 ioprio_class;
832 struct rb_root *child_active = &child_st->active;
833
834 child_entity = bfq_entity_of(rb_first(child_active));
835
836 if (!child_entity)
837 child_entity = child_sd->in_service_entity;
838 }
839
840 bfqq = bfq_entity_to_bfqq(child_entity);
841 bfq_bfqq_move(bfqd, bfqq, bfqd->root_group);
842}
843
844
845
846
847
848
849
850static void bfq_reparent_active_queues(struct bfq_data *bfqd,
851 struct bfq_group *bfqg,
852 struct bfq_service_tree *st,
853 int ioprio_class)
854{
855 struct rb_root *active = &st->active;
856 struct bfq_entity *entity;
857
858 while ((entity = bfq_entity_of(rb_first(active))))
859 bfq_reparent_leaf_entity(bfqd, entity, ioprio_class);
860
861 if (bfqg->sched_data.in_service_entity)
862 bfq_reparent_leaf_entity(bfqd,
863 bfqg->sched_data.in_service_entity,
864 ioprio_class);
865}
866
867
868
869
870
871
872
873
874
875static void bfq_pd_offline(struct blkg_policy_data *pd)
876{
877 struct bfq_service_tree *st;
878 struct bfq_group *bfqg = pd_to_bfqg(pd);
879 struct bfq_data *bfqd = bfqg->bfqd;
880 struct bfq_entity *entity = bfqg->my_entity;
881 unsigned long flags;
882 int i;
883
884 spin_lock_irqsave(&bfqd->lock, flags);
885
886 if (!entity)
887 goto put_async_queues;
888
889
890
891
892
893 for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) {
894 st = bfqg->sched_data.service_tree + i;
895
896
897
898
899
900
901
902
903
904
905
906
907
908 bfq_reparent_active_queues(bfqd, bfqg, st, i);
909
910
911
912
913
914
915
916
917
918
919
920
921 bfq_flush_idle_tree(st);
922 }
923
924 __bfq_deactivate_entity(entity, false);
925
926put_async_queues:
927 bfq_put_async_queues(bfqd, bfqg);
928
929 spin_unlock_irqrestore(&bfqd->lock, flags);
930
931
932
933
934
935
936 bfqg_stats_xfer_dead(bfqg);
937}
938
939void bfq_end_wr_async(struct bfq_data *bfqd)
940{
941 struct blkcg_gq *blkg;
942
943 list_for_each_entry(blkg, &bfqd->queue->blkg_list, q_node) {
944 struct bfq_group *bfqg = blkg_to_bfqg(blkg);
945
946 bfq_end_wr_async_queues(bfqd, bfqg);
947 }
948 bfq_end_wr_async_queues(bfqd, bfqd->root_group);
949}
950
951static int bfq_io_show_weight_legacy(struct seq_file *sf, void *v)
952{
953 struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
954 struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
955 unsigned int val = 0;
956
957 if (bfqgd)
958 val = bfqgd->weight;
959
960 seq_printf(sf, "%u\n", val);
961
962 return 0;
963}
964
965static u64 bfqg_prfill_weight_device(struct seq_file *sf,
966 struct blkg_policy_data *pd, int off)
967{
968 struct bfq_group *bfqg = pd_to_bfqg(pd);
969
970 if (!bfqg->entity.dev_weight)
971 return 0;
972 return __blkg_prfill_u64(sf, pd, bfqg->entity.dev_weight);
973}
974
975static int bfq_io_show_weight(struct seq_file *sf, void *v)
976{
977 struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
978 struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
979
980 seq_printf(sf, "default %u\n", bfqgd->weight);
981 blkcg_print_blkgs(sf, blkcg, bfqg_prfill_weight_device,
982 &blkcg_policy_bfq, 0, false);
983 return 0;
984}
985
986static void bfq_group_set_weight(struct bfq_group *bfqg, u64 weight, u64 dev_weight)
987{
988 weight = dev_weight ?: weight;
989
990 bfqg->entity.dev_weight = dev_weight;
991
992
993
994
995
996
997 if ((unsigned short)weight != bfqg->entity.new_weight) {
998 bfqg->entity.new_weight = (unsigned short)weight;
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014 smp_wmb();
1015 bfqg->entity.prio_changed = 1;
1016 }
1017}
1018
1019static int bfq_io_set_weight_legacy(struct cgroup_subsys_state *css,
1020 struct cftype *cftype,
1021 u64 val)
1022{
1023 struct blkcg *blkcg = css_to_blkcg(css);
1024 struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
1025 struct blkcg_gq *blkg;
1026 int ret = -ERANGE;
1027
1028 if (val < BFQ_MIN_WEIGHT || val > BFQ_MAX_WEIGHT)
1029 return ret;
1030
1031 ret = 0;
1032 spin_lock_irq(&blkcg->lock);
1033 bfqgd->weight = (unsigned short)val;
1034 hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
1035 struct bfq_group *bfqg = blkg_to_bfqg(blkg);
1036
1037 if (bfqg)
1038 bfq_group_set_weight(bfqg, val, 0);
1039 }
1040 spin_unlock_irq(&blkcg->lock);
1041
1042 return ret;
1043}
1044
1045static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of,
1046 char *buf, size_t nbytes,
1047 loff_t off)
1048{
1049 int ret;
1050 struct blkg_conf_ctx ctx;
1051 struct blkcg *blkcg = css_to_blkcg(of_css(of));
1052 struct bfq_group *bfqg;
1053 u64 v;
1054
1055 ret = blkg_conf_prep(blkcg, &blkcg_policy_bfq, buf, &ctx);
1056 if (ret)
1057 return ret;
1058
1059 if (sscanf(ctx.body, "%llu", &v) == 1) {
1060
1061 ret = -ERANGE;
1062 if (!v)
1063 goto out;
1064 } else if (!strcmp(strim(ctx.body), "default")) {
1065 v = 0;
1066 } else {
1067 ret = -EINVAL;
1068 goto out;
1069 }
1070
1071 bfqg = blkg_to_bfqg(ctx.blkg);
1072
1073 ret = -ERANGE;
1074 if (!v || (v >= BFQ_MIN_WEIGHT && v <= BFQ_MAX_WEIGHT)) {
1075 bfq_group_set_weight(bfqg, bfqg->entity.weight, v);
1076 ret = 0;
1077 }
1078out:
1079 blkg_conf_finish(&ctx);
1080 return ret ?: nbytes;
1081}
1082
1083static ssize_t bfq_io_set_weight(struct kernfs_open_file *of,
1084 char *buf, size_t nbytes,
1085 loff_t off)
1086{
1087 char *endp;
1088 int ret;
1089 u64 v;
1090
1091 buf = strim(buf);
1092
1093
1094 v = simple_strtoull(buf, &endp, 0);
1095 if (*endp == '\0' || sscanf(buf, "default %llu", &v) == 1) {
1096 ret = bfq_io_set_weight_legacy(of_css(of), NULL, v);
1097 return ret ?: nbytes;
1098 }
1099
1100 return bfq_io_set_device_weight(of, buf, nbytes, off);
1101}
1102
1103static int bfqg_print_rwstat(struct seq_file *sf, void *v)
1104{
1105 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat,
1106 &blkcg_policy_bfq, seq_cft(sf)->private, true);
1107 return 0;
1108}
1109
1110static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf,
1111 struct blkg_policy_data *pd, int off)
1112{
1113 struct blkg_rwstat_sample sum;
1114
1115 blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off, &sum);
1116 return __blkg_prfill_rwstat(sf, pd, &sum);
1117}
1118
1119static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v)
1120{
1121 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1122 bfqg_prfill_rwstat_recursive, &blkcg_policy_bfq,
1123 seq_cft(sf)->private, true);
1124 return 0;
1125}
1126
1127#ifdef CONFIG_BFQ_CGROUP_DEBUG
1128static int bfqg_print_stat(struct seq_file *sf, void *v)
1129{
1130 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat,
1131 &blkcg_policy_bfq, seq_cft(sf)->private, false);
1132 return 0;
1133}
1134
1135static u64 bfqg_prfill_stat_recursive(struct seq_file *sf,
1136 struct blkg_policy_data *pd, int off)
1137{
1138 struct blkcg_gq *blkg = pd_to_blkg(pd);
1139 struct blkcg_gq *pos_blkg;
1140 struct cgroup_subsys_state *pos_css;
1141 u64 sum = 0;
1142
1143 lockdep_assert_held(&blkg->q->queue_lock);
1144
1145 rcu_read_lock();
1146 blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
1147 struct bfq_stat *stat;
1148
1149 if (!pos_blkg->online)
1150 continue;
1151
1152 stat = (void *)blkg_to_pd(pos_blkg, &blkcg_policy_bfq) + off;
1153 sum += bfq_stat_read(stat) + atomic64_read(&stat->aux_cnt);
1154 }
1155 rcu_read_unlock();
1156
1157 return __blkg_prfill_u64(sf, pd, sum);
1158}
1159
1160static int bfqg_print_stat_recursive(struct seq_file *sf, void *v)
1161{
1162 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1163 bfqg_prfill_stat_recursive, &blkcg_policy_bfq,
1164 seq_cft(sf)->private, false);
1165 return 0;
1166}
1167
1168static u64 bfqg_prfill_sectors(struct seq_file *sf, struct blkg_policy_data *pd,
1169 int off)
1170{
1171 struct bfq_group *bfqg = blkg_to_bfqg(pd->blkg);
1172 u64 sum = blkg_rwstat_total(&bfqg->stats.bytes);
1173
1174 return __blkg_prfill_u64(sf, pd, sum >> 9);
1175}
1176
1177static int bfqg_print_stat_sectors(struct seq_file *sf, void *v)
1178{
1179 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1180 bfqg_prfill_sectors, &blkcg_policy_bfq, 0, false);
1181 return 0;
1182}
1183
1184static u64 bfqg_prfill_sectors_recursive(struct seq_file *sf,
1185 struct blkg_policy_data *pd, int off)
1186{
1187 struct blkg_rwstat_sample tmp;
1188
1189 blkg_rwstat_recursive_sum(pd->blkg, &blkcg_policy_bfq,
1190 offsetof(struct bfq_group, stats.bytes), &tmp);
1191
1192 return __blkg_prfill_u64(sf, pd,
1193 (tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]) >> 9);
1194}
1195
1196static int bfqg_print_stat_sectors_recursive(struct seq_file *sf, void *v)
1197{
1198 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1199 bfqg_prfill_sectors_recursive, &blkcg_policy_bfq, 0,
1200 false);
1201 return 0;
1202}
1203
1204static u64 bfqg_prfill_avg_queue_size(struct seq_file *sf,
1205 struct blkg_policy_data *pd, int off)
1206{
1207 struct bfq_group *bfqg = pd_to_bfqg(pd);
1208 u64 samples = bfq_stat_read(&bfqg->stats.avg_queue_size_samples);
1209 u64 v = 0;
1210
1211 if (samples) {
1212 v = bfq_stat_read(&bfqg->stats.avg_queue_size_sum);
1213 v = div64_u64(v, samples);
1214 }
1215 __blkg_prfill_u64(sf, pd, v);
1216 return 0;
1217}
1218
1219
1220static int bfqg_print_avg_queue_size(struct seq_file *sf, void *v)
1221{
1222 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1223 bfqg_prfill_avg_queue_size, &blkcg_policy_bfq,
1224 0, false);
1225 return 0;
1226}
1227#endif
1228
1229struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
1230{
1231 int ret;
1232
1233 ret = blkcg_activate_policy(bfqd->queue, &blkcg_policy_bfq);
1234 if (ret)
1235 return NULL;
1236
1237 return blkg_to_bfqg(bfqd->queue->root_blkg);
1238}
1239
1240struct blkcg_policy blkcg_policy_bfq = {
1241 .dfl_cftypes = bfq_blkg_files,
1242 .legacy_cftypes = bfq_blkcg_legacy_files,
1243
1244 .cpd_alloc_fn = bfq_cpd_alloc,
1245 .cpd_init_fn = bfq_cpd_init,
1246 .cpd_bind_fn = bfq_cpd_init,
1247 .cpd_free_fn = bfq_cpd_free,
1248
1249 .pd_alloc_fn = bfq_pd_alloc,
1250 .pd_init_fn = bfq_pd_init,
1251 .pd_offline_fn = bfq_pd_offline,
1252 .pd_free_fn = bfq_pd_free,
1253 .pd_reset_stats_fn = bfq_pd_reset_stats,
1254};
1255
1256struct cftype bfq_blkcg_legacy_files[] = {
1257 {
1258 .name = "bfq.weight",
1259 .flags = CFTYPE_NOT_ON_ROOT,
1260 .seq_show = bfq_io_show_weight_legacy,
1261 .write_u64 = bfq_io_set_weight_legacy,
1262 },
1263 {
1264 .name = "bfq.weight_device",
1265 .flags = CFTYPE_NOT_ON_ROOT,
1266 .seq_show = bfq_io_show_weight,
1267 .write = bfq_io_set_weight,
1268 },
1269
1270
1271 {
1272 .name = "bfq.io_service_bytes",
1273 .private = offsetof(struct bfq_group, stats.bytes),
1274 .seq_show = bfqg_print_rwstat,
1275 },
1276 {
1277 .name = "bfq.io_serviced",
1278 .private = offsetof(struct bfq_group, stats.ios),
1279 .seq_show = bfqg_print_rwstat,
1280 },
1281#ifdef CONFIG_BFQ_CGROUP_DEBUG
1282 {
1283 .name = "bfq.time",
1284 .private = offsetof(struct bfq_group, stats.time),
1285 .seq_show = bfqg_print_stat,
1286 },
1287 {
1288 .name = "bfq.sectors",
1289 .seq_show = bfqg_print_stat_sectors,
1290 },
1291 {
1292 .name = "bfq.io_service_time",
1293 .private = offsetof(struct bfq_group, stats.service_time),
1294 .seq_show = bfqg_print_rwstat,
1295 },
1296 {
1297 .name = "bfq.io_wait_time",
1298 .private = offsetof(struct bfq_group, stats.wait_time),
1299 .seq_show = bfqg_print_rwstat,
1300 },
1301 {
1302 .name = "bfq.io_merged",
1303 .private = offsetof(struct bfq_group, stats.merged),
1304 .seq_show = bfqg_print_rwstat,
1305 },
1306 {
1307 .name = "bfq.io_queued",
1308 .private = offsetof(struct bfq_group, stats.queued),
1309 .seq_show = bfqg_print_rwstat,
1310 },
1311#endif
1312
1313
1314 {
1315 .name = "bfq.io_service_bytes_recursive",
1316 .private = offsetof(struct bfq_group, stats.bytes),
1317 .seq_show = bfqg_print_rwstat_recursive,
1318 },
1319 {
1320 .name = "bfq.io_serviced_recursive",
1321 .private = offsetof(struct bfq_group, stats.ios),
1322 .seq_show = bfqg_print_rwstat_recursive,
1323 },
1324#ifdef CONFIG_BFQ_CGROUP_DEBUG
1325 {
1326 .name = "bfq.time_recursive",
1327 .private = offsetof(struct bfq_group, stats.time),
1328 .seq_show = bfqg_print_stat_recursive,
1329 },
1330 {
1331 .name = "bfq.sectors_recursive",
1332 .seq_show = bfqg_print_stat_sectors_recursive,
1333 },
1334 {
1335 .name = "bfq.io_service_time_recursive",
1336 .private = offsetof(struct bfq_group, stats.service_time),
1337 .seq_show = bfqg_print_rwstat_recursive,
1338 },
1339 {
1340 .name = "bfq.io_wait_time_recursive",
1341 .private = offsetof(struct bfq_group, stats.wait_time),
1342 .seq_show = bfqg_print_rwstat_recursive,
1343 },
1344 {
1345 .name = "bfq.io_merged_recursive",
1346 .private = offsetof(struct bfq_group, stats.merged),
1347 .seq_show = bfqg_print_rwstat_recursive,
1348 },
1349 {
1350 .name = "bfq.io_queued_recursive",
1351 .private = offsetof(struct bfq_group, stats.queued),
1352 .seq_show = bfqg_print_rwstat_recursive,
1353 },
1354 {
1355 .name = "bfq.avg_queue_size",
1356 .seq_show = bfqg_print_avg_queue_size,
1357 },
1358 {
1359 .name = "bfq.group_wait_time",
1360 .private = offsetof(struct bfq_group, stats.group_wait_time),
1361 .seq_show = bfqg_print_stat,
1362 },
1363 {
1364 .name = "bfq.idle_time",
1365 .private = offsetof(struct bfq_group, stats.idle_time),
1366 .seq_show = bfqg_print_stat,
1367 },
1368 {
1369 .name = "bfq.empty_time",
1370 .private = offsetof(struct bfq_group, stats.empty_time),
1371 .seq_show = bfqg_print_stat,
1372 },
1373 {
1374 .name = "bfq.dequeue",
1375 .private = offsetof(struct bfq_group, stats.dequeue),
1376 .seq_show = bfqg_print_stat,
1377 },
1378#endif
1379 { }
1380};
1381
1382struct cftype bfq_blkg_files[] = {
1383 {
1384 .name = "bfq.weight",
1385 .flags = CFTYPE_NOT_ON_ROOT,
1386 .seq_show = bfq_io_show_weight,
1387 .write = bfq_io_set_weight,
1388 },
1389 {}
1390};
1391
1392#else
1393
1394void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
1395 struct bfq_group *bfqg) {}
1396
1397void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg)
1398{
1399 struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
1400
1401 entity->weight = entity->new_weight;
1402 entity->orig_weight = entity->new_weight;
1403 if (bfqq) {
1404 bfqq->ioprio = bfqq->new_ioprio;
1405 bfqq->ioprio_class = bfqq->new_ioprio_class;
1406 }
1407 entity->sched_data = &bfqg->sched_data;
1408}
1409
1410void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) {}
1411
1412void bfq_end_wr_async(struct bfq_data *bfqd)
1413{
1414 bfq_end_wr_async_queues(bfqd, bfqd->root_group);
1415}
1416
1417struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, struct blkcg *blkcg)
1418{
1419 return bfqd->root_group;
1420}
1421
1422struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
1423{
1424 return bfqq->bfqd->root_group;
1425}
1426
1427void bfqg_and_blkg_get(struct bfq_group *bfqg) {}
1428
1429void bfqg_and_blkg_put(struct bfq_group *bfqg) {}
1430
1431struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
1432{
1433 struct bfq_group *bfqg;
1434 int i;
1435
1436 bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node);
1437 if (!bfqg)
1438 return NULL;
1439
1440 for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
1441 bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
1442
1443 return bfqg;
1444}
1445#endif
1446