1
2
3
4
5#include <linux/module.h>
6#include <linux/slab.h>
7#include <linux/blkdev.h>
8#include <linux/cgroup.h>
9#include <linux/elevator.h>
10#include <linux/ktime.h>
11#include <linux/rbtree.h>
12#include <linux/ioprio.h>
13#include <linux/sbitmap.h>
14#include <linux/delay.h>
15
16#include "bfq-iosched.h"
17
18#ifdef CONFIG_BFQ_CGROUP_DEBUG
19static int bfq_stat_init(struct bfq_stat *stat, gfp_t gfp)
20{
21 int ret;
22
23 ret = percpu_counter_init(&stat->cpu_cnt, 0, gfp);
24 if (ret)
25 return ret;
26
27 atomic64_set(&stat->aux_cnt, 0);
28 return 0;
29}
30
31static void bfq_stat_exit(struct bfq_stat *stat)
32{
33 percpu_counter_destroy(&stat->cpu_cnt);
34}
35
36
37
38
39
40
41
42
43
44static inline void bfq_stat_add(struct bfq_stat *stat, uint64_t val)
45{
46 percpu_counter_add_batch(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH);
47}
48
49
50
51
52
53static inline uint64_t bfq_stat_read(struct bfq_stat *stat)
54{
55 return percpu_counter_sum_positive(&stat->cpu_cnt);
56}
57
58
59
60
61
62static inline void bfq_stat_reset(struct bfq_stat *stat)
63{
64 percpu_counter_set(&stat->cpu_cnt, 0);
65 atomic64_set(&stat->aux_cnt, 0);
66}
67
68
69
70
71
72
73
74
75static inline void bfq_stat_add_aux(struct bfq_stat *to,
76 struct bfq_stat *from)
77{
78 atomic64_add(bfq_stat_read(from) + atomic64_read(&from->aux_cnt),
79 &to->aux_cnt);
80}
81
82
83
84
85
86
87
88
89
90static u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd,
91 int off)
92{
93 return __blkg_prfill_u64(sf, pd, bfq_stat_read((void *)pd + off));
94}
95
96
97enum bfqg_stats_flags {
98 BFQG_stats_waiting = 0,
99 BFQG_stats_idling,
100 BFQG_stats_empty,
101};
102
103#define BFQG_FLAG_FNS(name) \
104static void bfqg_stats_mark_##name(struct bfqg_stats *stats) \
105{ \
106 stats->flags |= (1 << BFQG_stats_##name); \
107} \
108static void bfqg_stats_clear_##name(struct bfqg_stats *stats) \
109{ \
110 stats->flags &= ~(1 << BFQG_stats_##name); \
111} \
112static int bfqg_stats_##name(struct bfqg_stats *stats) \
113{ \
114 return (stats->flags & (1 << BFQG_stats_##name)) != 0; \
115} \
116
117BFQG_FLAG_FNS(waiting)
118BFQG_FLAG_FNS(idling)
119BFQG_FLAG_FNS(empty)
120#undef BFQG_FLAG_FNS
121
122
123static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats)
124{
125 u64 now;
126
127 if (!bfqg_stats_waiting(stats))
128 return;
129
130 now = ktime_get_ns();
131 if (now > stats->start_group_wait_time)
132 bfq_stat_add(&stats->group_wait_time,
133 now - stats->start_group_wait_time);
134 bfqg_stats_clear_waiting(stats);
135}
136
137
138static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg,
139 struct bfq_group *curr_bfqg)
140{
141 struct bfqg_stats *stats = &bfqg->stats;
142
143 if (bfqg_stats_waiting(stats))
144 return;
145 if (bfqg == curr_bfqg)
146 return;
147 stats->start_group_wait_time = ktime_get_ns();
148 bfqg_stats_mark_waiting(stats);
149}
150
151
152static void bfqg_stats_end_empty_time(struct bfqg_stats *stats)
153{
154 u64 now;
155
156 if (!bfqg_stats_empty(stats))
157 return;
158
159 now = ktime_get_ns();
160 if (now > stats->start_empty_time)
161 bfq_stat_add(&stats->empty_time,
162 now - stats->start_empty_time);
163 bfqg_stats_clear_empty(stats);
164}
165
166void bfqg_stats_update_dequeue(struct bfq_group *bfqg)
167{
168 bfq_stat_add(&bfqg->stats.dequeue, 1);
169}
170
171void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg)
172{
173 struct bfqg_stats *stats = &bfqg->stats;
174
175 if (blkg_rwstat_total(&stats->queued))
176 return;
177
178
179
180
181
182
183 if (bfqg_stats_empty(stats))
184 return;
185
186 stats->start_empty_time = ktime_get_ns();
187 bfqg_stats_mark_empty(stats);
188}
189
190void bfqg_stats_update_idle_time(struct bfq_group *bfqg)
191{
192 struct bfqg_stats *stats = &bfqg->stats;
193
194 if (bfqg_stats_idling(stats)) {
195 u64 now = ktime_get_ns();
196
197 if (now > stats->start_idle_time)
198 bfq_stat_add(&stats->idle_time,
199 now - stats->start_idle_time);
200 bfqg_stats_clear_idling(stats);
201 }
202}
203
204void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg)
205{
206 struct bfqg_stats *stats = &bfqg->stats;
207
208 stats->start_idle_time = ktime_get_ns();
209 bfqg_stats_mark_idling(stats);
210}
211
212void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg)
213{
214 struct bfqg_stats *stats = &bfqg->stats;
215
216 bfq_stat_add(&stats->avg_queue_size_sum,
217 blkg_rwstat_total(&stats->queued));
218 bfq_stat_add(&stats->avg_queue_size_samples, 1);
219 bfqg_stats_update_group_wait_time(stats);
220}
221
222void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
223 unsigned int op)
224{
225 blkg_rwstat_add(&bfqg->stats.queued, op, 1);
226 bfqg_stats_end_empty_time(&bfqg->stats);
227 if (!(bfqq == ((struct bfq_data *)bfqg->bfqd)->in_service_queue))
228 bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq));
229}
230
231void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op)
232{
233 blkg_rwstat_add(&bfqg->stats.queued, op, -1);
234}
235
236void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op)
237{
238 blkg_rwstat_add(&bfqg->stats.merged, op, 1);
239}
240
241void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
242 u64 io_start_time_ns, unsigned int op)
243{
244 struct bfqg_stats *stats = &bfqg->stats;
245 u64 now = ktime_get_ns();
246
247 if (now > io_start_time_ns)
248 blkg_rwstat_add(&stats->service_time, op,
249 now - io_start_time_ns);
250 if (io_start_time_ns > start_time_ns)
251 blkg_rwstat_add(&stats->wait_time, op,
252 io_start_time_ns - start_time_ns);
253}
254
255#else
256
257void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
258 unsigned int op) { }
259void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op) { }
260void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op) { }
261void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
262 u64 io_start_time_ns, unsigned int op) { }
263void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { }
264void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { }
265void bfqg_stats_update_idle_time(struct bfq_group *bfqg) { }
266void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg) { }
267void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg) { }
268
269#endif
270
271#ifdef CONFIG_BFQ_GROUP_IOSCHED
272
273
274
275
276
277
278
279static struct bfq_group *pd_to_bfqg(struct blkg_policy_data *pd)
280{
281 return pd ? container_of(pd, struct bfq_group, pd) : NULL;
282}
283
284struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg)
285{
286 return pd_to_blkg(&bfqg->pd);
287}
288
289static struct bfq_group *blkg_to_bfqg(struct blkcg_gq *blkg)
290{
291 return pd_to_bfqg(blkg_to_pd(blkg, &blkcg_policy_bfq));
292}
293
294
295
296
297
298
299
300
301static struct bfq_group *bfqg_parent(struct bfq_group *bfqg)
302{
303 struct blkcg_gq *pblkg = bfqg_to_blkg(bfqg)->parent;
304
305 return pblkg ? blkg_to_bfqg(pblkg) : NULL;
306}
307
308struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
309{
310 struct bfq_entity *group_entity = bfqq->entity.parent;
311
312 return group_entity ? container_of(group_entity, struct bfq_group,
313 entity) :
314 bfqq->bfqd->root_group;
315}
316
317
318
319
320
321
322static void bfqg_get(struct bfq_group *bfqg)
323{
324 bfqg->ref++;
325}
326
327static void bfqg_put(struct bfq_group *bfqg)
328{
329 bfqg->ref--;
330
331 if (bfqg->ref == 0)
332 kfree(bfqg);
333}
334
335void bfqg_and_blkg_get(struct bfq_group *bfqg)
336{
337
338 bfqg_get(bfqg);
339
340 blkg_get(bfqg_to_blkg(bfqg));
341}
342
343void bfqg_and_blkg_put(struct bfq_group *bfqg)
344{
345 blkg_put(bfqg_to_blkg(bfqg));
346
347 bfqg_put(bfqg);
348}
349
350void bfqg_stats_update_legacy_io(struct request_queue *q, struct request *rq)
351{
352 struct bfq_group *bfqg = blkg_to_bfqg(rq->bio->bi_blkg);
353
354 if (!bfqg)
355 return;
356
357 blkg_rwstat_add(&bfqg->stats.bytes, rq->cmd_flags, blk_rq_bytes(rq));
358 blkg_rwstat_add(&bfqg->stats.ios, rq->cmd_flags, 1);
359}
360
361
362static void bfqg_stats_reset(struct bfqg_stats *stats)
363{
364#ifdef CONFIG_BFQ_CGROUP_DEBUG
365
366 blkg_rwstat_reset(&stats->merged);
367 blkg_rwstat_reset(&stats->service_time);
368 blkg_rwstat_reset(&stats->wait_time);
369 bfq_stat_reset(&stats->time);
370 bfq_stat_reset(&stats->avg_queue_size_sum);
371 bfq_stat_reset(&stats->avg_queue_size_samples);
372 bfq_stat_reset(&stats->dequeue);
373 bfq_stat_reset(&stats->group_wait_time);
374 bfq_stat_reset(&stats->idle_time);
375 bfq_stat_reset(&stats->empty_time);
376#endif
377}
378
379
380static void bfqg_stats_add_aux(struct bfqg_stats *to, struct bfqg_stats *from)
381{
382 if (!to || !from)
383 return;
384
385#ifdef CONFIG_BFQ_CGROUP_DEBUG
386
387 blkg_rwstat_add_aux(&to->merged, &from->merged);
388 blkg_rwstat_add_aux(&to->service_time, &from->service_time);
389 blkg_rwstat_add_aux(&to->wait_time, &from->wait_time);
390 bfq_stat_add_aux(&from->time, &from->time);
391 bfq_stat_add_aux(&to->avg_queue_size_sum, &from->avg_queue_size_sum);
392 bfq_stat_add_aux(&to->avg_queue_size_samples,
393 &from->avg_queue_size_samples);
394 bfq_stat_add_aux(&to->dequeue, &from->dequeue);
395 bfq_stat_add_aux(&to->group_wait_time, &from->group_wait_time);
396 bfq_stat_add_aux(&to->idle_time, &from->idle_time);
397 bfq_stat_add_aux(&to->empty_time, &from->empty_time);
398#endif
399}
400
401
402
403
404
405
406static void bfqg_stats_xfer_dead(struct bfq_group *bfqg)
407{
408 struct bfq_group *parent;
409
410 if (!bfqg)
411 return;
412
413 parent = bfqg_parent(bfqg);
414
415 lockdep_assert_held(&bfqg_to_blkg(bfqg)->q->queue_lock);
416
417 if (unlikely(!parent))
418 return;
419
420 bfqg_stats_add_aux(&parent->stats, &bfqg->stats);
421 bfqg_stats_reset(&bfqg->stats);
422}
423
424void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg)
425{
426 struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
427
428 entity->weight = entity->new_weight;
429 entity->orig_weight = entity->new_weight;
430 if (bfqq) {
431 bfqq->ioprio = bfqq->new_ioprio;
432 bfqq->ioprio_class = bfqq->new_ioprio_class;
433
434
435
436
437 bfqg_and_blkg_get(bfqg);
438 }
439 entity->parent = bfqg->my_entity;
440 entity->sched_data = &bfqg->sched_data;
441}
442
443static void bfqg_stats_exit(struct bfqg_stats *stats)
444{
445 blkg_rwstat_exit(&stats->bytes);
446 blkg_rwstat_exit(&stats->ios);
447#ifdef CONFIG_BFQ_CGROUP_DEBUG
448 blkg_rwstat_exit(&stats->merged);
449 blkg_rwstat_exit(&stats->service_time);
450 blkg_rwstat_exit(&stats->wait_time);
451 blkg_rwstat_exit(&stats->queued);
452 bfq_stat_exit(&stats->time);
453 bfq_stat_exit(&stats->avg_queue_size_sum);
454 bfq_stat_exit(&stats->avg_queue_size_samples);
455 bfq_stat_exit(&stats->dequeue);
456 bfq_stat_exit(&stats->group_wait_time);
457 bfq_stat_exit(&stats->idle_time);
458 bfq_stat_exit(&stats->empty_time);
459#endif
460}
461
462static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp)
463{
464 if (blkg_rwstat_init(&stats->bytes, gfp) ||
465 blkg_rwstat_init(&stats->ios, gfp))
466 return -ENOMEM;
467
468#ifdef CONFIG_BFQ_CGROUP_DEBUG
469 if (blkg_rwstat_init(&stats->merged, gfp) ||
470 blkg_rwstat_init(&stats->service_time, gfp) ||
471 blkg_rwstat_init(&stats->wait_time, gfp) ||
472 blkg_rwstat_init(&stats->queued, gfp) ||
473 bfq_stat_init(&stats->time, gfp) ||
474 bfq_stat_init(&stats->avg_queue_size_sum, gfp) ||
475 bfq_stat_init(&stats->avg_queue_size_samples, gfp) ||
476 bfq_stat_init(&stats->dequeue, gfp) ||
477 bfq_stat_init(&stats->group_wait_time, gfp) ||
478 bfq_stat_init(&stats->idle_time, gfp) ||
479 bfq_stat_init(&stats->empty_time, gfp)) {
480 bfqg_stats_exit(stats);
481 return -ENOMEM;
482 }
483#endif
484
485 return 0;
486}
487
488static struct bfq_group_data *cpd_to_bfqgd(struct blkcg_policy_data *cpd)
489{
490 return cpd ? container_of(cpd, struct bfq_group_data, pd) : NULL;
491}
492
493static struct bfq_group_data *blkcg_to_bfqgd(struct blkcg *blkcg)
494{
495 return cpd_to_bfqgd(blkcg_to_cpd(blkcg, &blkcg_policy_bfq));
496}
497
498static struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp)
499{
500 struct bfq_group_data *bgd;
501
502 bgd = kzalloc(sizeof(*bgd), gfp);
503 if (!bgd)
504 return NULL;
505 return &bgd->pd;
506}
507
508static void bfq_cpd_init(struct blkcg_policy_data *cpd)
509{
510 struct bfq_group_data *d = cpd_to_bfqgd(cpd);
511
512 d->weight = cgroup_subsys_on_dfl(io_cgrp_subsys) ?
513 CGROUP_WEIGHT_DFL : BFQ_WEIGHT_LEGACY_DFL;
514}
515
516static void bfq_cpd_free(struct blkcg_policy_data *cpd)
517{
518 kfree(cpd_to_bfqgd(cpd));
519}
520
521static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, struct request_queue *q,
522 struct blkcg *blkcg)
523{
524 struct bfq_group *bfqg;
525
526 bfqg = kzalloc_node(sizeof(*bfqg), gfp, q->node);
527 if (!bfqg)
528 return NULL;
529
530 if (bfqg_stats_init(&bfqg->stats, gfp)) {
531 kfree(bfqg);
532 return NULL;
533 }
534
535
536 bfqg_get(bfqg);
537 return &bfqg->pd;
538}
539
540static void bfq_pd_init(struct blkg_policy_data *pd)
541{
542 struct blkcg_gq *blkg = pd_to_blkg(pd);
543 struct bfq_group *bfqg = blkg_to_bfqg(blkg);
544 struct bfq_data *bfqd = blkg->q->elevator->elevator_data;
545 struct bfq_entity *entity = &bfqg->entity;
546 struct bfq_group_data *d = blkcg_to_bfqgd(blkg->blkcg);
547
548 entity->orig_weight = entity->weight = entity->new_weight = d->weight;
549 entity->my_sched_data = &bfqg->sched_data;
550 bfqg->my_entity = entity;
551
552
553
554 bfqg->bfqd = bfqd;
555 bfqg->active_entities = 0;
556 bfqg->rq_pos_tree = RB_ROOT;
557}
558
559static void bfq_pd_free(struct blkg_policy_data *pd)
560{
561 struct bfq_group *bfqg = pd_to_bfqg(pd);
562
563 bfqg_stats_exit(&bfqg->stats);
564 bfqg_put(bfqg);
565}
566
567static void bfq_pd_reset_stats(struct blkg_policy_data *pd)
568{
569 struct bfq_group *bfqg = pd_to_bfqg(pd);
570
571 bfqg_stats_reset(&bfqg->stats);
572}
573
574static void bfq_group_set_parent(struct bfq_group *bfqg,
575 struct bfq_group *parent)
576{
577 struct bfq_entity *entity;
578
579 entity = &bfqg->entity;
580 entity->parent = parent->my_entity;
581 entity->sched_data = &parent->sched_data;
582}
583
584static struct bfq_group *bfq_lookup_bfqg(struct bfq_data *bfqd,
585 struct blkcg *blkcg)
586{
587 struct blkcg_gq *blkg;
588
589 blkg = blkg_lookup(blkcg, bfqd->queue);
590 if (likely(blkg))
591 return blkg_to_bfqg(blkg);
592 return NULL;
593}
594
595struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
596 struct blkcg *blkcg)
597{
598 struct bfq_group *bfqg, *parent;
599 struct bfq_entity *entity;
600
601 bfqg = bfq_lookup_bfqg(bfqd, blkcg);
602
603 if (unlikely(!bfqg))
604 return NULL;
605
606
607
608
609
610
611 entity = &bfqg->entity;
612 for_each_entity(entity) {
613 struct bfq_group *curr_bfqg = container_of(entity,
614 struct bfq_group, entity);
615 if (curr_bfqg != bfqd->root_group) {
616 parent = bfqg_parent(curr_bfqg);
617 if (!parent)
618 parent = bfqd->root_group;
619 bfq_group_set_parent(curr_bfqg, parent);
620 }
621 }
622
623 return bfqg;
624}
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
641 struct bfq_group *bfqg)
642{
643 struct bfq_entity *entity = &bfqq->entity;
644
645
646
647
648
649
650
651 if (bfqq == bfqd->in_service_queue)
652 bfq_bfqq_expire(bfqd, bfqd->in_service_queue,
653 false, BFQQE_PREEMPTED);
654
655
656
657
658
659 bfqq->ref++;
660
661 if (bfq_bfqq_busy(bfqq))
662 bfq_deactivate_bfqq(bfqd, bfqq, false, false);
663 else if (entity->on_st_or_in_serv)
664 bfq_put_idle_entity(bfq_entity_service_tree(entity), entity);
665 bfqg_and_blkg_put(bfqq_group(bfqq));
666
667 entity->parent = bfqg->my_entity;
668 entity->sched_data = &bfqg->sched_data;
669
670 bfqg_and_blkg_get(bfqg);
671
672 if (bfq_bfqq_busy(bfqq)) {
673 if (unlikely(!bfqd->nonrot_with_queueing))
674 bfq_pos_tree_add_move(bfqd, bfqq);
675 bfq_activate_bfqq(bfqd, bfqq);
676 }
677
678 if (!bfqd->in_service_queue && !bfqd->rq_in_driver)
679 bfq_schedule_dispatch(bfqd);
680
681 bfq_put_queue(bfqq);
682}
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
699 struct bfq_io_cq *bic,
700 struct blkcg *blkcg)
701{
702 struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0);
703 struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1);
704 struct bfq_group *bfqg;
705 struct bfq_entity *entity;
706
707 bfqg = bfq_find_set_group(bfqd, blkcg);
708
709 if (unlikely(!bfqg))
710 bfqg = bfqd->root_group;
711
712 if (async_bfqq) {
713 entity = &async_bfqq->entity;
714
715 if (entity->sched_data != &bfqg->sched_data) {
716 bic_set_bfqq(bic, NULL, 0);
717 bfq_log_bfqq(bfqd, async_bfqq,
718 "bic_change_group: %p %d",
719 async_bfqq, async_bfqq->ref);
720 bfq_put_queue(async_bfqq);
721 }
722 }
723
724 if (sync_bfqq) {
725 entity = &sync_bfqq->entity;
726 if (entity->sched_data != &bfqg->sched_data)
727 bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
728 }
729
730 return bfqg;
731}
732
733void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
734{
735 struct bfq_data *bfqd = bic_to_bfqd(bic);
736 struct bfq_group *bfqg = NULL;
737 uint64_t serial_nr;
738
739 rcu_read_lock();
740 serial_nr = __bio_blkcg(bio)->css.serial_nr;
741
742
743
744
745
746 if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
747 goto out;
748
749 bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio));
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800 blkg_path(bfqg_to_blkg(bfqg), bfqg->blkg_path, sizeof(bfqg->blkg_path));
801 bic->blkcg_serial_nr = serial_nr;
802out:
803 rcu_read_unlock();
804}
805
806
807
808
809
810static void bfq_flush_idle_tree(struct bfq_service_tree *st)
811{
812 struct bfq_entity *entity = st->first_idle;
813
814 for (; entity ; entity = st->first_idle)
815 __bfq_deactivate_entity(entity, false);
816}
817
818
819
820
821
822
823static void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
824 struct bfq_entity *entity)
825{
826 struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
827
828 bfq_bfqq_move(bfqd, bfqq, bfqd->root_group);
829}
830
831
832
833
834
835
836
837
838static void bfq_reparent_active_entities(struct bfq_data *bfqd,
839 struct bfq_group *bfqg,
840 struct bfq_service_tree *st)
841{
842 struct rb_root *active = &st->active;
843 struct bfq_entity *entity = NULL;
844
845 if (!RB_EMPTY_ROOT(&st->active))
846 entity = bfq_entity_of(rb_first(active));
847
848 for (; entity ; entity = bfq_entity_of(rb_first(active)))
849 bfq_reparent_leaf_entity(bfqd, entity);
850
851 if (bfqg->sched_data.in_service_entity)
852 bfq_reparent_leaf_entity(bfqd,
853 bfqg->sched_data.in_service_entity);
854}
855
856
857
858
859
860
861
862
863
864static void bfq_pd_offline(struct blkg_policy_data *pd)
865{
866 struct bfq_service_tree *st;
867 struct bfq_group *bfqg = pd_to_bfqg(pd);
868 struct bfq_data *bfqd = bfqg->bfqd;
869 struct bfq_entity *entity = bfqg->my_entity;
870 unsigned long flags;
871 int i;
872
873 spin_lock_irqsave(&bfqd->lock, flags);
874
875 if (!entity)
876 goto put_async_queues;
877
878
879
880
881
882 for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) {
883 st = bfqg->sched_data.service_tree + i;
884
885
886
887
888
889
890 bfq_flush_idle_tree(st);
891
892
893
894
895
896
897
898
899
900
901
902
903
904 bfq_reparent_active_entities(bfqd, bfqg, st);
905 }
906
907 __bfq_deactivate_entity(entity, false);
908
909put_async_queues:
910 bfq_put_async_queues(bfqd, bfqg);
911
912 spin_unlock_irqrestore(&bfqd->lock, flags);
913
914
915
916
917
918
919 bfqg_stats_xfer_dead(bfqg);
920}
921
922void bfq_end_wr_async(struct bfq_data *bfqd)
923{
924 struct blkcg_gq *blkg;
925
926 list_for_each_entry(blkg, &bfqd->queue->blkg_list, q_node) {
927 struct bfq_group *bfqg = blkg_to_bfqg(blkg);
928
929 bfq_end_wr_async_queues(bfqd, bfqg);
930 }
931 bfq_end_wr_async_queues(bfqd, bfqd->root_group);
932}
933
934static int bfq_io_show_weight_legacy(struct seq_file *sf, void *v)
935{
936 struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
937 struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
938 unsigned int val = 0;
939
940 if (bfqgd)
941 val = bfqgd->weight;
942
943 seq_printf(sf, "%u\n", val);
944
945 return 0;
946}
947
948static u64 bfqg_prfill_weight_device(struct seq_file *sf,
949 struct blkg_policy_data *pd, int off)
950{
951 struct bfq_group *bfqg = pd_to_bfqg(pd);
952
953 if (!bfqg->entity.dev_weight)
954 return 0;
955 return __blkg_prfill_u64(sf, pd, bfqg->entity.dev_weight);
956}
957
958static int bfq_io_show_weight(struct seq_file *sf, void *v)
959{
960 struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
961 struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
962
963 seq_printf(sf, "default %u\n", bfqgd->weight);
964 blkcg_print_blkgs(sf, blkcg, bfqg_prfill_weight_device,
965 &blkcg_policy_bfq, 0, false);
966 return 0;
967}
968
969static void bfq_group_set_weight(struct bfq_group *bfqg, u64 weight, u64 dev_weight)
970{
971 weight = dev_weight ?: weight;
972
973 bfqg->entity.dev_weight = dev_weight;
974
975
976
977
978
979
980 if ((unsigned short)weight != bfqg->entity.new_weight) {
981 bfqg->entity.new_weight = (unsigned short)weight;
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997 smp_wmb();
998 bfqg->entity.prio_changed = 1;
999 }
1000}
1001
1002static int bfq_io_set_weight_legacy(struct cgroup_subsys_state *css,
1003 struct cftype *cftype,
1004 u64 val)
1005{
1006 struct blkcg *blkcg = css_to_blkcg(css);
1007 struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
1008 struct blkcg_gq *blkg;
1009 int ret = -ERANGE;
1010
1011 if (val < BFQ_MIN_WEIGHT || val > BFQ_MAX_WEIGHT)
1012 return ret;
1013
1014 ret = 0;
1015 spin_lock_irq(&blkcg->lock);
1016 bfqgd->weight = (unsigned short)val;
1017 hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
1018 struct bfq_group *bfqg = blkg_to_bfqg(blkg);
1019
1020 if (bfqg)
1021 bfq_group_set_weight(bfqg, val, 0);
1022 }
1023 spin_unlock_irq(&blkcg->lock);
1024
1025 return ret;
1026}
1027
1028static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of,
1029 char *buf, size_t nbytes,
1030 loff_t off)
1031{
1032 int ret;
1033 struct blkg_conf_ctx ctx;
1034 struct blkcg *blkcg = css_to_blkcg(of_css(of));
1035 struct bfq_group *bfqg;
1036 u64 v;
1037
1038 ret = blkg_conf_prep(blkcg, &blkcg_policy_bfq, buf, &ctx);
1039 if (ret)
1040 return ret;
1041
1042 if (sscanf(ctx.body, "%llu", &v) == 1) {
1043
1044 ret = -ERANGE;
1045 if (!v)
1046 goto out;
1047 } else if (!strcmp(strim(ctx.body), "default")) {
1048 v = 0;
1049 } else {
1050 ret = -EINVAL;
1051 goto out;
1052 }
1053
1054 bfqg = blkg_to_bfqg(ctx.blkg);
1055
1056 ret = -ERANGE;
1057 if (!v || (v >= BFQ_MIN_WEIGHT && v <= BFQ_MAX_WEIGHT)) {
1058 bfq_group_set_weight(bfqg, bfqg->entity.weight, v);
1059 ret = 0;
1060 }
1061out:
1062 blkg_conf_finish(&ctx);
1063 return ret ?: nbytes;
1064}
1065
1066static ssize_t bfq_io_set_weight(struct kernfs_open_file *of,
1067 char *buf, size_t nbytes,
1068 loff_t off)
1069{
1070 char *endp;
1071 int ret;
1072 u64 v;
1073
1074 buf = strim(buf);
1075
1076
1077 v = simple_strtoull(buf, &endp, 0);
1078 if (*endp == '\0' || sscanf(buf, "default %llu", &v) == 1) {
1079 ret = bfq_io_set_weight_legacy(of_css(of), NULL, v);
1080 return ret ?: nbytes;
1081 }
1082
1083 return bfq_io_set_device_weight(of, buf, nbytes, off);
1084}
1085
1086static int bfqg_print_rwstat(struct seq_file *sf, void *v)
1087{
1088 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat,
1089 &blkcg_policy_bfq, seq_cft(sf)->private, true);
1090 return 0;
1091}
1092
1093static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf,
1094 struct blkg_policy_data *pd, int off)
1095{
1096 struct blkg_rwstat_sample sum;
1097
1098 blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off, &sum);
1099 return __blkg_prfill_rwstat(sf, pd, &sum);
1100}
1101
1102static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v)
1103{
1104 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1105 bfqg_prfill_rwstat_recursive, &blkcg_policy_bfq,
1106 seq_cft(sf)->private, true);
1107 return 0;
1108}
1109
1110#ifdef CONFIG_BFQ_CGROUP_DEBUG
1111static int bfqg_print_stat(struct seq_file *sf, void *v)
1112{
1113 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat,
1114 &blkcg_policy_bfq, seq_cft(sf)->private, false);
1115 return 0;
1116}
1117
1118static u64 bfqg_prfill_stat_recursive(struct seq_file *sf,
1119 struct blkg_policy_data *pd, int off)
1120{
1121 struct blkcg_gq *blkg = pd_to_blkg(pd);
1122 struct blkcg_gq *pos_blkg;
1123 struct cgroup_subsys_state *pos_css;
1124 u64 sum = 0;
1125
1126 lockdep_assert_held(&blkg->q->queue_lock);
1127
1128 rcu_read_lock();
1129 blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
1130 struct bfq_stat *stat;
1131
1132 if (!pos_blkg->online)
1133 continue;
1134
1135 stat = (void *)blkg_to_pd(pos_blkg, &blkcg_policy_bfq) + off;
1136 sum += bfq_stat_read(stat) + atomic64_read(&stat->aux_cnt);
1137 }
1138 rcu_read_unlock();
1139
1140 return __blkg_prfill_u64(sf, pd, sum);
1141}
1142
1143static int bfqg_print_stat_recursive(struct seq_file *sf, void *v)
1144{
1145 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1146 bfqg_prfill_stat_recursive, &blkcg_policy_bfq,
1147 seq_cft(sf)->private, false);
1148 return 0;
1149}
1150
1151static u64 bfqg_prfill_sectors(struct seq_file *sf, struct blkg_policy_data *pd,
1152 int off)
1153{
1154 struct bfq_group *bfqg = blkg_to_bfqg(pd->blkg);
1155 u64 sum = blkg_rwstat_total(&bfqg->stats.bytes);
1156
1157 return __blkg_prfill_u64(sf, pd, sum >> 9);
1158}
1159
1160static int bfqg_print_stat_sectors(struct seq_file *sf, void *v)
1161{
1162 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1163 bfqg_prfill_sectors, &blkcg_policy_bfq, 0, false);
1164 return 0;
1165}
1166
1167static u64 bfqg_prfill_sectors_recursive(struct seq_file *sf,
1168 struct blkg_policy_data *pd, int off)
1169{
1170 struct blkg_rwstat_sample tmp;
1171
1172 blkg_rwstat_recursive_sum(pd->blkg, &blkcg_policy_bfq,
1173 offsetof(struct bfq_group, stats.bytes), &tmp);
1174
1175 return __blkg_prfill_u64(sf, pd,
1176 (tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]) >> 9);
1177}
1178
1179static int bfqg_print_stat_sectors_recursive(struct seq_file *sf, void *v)
1180{
1181 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1182 bfqg_prfill_sectors_recursive, &blkcg_policy_bfq, 0,
1183 false);
1184 return 0;
1185}
1186
1187static u64 bfqg_prfill_avg_queue_size(struct seq_file *sf,
1188 struct blkg_policy_data *pd, int off)
1189{
1190 struct bfq_group *bfqg = pd_to_bfqg(pd);
1191 u64 samples = bfq_stat_read(&bfqg->stats.avg_queue_size_samples);
1192 u64 v = 0;
1193
1194 if (samples) {
1195 v = bfq_stat_read(&bfqg->stats.avg_queue_size_sum);
1196 v = div64_u64(v, samples);
1197 }
1198 __blkg_prfill_u64(sf, pd, v);
1199 return 0;
1200}
1201
1202
1203static int bfqg_print_avg_queue_size(struct seq_file *sf, void *v)
1204{
1205 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1206 bfqg_prfill_avg_queue_size, &blkcg_policy_bfq,
1207 0, false);
1208 return 0;
1209}
1210#endif
1211
1212struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
1213{
1214 int ret;
1215
1216 ret = blkcg_activate_policy(bfqd->queue, &blkcg_policy_bfq);
1217 if (ret)
1218 return NULL;
1219
1220 return blkg_to_bfqg(bfqd->queue->root_blkg);
1221}
1222
1223struct blkcg_policy blkcg_policy_bfq = {
1224 .dfl_cftypes = bfq_blkg_files,
1225 .legacy_cftypes = bfq_blkcg_legacy_files,
1226
1227 .cpd_alloc_fn = bfq_cpd_alloc,
1228 .cpd_init_fn = bfq_cpd_init,
1229 .cpd_bind_fn = bfq_cpd_init,
1230 .cpd_free_fn = bfq_cpd_free,
1231
1232 .pd_alloc_fn = bfq_pd_alloc,
1233 .pd_init_fn = bfq_pd_init,
1234 .pd_offline_fn = bfq_pd_offline,
1235 .pd_free_fn = bfq_pd_free,
1236 .pd_reset_stats_fn = bfq_pd_reset_stats,
1237};
1238
1239struct cftype bfq_blkcg_legacy_files[] = {
1240 {
1241 .name = "bfq.weight",
1242 .flags = CFTYPE_NOT_ON_ROOT,
1243 .seq_show = bfq_io_show_weight_legacy,
1244 .write_u64 = bfq_io_set_weight_legacy,
1245 },
1246 {
1247 .name = "bfq.weight_device",
1248 .flags = CFTYPE_NOT_ON_ROOT,
1249 .seq_show = bfq_io_show_weight,
1250 .write = bfq_io_set_weight,
1251 },
1252
1253
1254 {
1255 .name = "bfq.io_service_bytes",
1256 .private = offsetof(struct bfq_group, stats.bytes),
1257 .seq_show = bfqg_print_rwstat,
1258 },
1259 {
1260 .name = "bfq.io_serviced",
1261 .private = offsetof(struct bfq_group, stats.ios),
1262 .seq_show = bfqg_print_rwstat,
1263 },
1264#ifdef CONFIG_BFQ_CGROUP_DEBUG
1265 {
1266 .name = "bfq.time",
1267 .private = offsetof(struct bfq_group, stats.time),
1268 .seq_show = bfqg_print_stat,
1269 },
1270 {
1271 .name = "bfq.sectors",
1272 .seq_show = bfqg_print_stat_sectors,
1273 },
1274 {
1275 .name = "bfq.io_service_time",
1276 .private = offsetof(struct bfq_group, stats.service_time),
1277 .seq_show = bfqg_print_rwstat,
1278 },
1279 {
1280 .name = "bfq.io_wait_time",
1281 .private = offsetof(struct bfq_group, stats.wait_time),
1282 .seq_show = bfqg_print_rwstat,
1283 },
1284 {
1285 .name = "bfq.io_merged",
1286 .private = offsetof(struct bfq_group, stats.merged),
1287 .seq_show = bfqg_print_rwstat,
1288 },
1289 {
1290 .name = "bfq.io_queued",
1291 .private = offsetof(struct bfq_group, stats.queued),
1292 .seq_show = bfqg_print_rwstat,
1293 },
1294#endif
1295
1296
1297 {
1298 .name = "bfq.io_service_bytes_recursive",
1299 .private = offsetof(struct bfq_group, stats.bytes),
1300 .seq_show = bfqg_print_rwstat_recursive,
1301 },
1302 {
1303 .name = "bfq.io_serviced_recursive",
1304 .private = offsetof(struct bfq_group, stats.ios),
1305 .seq_show = bfqg_print_rwstat_recursive,
1306 },
1307#ifdef CONFIG_BFQ_CGROUP_DEBUG
1308 {
1309 .name = "bfq.time_recursive",
1310 .private = offsetof(struct bfq_group, stats.time),
1311 .seq_show = bfqg_print_stat_recursive,
1312 },
1313 {
1314 .name = "bfq.sectors_recursive",
1315 .seq_show = bfqg_print_stat_sectors_recursive,
1316 },
1317 {
1318 .name = "bfq.io_service_time_recursive",
1319 .private = offsetof(struct bfq_group, stats.service_time),
1320 .seq_show = bfqg_print_rwstat_recursive,
1321 },
1322 {
1323 .name = "bfq.io_wait_time_recursive",
1324 .private = offsetof(struct bfq_group, stats.wait_time),
1325 .seq_show = bfqg_print_rwstat_recursive,
1326 },
1327 {
1328 .name = "bfq.io_merged_recursive",
1329 .private = offsetof(struct bfq_group, stats.merged),
1330 .seq_show = bfqg_print_rwstat_recursive,
1331 },
1332 {
1333 .name = "bfq.io_queued_recursive",
1334 .private = offsetof(struct bfq_group, stats.queued),
1335 .seq_show = bfqg_print_rwstat_recursive,
1336 },
1337 {
1338 .name = "bfq.avg_queue_size",
1339 .seq_show = bfqg_print_avg_queue_size,
1340 },
1341 {
1342 .name = "bfq.group_wait_time",
1343 .private = offsetof(struct bfq_group, stats.group_wait_time),
1344 .seq_show = bfqg_print_stat,
1345 },
1346 {
1347 .name = "bfq.idle_time",
1348 .private = offsetof(struct bfq_group, stats.idle_time),
1349 .seq_show = bfqg_print_stat,
1350 },
1351 {
1352 .name = "bfq.empty_time",
1353 .private = offsetof(struct bfq_group, stats.empty_time),
1354 .seq_show = bfqg_print_stat,
1355 },
1356 {
1357 .name = "bfq.dequeue",
1358 .private = offsetof(struct bfq_group, stats.dequeue),
1359 .seq_show = bfqg_print_stat,
1360 },
1361#endif
1362 { }
1363};
1364
1365struct cftype bfq_blkg_files[] = {
1366 {
1367 .name = "bfq.weight",
1368 .flags = CFTYPE_NOT_ON_ROOT,
1369 .seq_show = bfq_io_show_weight,
1370 .write = bfq_io_set_weight,
1371 },
1372 {}
1373};
1374
1375#else
1376
1377void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
1378 struct bfq_group *bfqg) {}
1379
1380void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg)
1381{
1382 struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
1383
1384 entity->weight = entity->new_weight;
1385 entity->orig_weight = entity->new_weight;
1386 if (bfqq) {
1387 bfqq->ioprio = bfqq->new_ioprio;
1388 bfqq->ioprio_class = bfqq->new_ioprio_class;
1389 }
1390 entity->sched_data = &bfqg->sched_data;
1391}
1392
1393void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) {}
1394
1395void bfq_end_wr_async(struct bfq_data *bfqd)
1396{
1397 bfq_end_wr_async_queues(bfqd, bfqd->root_group);
1398}
1399
1400struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, struct blkcg *blkcg)
1401{
1402 return bfqd->root_group;
1403}
1404
1405struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
1406{
1407 return bfqq->bfqd->root_group;
1408}
1409
1410void bfqg_and_blkg_get(struct bfq_group *bfqg) {}
1411
1412void bfqg_and_blkg_put(struct bfq_group *bfqg) {}
1413
1414struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
1415{
1416 struct bfq_group *bfqg;
1417 int i;
1418
1419 bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node);
1420 if (!bfqg)
1421 return NULL;
1422
1423 for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
1424 bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
1425
1426 return bfqg;
1427}
1428#endif
1429