1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17#include <linux/ioprio.h>
18#include <linux/kdev_t.h>
19#include <linux/module.h>
20#include <linux/sched/signal.h>
21#include <linux/err.h>
22#include <linux/blkdev.h>
23#include <linux/backing-dev.h>
24#include <linux/slab.h>
25#include <linux/genhd.h>
26#include <linux/delay.h>
27#include <linux/atomic.h>
28#include <linux/ctype.h>
29#include <linux/blk-cgroup.h>
30#include "blk.h"
31
32#define MAX_KEY_LEN 100
33
34
35
36
37
38
39
40
41static DEFINE_MUTEX(blkcg_pol_register_mutex);
42static DEFINE_MUTEX(blkcg_pol_mutex);
43
44struct blkcg blkcg_root;
45EXPORT_SYMBOL_GPL(blkcg_root);
46
47struct cgroup_subsys_state * const blkcg_root_css = &blkcg_root.css;
48
49static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
50
51static LIST_HEAD(all_blkcgs);
52
53static bool blkcg_policy_enabled(struct request_queue *q,
54 const struct blkcg_policy *pol)
55{
56 return pol && test_bit(pol->plid, q->blkcg_pols);
57}
58
59
60
61
62
63
64
65static void blkg_free(struct blkcg_gq *blkg)
66{
67 int i;
68
69 if (!blkg)
70 return;
71
72 for (i = 0; i < BLKCG_MAX_POLS; i++)
73 if (blkg->pd[i])
74 blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
75
76 if (blkg->blkcg != &blkcg_root)
77 blk_exit_rl(blkg->q, &blkg->rl);
78
79 blkg_rwstat_exit(&blkg->stat_ios);
80 blkg_rwstat_exit(&blkg->stat_bytes);
81 kfree(blkg);
82}
83
84
85
86
87
88
89
90
91
92static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
93 gfp_t gfp_mask)
94{
95 struct blkcg_gq *blkg;
96 int i;
97
98
99 blkg = kzalloc_node(sizeof(*blkg), gfp_mask, q->node);
100 if (!blkg)
101 return NULL;
102
103 if (blkg_rwstat_init(&blkg->stat_bytes, gfp_mask) ||
104 blkg_rwstat_init(&blkg->stat_ios, gfp_mask))
105 goto err_free;
106
107 blkg->q = q;
108 INIT_LIST_HEAD(&blkg->q_node);
109 blkg->blkcg = blkcg;
110 atomic_set(&blkg->refcnt, 1);
111
112
113 if (blkcg != &blkcg_root) {
114 if (blk_init_rl(&blkg->rl, q, gfp_mask))
115 goto err_free;
116 blkg->rl.blkg = blkg;
117 }
118
119 for (i = 0; i < BLKCG_MAX_POLS; i++) {
120 struct blkcg_policy *pol = blkcg_policy[i];
121 struct blkg_policy_data *pd;
122
123 if (!blkcg_policy_enabled(q, pol))
124 continue;
125
126
127 pd = pol->pd_alloc_fn(gfp_mask, q->node);
128 if (!pd)
129 goto err_free;
130
131 blkg->pd[i] = pd;
132 pd->blkg = blkg;
133 pd->plid = i;
134 }
135
136 return blkg;
137
138err_free:
139 blkg_free(blkg);
140 return NULL;
141}
142
143struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
144 struct request_queue *q, bool update_hint)
145{
146 struct blkcg_gq *blkg;
147
148
149
150
151
152
153
154 blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id);
155 if (blkg && blkg->q == q) {
156 if (update_hint) {
157 lockdep_assert_held(q->queue_lock);
158 rcu_assign_pointer(blkcg->blkg_hint, blkg);
159 }
160 return blkg;
161 }
162
163 return NULL;
164}
165EXPORT_SYMBOL_GPL(blkg_lookup_slowpath);
166
167
168
169
170
171static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
172 struct request_queue *q,
173 struct blkcg_gq *new_blkg)
174{
175 struct blkcg_gq *blkg;
176 struct bdi_writeback_congested *wb_congested;
177 int i, ret;
178
179 WARN_ON_ONCE(!rcu_read_lock_held());
180 lockdep_assert_held(q->queue_lock);
181
182
183 if (!css_tryget_online(&blkcg->css)) {
184 ret = -ENODEV;
185 goto err_free_blkg;
186 }
187
188 wb_congested = wb_congested_get_create(q->backing_dev_info,
189 blkcg->css.id,
190 GFP_NOWAIT | __GFP_NOWARN);
191 if (!wb_congested) {
192 ret = -ENOMEM;
193 goto err_put_css;
194 }
195
196
197 if (!new_blkg) {
198 new_blkg = blkg_alloc(blkcg, q, GFP_NOWAIT | __GFP_NOWARN);
199 if (unlikely(!new_blkg)) {
200 ret = -ENOMEM;
201 goto err_put_congested;
202 }
203 }
204 blkg = new_blkg;
205 blkg->wb_congested = wb_congested;
206
207
208 if (blkcg_parent(blkcg)) {
209 blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false);
210 if (WARN_ON_ONCE(!blkg->parent)) {
211 ret = -ENODEV;
212 goto err_put_congested;
213 }
214 blkg_get(blkg->parent);
215 }
216
217
218 for (i = 0; i < BLKCG_MAX_POLS; i++) {
219 struct blkcg_policy *pol = blkcg_policy[i];
220
221 if (blkg->pd[i] && pol->pd_init_fn)
222 pol->pd_init_fn(blkg->pd[i]);
223 }
224
225
226 spin_lock(&blkcg->lock);
227 ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg);
228 if (likely(!ret)) {
229 hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
230 list_add(&blkg->q_node, &q->blkg_list);
231
232 for (i = 0; i < BLKCG_MAX_POLS; i++) {
233 struct blkcg_policy *pol = blkcg_policy[i];
234
235 if (blkg->pd[i] && pol->pd_online_fn)
236 pol->pd_online_fn(blkg->pd[i]);
237 }
238 }
239 blkg->online = true;
240 spin_unlock(&blkcg->lock);
241
242 if (!ret)
243 return blkg;
244
245
246 blkg_put(blkg);
247 return ERR_PTR(ret);
248
249err_put_congested:
250 wb_congested_put(wb_congested);
251err_put_css:
252 css_put(&blkcg->css);
253err_free_blkg:
254 blkg_free(new_blkg);
255 return ERR_PTR(ret);
256}
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
273 struct request_queue *q)
274{
275 struct blkcg_gq *blkg;
276
277 WARN_ON_ONCE(!rcu_read_lock_held());
278 lockdep_assert_held(q->queue_lock);
279
280
281
282
283
284 if (unlikely(blk_queue_bypass(q)))
285 return ERR_PTR(blk_queue_dying(q) ? -ENODEV : -EBUSY);
286
287 blkg = __blkg_lookup(blkcg, q, true);
288 if (blkg)
289 return blkg;
290
291
292
293
294
295 while (true) {
296 struct blkcg *pos = blkcg;
297 struct blkcg *parent = blkcg_parent(blkcg);
298
299 while (parent && !__blkg_lookup(parent, q, false)) {
300 pos = parent;
301 parent = blkcg_parent(parent);
302 }
303
304 blkg = blkg_create(pos, q, NULL);
305 if (pos == blkcg || IS_ERR(blkg))
306 return blkg;
307 }
308}
309
310static void blkg_destroy(struct blkcg_gq *blkg)
311{
312 struct blkcg *blkcg = blkg->blkcg;
313 struct blkcg_gq *parent = blkg->parent;
314 int i;
315
316 lockdep_assert_held(blkg->q->queue_lock);
317 lockdep_assert_held(&blkcg->lock);
318
319
320 WARN_ON_ONCE(list_empty(&blkg->q_node));
321 WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
322
323 for (i = 0; i < BLKCG_MAX_POLS; i++) {
324 struct blkcg_policy *pol = blkcg_policy[i];
325
326 if (blkg->pd[i] && pol->pd_offline_fn)
327 pol->pd_offline_fn(blkg->pd[i]);
328 }
329
330 if (parent) {
331 blkg_rwstat_add_aux(&parent->stat_bytes, &blkg->stat_bytes);
332 blkg_rwstat_add_aux(&parent->stat_ios, &blkg->stat_ios);
333 }
334
335 blkg->online = false;
336
337 radix_tree_delete(&blkcg->blkg_tree, blkg->q->id);
338 list_del_init(&blkg->q_node);
339 hlist_del_init_rcu(&blkg->blkcg_node);
340
341
342
343
344
345
346 if (rcu_access_pointer(blkcg->blkg_hint) == blkg)
347 rcu_assign_pointer(blkcg->blkg_hint, NULL);
348
349
350
351
352
353 blkg_put(blkg);
354}
355
356
357
358
359
360
361
362static void blkg_destroy_all(struct request_queue *q)
363{
364 struct blkcg_gq *blkg, *n;
365
366 lockdep_assert_held(q->queue_lock);
367
368 list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
369 struct blkcg *blkcg = blkg->blkcg;
370
371 spin_lock(&blkcg->lock);
372 blkg_destroy(blkg);
373 spin_unlock(&blkcg->lock);
374 }
375
376 q->root_blkg = NULL;
377 q->root_rl.blkg = NULL;
378}
379
380
381
382
383
384
385
386
387
388void __blkg_release_rcu(struct rcu_head *rcu_head)
389{
390 struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head);
391
392
393 css_put(&blkg->blkcg->css);
394 if (blkg->parent)
395 blkg_put(blkg->parent);
396
397 wb_congested_put(blkg->wb_congested);
398
399 blkg_free(blkg);
400}
401EXPORT_SYMBOL_GPL(__blkg_release_rcu);
402
403
404
405
406
407struct request_list *__blk_queue_next_rl(struct request_list *rl,
408 struct request_queue *q)
409{
410 struct list_head *ent;
411 struct blkcg_gq *blkg;
412
413
414
415
416
417 if (rl == &q->root_rl) {
418 ent = &q->blkg_list;
419
420 if (list_empty(ent))
421 return NULL;
422 } else {
423 blkg = container_of(rl, struct blkcg_gq, rl);
424 ent = &blkg->q_node;
425 }
426
427
428 ent = ent->next;
429 if (ent == &q->root_blkg->q_node)
430 ent = ent->next;
431 if (ent == &q->blkg_list)
432 return NULL;
433
434 blkg = container_of(ent, struct blkcg_gq, q_node);
435 return &blkg->rl;
436}
437
438static int blkcg_reset_stats(struct cgroup_subsys_state *css,
439 struct cftype *cftype, u64 val)
440{
441 struct blkcg *blkcg = css_to_blkcg(css);
442 struct blkcg_gq *blkg;
443 int i;
444
445 mutex_lock(&blkcg_pol_mutex);
446 spin_lock_irq(&blkcg->lock);
447
448
449
450
451
452
453 hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
454 blkg_rwstat_reset(&blkg->stat_bytes);
455 blkg_rwstat_reset(&blkg->stat_ios);
456
457 for (i = 0; i < BLKCG_MAX_POLS; i++) {
458 struct blkcg_policy *pol = blkcg_policy[i];
459
460 if (blkg->pd[i] && pol->pd_reset_stats_fn)
461 pol->pd_reset_stats_fn(blkg->pd[i]);
462 }
463 }
464
465 spin_unlock_irq(&blkcg->lock);
466 mutex_unlock(&blkcg_pol_mutex);
467 return 0;
468}
469
470const char *blkg_dev_name(struct blkcg_gq *blkg)
471{
472
473 if (blkg->q->backing_dev_info->dev)
474 return dev_name(blkg->q->backing_dev_info->dev);
475 return NULL;
476}
477EXPORT_SYMBOL_GPL(blkg_dev_name);
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
498 u64 (*prfill)(struct seq_file *,
499 struct blkg_policy_data *, int),
500 const struct blkcg_policy *pol, int data,
501 bool show_total)
502{
503 struct blkcg_gq *blkg;
504 u64 total = 0;
505
506 rcu_read_lock();
507 hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
508 spin_lock_irq(blkg->q->queue_lock);
509 if (blkcg_policy_enabled(blkg->q, pol))
510 total += prfill(sf, blkg->pd[pol->plid], data);
511 spin_unlock_irq(blkg->q->queue_lock);
512 }
513 rcu_read_unlock();
514
515 if (show_total)
516 seq_printf(sf, "Total %llu\n", (unsigned long long)total);
517}
518EXPORT_SYMBOL_GPL(blkcg_print_blkgs);
519
520
521
522
523
524
525
526
527
528u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v)
529{
530 const char *dname = blkg_dev_name(pd->blkg);
531
532 if (!dname)
533 return 0;
534
535 seq_printf(sf, "%s %llu\n", dname, (unsigned long long)v);
536 return v;
537}
538EXPORT_SYMBOL_GPL(__blkg_prfill_u64);
539
540
541
542
543
544
545
546
547
548u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
549 const struct blkg_rwstat *rwstat)
550{
551 static const char *rwstr[] = {
552 [BLKG_RWSTAT_READ] = "Read",
553 [BLKG_RWSTAT_WRITE] = "Write",
554 [BLKG_RWSTAT_SYNC] = "Sync",
555 [BLKG_RWSTAT_ASYNC] = "Async",
556 };
557 const char *dname = blkg_dev_name(pd->blkg);
558 u64 v;
559 int i;
560
561 if (!dname)
562 return 0;
563
564 for (i = 0; i < BLKG_RWSTAT_NR; i++)
565 seq_printf(sf, "%s %s %llu\n", dname, rwstr[i],
566 (unsigned long long)atomic64_read(&rwstat->aux_cnt[i]));
567
568 v = atomic64_read(&rwstat->aux_cnt[BLKG_RWSTAT_READ]) +
569 atomic64_read(&rwstat->aux_cnt[BLKG_RWSTAT_WRITE]);
570 seq_printf(sf, "%s Total %llu\n", dname, (unsigned long long)v);
571 return v;
572}
573EXPORT_SYMBOL_GPL(__blkg_prfill_rwstat);
574
575
576
577
578
579
580
581
582
583u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off)
584{
585 return __blkg_prfill_u64(sf, pd, blkg_stat_read((void *)pd + off));
586}
587EXPORT_SYMBOL_GPL(blkg_prfill_stat);
588
589
590
591
592
593
594
595
596
597u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
598 int off)
599{
600 struct blkg_rwstat rwstat = blkg_rwstat_read((void *)pd + off);
601
602 return __blkg_prfill_rwstat(sf, pd, &rwstat);
603}
604EXPORT_SYMBOL_GPL(blkg_prfill_rwstat);
605
606static u64 blkg_prfill_rwstat_field(struct seq_file *sf,
607 struct blkg_policy_data *pd, int off)
608{
609 struct blkg_rwstat rwstat = blkg_rwstat_read((void *)pd->blkg + off);
610
611 return __blkg_prfill_rwstat(sf, pd, &rwstat);
612}
613
614
615
616
617
618
619
620
621
622int blkg_print_stat_bytes(struct seq_file *sf, void *v)
623{
624 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
625 blkg_prfill_rwstat_field, (void *)seq_cft(sf)->private,
626 offsetof(struct blkcg_gq, stat_bytes), true);
627 return 0;
628}
629EXPORT_SYMBOL_GPL(blkg_print_stat_bytes);
630
631
632
633
634
635
636
637
638
639int blkg_print_stat_ios(struct seq_file *sf, void *v)
640{
641 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
642 blkg_prfill_rwstat_field, (void *)seq_cft(sf)->private,
643 offsetof(struct blkcg_gq, stat_ios), true);
644 return 0;
645}
646EXPORT_SYMBOL_GPL(blkg_print_stat_ios);
647
648static u64 blkg_prfill_rwstat_field_recursive(struct seq_file *sf,
649 struct blkg_policy_data *pd,
650 int off)
651{
652 struct blkg_rwstat rwstat = blkg_rwstat_recursive_sum(pd->blkg,
653 NULL, off);
654 return __blkg_prfill_rwstat(sf, pd, &rwstat);
655}
656
657
658
659
660
661
662int blkg_print_stat_bytes_recursive(struct seq_file *sf, void *v)
663{
664 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
665 blkg_prfill_rwstat_field_recursive,
666 (void *)seq_cft(sf)->private,
667 offsetof(struct blkcg_gq, stat_bytes), true);
668 return 0;
669}
670EXPORT_SYMBOL_GPL(blkg_print_stat_bytes_recursive);
671
672
673
674
675
676
677int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v)
678{
679 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
680 blkg_prfill_rwstat_field_recursive,
681 (void *)seq_cft(sf)->private,
682 offsetof(struct blkcg_gq, stat_ios), true);
683 return 0;
684}
685EXPORT_SYMBOL_GPL(blkg_print_stat_ios_recursive);
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg,
701 struct blkcg_policy *pol, int off)
702{
703 struct blkcg_gq *pos_blkg;
704 struct cgroup_subsys_state *pos_css;
705 u64 sum = 0;
706
707 lockdep_assert_held(blkg->q->queue_lock);
708
709 rcu_read_lock();
710 blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
711 struct blkg_stat *stat;
712
713 if (!pos_blkg->online)
714 continue;
715
716 if (pol)
717 stat = (void *)blkg_to_pd(pos_blkg, pol) + off;
718 else
719 stat = (void *)blkg + off;
720
721 sum += blkg_stat_read(stat) + atomic64_read(&stat->aux_cnt);
722 }
723 rcu_read_unlock();
724
725 return sum;
726}
727EXPORT_SYMBOL_GPL(blkg_stat_recursive_sum);
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg,
743 struct blkcg_policy *pol, int off)
744{
745 struct blkcg_gq *pos_blkg;
746 struct cgroup_subsys_state *pos_css;
747 struct blkg_rwstat sum = { };
748 int i;
749
750 lockdep_assert_held(blkg->q->queue_lock);
751
752 rcu_read_lock();
753 blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
754 struct blkg_rwstat *rwstat;
755
756 if (!pos_blkg->online)
757 continue;
758
759 if (pol)
760 rwstat = (void *)blkg_to_pd(pos_blkg, pol) + off;
761 else
762 rwstat = (void *)pos_blkg + off;
763
764 for (i = 0; i < BLKG_RWSTAT_NR; i++)
765 atomic64_add(atomic64_read(&rwstat->aux_cnt[i]) +
766 percpu_counter_sum_positive(&rwstat->cpu_cnt[i]),
767 &sum.aux_cnt[i]);
768 }
769 rcu_read_unlock();
770
771 return sum;
772}
773EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum);
774
775
776static struct blkcg_gq *blkg_lookup_check(struct blkcg *blkcg,
777 const struct blkcg_policy *pol,
778 struct request_queue *q)
779{
780 WARN_ON_ONCE(!rcu_read_lock_held());
781 lockdep_assert_held(q->queue_lock);
782
783 if (!blkcg_policy_enabled(q, pol))
784 return ERR_PTR(-EOPNOTSUPP);
785
786
787
788
789
790 if (unlikely(blk_queue_bypass(q)))
791 return ERR_PTR(blk_queue_dying(q) ? -ENODEV : -EBUSY);
792
793 return __blkg_lookup(blkcg, q, true );
794}
795
796
797
798
799
800
801
802
803
804
805
806
807
808int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
809 char *input, struct blkg_conf_ctx *ctx)
810 __acquires(rcu) __acquires(disk->queue->queue_lock)
811{
812 struct gendisk *disk;
813 struct request_queue *q;
814 struct blkcg_gq *blkg;
815 struct module *owner;
816 unsigned int major, minor;
817 int key_len, part, ret;
818 char *body;
819
820 if (sscanf(input, "%u:%u%n", &major, &minor, &key_len) != 2)
821 return -EINVAL;
822
823 body = input + key_len;
824 if (!isspace(*body))
825 return -EINVAL;
826 body = skip_spaces(body);
827
828 disk = get_gendisk(MKDEV(major, minor), &part);
829 if (!disk)
830 return -ENODEV;
831 if (part) {
832 ret = -ENODEV;
833 goto fail;
834 }
835
836 q = disk->queue;
837
838 rcu_read_lock();
839 spin_lock_irq(q->queue_lock);
840
841 blkg = blkg_lookup_check(blkcg, pol, q);
842 if (IS_ERR(blkg)) {
843 ret = PTR_ERR(blkg);
844 goto fail_unlock;
845 }
846
847 if (blkg)
848 goto success;
849
850
851
852
853
854 while (true) {
855 struct blkcg *pos = blkcg;
856 struct blkcg *parent;
857 struct blkcg_gq *new_blkg;
858
859 parent = blkcg_parent(blkcg);
860 while (parent && !__blkg_lookup(parent, q, false)) {
861 pos = parent;
862 parent = blkcg_parent(parent);
863 }
864
865
866 spin_unlock_irq(q->queue_lock);
867 rcu_read_unlock();
868
869 new_blkg = blkg_alloc(pos, q, GFP_KERNEL);
870 if (unlikely(!new_blkg)) {
871 ret = -ENOMEM;
872 goto fail;
873 }
874
875 rcu_read_lock();
876 spin_lock_irq(q->queue_lock);
877
878 blkg = blkg_lookup_check(pos, pol, q);
879 if (IS_ERR(blkg)) {
880 ret = PTR_ERR(blkg);
881 goto fail_unlock;
882 }
883
884 if (blkg) {
885 blkg_free(new_blkg);
886 } else {
887 blkg = blkg_create(pos, q, new_blkg);
888 if (unlikely(IS_ERR(blkg))) {
889 ret = PTR_ERR(blkg);
890 goto fail_unlock;
891 }
892 }
893
894 if (pos == blkcg)
895 goto success;
896 }
897success:
898 ctx->disk = disk;
899 ctx->blkg = blkg;
900 ctx->body = body;
901 return 0;
902
903fail_unlock:
904 spin_unlock_irq(q->queue_lock);
905 rcu_read_unlock();
906fail:
907 owner = disk->fops->owner;
908 put_disk(disk);
909 module_put(owner);
910
911
912
913
914
915
916 if (ret == -EBUSY) {
917 msleep(10);
918 ret = restart_syscall();
919 }
920 return ret;
921}
922EXPORT_SYMBOL_GPL(blkg_conf_prep);
923
924
925
926
927
928
929
930
931void blkg_conf_finish(struct blkg_conf_ctx *ctx)
932 __releases(ctx->disk->queue->queue_lock) __releases(rcu)
933{
934 struct module *owner;
935
936 spin_unlock_irq(ctx->disk->queue->queue_lock);
937 rcu_read_unlock();
938 owner = ctx->disk->fops->owner;
939 put_disk(ctx->disk);
940 module_put(owner);
941}
942EXPORT_SYMBOL_GPL(blkg_conf_finish);
943
944static int blkcg_print_stat(struct seq_file *sf, void *v)
945{
946 struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
947 struct blkcg_gq *blkg;
948
949 rcu_read_lock();
950
951 hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
952 const char *dname;
953 struct blkg_rwstat rwstat;
954 u64 rbytes, wbytes, rios, wios;
955
956 dname = blkg_dev_name(blkg);
957 if (!dname)
958 continue;
959
960 spin_lock_irq(blkg->q->queue_lock);
961
962 rwstat = blkg_rwstat_recursive_sum(blkg, NULL,
963 offsetof(struct blkcg_gq, stat_bytes));
964 rbytes = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_READ]);
965 wbytes = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_WRITE]);
966
967 rwstat = blkg_rwstat_recursive_sum(blkg, NULL,
968 offsetof(struct blkcg_gq, stat_ios));
969 rios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_READ]);
970 wios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_WRITE]);
971
972 spin_unlock_irq(blkg->q->queue_lock);
973
974 if (rbytes || wbytes || rios || wios)
975 seq_printf(sf, "%s rbytes=%llu wbytes=%llu rios=%llu wios=%llu\n",
976 dname, rbytes, wbytes, rios, wios);
977 }
978
979 rcu_read_unlock();
980 return 0;
981}
982
983static struct cftype blkcg_files[] = {
984 {
985 .name = "stat",
986 .flags = CFTYPE_NOT_ON_ROOT,
987 .seq_show = blkcg_print_stat,
988 },
989 { }
990};
991
992static struct cftype blkcg_legacy_files[] = {
993 {
994 .name = "reset_stats",
995 .write_u64 = blkcg_reset_stats,
996 },
997 { }
998};
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011static void blkcg_css_offline(struct cgroup_subsys_state *css)
1012{
1013 struct blkcg *blkcg = css_to_blkcg(css);
1014
1015 spin_lock_irq(&blkcg->lock);
1016
1017 while (!hlist_empty(&blkcg->blkg_list)) {
1018 struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first,
1019 struct blkcg_gq, blkcg_node);
1020 struct request_queue *q = blkg->q;
1021
1022 if (spin_trylock(q->queue_lock)) {
1023 blkg_destroy(blkg);
1024 spin_unlock(q->queue_lock);
1025 } else {
1026 spin_unlock_irq(&blkcg->lock);
1027 cpu_relax();
1028 spin_lock_irq(&blkcg->lock);
1029 }
1030 }
1031
1032 spin_unlock_irq(&blkcg->lock);
1033
1034 wb_blkcg_offline(blkcg);
1035}
1036
1037static void blkcg_css_free(struct cgroup_subsys_state *css)
1038{
1039 struct blkcg *blkcg = css_to_blkcg(css);
1040 int i;
1041
1042 mutex_lock(&blkcg_pol_mutex);
1043
1044 list_del(&blkcg->all_blkcgs_node);
1045
1046 for (i = 0; i < BLKCG_MAX_POLS; i++)
1047 if (blkcg->cpd[i])
1048 blkcg_policy[i]->cpd_free_fn(blkcg->cpd[i]);
1049
1050 mutex_unlock(&blkcg_pol_mutex);
1051
1052 kfree(blkcg);
1053}
1054
1055static struct cgroup_subsys_state *
1056blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
1057{
1058 struct blkcg *blkcg;
1059 struct cgroup_subsys_state *ret;
1060 int i;
1061
1062 mutex_lock(&blkcg_pol_mutex);
1063
1064 if (!parent_css) {
1065 blkcg = &blkcg_root;
1066 } else {
1067 blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
1068 if (!blkcg) {
1069 ret = ERR_PTR(-ENOMEM);
1070 goto unlock;
1071 }
1072 }
1073
1074 for (i = 0; i < BLKCG_MAX_POLS ; i++) {
1075 struct blkcg_policy *pol = blkcg_policy[i];
1076 struct blkcg_policy_data *cpd;
1077
1078
1079
1080
1081
1082
1083
1084 if (!pol || !pol->cpd_alloc_fn)
1085 continue;
1086
1087 cpd = pol->cpd_alloc_fn(GFP_KERNEL);
1088 if (!cpd) {
1089 ret = ERR_PTR(-ENOMEM);
1090 goto free_pd_blkcg;
1091 }
1092 blkcg->cpd[i] = cpd;
1093 cpd->blkcg = blkcg;
1094 cpd->plid = i;
1095 if (pol->cpd_init_fn)
1096 pol->cpd_init_fn(cpd);
1097 }
1098
1099 spin_lock_init(&blkcg->lock);
1100 INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_NOWAIT | __GFP_NOWARN);
1101 INIT_HLIST_HEAD(&blkcg->blkg_list);
1102#ifdef CONFIG_CGROUP_WRITEBACK
1103 INIT_LIST_HEAD(&blkcg->cgwb_list);
1104#endif
1105 list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs);
1106
1107 mutex_unlock(&blkcg_pol_mutex);
1108 return &blkcg->css;
1109
1110free_pd_blkcg:
1111 for (i--; i >= 0; i--)
1112 if (blkcg->cpd[i])
1113 blkcg_policy[i]->cpd_free_fn(blkcg->cpd[i]);
1114
1115 if (blkcg != &blkcg_root)
1116 kfree(blkcg);
1117unlock:
1118 mutex_unlock(&blkcg_pol_mutex);
1119 return ret;
1120}
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132int blkcg_init_queue(struct request_queue *q)
1133{
1134 struct blkcg_gq *new_blkg, *blkg;
1135 bool preloaded;
1136 int ret;
1137
1138 new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL);
1139 if (!new_blkg)
1140 return -ENOMEM;
1141
1142 preloaded = !radix_tree_preload(GFP_KERNEL);
1143
1144
1145
1146
1147
1148
1149 rcu_read_lock();
1150 spin_lock_irq(q->queue_lock);
1151 blkg = blkg_create(&blkcg_root, q, new_blkg);
1152 spin_unlock_irq(q->queue_lock);
1153 rcu_read_unlock();
1154
1155 if (preloaded)
1156 radix_tree_preload_end();
1157
1158 if (IS_ERR(blkg))
1159 return PTR_ERR(blkg);
1160
1161 q->root_blkg = blkg;
1162 q->root_rl.blkg = blkg;
1163
1164 ret = blk_throtl_init(q);
1165 if (ret) {
1166 spin_lock_irq(q->queue_lock);
1167 blkg_destroy_all(q);
1168 spin_unlock_irq(q->queue_lock);
1169 }
1170 return ret;
1171}
1172
1173
1174
1175
1176
1177
1178
1179void blkcg_drain_queue(struct request_queue *q)
1180{
1181 lockdep_assert_held(q->queue_lock);
1182
1183
1184
1185
1186
1187 if (!q->root_blkg)
1188 return;
1189
1190 blk_throtl_drain(q);
1191}
1192
1193
1194
1195
1196
1197
1198
1199void blkcg_exit_queue(struct request_queue *q)
1200{
1201 spin_lock_irq(q->queue_lock);
1202 blkg_destroy_all(q);
1203 spin_unlock_irq(q->queue_lock);
1204
1205 blk_throtl_exit(q);
1206}
1207
1208
1209
1210
1211
1212
1213
1214static int blkcg_can_attach(struct cgroup_taskset *tset)
1215{
1216 struct task_struct *task;
1217 struct cgroup_subsys_state *dst_css;
1218 struct io_context *ioc;
1219 int ret = 0;
1220
1221
1222 cgroup_taskset_for_each(task, dst_css, tset) {
1223 task_lock(task);
1224 ioc = task->io_context;
1225 if (ioc && atomic_read(&ioc->nr_tasks) > 1)
1226 ret = -EINVAL;
1227 task_unlock(task);
1228 if (ret)
1229 break;
1230 }
1231 return ret;
1232}
1233
1234static void blkcg_bind(struct cgroup_subsys_state *root_css)
1235{
1236 int i;
1237
1238 mutex_lock(&blkcg_pol_mutex);
1239
1240 for (i = 0; i < BLKCG_MAX_POLS; i++) {
1241 struct blkcg_policy *pol = blkcg_policy[i];
1242 struct blkcg *blkcg;
1243
1244 if (!pol || !pol->cpd_bind_fn)
1245 continue;
1246
1247 list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node)
1248 if (blkcg->cpd[pol->plid])
1249 pol->cpd_bind_fn(blkcg->cpd[pol->plid]);
1250 }
1251 mutex_unlock(&blkcg_pol_mutex);
1252}
1253
1254struct cgroup_subsys io_cgrp_subsys = {
1255 .css_alloc = blkcg_css_alloc,
1256 .css_offline = blkcg_css_offline,
1257 .css_free = blkcg_css_free,
1258 .can_attach = blkcg_can_attach,
1259 .bind = blkcg_bind,
1260 .dfl_cftypes = blkcg_files,
1261 .legacy_cftypes = blkcg_legacy_files,
1262 .legacy_name = "blkio",
1263#ifdef CONFIG_MEMCG
1264
1265
1266
1267
1268
1269 .depends_on = 1 << memory_cgrp_id,
1270#endif
1271};
1272EXPORT_SYMBOL_GPL(io_cgrp_subsys);
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290int blkcg_activate_policy(struct request_queue *q,
1291 const struct blkcg_policy *pol)
1292{
1293 struct blkg_policy_data *pd_prealloc = NULL;
1294 struct blkcg_gq *blkg;
1295 int ret;
1296
1297 if (blkcg_policy_enabled(q, pol))
1298 return 0;
1299
1300 if (q->mq_ops)
1301 blk_mq_freeze_queue(q);
1302 else
1303 blk_queue_bypass_start(q);
1304pd_prealloc:
1305 if (!pd_prealloc) {
1306 pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q->node);
1307 if (!pd_prealloc) {
1308 ret = -ENOMEM;
1309 goto out_bypass_end;
1310 }
1311 }
1312
1313 spin_lock_irq(q->queue_lock);
1314
1315 list_for_each_entry(blkg, &q->blkg_list, q_node) {
1316 struct blkg_policy_data *pd;
1317
1318 if (blkg->pd[pol->plid])
1319 continue;
1320
1321 pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q->node);
1322 if (!pd)
1323 swap(pd, pd_prealloc);
1324 if (!pd) {
1325 spin_unlock_irq(q->queue_lock);
1326 goto pd_prealloc;
1327 }
1328
1329 blkg->pd[pol->plid] = pd;
1330 pd->blkg = blkg;
1331 pd->plid = pol->plid;
1332 if (pol->pd_init_fn)
1333 pol->pd_init_fn(pd);
1334 }
1335
1336 __set_bit(pol->plid, q->blkcg_pols);
1337 ret = 0;
1338
1339 spin_unlock_irq(q->queue_lock);
1340out_bypass_end:
1341 if (q->mq_ops)
1342 blk_mq_unfreeze_queue(q);
1343 else
1344 blk_queue_bypass_end(q);
1345 if (pd_prealloc)
1346 pol->pd_free_fn(pd_prealloc);
1347 return ret;
1348}
1349EXPORT_SYMBOL_GPL(blkcg_activate_policy);
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359void blkcg_deactivate_policy(struct request_queue *q,
1360 const struct blkcg_policy *pol)
1361{
1362 struct blkcg_gq *blkg;
1363
1364 if (!blkcg_policy_enabled(q, pol))
1365 return;
1366
1367 if (q->mq_ops)
1368 blk_mq_freeze_queue(q);
1369 else
1370 blk_queue_bypass_start(q);
1371
1372 spin_lock_irq(q->queue_lock);
1373
1374 __clear_bit(pol->plid, q->blkcg_pols);
1375
1376 list_for_each_entry(blkg, &q->blkg_list, q_node) {
1377
1378 spin_lock(&blkg->blkcg->lock);
1379
1380 if (blkg->pd[pol->plid]) {
1381 if (pol->pd_offline_fn)
1382 pol->pd_offline_fn(blkg->pd[pol->plid]);
1383 pol->pd_free_fn(blkg->pd[pol->plid]);
1384 blkg->pd[pol->plid] = NULL;
1385 }
1386
1387 spin_unlock(&blkg->blkcg->lock);
1388 }
1389
1390 spin_unlock_irq(q->queue_lock);
1391
1392 if (q->mq_ops)
1393 blk_mq_unfreeze_queue(q);
1394 else
1395 blk_queue_bypass_end(q);
1396}
1397EXPORT_SYMBOL_GPL(blkcg_deactivate_policy);
1398
1399
1400
1401
1402
1403
1404
1405
1406int blkcg_policy_register(struct blkcg_policy *pol)
1407{
1408 struct blkcg *blkcg;
1409 int i, ret;
1410
1411 mutex_lock(&blkcg_pol_register_mutex);
1412 mutex_lock(&blkcg_pol_mutex);
1413
1414
1415 ret = -ENOSPC;
1416 for (i = 0; i < BLKCG_MAX_POLS; i++)
1417 if (!blkcg_policy[i])
1418 break;
1419 if (i >= BLKCG_MAX_POLS)
1420 goto err_unlock;
1421
1422
1423 pol->plid = i;
1424 blkcg_policy[pol->plid] = pol;
1425
1426
1427 if (pol->cpd_alloc_fn) {
1428 list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
1429 struct blkcg_policy_data *cpd;
1430
1431 cpd = pol->cpd_alloc_fn(GFP_KERNEL);
1432 if (!cpd)
1433 goto err_free_cpds;
1434
1435 blkcg->cpd[pol->plid] = cpd;
1436 cpd->blkcg = blkcg;
1437 cpd->plid = pol->plid;
1438 pol->cpd_init_fn(cpd);
1439 }
1440 }
1441
1442 mutex_unlock(&blkcg_pol_mutex);
1443
1444
1445 if (pol->dfl_cftypes)
1446 WARN_ON(cgroup_add_dfl_cftypes(&io_cgrp_subsys,
1447 pol->dfl_cftypes));
1448 if (pol->legacy_cftypes)
1449 WARN_ON(cgroup_add_legacy_cftypes(&io_cgrp_subsys,
1450 pol->legacy_cftypes));
1451 mutex_unlock(&blkcg_pol_register_mutex);
1452 return 0;
1453
1454err_free_cpds:
1455 if (pol->cpd_alloc_fn) {
1456 list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
1457 if (blkcg->cpd[pol->plid]) {
1458 pol->cpd_free_fn(blkcg->cpd[pol->plid]);
1459 blkcg->cpd[pol->plid] = NULL;
1460 }
1461 }
1462 }
1463 blkcg_policy[pol->plid] = NULL;
1464err_unlock:
1465 mutex_unlock(&blkcg_pol_mutex);
1466 mutex_unlock(&blkcg_pol_register_mutex);
1467 return ret;
1468}
1469EXPORT_SYMBOL_GPL(blkcg_policy_register);
1470
1471
1472
1473
1474
1475
1476
1477void blkcg_policy_unregister(struct blkcg_policy *pol)
1478{
1479 struct blkcg *blkcg;
1480
1481 mutex_lock(&blkcg_pol_register_mutex);
1482
1483 if (WARN_ON(blkcg_policy[pol->plid] != pol))
1484 goto out_unlock;
1485
1486
1487 if (pol->dfl_cftypes)
1488 cgroup_rm_cftypes(pol->dfl_cftypes);
1489 if (pol->legacy_cftypes)
1490 cgroup_rm_cftypes(pol->legacy_cftypes);
1491
1492
1493 mutex_lock(&blkcg_pol_mutex);
1494
1495 if (pol->cpd_alloc_fn) {
1496 list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
1497 if (blkcg->cpd[pol->plid]) {
1498 pol->cpd_free_fn(blkcg->cpd[pol->plid]);
1499 blkcg->cpd[pol->plid] = NULL;
1500 }
1501 }
1502 }
1503 blkcg_policy[pol->plid] = NULL;
1504
1505 mutex_unlock(&blkcg_pol_mutex);
1506out_unlock:
1507 mutex_unlock(&blkcg_pol_register_mutex);
1508}
1509EXPORT_SYMBOL_GPL(blkcg_policy_unregister);
1510