1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66#include <linux/kernel.h>
67#include <linux/blk_types.h>
68#include <linux/backing-dev.h>
69#include <linux/module.h>
70#include <linux/timer.h>
71#include <linux/memcontrol.h>
72#include <linux/sched/loadavg.h>
73#include <linux/sched/signal.h>
74#include <trace/events/block.h>
75#include "blk-rq-qos.h"
76#include "blk-stat.h"
77
78#define DEFAULT_SCALE_COOKIE 1000000U
79
80static struct blkcg_policy blkcg_policy_iolatency;
81struct iolatency_grp;
82
83struct blk_iolatency {
84 struct rq_qos rqos;
85 struct timer_list timer;
86 atomic_t enabled;
87};
88
89static inline struct blk_iolatency *BLKIOLATENCY(struct rq_qos *rqos)
90{
91 return container_of(rqos, struct blk_iolatency, rqos);
92}
93
94static inline bool blk_iolatency_enabled(struct blk_iolatency *blkiolat)
95{
96 return atomic_read(&blkiolat->enabled) > 0;
97}
98
99struct child_latency_info {
100 spinlock_t lock;
101
102
103 u64 last_scale_event;
104
105
106 u64 scale_lat;
107
108
109 u64 nr_samples;
110
111
112 struct iolatency_grp *scale_grp;
113
114
115 atomic_t scale_cookie;
116};
117
118struct iolatency_grp {
119 struct blkg_policy_data pd;
120 struct blk_rq_stat __percpu *stats;
121 struct blk_iolatency *blkiolat;
122 struct rq_depth rq_depth;
123 struct rq_wait rq_wait;
124 atomic64_t window_start;
125 atomic_t scale_cookie;
126 u64 min_lat_nsec;
127 u64 cur_win_nsec;
128
129
130 u64 lat_avg;
131
132
133 u64 nr_samples;
134
135 struct child_latency_info child_lat;
136};
137
138#define BLKIOLATENCY_MIN_WIN_SIZE (100 * NSEC_PER_MSEC)
139#define BLKIOLATENCY_MAX_WIN_SIZE NSEC_PER_SEC
140
141
142
143
144
145
146
147
148
149#define BLKIOLATENCY_NR_EXP_FACTORS 5
150#define BLKIOLATENCY_EXP_BUCKET_SIZE (BLKIOLATENCY_MAX_WIN_SIZE / \
151 (BLKIOLATENCY_NR_EXP_FACTORS - 1))
152static const u64 iolatency_exp_factors[BLKIOLATENCY_NR_EXP_FACTORS] = {
153 2045,
154 2039,
155 2031,
156 2023,
157 2014,
158};
159
160static inline struct iolatency_grp *pd_to_lat(struct blkg_policy_data *pd)
161{
162 return pd ? container_of(pd, struct iolatency_grp, pd) : NULL;
163}
164
165static inline struct iolatency_grp *blkg_to_lat(struct blkcg_gq *blkg)
166{
167 return pd_to_lat(blkg_to_pd(blkg, &blkcg_policy_iolatency));
168}
169
170static inline struct blkcg_gq *lat_to_blkg(struct iolatency_grp *iolat)
171{
172 return pd_to_blkg(&iolat->pd);
173}
174
175static void iolat_cleanup_cb(struct rq_wait *rqw, void *private_data)
176{
177 atomic_dec(&rqw->inflight);
178 wake_up(&rqw->wait);
179}
180
181static bool iolat_acquire_inflight(struct rq_wait *rqw, void *private_data)
182{
183 struct iolatency_grp *iolat = private_data;
184 return rq_wait_inc_below(rqw, iolat->rq_depth.max_depth);
185}
186
187static void __blkcg_iolatency_throttle(struct rq_qos *rqos,
188 struct iolatency_grp *iolat,
189 bool issue_as_root,
190 bool use_memdelay)
191{
192 struct rq_wait *rqw = &iolat->rq_wait;
193 unsigned use_delay = atomic_read(&lat_to_blkg(iolat)->use_delay);
194
195 if (use_delay)
196 blkcg_schedule_throttle(rqos->q, use_memdelay);
197
198
199
200
201
202
203
204
205 if (issue_as_root || fatal_signal_pending(current)) {
206 atomic_inc(&rqw->inflight);
207 return;
208 }
209
210 rq_qos_wait(rqw, iolat, iolat_acquire_inflight, iolat_cleanup_cb);
211}
212
213#define SCALE_DOWN_FACTOR 2
214#define SCALE_UP_FACTOR 4
215
216static inline unsigned long scale_amount(unsigned long qd, bool up)
217{
218 return max(up ? qd >> SCALE_UP_FACTOR : qd >> SCALE_DOWN_FACTOR, 1UL);
219}
220
221
222
223
224
225
226
227
228
229
230static void scale_cookie_change(struct blk_iolatency *blkiolat,
231 struct child_latency_info *lat_info,
232 bool up)
233{
234 unsigned long qd = blk_queue_depth(blkiolat->rqos.q);
235 unsigned long scale = scale_amount(qd, up);
236 unsigned long old = atomic_read(&lat_info->scale_cookie);
237 unsigned long max_scale = qd << 1;
238 unsigned long diff = 0;
239
240 if (old < DEFAULT_SCALE_COOKIE)
241 diff = DEFAULT_SCALE_COOKIE - old;
242
243 if (up) {
244 if (scale + old > DEFAULT_SCALE_COOKIE)
245 atomic_set(&lat_info->scale_cookie,
246 DEFAULT_SCALE_COOKIE);
247 else if (diff > qd)
248 atomic_inc(&lat_info->scale_cookie);
249 else
250 atomic_add(scale, &lat_info->scale_cookie);
251 } else {
252
253
254
255
256
257
258 if (diff > qd) {
259 if (diff < max_scale)
260 atomic_dec(&lat_info->scale_cookie);
261 } else {
262 atomic_sub(scale, &lat_info->scale_cookie);
263 }
264 }
265}
266
267
268
269
270
271
272static void scale_change(struct iolatency_grp *iolat, bool up)
273{
274 unsigned long qd = blk_queue_depth(iolat->blkiolat->rqos.q);
275 unsigned long scale = scale_amount(qd, up);
276 unsigned long old = iolat->rq_depth.max_depth;
277 bool changed = false;
278
279 if (old > qd)
280 old = qd;
281
282 if (up) {
283 if (old == 1 && blkcg_unuse_delay(lat_to_blkg(iolat)))
284 return;
285
286 if (old < qd) {
287 changed = true;
288 old += scale;
289 old = min(old, qd);
290 iolat->rq_depth.max_depth = old;
291 wake_up_all(&iolat->rq_wait.wait);
292 }
293 } else if (old > 1) {
294 old >>= 1;
295 changed = true;
296 iolat->rq_depth.max_depth = max(old, 1UL);
297 }
298}
299
300
301static void check_scale_change(struct iolatency_grp *iolat)
302{
303 struct iolatency_grp *parent;
304 struct child_latency_info *lat_info;
305 unsigned int cur_cookie;
306 unsigned int our_cookie = atomic_read(&iolat->scale_cookie);
307 u64 scale_lat;
308 unsigned int old;
309 int direction = 0;
310
311 if (lat_to_blkg(iolat)->parent == NULL)
312 return;
313
314 parent = blkg_to_lat(lat_to_blkg(iolat)->parent);
315 if (!parent)
316 return;
317
318 lat_info = &parent->child_lat;
319 cur_cookie = atomic_read(&lat_info->scale_cookie);
320 scale_lat = READ_ONCE(lat_info->scale_lat);
321
322 if (cur_cookie < our_cookie)
323 direction = -1;
324 else if (cur_cookie > our_cookie)
325 direction = 1;
326 else
327 return;
328
329 old = atomic_cmpxchg(&iolat->scale_cookie, our_cookie, cur_cookie);
330
331
332 if (old != our_cookie)
333 return;
334
335 if (direction < 0 && iolat->min_lat_nsec) {
336 u64 samples_thresh;
337
338 if (!scale_lat || iolat->min_lat_nsec <= scale_lat)
339 return;
340
341
342
343
344
345
346
347 samples_thresh = lat_info->nr_samples * 5;
348 samples_thresh = div64_u64(samples_thresh, 100);
349 if (iolat->nr_samples <= samples_thresh)
350 return;
351 }
352
353
354 if (iolat->rq_depth.max_depth == 1 && direction < 0) {
355 blkcg_use_delay(lat_to_blkg(iolat));
356 return;
357 }
358
359
360 if (cur_cookie == DEFAULT_SCALE_COOKIE) {
361 blkcg_clear_delay(lat_to_blkg(iolat));
362 iolat->rq_depth.max_depth = UINT_MAX;
363 wake_up_all(&iolat->rq_wait.wait);
364 return;
365 }
366
367 scale_change(iolat, direction > 0);
368}
369
370static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio)
371{
372 struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
373 struct blkcg_gq *blkg = bio->bi_blkg;
374 bool issue_as_root = bio_issue_as_root_blkg(bio);
375
376 if (!blk_iolatency_enabled(blkiolat))
377 return;
378
379 while (blkg && blkg->parent) {
380 struct iolatency_grp *iolat = blkg_to_lat(blkg);
381 if (!iolat) {
382 blkg = blkg->parent;
383 continue;
384 }
385
386 check_scale_change(iolat);
387 __blkcg_iolatency_throttle(rqos, iolat, issue_as_root,
388 (bio->bi_opf & REQ_SWAP) == REQ_SWAP);
389 blkg = blkg->parent;
390 }
391 if (!timer_pending(&blkiolat->timer))
392 mod_timer(&blkiolat->timer, jiffies + HZ);
393}
394
395static void iolatency_record_time(struct iolatency_grp *iolat,
396 struct bio_issue *issue, u64 now,
397 bool issue_as_root)
398{
399 struct blk_rq_stat *rq_stat;
400 u64 start = bio_issue_time(issue);
401 u64 req_time;
402
403
404
405
406
407 now = __bio_issue_time(now);
408
409 if (now <= start)
410 return;
411
412 req_time = now - start;
413
414
415
416
417
418 if (unlikely(issue_as_root && iolat->rq_depth.max_depth != UINT_MAX)) {
419 u64 sub = iolat->min_lat_nsec;
420 if (req_time < sub)
421 blkcg_add_delay(lat_to_blkg(iolat), now, sub - req_time);
422 return;
423 }
424
425 rq_stat = get_cpu_ptr(iolat->stats);
426 blk_rq_stat_add(rq_stat, req_time);
427 put_cpu_ptr(rq_stat);
428}
429
430#define BLKIOLATENCY_MIN_ADJUST_TIME (500 * NSEC_PER_MSEC)
431#define BLKIOLATENCY_MIN_GOOD_SAMPLES 5
432
433static void iolatency_check_latencies(struct iolatency_grp *iolat, u64 now)
434{
435 struct blkcg_gq *blkg = lat_to_blkg(iolat);
436 struct iolatency_grp *parent;
437 struct child_latency_info *lat_info;
438 struct blk_rq_stat stat;
439 unsigned long flags;
440 int cpu, exp_idx;
441
442 blk_rq_stat_init(&stat);
443 preempt_disable();
444 for_each_online_cpu(cpu) {
445 struct blk_rq_stat *s;
446 s = per_cpu_ptr(iolat->stats, cpu);
447 blk_rq_stat_sum(&stat, s);
448 blk_rq_stat_init(s);
449 }
450 preempt_enable();
451
452 parent = blkg_to_lat(blkg->parent);
453 if (!parent)
454 return;
455
456 lat_info = &parent->child_lat;
457
458
459
460
461
462
463
464
465 exp_idx = min_t(int, BLKIOLATENCY_NR_EXP_FACTORS - 1,
466 div64_u64(iolat->cur_win_nsec,
467 BLKIOLATENCY_EXP_BUCKET_SIZE));
468 iolat->lat_avg = calc_load(iolat->lat_avg,
469 iolatency_exp_factors[exp_idx],
470 stat.mean);
471
472
473 if (stat.mean <= iolat->min_lat_nsec &&
474 atomic_read(&lat_info->scale_cookie) == DEFAULT_SCALE_COOKIE)
475 return;
476
477
478 spin_lock_irqsave(&lat_info->lock, flags);
479 lat_info->nr_samples -= iolat->nr_samples;
480 lat_info->nr_samples += stat.nr_samples;
481 iolat->nr_samples = stat.nr_samples;
482
483 if ((lat_info->last_scale_event >= now ||
484 now - lat_info->last_scale_event < BLKIOLATENCY_MIN_ADJUST_TIME) &&
485 lat_info->scale_lat <= iolat->min_lat_nsec)
486 goto out;
487
488 if (stat.mean <= iolat->min_lat_nsec &&
489 stat.nr_samples >= BLKIOLATENCY_MIN_GOOD_SAMPLES) {
490 if (lat_info->scale_grp == iolat) {
491 lat_info->last_scale_event = now;
492 scale_cookie_change(iolat->blkiolat, lat_info, true);
493 }
494 } else if (stat.mean > iolat->min_lat_nsec) {
495 lat_info->last_scale_event = now;
496 if (!lat_info->scale_grp ||
497 lat_info->scale_lat > iolat->min_lat_nsec) {
498 WRITE_ONCE(lat_info->scale_lat, iolat->min_lat_nsec);
499 lat_info->scale_grp = iolat;
500 }
501 scale_cookie_change(iolat->blkiolat, lat_info, false);
502 }
503out:
504 spin_unlock_irqrestore(&lat_info->lock, flags);
505}
506
507static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
508{
509 struct blkcg_gq *blkg;
510 struct rq_wait *rqw;
511 struct iolatency_grp *iolat;
512 u64 window_start;
513 u64 now = ktime_to_ns(ktime_get());
514 bool issue_as_root = bio_issue_as_root_blkg(bio);
515 bool enabled = false;
516
517 blkg = bio->bi_blkg;
518 if (!blkg)
519 return;
520
521 iolat = blkg_to_lat(bio->bi_blkg);
522 if (!iolat)
523 return;
524
525 enabled = blk_iolatency_enabled(iolat->blkiolat);
526 while (blkg && blkg->parent) {
527 iolat = blkg_to_lat(blkg);
528 if (!iolat) {
529 blkg = blkg->parent;
530 continue;
531 }
532 rqw = &iolat->rq_wait;
533
534 atomic_dec(&rqw->inflight);
535 if (!enabled || iolat->min_lat_nsec == 0)
536 goto next;
537 iolatency_record_time(iolat, &bio->bi_issue, now,
538 issue_as_root);
539 window_start = atomic64_read(&iolat->window_start);
540 if (now > window_start &&
541 (now - window_start) >= iolat->cur_win_nsec) {
542 if (atomic64_cmpxchg(&iolat->window_start,
543 window_start, now) == window_start)
544 iolatency_check_latencies(iolat, now);
545 }
546next:
547 wake_up(&rqw->wait);
548 blkg = blkg->parent;
549 }
550}
551
552static void blkcg_iolatency_cleanup(struct rq_qos *rqos, struct bio *bio)
553{
554 struct blkcg_gq *blkg;
555
556 blkg = bio->bi_blkg;
557 while (blkg && blkg->parent) {
558 struct rq_wait *rqw;
559 struct iolatency_grp *iolat;
560
561 iolat = blkg_to_lat(blkg);
562 if (!iolat)
563 goto next;
564
565 rqw = &iolat->rq_wait;
566 atomic_dec(&rqw->inflight);
567 wake_up(&rqw->wait);
568next:
569 blkg = blkg->parent;
570 }
571}
572
573static void blkcg_iolatency_exit(struct rq_qos *rqos)
574{
575 struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
576
577 del_timer_sync(&blkiolat->timer);
578 blkcg_deactivate_policy(rqos->q, &blkcg_policy_iolatency);
579 kfree(blkiolat);
580}
581
582static struct rq_qos_ops blkcg_iolatency_ops = {
583 .throttle = blkcg_iolatency_throttle,
584 .cleanup = blkcg_iolatency_cleanup,
585 .done_bio = blkcg_iolatency_done_bio,
586 .exit = blkcg_iolatency_exit,
587};
588
589static void blkiolatency_timer_fn(struct timer_list *t)
590{
591 struct blk_iolatency *blkiolat = from_timer(blkiolat, t, timer);
592 struct blkcg_gq *blkg;
593 struct cgroup_subsys_state *pos_css;
594 u64 now = ktime_to_ns(ktime_get());
595
596 rcu_read_lock();
597 blkg_for_each_descendant_pre(blkg, pos_css,
598 blkiolat->rqos.q->root_blkg) {
599 struct iolatency_grp *iolat;
600 struct child_latency_info *lat_info;
601 unsigned long flags;
602 u64 cookie;
603
604
605
606
607
608 if (!blkg_tryget(blkg))
609 continue;
610
611 iolat = blkg_to_lat(blkg);
612 if (!iolat)
613 goto next;
614
615 lat_info = &iolat->child_lat;
616 cookie = atomic_read(&lat_info->scale_cookie);
617
618 if (cookie >= DEFAULT_SCALE_COOKIE)
619 goto next;
620
621 spin_lock_irqsave(&lat_info->lock, flags);
622 if (lat_info->last_scale_event >= now)
623 goto next_lock;
624
625
626
627
628
629 if (lat_info->scale_grp == NULL) {
630 scale_cookie_change(iolat->blkiolat, lat_info, true);
631 goto next_lock;
632 }
633
634
635
636
637
638
639 if (now - lat_info->last_scale_event >=
640 ((u64)NSEC_PER_SEC * 5))
641 lat_info->scale_grp = NULL;
642next_lock:
643 spin_unlock_irqrestore(&lat_info->lock, flags);
644next:
645 blkg_put(blkg);
646 }
647 rcu_read_unlock();
648}
649
650int blk_iolatency_init(struct request_queue *q)
651{
652 struct blk_iolatency *blkiolat;
653 struct rq_qos *rqos;
654 int ret;
655
656 blkiolat = kzalloc(sizeof(*blkiolat), GFP_KERNEL);
657 if (!blkiolat)
658 return -ENOMEM;
659
660 rqos = &blkiolat->rqos;
661 rqos->id = RQ_QOS_CGROUP;
662 rqos->ops = &blkcg_iolatency_ops;
663 rqos->q = q;
664
665 rq_qos_add(q, rqos);
666
667 ret = blkcg_activate_policy(q, &blkcg_policy_iolatency);
668 if (ret) {
669 rq_qos_del(q, rqos);
670 kfree(blkiolat);
671 return ret;
672 }
673
674 timer_setup(&blkiolat->timer, blkiolatency_timer_fn, 0);
675
676 return 0;
677}
678
679static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
680{
681 struct iolatency_grp *iolat = blkg_to_lat(blkg);
682 struct blk_iolatency *blkiolat = iolat->blkiolat;
683 u64 oldval = iolat->min_lat_nsec;
684
685 iolat->min_lat_nsec = val;
686 iolat->cur_win_nsec = max_t(u64, val << 4, BLKIOLATENCY_MIN_WIN_SIZE);
687 iolat->cur_win_nsec = min_t(u64, iolat->cur_win_nsec,
688 BLKIOLATENCY_MAX_WIN_SIZE);
689
690 if (!oldval && val)
691 atomic_inc(&blkiolat->enabled);
692 if (oldval && !val)
693 atomic_dec(&blkiolat->enabled);
694}
695
696static void iolatency_clear_scaling(struct blkcg_gq *blkg)
697{
698 if (blkg->parent) {
699 struct iolatency_grp *iolat = blkg_to_lat(blkg->parent);
700 struct child_latency_info *lat_info;
701 if (!iolat)
702 return;
703
704 lat_info = &iolat->child_lat;
705 spin_lock(&lat_info->lock);
706 atomic_set(&lat_info->scale_cookie, DEFAULT_SCALE_COOKIE);
707 lat_info->last_scale_event = 0;
708 lat_info->scale_grp = NULL;
709 lat_info->scale_lat = 0;
710 spin_unlock(&lat_info->lock);
711 }
712}
713
714static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
715 size_t nbytes, loff_t off)
716{
717 struct blkcg *blkcg = css_to_blkcg(of_css(of));
718 struct blkcg_gq *blkg;
719 struct blk_iolatency *blkiolat;
720 struct blkg_conf_ctx ctx;
721 struct iolatency_grp *iolat;
722 char *p, *tok;
723 u64 lat_val = 0;
724 u64 oldval;
725 int ret;
726
727 ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx);
728 if (ret)
729 return ret;
730
731 iolat = blkg_to_lat(ctx.blkg);
732 blkiolat = iolat->blkiolat;
733 p = ctx.body;
734
735 ret = -EINVAL;
736 while ((tok = strsep(&p, " "))) {
737 char key[16];
738 char val[21];
739
740 if (sscanf(tok, "%15[^=]=%20s", key, val) != 2)
741 goto out;
742
743 if (!strcmp(key, "target")) {
744 u64 v;
745
746 if (!strcmp(val, "max"))
747 lat_val = 0;
748 else if (sscanf(val, "%llu", &v) == 1)
749 lat_val = v * NSEC_PER_USEC;
750 else
751 goto out;
752 } else {
753 goto out;
754 }
755 }
756
757
758 blkg = ctx.blkg;
759 oldval = iolat->min_lat_nsec;
760
761 iolatency_set_min_lat_nsec(blkg, lat_val);
762 if (oldval != iolat->min_lat_nsec) {
763 iolatency_clear_scaling(blkg);
764 }
765
766 ret = 0;
767out:
768 blkg_conf_finish(&ctx);
769 return ret ?: nbytes;
770}
771
772static u64 iolatency_prfill_limit(struct seq_file *sf,
773 struct blkg_policy_data *pd, int off)
774{
775 struct iolatency_grp *iolat = pd_to_lat(pd);
776 const char *dname = blkg_dev_name(pd->blkg);
777
778 if (!dname || !iolat->min_lat_nsec)
779 return 0;
780 seq_printf(sf, "%s target=%llu\n",
781 dname, div_u64(iolat->min_lat_nsec, NSEC_PER_USEC));
782 return 0;
783}
784
785static int iolatency_print_limit(struct seq_file *sf, void *v)
786{
787 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
788 iolatency_prfill_limit,
789 &blkcg_policy_iolatency, seq_cft(sf)->private, false);
790 return 0;
791}
792
793static size_t iolatency_pd_stat(struct blkg_policy_data *pd, char *buf,
794 size_t size)
795{
796 struct iolatency_grp *iolat = pd_to_lat(pd);
797 unsigned long long avg_lat = div64_u64(iolat->lat_avg, NSEC_PER_USEC);
798 unsigned long long cur_win = div64_u64(iolat->cur_win_nsec, NSEC_PER_MSEC);
799
800 if (iolat->rq_depth.max_depth == UINT_MAX)
801 return scnprintf(buf, size, " depth=max avg_lat=%llu win=%llu",
802 avg_lat, cur_win);
803
804 return scnprintf(buf, size, " depth=%u avg_lat=%llu win=%llu",
805 iolat->rq_depth.max_depth, avg_lat, cur_win);
806}
807
808
809static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp, int node)
810{
811 struct iolatency_grp *iolat;
812
813 iolat = kzalloc_node(sizeof(*iolat), gfp, node);
814 if (!iolat)
815 return NULL;
816 iolat->stats = __alloc_percpu_gfp(sizeof(struct blk_rq_stat),
817 __alignof__(struct blk_rq_stat), gfp);
818 if (!iolat->stats) {
819 kfree(iolat);
820 return NULL;
821 }
822 return &iolat->pd;
823}
824
825static void iolatency_pd_init(struct blkg_policy_data *pd)
826{
827 struct iolatency_grp *iolat = pd_to_lat(pd);
828 struct blkcg_gq *blkg = lat_to_blkg(iolat);
829 struct rq_qos *rqos = blkcg_rq_qos(blkg->q);
830 struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
831 u64 now = ktime_to_ns(ktime_get());
832 int cpu;
833
834 for_each_possible_cpu(cpu) {
835 struct blk_rq_stat *stat;
836 stat = per_cpu_ptr(iolat->stats, cpu);
837 blk_rq_stat_init(stat);
838 }
839
840 rq_wait_init(&iolat->rq_wait);
841 spin_lock_init(&iolat->child_lat.lock);
842 iolat->rq_depth.queue_depth = blk_queue_depth(blkg->q);
843 iolat->rq_depth.max_depth = UINT_MAX;
844 iolat->rq_depth.default_depth = iolat->rq_depth.queue_depth;
845 iolat->blkiolat = blkiolat;
846 iolat->cur_win_nsec = 100 * NSEC_PER_MSEC;
847 atomic64_set(&iolat->window_start, now);
848
849
850
851
852
853 if (blkg->parent && blkg_to_pd(blkg->parent, &blkcg_policy_iolatency)) {
854 struct iolatency_grp *parent = blkg_to_lat(blkg->parent);
855 atomic_set(&iolat->scale_cookie,
856 atomic_read(&parent->child_lat.scale_cookie));
857 } else {
858 atomic_set(&iolat->scale_cookie, DEFAULT_SCALE_COOKIE);
859 }
860
861 atomic_set(&iolat->child_lat.scale_cookie, DEFAULT_SCALE_COOKIE);
862}
863
864static void iolatency_pd_offline(struct blkg_policy_data *pd)
865{
866 struct iolatency_grp *iolat = pd_to_lat(pd);
867 struct blkcg_gq *blkg = lat_to_blkg(iolat);
868
869 iolatency_set_min_lat_nsec(blkg, 0);
870 iolatency_clear_scaling(blkg);
871}
872
873static void iolatency_pd_free(struct blkg_policy_data *pd)
874{
875 struct iolatency_grp *iolat = pd_to_lat(pd);
876 free_percpu(iolat->stats);
877 kfree(iolat);
878}
879
880static struct cftype iolatency_files[] = {
881 {
882 .name = "latency",
883 .flags = CFTYPE_NOT_ON_ROOT,
884 .seq_show = iolatency_print_limit,
885 .write = iolatency_set_limit,
886 },
887 {}
888};
889
890static struct blkcg_policy blkcg_policy_iolatency = {
891 .dfl_cftypes = iolatency_files,
892 .pd_alloc_fn = iolatency_pd_alloc,
893 .pd_init_fn = iolatency_pd_init,
894 .pd_offline_fn = iolatency_pd_offline,
895 .pd_free_fn = iolatency_pd_free,
896 .pd_stat_fn = iolatency_pd_stat,
897};
898
899static int __init iolatency_init(void)
900{
901 return blkcg_policy_register(&blkcg_policy_iolatency);
902}
903
904static void __exit iolatency_exit(void)
905{
906 return blkcg_policy_unregister(&blkcg_policy_iolatency);
907}
908
909module_init(iolatency_init);
910module_exit(iolatency_exit);
911