1
2
3
4
5
6#include "sched.h"
7
8#include "pelt.h"
9
10int sched_rr_timeslice = RR_TIMESLICE;
11int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
12
13static const u64 max_rt_runtime = MAX_BW;
14
15static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
16
17struct rt_bandwidth def_rt_bandwidth;
18
19static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
20{
21 struct rt_bandwidth *rt_b =
22 container_of(timer, struct rt_bandwidth, rt_period_timer);
23 int idle = 0;
24 int overrun;
25
26 raw_spin_lock(&rt_b->rt_runtime_lock);
27 for (;;) {
28 overrun = hrtimer_forward_now(timer, rt_b->rt_period);
29 if (!overrun)
30 break;
31
32 raw_spin_unlock(&rt_b->rt_runtime_lock);
33 idle = do_sched_rt_period_timer(rt_b, overrun);
34 raw_spin_lock(&rt_b->rt_runtime_lock);
35 }
36 if (idle)
37 rt_b->rt_period_active = 0;
38 raw_spin_unlock(&rt_b->rt_runtime_lock);
39
40 return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
41}
42
43void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
44{
45 rt_b->rt_period = ns_to_ktime(period);
46 rt_b->rt_runtime = runtime;
47
48 raw_spin_lock_init(&rt_b->rt_runtime_lock);
49
50 hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC,
51 HRTIMER_MODE_REL_HARD);
52 rt_b->rt_period_timer.function = sched_rt_period_timer;
53}
54
55static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
56{
57 if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
58 return;
59
60 raw_spin_lock(&rt_b->rt_runtime_lock);
61 if (!rt_b->rt_period_active) {
62 rt_b->rt_period_active = 1;
63
64
65
66
67
68
69
70
71 hrtimer_forward_now(&rt_b->rt_period_timer, ns_to_ktime(0));
72 hrtimer_start_expires(&rt_b->rt_period_timer,
73 HRTIMER_MODE_ABS_PINNED_HARD);
74 }
75 raw_spin_unlock(&rt_b->rt_runtime_lock);
76}
77
78void init_rt_rq(struct rt_rq *rt_rq)
79{
80 struct rt_prio_array *array;
81 int i;
82
83 array = &rt_rq->active;
84 for (i = 0; i < MAX_RT_PRIO; i++) {
85 INIT_LIST_HEAD(array->queue + i);
86 __clear_bit(i, array->bitmap);
87 }
88
89 __set_bit(MAX_RT_PRIO, array->bitmap);
90
91#if defined CONFIG_SMP
92 rt_rq->highest_prio.curr = MAX_RT_PRIO;
93 rt_rq->highest_prio.next = MAX_RT_PRIO;
94 rt_rq->rt_nr_migratory = 0;
95 rt_rq->overloaded = 0;
96 plist_head_init(&rt_rq->pushable_tasks);
97#endif
98
99 rt_rq->rt_queued = 0;
100
101 rt_rq->rt_time = 0;
102 rt_rq->rt_throttled = 0;
103 rt_rq->rt_runtime = 0;
104 raw_spin_lock_init(&rt_rq->rt_runtime_lock);
105}
106
107#ifdef CONFIG_RT_GROUP_SCHED
108static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b)
109{
110 hrtimer_cancel(&rt_b->rt_period_timer);
111}
112
113#define rt_entity_is_task(rt_se) (!(rt_se)->my_q)
114
115static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
116{
117#ifdef CONFIG_SCHED_DEBUG
118 WARN_ON_ONCE(!rt_entity_is_task(rt_se));
119#endif
120 return container_of(rt_se, struct task_struct, rt);
121}
122
123static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
124{
125 return rt_rq->rq;
126}
127
128static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
129{
130 return rt_se->rt_rq;
131}
132
133static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se)
134{
135 struct rt_rq *rt_rq = rt_se->rt_rq;
136
137 return rt_rq->rq;
138}
139
140void free_rt_sched_group(struct task_group *tg)
141{
142 int i;
143
144 if (tg->rt_se)
145 destroy_rt_bandwidth(&tg->rt_bandwidth);
146
147 for_each_possible_cpu(i) {
148 if (tg->rt_rq)
149 kfree(tg->rt_rq[i]);
150 if (tg->rt_se)
151 kfree(tg->rt_se[i]);
152 }
153
154 kfree(tg->rt_rq);
155 kfree(tg->rt_se);
156}
157
158void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
159 struct sched_rt_entity *rt_se, int cpu,
160 struct sched_rt_entity *parent)
161{
162 struct rq *rq = cpu_rq(cpu);
163
164 rt_rq->highest_prio.curr = MAX_RT_PRIO;
165 rt_rq->rt_nr_boosted = 0;
166 rt_rq->rq = rq;
167 rt_rq->tg = tg;
168
169 tg->rt_rq[cpu] = rt_rq;
170 tg->rt_se[cpu] = rt_se;
171
172 if (!rt_se)
173 return;
174
175 if (!parent)
176 rt_se->rt_rq = &rq->rt;
177 else
178 rt_se->rt_rq = parent->my_q;
179
180 rt_se->my_q = rt_rq;
181 rt_se->parent = parent;
182 INIT_LIST_HEAD(&rt_se->run_list);
183}
184
185int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
186{
187 struct rt_rq *rt_rq;
188 struct sched_rt_entity *rt_se;
189 int i;
190
191 tg->rt_rq = kcalloc(nr_cpu_ids, sizeof(rt_rq), GFP_KERNEL);
192 if (!tg->rt_rq)
193 goto err;
194 tg->rt_se = kcalloc(nr_cpu_ids, sizeof(rt_se), GFP_KERNEL);
195 if (!tg->rt_se)
196 goto err;
197
198 init_rt_bandwidth(&tg->rt_bandwidth,
199 ktime_to_ns(def_rt_bandwidth.rt_period), 0);
200
201 for_each_possible_cpu(i) {
202 rt_rq = kzalloc_node(sizeof(struct rt_rq),
203 GFP_KERNEL, cpu_to_node(i));
204 if (!rt_rq)
205 goto err;
206
207 rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
208 GFP_KERNEL, cpu_to_node(i));
209 if (!rt_se)
210 goto err_free_rq;
211
212 init_rt_rq(rt_rq);
213 rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
214 init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]);
215 }
216
217 return 1;
218
219err_free_rq:
220 kfree(rt_rq);
221err:
222 return 0;
223}
224
225#else
226
227#define rt_entity_is_task(rt_se) (1)
228
229static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
230{
231 return container_of(rt_se, struct task_struct, rt);
232}
233
234static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
235{
236 return container_of(rt_rq, struct rq, rt);
237}
238
239static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se)
240{
241 struct task_struct *p = rt_task_of(rt_se);
242
243 return task_rq(p);
244}
245
246static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
247{
248 struct rq *rq = rq_of_rt_se(rt_se);
249
250 return &rq->rt;
251}
252
253void free_rt_sched_group(struct task_group *tg) { }
254
255int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
256{
257 return 1;
258}
259#endif
260
261#ifdef CONFIG_SMP
262
263static void pull_rt_task(struct rq *this_rq);
264
265static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
266{
267
268 return rq->rt.highest_prio.curr > prev->prio;
269}
270
271static inline int rt_overloaded(struct rq *rq)
272{
273 return atomic_read(&rq->rd->rto_count);
274}
275
276static inline void rt_set_overload(struct rq *rq)
277{
278 if (!rq->online)
279 return;
280
281 cpumask_set_cpu(rq->cpu, rq->rd->rto_mask);
282
283
284
285
286
287
288
289
290
291 smp_wmb();
292 atomic_inc(&rq->rd->rto_count);
293}
294
295static inline void rt_clear_overload(struct rq *rq)
296{
297 if (!rq->online)
298 return;
299
300
301 atomic_dec(&rq->rd->rto_count);
302 cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask);
303}
304
305static void update_rt_migration(struct rt_rq *rt_rq)
306{
307 if (rt_rq->rt_nr_migratory && rt_rq->rt_nr_total > 1) {
308 if (!rt_rq->overloaded) {
309 rt_set_overload(rq_of_rt_rq(rt_rq));
310 rt_rq->overloaded = 1;
311 }
312 } else if (rt_rq->overloaded) {
313 rt_clear_overload(rq_of_rt_rq(rt_rq));
314 rt_rq->overloaded = 0;
315 }
316}
317
318static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
319{
320 struct task_struct *p;
321
322 if (!rt_entity_is_task(rt_se))
323 return;
324
325 p = rt_task_of(rt_se);
326 rt_rq = &rq_of_rt_rq(rt_rq)->rt;
327
328 rt_rq->rt_nr_total++;
329 if (p->nr_cpus_allowed > 1)
330 rt_rq->rt_nr_migratory++;
331
332 update_rt_migration(rt_rq);
333}
334
335static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
336{
337 struct task_struct *p;
338
339 if (!rt_entity_is_task(rt_se))
340 return;
341
342 p = rt_task_of(rt_se);
343 rt_rq = &rq_of_rt_rq(rt_rq)->rt;
344
345 rt_rq->rt_nr_total--;
346 if (p->nr_cpus_allowed > 1)
347 rt_rq->rt_nr_migratory--;
348
349 update_rt_migration(rt_rq);
350}
351
352static inline int has_pushable_tasks(struct rq *rq)
353{
354 return !plist_head_empty(&rq->rt.pushable_tasks);
355}
356
357static DEFINE_PER_CPU(struct callback_head, rt_push_head);
358static DEFINE_PER_CPU(struct callback_head, rt_pull_head);
359
360static void push_rt_tasks(struct rq *);
361static void pull_rt_task(struct rq *);
362
363static inline void rt_queue_push_tasks(struct rq *rq)
364{
365 if (!has_pushable_tasks(rq))
366 return;
367
368 queue_balance_callback(rq, &per_cpu(rt_push_head, rq->cpu), push_rt_tasks);
369}
370
371static inline void rt_queue_pull_task(struct rq *rq)
372{
373 queue_balance_callback(rq, &per_cpu(rt_pull_head, rq->cpu), pull_rt_task);
374}
375
376static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
377{
378 plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
379 plist_node_init(&p->pushable_tasks, p->prio);
380 plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks);
381
382
383 if (p->prio < rq->rt.highest_prio.next)
384 rq->rt.highest_prio.next = p->prio;
385}
386
387static void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
388{
389 plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
390
391
392 if (has_pushable_tasks(rq)) {
393 p = plist_first_entry(&rq->rt.pushable_tasks,
394 struct task_struct, pushable_tasks);
395 rq->rt.highest_prio.next = p->prio;
396 } else
397 rq->rt.highest_prio.next = MAX_RT_PRIO;
398}
399
400#else
401
402static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
403{
404}
405
406static inline void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
407{
408}
409
410static inline
411void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
412{
413}
414
415static inline
416void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
417{
418}
419
420static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
421{
422 return false;
423}
424
425static inline void pull_rt_task(struct rq *this_rq)
426{
427}
428
429static inline void rt_queue_push_tasks(struct rq *rq)
430{
431}
432#endif
433
434static void enqueue_top_rt_rq(struct rt_rq *rt_rq);
435static void dequeue_top_rt_rq(struct rt_rq *rt_rq);
436
437static inline int on_rt_rq(struct sched_rt_entity *rt_se)
438{
439 return rt_se->on_rq;
440}
441
442#ifdef CONFIG_UCLAMP_TASK
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)
458{
459 unsigned int min_cap;
460 unsigned int max_cap;
461 unsigned int cpu_cap;
462
463
464 if (!static_branch_unlikely(&sched_asym_cpucapacity))
465 return true;
466
467 min_cap = uclamp_eff_value(p, UCLAMP_MIN);
468 max_cap = uclamp_eff_value(p, UCLAMP_MAX);
469
470 cpu_cap = capacity_orig_of(cpu);
471
472 return cpu_cap >= min(min_cap, max_cap);
473}
474#else
475static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)
476{
477 return true;
478}
479#endif
480
481#ifdef CONFIG_RT_GROUP_SCHED
482
483static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
484{
485 if (!rt_rq->tg)
486 return RUNTIME_INF;
487
488 return rt_rq->rt_runtime;
489}
490
491static inline u64 sched_rt_period(struct rt_rq *rt_rq)
492{
493 return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
494}
495
496typedef struct task_group *rt_rq_iter_t;
497
498static inline struct task_group *next_task_group(struct task_group *tg)
499{
500 do {
501 tg = list_entry_rcu(tg->list.next,
502 typeof(struct task_group), list);
503 } while (&tg->list != &task_groups && task_group_is_autogroup(tg));
504
505 if (&tg->list == &task_groups)
506 tg = NULL;
507
508 return tg;
509}
510
511#define for_each_rt_rq(rt_rq, iter, rq) \
512 for (iter = container_of(&task_groups, typeof(*iter), list); \
513 (iter = next_task_group(iter)) && \
514 (rt_rq = iter->rt_rq[cpu_of(rq)]);)
515
516#define for_each_sched_rt_entity(rt_se) \
517 for (; rt_se; rt_se = rt_se->parent)
518
519static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
520{
521 return rt_se->my_q;
522}
523
524static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags);
525static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags);
526
527static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
528{
529 struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
530 struct rq *rq = rq_of_rt_rq(rt_rq);
531 struct sched_rt_entity *rt_se;
532
533 int cpu = cpu_of(rq);
534
535 rt_se = rt_rq->tg->rt_se[cpu];
536
537 if (rt_rq->rt_nr_running) {
538 if (!rt_se)
539 enqueue_top_rt_rq(rt_rq);
540 else if (!on_rt_rq(rt_se))
541 enqueue_rt_entity(rt_se, 0);
542
543 if (rt_rq->highest_prio.curr < curr->prio)
544 resched_curr(rq);
545 }
546}
547
548static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
549{
550 struct sched_rt_entity *rt_se;
551 int cpu = cpu_of(rq_of_rt_rq(rt_rq));
552
553 rt_se = rt_rq->tg->rt_se[cpu];
554
555 if (!rt_se) {
556 dequeue_top_rt_rq(rt_rq);
557
558 cpufreq_update_util(rq_of_rt_rq(rt_rq), 0);
559 }
560 else if (on_rt_rq(rt_se))
561 dequeue_rt_entity(rt_se, 0);
562}
563
564static inline int rt_rq_throttled(struct rt_rq *rt_rq)
565{
566 return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted;
567}
568
569static int rt_se_boosted(struct sched_rt_entity *rt_se)
570{
571 struct rt_rq *rt_rq = group_rt_rq(rt_se);
572 struct task_struct *p;
573
574 if (rt_rq)
575 return !!rt_rq->rt_nr_boosted;
576
577 p = rt_task_of(rt_se);
578 return p->prio != p->normal_prio;
579}
580
581#ifdef CONFIG_SMP
582static inline const struct cpumask *sched_rt_period_mask(void)
583{
584 return this_rq()->rd->span;
585}
586#else
587static inline const struct cpumask *sched_rt_period_mask(void)
588{
589 return cpu_online_mask;
590}
591#endif
592
593static inline
594struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
595{
596 return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu];
597}
598
599static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
600{
601 return &rt_rq->tg->rt_bandwidth;
602}
603
604#else
605
606static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
607{
608 return rt_rq->rt_runtime;
609}
610
611static inline u64 sched_rt_period(struct rt_rq *rt_rq)
612{
613 return ktime_to_ns(def_rt_bandwidth.rt_period);
614}
615
616typedef struct rt_rq *rt_rq_iter_t;
617
618#define for_each_rt_rq(rt_rq, iter, rq) \
619 for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
620
621#define for_each_sched_rt_entity(rt_se) \
622 for (; rt_se; rt_se = NULL)
623
624static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
625{
626 return NULL;
627}
628
629static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
630{
631 struct rq *rq = rq_of_rt_rq(rt_rq);
632
633 if (!rt_rq->rt_nr_running)
634 return;
635
636 enqueue_top_rt_rq(rt_rq);
637 resched_curr(rq);
638}
639
640static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
641{
642 dequeue_top_rt_rq(rt_rq);
643}
644
645static inline int rt_rq_throttled(struct rt_rq *rt_rq)
646{
647 return rt_rq->rt_throttled;
648}
649
650static inline const struct cpumask *sched_rt_period_mask(void)
651{
652 return cpu_online_mask;
653}
654
655static inline
656struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
657{
658 return &cpu_rq(cpu)->rt;
659}
660
661static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
662{
663 return &def_rt_bandwidth;
664}
665
666#endif
667
668bool sched_rt_bandwidth_account(struct rt_rq *rt_rq)
669{
670 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
671
672 return (hrtimer_active(&rt_b->rt_period_timer) ||
673 rt_rq->rt_time < rt_b->rt_runtime);
674}
675
676#ifdef CONFIG_SMP
677
678
679
680static void do_balance_runtime(struct rt_rq *rt_rq)
681{
682 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
683 struct root_domain *rd = rq_of_rt_rq(rt_rq)->rd;
684 int i, weight;
685 u64 rt_period;
686
687 weight = cpumask_weight(rd->span);
688
689 raw_spin_lock(&rt_b->rt_runtime_lock);
690 rt_period = ktime_to_ns(rt_b->rt_period);
691 for_each_cpu(i, rd->span) {
692 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
693 s64 diff;
694
695 if (iter == rt_rq)
696 continue;
697
698 raw_spin_lock(&iter->rt_runtime_lock);
699
700
701
702
703
704 if (iter->rt_runtime == RUNTIME_INF)
705 goto next;
706
707
708
709
710
711 diff = iter->rt_runtime - iter->rt_time;
712 if (diff > 0) {
713 diff = div_u64((u64)diff, weight);
714 if (rt_rq->rt_runtime + diff > rt_period)
715 diff = rt_period - rt_rq->rt_runtime;
716 iter->rt_runtime -= diff;
717 rt_rq->rt_runtime += diff;
718 if (rt_rq->rt_runtime == rt_period) {
719 raw_spin_unlock(&iter->rt_runtime_lock);
720 break;
721 }
722 }
723next:
724 raw_spin_unlock(&iter->rt_runtime_lock);
725 }
726 raw_spin_unlock(&rt_b->rt_runtime_lock);
727}
728
729
730
731
732static void __disable_runtime(struct rq *rq)
733{
734 struct root_domain *rd = rq->rd;
735 rt_rq_iter_t iter;
736 struct rt_rq *rt_rq;
737
738 if (unlikely(!scheduler_running))
739 return;
740
741 for_each_rt_rq(rt_rq, iter, rq) {
742 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
743 s64 want;
744 int i;
745
746 raw_spin_lock(&rt_b->rt_runtime_lock);
747 raw_spin_lock(&rt_rq->rt_runtime_lock);
748
749
750
751
752
753 if (rt_rq->rt_runtime == RUNTIME_INF ||
754 rt_rq->rt_runtime == rt_b->rt_runtime)
755 goto balanced;
756 raw_spin_unlock(&rt_rq->rt_runtime_lock);
757
758
759
760
761
762
763 want = rt_b->rt_runtime - rt_rq->rt_runtime;
764
765
766
767
768 for_each_cpu(i, rd->span) {
769 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
770 s64 diff;
771
772
773
774
775 if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF)
776 continue;
777
778 raw_spin_lock(&iter->rt_runtime_lock);
779 if (want > 0) {
780 diff = min_t(s64, iter->rt_runtime, want);
781 iter->rt_runtime -= diff;
782 want -= diff;
783 } else {
784 iter->rt_runtime -= want;
785 want -= want;
786 }
787 raw_spin_unlock(&iter->rt_runtime_lock);
788
789 if (!want)
790 break;
791 }
792
793 raw_spin_lock(&rt_rq->rt_runtime_lock);
794
795
796
797
798 BUG_ON(want);
799balanced:
800
801
802
803
804 rt_rq->rt_runtime = RUNTIME_INF;
805 rt_rq->rt_throttled = 0;
806 raw_spin_unlock(&rt_rq->rt_runtime_lock);
807 raw_spin_unlock(&rt_b->rt_runtime_lock);
808
809
810 sched_rt_rq_enqueue(rt_rq);
811 }
812}
813
814static void __enable_runtime(struct rq *rq)
815{
816 rt_rq_iter_t iter;
817 struct rt_rq *rt_rq;
818
819 if (unlikely(!scheduler_running))
820 return;
821
822
823
824
825 for_each_rt_rq(rt_rq, iter, rq) {
826 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
827
828 raw_spin_lock(&rt_b->rt_runtime_lock);
829 raw_spin_lock(&rt_rq->rt_runtime_lock);
830 rt_rq->rt_runtime = rt_b->rt_runtime;
831 rt_rq->rt_time = 0;
832 rt_rq->rt_throttled = 0;
833 raw_spin_unlock(&rt_rq->rt_runtime_lock);
834 raw_spin_unlock(&rt_b->rt_runtime_lock);
835 }
836}
837
838static void balance_runtime(struct rt_rq *rt_rq)
839{
840 if (!sched_feat(RT_RUNTIME_SHARE))
841 return;
842
843 if (rt_rq->rt_time > rt_rq->rt_runtime) {
844 raw_spin_unlock(&rt_rq->rt_runtime_lock);
845 do_balance_runtime(rt_rq);
846 raw_spin_lock(&rt_rq->rt_runtime_lock);
847 }
848}
849#else
850static inline void balance_runtime(struct rt_rq *rt_rq) {}
851#endif
852
853static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
854{
855 int i, idle = 1, throttled = 0;
856 const struct cpumask *span;
857
858 span = sched_rt_period_mask();
859#ifdef CONFIG_RT_GROUP_SCHED
860
861
862
863
864
865
866
867
868
869 if (rt_b == &root_task_group.rt_bandwidth)
870 span = cpu_online_mask;
871#endif
872 for_each_cpu(i, span) {
873 int enqueue = 0;
874 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
875 struct rq *rq = rq_of_rt_rq(rt_rq);
876 int skip;
877
878
879
880
881
882 raw_spin_lock(&rt_rq->rt_runtime_lock);
883 if (!sched_feat(RT_RUNTIME_SHARE) && rt_rq->rt_runtime != RUNTIME_INF)
884 rt_rq->rt_runtime = rt_b->rt_runtime;
885 skip = !rt_rq->rt_time && !rt_rq->rt_nr_running;
886 raw_spin_unlock(&rt_rq->rt_runtime_lock);
887 if (skip)
888 continue;
889
890 raw_spin_lock(&rq->lock);
891 update_rq_clock(rq);
892
893 if (rt_rq->rt_time) {
894 u64 runtime;
895
896 raw_spin_lock(&rt_rq->rt_runtime_lock);
897 if (rt_rq->rt_throttled)
898 balance_runtime(rt_rq);
899 runtime = rt_rq->rt_runtime;
900 rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime);
901 if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
902 rt_rq->rt_throttled = 0;
903 enqueue = 1;
904
905
906
907
908
909
910
911
912 if (rt_rq->rt_nr_running && rq->curr == rq->idle)
913 rq_clock_cancel_skipupdate(rq);
914 }
915 if (rt_rq->rt_time || rt_rq->rt_nr_running)
916 idle = 0;
917 raw_spin_unlock(&rt_rq->rt_runtime_lock);
918 } else if (rt_rq->rt_nr_running) {
919 idle = 0;
920 if (!rt_rq_throttled(rt_rq))
921 enqueue = 1;
922 }
923 if (rt_rq->rt_throttled)
924 throttled = 1;
925
926 if (enqueue)
927 sched_rt_rq_enqueue(rt_rq);
928 raw_spin_unlock(&rq->lock);
929 }
930
931 if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF))
932 return 1;
933
934 return idle;
935}
936
937static inline int rt_se_prio(struct sched_rt_entity *rt_se)
938{
939#ifdef CONFIG_RT_GROUP_SCHED
940 struct rt_rq *rt_rq = group_rt_rq(rt_se);
941
942 if (rt_rq)
943 return rt_rq->highest_prio.curr;
944#endif
945
946 return rt_task_of(rt_se)->prio;
947}
948
949static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
950{
951 u64 runtime = sched_rt_runtime(rt_rq);
952
953 if (rt_rq->rt_throttled)
954 return rt_rq_throttled(rt_rq);
955
956 if (runtime >= sched_rt_period(rt_rq))
957 return 0;
958
959 balance_runtime(rt_rq);
960 runtime = sched_rt_runtime(rt_rq);
961 if (runtime == RUNTIME_INF)
962 return 0;
963
964 if (rt_rq->rt_time > runtime) {
965 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
966
967
968
969
970
971 if (likely(rt_b->rt_runtime)) {
972 rt_rq->rt_throttled = 1;
973 printk_deferred_once("sched: RT throttling activated\n");
974 } else {
975
976
977
978
979
980 rt_rq->rt_time = 0;
981 }
982
983 if (rt_rq_throttled(rt_rq)) {
984 sched_rt_rq_dequeue(rt_rq);
985 return 1;
986 }
987 }
988
989 return 0;
990}
991
992
993
994
995
996static void update_curr_rt(struct rq *rq)
997{
998 struct task_struct *curr = rq->curr;
999 struct sched_rt_entity *rt_se = &curr->rt;
1000 u64 delta_exec;
1001 u64 now;
1002
1003 if (curr->sched_class != &rt_sched_class)
1004 return;
1005
1006 now = rq_clock_task(rq);
1007 delta_exec = now - curr->se.exec_start;
1008 if (unlikely((s64)delta_exec <= 0))
1009 return;
1010
1011 schedstat_set(curr->se.statistics.exec_max,
1012 max(curr->se.statistics.exec_max, delta_exec));
1013
1014 curr->se.sum_exec_runtime += delta_exec;
1015 account_group_exec_runtime(curr, delta_exec);
1016
1017 curr->se.exec_start = now;
1018 cgroup_account_cputime(curr, delta_exec);
1019
1020 if (!rt_bandwidth_enabled())
1021 return;
1022
1023 for_each_sched_rt_entity(rt_se) {
1024 struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
1025
1026 if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
1027 raw_spin_lock(&rt_rq->rt_runtime_lock);
1028 rt_rq->rt_time += delta_exec;
1029 if (sched_rt_runtime_exceeded(rt_rq))
1030 resched_curr(rq);
1031 raw_spin_unlock(&rt_rq->rt_runtime_lock);
1032 }
1033 }
1034}
1035
1036static void
1037dequeue_top_rt_rq(struct rt_rq *rt_rq)
1038{
1039 struct rq *rq = rq_of_rt_rq(rt_rq);
1040
1041 BUG_ON(&rq->rt != rt_rq);
1042
1043 if (!rt_rq->rt_queued)
1044 return;
1045
1046 BUG_ON(!rq->nr_running);
1047
1048 sub_nr_running(rq, rt_rq->rt_nr_running);
1049 rt_rq->rt_queued = 0;
1050
1051}
1052
1053static void
1054enqueue_top_rt_rq(struct rt_rq *rt_rq)
1055{
1056 struct rq *rq = rq_of_rt_rq(rt_rq);
1057
1058 BUG_ON(&rq->rt != rt_rq);
1059
1060 if (rt_rq->rt_queued)
1061 return;
1062
1063 if (rt_rq_throttled(rt_rq))
1064 return;
1065
1066 if (rt_rq->rt_nr_running) {
1067 add_nr_running(rq, rt_rq->rt_nr_running);
1068 rt_rq->rt_queued = 1;
1069 }
1070
1071
1072 cpufreq_update_util(rq, 0);
1073}
1074
1075#if defined CONFIG_SMP
1076
1077static void
1078inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
1079{
1080 struct rq *rq = rq_of_rt_rq(rt_rq);
1081
1082#ifdef CONFIG_RT_GROUP_SCHED
1083
1084
1085
1086 if (&rq->rt != rt_rq)
1087 return;
1088#endif
1089 if (rq->online && prio < prev_prio)
1090 cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
1091}
1092
1093static void
1094dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
1095{
1096 struct rq *rq = rq_of_rt_rq(rt_rq);
1097
1098#ifdef CONFIG_RT_GROUP_SCHED
1099
1100
1101
1102 if (&rq->rt != rt_rq)
1103 return;
1104#endif
1105 if (rq->online && rt_rq->highest_prio.curr != prev_prio)
1106 cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
1107}
1108
1109#else
1110
1111static inline
1112void inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
1113static inline
1114void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
1115
1116#endif
1117
1118#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
1119static void
1120inc_rt_prio(struct rt_rq *rt_rq, int prio)
1121{
1122 int prev_prio = rt_rq->highest_prio.curr;
1123
1124 if (prio < prev_prio)
1125 rt_rq->highest_prio.curr = prio;
1126
1127 inc_rt_prio_smp(rt_rq, prio, prev_prio);
1128}
1129
1130static void
1131dec_rt_prio(struct rt_rq *rt_rq, int prio)
1132{
1133 int prev_prio = rt_rq->highest_prio.curr;
1134
1135 if (rt_rq->rt_nr_running) {
1136
1137 WARN_ON(prio < prev_prio);
1138
1139
1140
1141
1142
1143 if (prio == prev_prio) {
1144 struct rt_prio_array *array = &rt_rq->active;
1145
1146 rt_rq->highest_prio.curr =
1147 sched_find_first_bit(array->bitmap);
1148 }
1149
1150 } else
1151 rt_rq->highest_prio.curr = MAX_RT_PRIO;
1152
1153 dec_rt_prio_smp(rt_rq, prio, prev_prio);
1154}
1155
1156#else
1157
1158static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {}
1159static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {}
1160
1161#endif
1162
1163#ifdef CONFIG_RT_GROUP_SCHED
1164
1165static void
1166inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1167{
1168 if (rt_se_boosted(rt_se))
1169 rt_rq->rt_nr_boosted++;
1170
1171 if (rt_rq->tg)
1172 start_rt_bandwidth(&rt_rq->tg->rt_bandwidth);
1173}
1174
1175static void
1176dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1177{
1178 if (rt_se_boosted(rt_se))
1179 rt_rq->rt_nr_boosted--;
1180
1181 WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted);
1182}
1183
1184#else
1185
1186static void
1187inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1188{
1189 start_rt_bandwidth(&def_rt_bandwidth);
1190}
1191
1192static inline
1193void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {}
1194
1195#endif
1196
1197static inline
1198unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se)
1199{
1200 struct rt_rq *group_rq = group_rt_rq(rt_se);
1201
1202 if (group_rq)
1203 return group_rq->rt_nr_running;
1204 else
1205 return 1;
1206}
1207
1208static inline
1209unsigned int rt_se_rr_nr_running(struct sched_rt_entity *rt_se)
1210{
1211 struct rt_rq *group_rq = group_rt_rq(rt_se);
1212 struct task_struct *tsk;
1213
1214 if (group_rq)
1215 return group_rq->rr_nr_running;
1216
1217 tsk = rt_task_of(rt_se);
1218
1219 return (tsk->policy == SCHED_RR) ? 1 : 0;
1220}
1221
1222static inline
1223void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1224{
1225 int prio = rt_se_prio(rt_se);
1226
1227 WARN_ON(!rt_prio(prio));
1228 rt_rq->rt_nr_running += rt_se_nr_running(rt_se);
1229 rt_rq->rr_nr_running += rt_se_rr_nr_running(rt_se);
1230
1231 inc_rt_prio(rt_rq, prio);
1232 inc_rt_migration(rt_se, rt_rq);
1233 inc_rt_group(rt_se, rt_rq);
1234}
1235
1236static inline
1237void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1238{
1239 WARN_ON(!rt_prio(rt_se_prio(rt_se)));
1240 WARN_ON(!rt_rq->rt_nr_running);
1241 rt_rq->rt_nr_running -= rt_se_nr_running(rt_se);
1242 rt_rq->rr_nr_running -= rt_se_rr_nr_running(rt_se);
1243
1244 dec_rt_prio(rt_rq, rt_se_prio(rt_se));
1245 dec_rt_migration(rt_se, rt_rq);
1246 dec_rt_group(rt_se, rt_rq);
1247}
1248
1249
1250
1251
1252
1253
1254static inline bool move_entity(unsigned int flags)
1255{
1256 if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) == DEQUEUE_SAVE)
1257 return false;
1258
1259 return true;
1260}
1261
1262static void __delist_rt_entity(struct sched_rt_entity *rt_se, struct rt_prio_array *array)
1263{
1264 list_del_init(&rt_se->run_list);
1265
1266 if (list_empty(array->queue + rt_se_prio(rt_se)))
1267 __clear_bit(rt_se_prio(rt_se), array->bitmap);
1268
1269 rt_se->on_list = 0;
1270}
1271
1272static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
1273{
1274 struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
1275 struct rt_prio_array *array = &rt_rq->active;
1276 struct rt_rq *group_rq = group_rt_rq(rt_se);
1277 struct list_head *queue = array->queue + rt_se_prio(rt_se);
1278
1279
1280
1281
1282
1283
1284
1285 if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) {
1286 if (rt_se->on_list)
1287 __delist_rt_entity(rt_se, array);
1288 return;
1289 }
1290
1291 if (move_entity(flags)) {
1292 WARN_ON_ONCE(rt_se->on_list);
1293 if (flags & ENQUEUE_HEAD)
1294 list_add(&rt_se->run_list, queue);
1295 else
1296 list_add_tail(&rt_se->run_list, queue);
1297
1298 __set_bit(rt_se_prio(rt_se), array->bitmap);
1299 rt_se->on_list = 1;
1300 }
1301 rt_se->on_rq = 1;
1302
1303 inc_rt_tasks(rt_se, rt_rq);
1304}
1305
1306static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
1307{
1308 struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
1309 struct rt_prio_array *array = &rt_rq->active;
1310
1311 if (move_entity(flags)) {
1312 WARN_ON_ONCE(!rt_se->on_list);
1313 __delist_rt_entity(rt_se, array);
1314 }
1315 rt_se->on_rq = 0;
1316
1317 dec_rt_tasks(rt_se, rt_rq);
1318}
1319
1320
1321
1322
1323
1324static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
1325{
1326 struct sched_rt_entity *back = NULL;
1327
1328 for_each_sched_rt_entity(rt_se) {
1329 rt_se->back = back;
1330 back = rt_se;
1331 }
1332
1333 dequeue_top_rt_rq(rt_rq_of_se(back));
1334
1335 for (rt_se = back; rt_se; rt_se = rt_se->back) {
1336 if (on_rt_rq(rt_se))
1337 __dequeue_rt_entity(rt_se, flags);
1338 }
1339}
1340
1341static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
1342{
1343 struct rq *rq = rq_of_rt_se(rt_se);
1344
1345 dequeue_rt_stack(rt_se, flags);
1346 for_each_sched_rt_entity(rt_se)
1347 __enqueue_rt_entity(rt_se, flags);
1348 enqueue_top_rt_rq(&rq->rt);
1349}
1350
1351static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
1352{
1353 struct rq *rq = rq_of_rt_se(rt_se);
1354
1355 dequeue_rt_stack(rt_se, flags);
1356
1357 for_each_sched_rt_entity(rt_se) {
1358 struct rt_rq *rt_rq = group_rt_rq(rt_se);
1359
1360 if (rt_rq && rt_rq->rt_nr_running)
1361 __enqueue_rt_entity(rt_se, flags);
1362 }
1363 enqueue_top_rt_rq(&rq->rt);
1364}
1365
1366
1367
1368
1369static void
1370enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
1371{
1372 struct sched_rt_entity *rt_se = &p->rt;
1373
1374 if (flags & ENQUEUE_WAKEUP)
1375 rt_se->timeout = 0;
1376
1377 enqueue_rt_entity(rt_se, flags);
1378
1379 if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
1380 enqueue_pushable_task(rq, p);
1381}
1382
1383static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
1384{
1385 struct sched_rt_entity *rt_se = &p->rt;
1386
1387 update_curr_rt(rq);
1388 dequeue_rt_entity(rt_se, flags);
1389
1390 dequeue_pushable_task(rq, p);
1391}
1392
1393
1394
1395
1396
1397static void
1398requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head)
1399{
1400 if (on_rt_rq(rt_se)) {
1401 struct rt_prio_array *array = &rt_rq->active;
1402 struct list_head *queue = array->queue + rt_se_prio(rt_se);
1403
1404 if (head)
1405 list_move(&rt_se->run_list, queue);
1406 else
1407 list_move_tail(&rt_se->run_list, queue);
1408 }
1409}
1410
1411static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head)
1412{
1413 struct sched_rt_entity *rt_se = &p->rt;
1414 struct rt_rq *rt_rq;
1415
1416 for_each_sched_rt_entity(rt_se) {
1417 rt_rq = rt_rq_of_se(rt_se);
1418 requeue_rt_entity(rt_rq, rt_se, head);
1419 }
1420}
1421
1422static void yield_task_rt(struct rq *rq)
1423{
1424 requeue_task_rt(rq, rq->curr, 0);
1425}
1426
1427#ifdef CONFIG_SMP
1428static int find_lowest_rq(struct task_struct *task);
1429
1430static int
1431select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
1432{
1433 struct task_struct *curr;
1434 struct rq *rq;
1435 bool test;
1436
1437
1438 if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
1439 goto out;
1440
1441 rq = cpu_rq(cpu);
1442
1443 rcu_read_lock();
1444 curr = READ_ONCE(rq->curr);
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472 test = curr &&
1473 unlikely(rt_task(curr)) &&
1474 (curr->nr_cpus_allowed < 2 || curr->prio <= p->prio);
1475
1476 if (test || !rt_task_fits_capacity(p, cpu)) {
1477 int target = find_lowest_rq(p);
1478
1479
1480
1481
1482
1483 if (!test && target != -1 && !rt_task_fits_capacity(p, target))
1484 goto out_unlock;
1485
1486
1487
1488
1489
1490 if (target != -1 &&
1491 p->prio < cpu_rq(target)->rt.highest_prio.curr)
1492 cpu = target;
1493 }
1494
1495out_unlock:
1496 rcu_read_unlock();
1497
1498out:
1499 return cpu;
1500}
1501
1502static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
1503{
1504
1505
1506
1507
1508 if (rq->curr->nr_cpus_allowed == 1 ||
1509 !cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
1510 return;
1511
1512
1513
1514
1515
1516 if (p->nr_cpus_allowed != 1 &&
1517 cpupri_find(&rq->rd->cpupri, p, NULL))
1518 return;
1519
1520
1521
1522
1523
1524
1525 requeue_task_rt(rq, p, 1);
1526 resched_curr(rq);
1527}
1528
1529static int balance_rt(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
1530{
1531 if (!on_rt_rq(&p->rt) && need_pull_rt_task(rq, p)) {
1532
1533
1534
1535
1536
1537
1538 rq_unpin_lock(rq, rf);
1539 pull_rt_task(rq);
1540 rq_repin_lock(rq, rf);
1541 }
1542
1543 return sched_stop_runnable(rq) || sched_dl_runnable(rq) || sched_rt_runnable(rq);
1544}
1545#endif
1546
1547
1548
1549
1550static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags)
1551{
1552 if (p->prio < rq->curr->prio) {
1553 resched_curr(rq);
1554 return;
1555 }
1556
1557#ifdef CONFIG_SMP
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570 if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr))
1571 check_preempt_equal_prio(rq, p);
1572#endif
1573}
1574
1575static inline void set_next_task_rt(struct rq *rq, struct task_struct *p, bool first)
1576{
1577 p->se.exec_start = rq_clock_task(rq);
1578
1579
1580 dequeue_pushable_task(rq, p);
1581
1582 if (!first)
1583 return;
1584
1585
1586
1587
1588
1589
1590 if (rq->curr->sched_class != &rt_sched_class)
1591 update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0);
1592
1593 rt_queue_push_tasks(rq);
1594}
1595
1596static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
1597 struct rt_rq *rt_rq)
1598{
1599 struct rt_prio_array *array = &rt_rq->active;
1600 struct sched_rt_entity *next = NULL;
1601 struct list_head *queue;
1602 int idx;
1603
1604 idx = sched_find_first_bit(array->bitmap);
1605 BUG_ON(idx >= MAX_RT_PRIO);
1606
1607 queue = array->queue + idx;
1608 next = list_entry(queue->next, struct sched_rt_entity, run_list);
1609
1610 return next;
1611}
1612
1613static struct task_struct *_pick_next_task_rt(struct rq *rq)
1614{
1615 struct sched_rt_entity *rt_se;
1616 struct rt_rq *rt_rq = &rq->rt;
1617
1618 do {
1619 rt_se = pick_next_rt_entity(rq, rt_rq);
1620 BUG_ON(!rt_se);
1621 rt_rq = group_rt_rq(rt_se);
1622 } while (rt_rq);
1623
1624 return rt_task_of(rt_se);
1625}
1626
1627static struct task_struct *pick_next_task_rt(struct rq *rq)
1628{
1629 struct task_struct *p;
1630
1631 if (!sched_rt_runnable(rq))
1632 return NULL;
1633
1634 p = _pick_next_task_rt(rq);
1635 set_next_task_rt(rq, p, true);
1636 return p;
1637}
1638
1639static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
1640{
1641 update_curr_rt(rq);
1642
1643 update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1);
1644
1645
1646
1647
1648
1649 if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
1650 enqueue_pushable_task(rq, p);
1651}
1652
1653#ifdef CONFIG_SMP
1654
1655
1656#define RT_MAX_TRIES 3
1657
1658static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
1659{
1660 if (!task_running(rq, p) &&
1661 cpumask_test_cpu(cpu, p->cpus_ptr))
1662 return 1;
1663
1664 return 0;
1665}
1666
1667
1668
1669
1670
1671static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu)
1672{
1673 struct plist_head *head = &rq->rt.pushable_tasks;
1674 struct task_struct *p;
1675
1676 if (!has_pushable_tasks(rq))
1677 return NULL;
1678
1679 plist_for_each_entry(p, head, pushable_tasks) {
1680 if (pick_rt_task(rq, p, cpu))
1681 return p;
1682 }
1683
1684 return NULL;
1685}
1686
1687static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
1688
1689static int find_lowest_rq(struct task_struct *task)
1690{
1691 struct sched_domain *sd;
1692 struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
1693 int this_cpu = smp_processor_id();
1694 int cpu = task_cpu(task);
1695 int ret;
1696
1697
1698 if (unlikely(!lowest_mask))
1699 return -1;
1700
1701 if (task->nr_cpus_allowed == 1)
1702 return -1;
1703
1704
1705
1706
1707
1708 if (static_branch_unlikely(&sched_asym_cpucapacity)) {
1709
1710 ret = cpupri_find_fitness(&task_rq(task)->rd->cpupri,
1711 task, lowest_mask,
1712 rt_task_fits_capacity);
1713 } else {
1714
1715 ret = cpupri_find(&task_rq(task)->rd->cpupri,
1716 task, lowest_mask);
1717 }
1718
1719 if (!ret)
1720 return -1;
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730 if (cpumask_test_cpu(cpu, lowest_mask))
1731 return cpu;
1732
1733
1734
1735
1736
1737 if (!cpumask_test_cpu(this_cpu, lowest_mask))
1738 this_cpu = -1;
1739
1740 rcu_read_lock();
1741 for_each_domain(cpu, sd) {
1742 if (sd->flags & SD_WAKE_AFFINE) {
1743 int best_cpu;
1744
1745
1746
1747
1748
1749 if (this_cpu != -1 &&
1750 cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
1751 rcu_read_unlock();
1752 return this_cpu;
1753 }
1754
1755 best_cpu = cpumask_first_and(lowest_mask,
1756 sched_domain_span(sd));
1757 if (best_cpu < nr_cpu_ids) {
1758 rcu_read_unlock();
1759 return best_cpu;
1760 }
1761 }
1762 }
1763 rcu_read_unlock();
1764
1765
1766
1767
1768
1769
1770 if (this_cpu != -1)
1771 return this_cpu;
1772
1773 cpu = cpumask_any(lowest_mask);
1774 if (cpu < nr_cpu_ids)
1775 return cpu;
1776
1777 return -1;
1778}
1779
1780
1781static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
1782{
1783 struct rq *lowest_rq = NULL;
1784 int tries;
1785 int cpu;
1786
1787 for (tries = 0; tries < RT_MAX_TRIES; tries++) {
1788 cpu = find_lowest_rq(task);
1789
1790 if ((cpu == -1) || (cpu == rq->cpu))
1791 break;
1792
1793 lowest_rq = cpu_rq(cpu);
1794
1795 if (lowest_rq->rt.highest_prio.curr <= task->prio) {
1796
1797
1798
1799
1800
1801 lowest_rq = NULL;
1802 break;
1803 }
1804
1805
1806 if (double_lock_balance(rq, lowest_rq)) {
1807
1808
1809
1810
1811
1812
1813 if (unlikely(task_rq(task) != rq ||
1814 !cpumask_test_cpu(lowest_rq->cpu, task->cpus_ptr) ||
1815 task_running(rq, task) ||
1816 !rt_task(task) ||
1817 !task_on_rq_queued(task))) {
1818
1819 double_unlock_balance(rq, lowest_rq);
1820 lowest_rq = NULL;
1821 break;
1822 }
1823 }
1824
1825
1826 if (lowest_rq->rt.highest_prio.curr > task->prio)
1827 break;
1828
1829
1830 double_unlock_balance(rq, lowest_rq);
1831 lowest_rq = NULL;
1832 }
1833
1834 return lowest_rq;
1835}
1836
1837static struct task_struct *pick_next_pushable_task(struct rq *rq)
1838{
1839 struct task_struct *p;
1840
1841 if (!has_pushable_tasks(rq))
1842 return NULL;
1843
1844 p = plist_first_entry(&rq->rt.pushable_tasks,
1845 struct task_struct, pushable_tasks);
1846
1847 BUG_ON(rq->cpu != task_cpu(p));
1848 BUG_ON(task_current(rq, p));
1849 BUG_ON(p->nr_cpus_allowed <= 1);
1850
1851 BUG_ON(!task_on_rq_queued(p));
1852 BUG_ON(!rt_task(p));
1853
1854 return p;
1855}
1856
1857
1858
1859
1860
1861
1862static int push_rt_task(struct rq *rq)
1863{
1864 struct task_struct *next_task;
1865 struct rq *lowest_rq;
1866 int ret = 0;
1867
1868 if (!rq->rt.overloaded)
1869 return 0;
1870
1871 next_task = pick_next_pushable_task(rq);
1872 if (!next_task)
1873 return 0;
1874
1875retry:
1876 if (WARN_ON(next_task == rq->curr))
1877 return 0;
1878
1879
1880
1881
1882
1883
1884 if (unlikely(next_task->prio < rq->curr->prio)) {
1885 resched_curr(rq);
1886 return 0;
1887 }
1888
1889
1890 get_task_struct(next_task);
1891
1892
1893 lowest_rq = find_lock_lowest_rq(next_task, rq);
1894 if (!lowest_rq) {
1895 struct task_struct *task;
1896
1897
1898
1899
1900
1901
1902
1903
1904 task = pick_next_pushable_task(rq);
1905 if (task == next_task) {
1906
1907
1908
1909
1910
1911
1912 goto out;
1913 }
1914
1915 if (!task)
1916
1917 goto out;
1918
1919
1920
1921
1922 put_task_struct(next_task);
1923 next_task = task;
1924 goto retry;
1925 }
1926
1927 deactivate_task(rq, next_task, 0);
1928 set_task_cpu(next_task, lowest_rq->cpu);
1929 activate_task(lowest_rq, next_task, 0);
1930 ret = 1;
1931
1932 resched_curr(lowest_rq);
1933
1934 double_unlock_balance(rq, lowest_rq);
1935
1936out:
1937 put_task_struct(next_task);
1938
1939 return ret;
1940}
1941
1942static void push_rt_tasks(struct rq *rq)
1943{
1944
1945 while (push_rt_task(rq))
1946 ;
1947}
1948
1949#ifdef HAVE_RT_PUSH_IPI
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992static int rto_next_cpu(struct root_domain *rd)
1993{
1994 int next;
1995 int cpu;
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010 for (;;) {
2011
2012
2013 cpu = cpumask_next(rd->rto_cpu, rd->rto_mask);
2014
2015 rd->rto_cpu = cpu;
2016
2017 if (cpu < nr_cpu_ids)
2018 return cpu;
2019
2020 rd->rto_cpu = -1;
2021
2022
2023
2024
2025
2026
2027
2028 next = atomic_read_acquire(&rd->rto_loop_next);
2029
2030 if (rd->rto_loop == next)
2031 break;
2032
2033 rd->rto_loop = next;
2034 }
2035
2036 return -1;
2037}
2038
2039static inline bool rto_start_trylock(atomic_t *v)
2040{
2041 return !atomic_cmpxchg_acquire(v, 0, 1);
2042}
2043
2044static inline void rto_start_unlock(atomic_t *v)
2045{
2046 atomic_set_release(v, 0);
2047}
2048
2049static void tell_cpu_to_push(struct rq *rq)
2050{
2051 int cpu = -1;
2052
2053
2054 atomic_inc(&rq->rd->rto_loop_next);
2055
2056
2057 if (!rto_start_trylock(&rq->rd->rto_loop_start))
2058 return;
2059
2060 raw_spin_lock(&rq->rd->rto_lock);
2061
2062
2063
2064
2065
2066
2067
2068 if (rq->rd->rto_cpu < 0)
2069 cpu = rto_next_cpu(rq->rd);
2070
2071 raw_spin_unlock(&rq->rd->rto_lock);
2072
2073 rto_start_unlock(&rq->rd->rto_loop_start);
2074
2075 if (cpu >= 0) {
2076
2077 sched_get_rd(rq->rd);
2078 irq_work_queue_on(&rq->rd->rto_push_work, cpu);
2079 }
2080}
2081
2082
2083void rto_push_irq_work_func(struct irq_work *work)
2084{
2085 struct root_domain *rd =
2086 container_of(work, struct root_domain, rto_push_work);
2087 struct rq *rq;
2088 int cpu;
2089
2090 rq = this_rq();
2091
2092
2093
2094
2095
2096 if (has_pushable_tasks(rq)) {
2097 raw_spin_lock(&rq->lock);
2098 push_rt_tasks(rq);
2099 raw_spin_unlock(&rq->lock);
2100 }
2101
2102 raw_spin_lock(&rd->rto_lock);
2103
2104
2105 cpu = rto_next_cpu(rd);
2106
2107 raw_spin_unlock(&rd->rto_lock);
2108
2109 if (cpu < 0) {
2110 sched_put_rd(rd);
2111 return;
2112 }
2113
2114
2115 irq_work_queue_on(&rd->rto_push_work, cpu);
2116}
2117#endif
2118
2119static void pull_rt_task(struct rq *this_rq)
2120{
2121 int this_cpu = this_rq->cpu, cpu;
2122 bool resched = false;
2123 struct task_struct *p;
2124 struct rq *src_rq;
2125 int rt_overload_count = rt_overloaded(this_rq);
2126
2127 if (likely(!rt_overload_count))
2128 return;
2129
2130
2131
2132
2133
2134 smp_rmb();
2135
2136
2137 if (rt_overload_count == 1 &&
2138 cpumask_test_cpu(this_rq->cpu, this_rq->rd->rto_mask))
2139 return;
2140
2141#ifdef HAVE_RT_PUSH_IPI
2142 if (sched_feat(RT_PUSH_IPI)) {
2143 tell_cpu_to_push(this_rq);
2144 return;
2145 }
2146#endif
2147
2148 for_each_cpu(cpu, this_rq->rd->rto_mask) {
2149 if (this_cpu == cpu)
2150 continue;
2151
2152 src_rq = cpu_rq(cpu);
2153
2154
2155
2156
2157
2158
2159
2160
2161 if (src_rq->rt.highest_prio.next >=
2162 this_rq->rt.highest_prio.curr)
2163 continue;
2164
2165
2166
2167
2168
2169
2170 double_lock_balance(this_rq, src_rq);
2171
2172
2173
2174
2175
2176 p = pick_highest_pushable_task(src_rq, this_cpu);
2177
2178
2179
2180
2181
2182 if (p && (p->prio < this_rq->rt.highest_prio.curr)) {
2183 WARN_ON(p == src_rq->curr);
2184 WARN_ON(!task_on_rq_queued(p));
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194 if (p->prio < src_rq->curr->prio)
2195 goto skip;
2196
2197 resched = true;
2198
2199 deactivate_task(src_rq, p, 0);
2200 set_task_cpu(p, this_cpu);
2201 activate_task(this_rq, p, 0);
2202
2203
2204
2205
2206
2207
2208 }
2209skip:
2210 double_unlock_balance(this_rq, src_rq);
2211 }
2212
2213 if (resched)
2214 resched_curr(this_rq);
2215}
2216
2217
2218
2219
2220
2221static void task_woken_rt(struct rq *rq, struct task_struct *p)
2222{
2223 bool need_to_push = !task_running(rq, p) &&
2224 !test_tsk_need_resched(rq->curr) &&
2225 p->nr_cpus_allowed > 1 &&
2226 (dl_task(rq->curr) || rt_task(rq->curr)) &&
2227 (rq->curr->nr_cpus_allowed < 2 ||
2228 rq->curr->prio <= p->prio);
2229
2230 if (need_to_push)
2231 push_rt_tasks(rq);
2232}
2233
2234
2235static void rq_online_rt(struct rq *rq)
2236{
2237 if (rq->rt.overloaded)
2238 rt_set_overload(rq);
2239
2240 __enable_runtime(rq);
2241
2242 cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr);
2243}
2244
2245
2246static void rq_offline_rt(struct rq *rq)
2247{
2248 if (rq->rt.overloaded)
2249 rt_clear_overload(rq);
2250
2251 __disable_runtime(rq);
2252
2253 cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID);
2254}
2255
2256
2257
2258
2259
2260static void switched_from_rt(struct rq *rq, struct task_struct *p)
2261{
2262
2263
2264
2265
2266
2267
2268
2269 if (!task_on_rq_queued(p) || rq->rt.rt_nr_running)
2270 return;
2271
2272 rt_queue_pull_task(rq);
2273}
2274
2275void __init init_sched_rt_class(void)
2276{
2277 unsigned int i;
2278
2279 for_each_possible_cpu(i) {
2280 zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
2281 GFP_KERNEL, cpu_to_node(i));
2282 }
2283}
2284#endif
2285
2286
2287
2288
2289
2290
2291static void switched_to_rt(struct rq *rq, struct task_struct *p)
2292{
2293
2294
2295
2296
2297
2298
2299
2300 if (task_on_rq_queued(p) && rq->curr != p) {
2301#ifdef CONFIG_SMP
2302 if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
2303 rt_queue_push_tasks(rq);
2304#endif
2305 if (p->prio < rq->curr->prio && cpu_online(cpu_of(rq)))
2306 resched_curr(rq);
2307 }
2308}
2309
2310
2311
2312
2313
2314static void
2315prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
2316{
2317 if (!task_on_rq_queued(p))
2318 return;
2319
2320 if (rq->curr == p) {
2321#ifdef CONFIG_SMP
2322
2323
2324
2325
2326 if (oldprio < p->prio)
2327 rt_queue_pull_task(rq);
2328
2329
2330
2331
2332
2333 if (p->prio > rq->rt.highest_prio.curr)
2334 resched_curr(rq);
2335#else
2336
2337 if (oldprio < p->prio)
2338 resched_curr(rq);
2339#endif
2340 } else {
2341
2342
2343
2344
2345
2346 if (p->prio < rq->curr->prio)
2347 resched_curr(rq);
2348 }
2349}
2350
2351#ifdef CONFIG_POSIX_TIMERS
2352static void watchdog(struct rq *rq, struct task_struct *p)
2353{
2354 unsigned long soft, hard;
2355
2356
2357 soft = task_rlimit(p, RLIMIT_RTTIME);
2358 hard = task_rlimit_max(p, RLIMIT_RTTIME);
2359
2360 if (soft != RLIM_INFINITY) {
2361 unsigned long next;
2362
2363 if (p->rt.watchdog_stamp != jiffies) {
2364 p->rt.timeout++;
2365 p->rt.watchdog_stamp = jiffies;
2366 }
2367
2368 next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
2369 if (p->rt.timeout > next) {
2370 posix_cputimers_rt_watchdog(&p->posix_cputimers,
2371 p->se.sum_exec_runtime);
2372 }
2373 }
2374}
2375#else
2376static inline void watchdog(struct rq *rq, struct task_struct *p) { }
2377#endif
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
2388{
2389 struct sched_rt_entity *rt_se = &p->rt;
2390
2391 update_curr_rt(rq);
2392 update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1);
2393
2394 watchdog(rq, p);
2395
2396
2397
2398
2399
2400 if (p->policy != SCHED_RR)
2401 return;
2402
2403 if (--p->rt.time_slice)
2404 return;
2405
2406 p->rt.time_slice = sched_rr_timeslice;
2407
2408
2409
2410
2411
2412 for_each_sched_rt_entity(rt_se) {
2413 if (rt_se->run_list.prev != rt_se->run_list.next) {
2414 requeue_task_rt(rq, p, 0);
2415 resched_curr(rq);
2416 return;
2417 }
2418 }
2419}
2420
2421static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
2422{
2423
2424
2425
2426 if (task->policy == SCHED_RR)
2427 return sched_rr_timeslice;
2428 else
2429 return 0;
2430}
2431
2432const struct sched_class rt_sched_class
2433 __attribute__((section("__rt_sched_class"))) = {
2434 .enqueue_task = enqueue_task_rt,
2435 .dequeue_task = dequeue_task_rt,
2436 .yield_task = yield_task_rt,
2437
2438 .check_preempt_curr = check_preempt_curr_rt,
2439
2440 .pick_next_task = pick_next_task_rt,
2441 .put_prev_task = put_prev_task_rt,
2442 .set_next_task = set_next_task_rt,
2443
2444#ifdef CONFIG_SMP
2445 .balance = balance_rt,
2446 .select_task_rq = select_task_rq_rt,
2447 .set_cpus_allowed = set_cpus_allowed_common,
2448 .rq_online = rq_online_rt,
2449 .rq_offline = rq_offline_rt,
2450 .task_woken = task_woken_rt,
2451 .switched_from = switched_from_rt,
2452#endif
2453
2454 .task_tick = task_tick_rt,
2455
2456 .get_rr_interval = get_rr_interval_rt,
2457
2458 .prio_changed = prio_changed_rt,
2459 .switched_to = switched_to_rt,
2460
2461 .update_curr = update_curr_rt,
2462
2463#ifdef CONFIG_UCLAMP_TASK
2464 .uclamp_enabled = 1,
2465#endif
2466};
2467
2468#ifdef CONFIG_RT_GROUP_SCHED
2469
2470
2471
2472static DEFINE_MUTEX(rt_constraints_mutex);
2473
2474static inline int tg_has_rt_tasks(struct task_group *tg)
2475{
2476 struct task_struct *task;
2477 struct css_task_iter it;
2478 int ret = 0;
2479
2480
2481
2482
2483 if (task_group_is_autogroup(tg))
2484 return 0;
2485
2486 css_task_iter_start(&tg->css, 0, &it);
2487 while (!ret && (task = css_task_iter_next(&it)))
2488 ret |= rt_task(task);
2489 css_task_iter_end(&it);
2490
2491 return ret;
2492}
2493
2494struct rt_schedulable_data {
2495 struct task_group *tg;
2496 u64 rt_period;
2497 u64 rt_runtime;
2498};
2499
2500static int tg_rt_schedulable(struct task_group *tg, void *data)
2501{
2502 struct rt_schedulable_data *d = data;
2503 struct task_group *child;
2504 unsigned long total, sum = 0;
2505 u64 period, runtime;
2506
2507 period = ktime_to_ns(tg->rt_bandwidth.rt_period);
2508 runtime = tg->rt_bandwidth.rt_runtime;
2509
2510 if (tg == d->tg) {
2511 period = d->rt_period;
2512 runtime = d->rt_runtime;
2513 }
2514
2515
2516
2517
2518 if (runtime > period && runtime != RUNTIME_INF)
2519 return -EINVAL;
2520
2521
2522
2523
2524 if (rt_bandwidth_enabled() && !runtime &&
2525 tg->rt_bandwidth.rt_runtime && tg_has_rt_tasks(tg))
2526 return -EBUSY;
2527
2528 total = to_ratio(period, runtime);
2529
2530
2531
2532
2533 if (total > to_ratio(global_rt_period(), global_rt_runtime()))
2534 return -EINVAL;
2535
2536
2537
2538
2539 list_for_each_entry_rcu(child, &tg->children, siblings) {
2540 period = ktime_to_ns(child->rt_bandwidth.rt_period);
2541 runtime = child->rt_bandwidth.rt_runtime;
2542
2543 if (child == d->tg) {
2544 period = d->rt_period;
2545 runtime = d->rt_runtime;
2546 }
2547
2548 sum += to_ratio(period, runtime);
2549 }
2550
2551 if (sum > total)
2552 return -EINVAL;
2553
2554 return 0;
2555}
2556
2557static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
2558{
2559 int ret;
2560
2561 struct rt_schedulable_data data = {
2562 .tg = tg,
2563 .rt_period = period,
2564 .rt_runtime = runtime,
2565 };
2566
2567 rcu_read_lock();
2568 ret = walk_tg_tree(tg_rt_schedulable, tg_nop, &data);
2569 rcu_read_unlock();
2570
2571 return ret;
2572}
2573
2574static int tg_set_rt_bandwidth(struct task_group *tg,
2575 u64 rt_period, u64 rt_runtime)
2576{
2577 int i, err = 0;
2578
2579
2580
2581
2582
2583 if (tg == &root_task_group && rt_runtime == 0)
2584 return -EINVAL;
2585
2586
2587 if (rt_period == 0)
2588 return -EINVAL;
2589
2590
2591
2592
2593 if (rt_runtime != RUNTIME_INF && rt_runtime > max_rt_runtime)
2594 return -EINVAL;
2595
2596 mutex_lock(&rt_constraints_mutex);
2597 err = __rt_schedulable(tg, rt_period, rt_runtime);
2598 if (err)
2599 goto unlock;
2600
2601 raw_spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
2602 tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
2603 tg->rt_bandwidth.rt_runtime = rt_runtime;
2604
2605 for_each_possible_cpu(i) {
2606 struct rt_rq *rt_rq = tg->rt_rq[i];
2607
2608 raw_spin_lock(&rt_rq->rt_runtime_lock);
2609 rt_rq->rt_runtime = rt_runtime;
2610 raw_spin_unlock(&rt_rq->rt_runtime_lock);
2611 }
2612 raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock);
2613unlock:
2614 mutex_unlock(&rt_constraints_mutex);
2615
2616 return err;
2617}
2618
2619int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
2620{
2621 u64 rt_runtime, rt_period;
2622
2623 rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
2624 rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC;
2625 if (rt_runtime_us < 0)
2626 rt_runtime = RUNTIME_INF;
2627 else if ((u64)rt_runtime_us > U64_MAX / NSEC_PER_USEC)
2628 return -EINVAL;
2629
2630 return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
2631}
2632
2633long sched_group_rt_runtime(struct task_group *tg)
2634{
2635 u64 rt_runtime_us;
2636
2637 if (tg->rt_bandwidth.rt_runtime == RUNTIME_INF)
2638 return -1;
2639
2640 rt_runtime_us = tg->rt_bandwidth.rt_runtime;
2641 do_div(rt_runtime_us, NSEC_PER_USEC);
2642 return rt_runtime_us;
2643}
2644
2645int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us)
2646{
2647 u64 rt_runtime, rt_period;
2648
2649 if (rt_period_us > U64_MAX / NSEC_PER_USEC)
2650 return -EINVAL;
2651
2652 rt_period = rt_period_us * NSEC_PER_USEC;
2653 rt_runtime = tg->rt_bandwidth.rt_runtime;
2654
2655 return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
2656}
2657
2658long sched_group_rt_period(struct task_group *tg)
2659{
2660 u64 rt_period_us;
2661
2662 rt_period_us = ktime_to_ns(tg->rt_bandwidth.rt_period);
2663 do_div(rt_period_us, NSEC_PER_USEC);
2664 return rt_period_us;
2665}
2666
2667static int sched_rt_global_constraints(void)
2668{
2669 int ret = 0;
2670
2671 mutex_lock(&rt_constraints_mutex);
2672 ret = __rt_schedulable(NULL, 0, 0);
2673 mutex_unlock(&rt_constraints_mutex);
2674
2675 return ret;
2676}
2677
2678int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
2679{
2680
2681 if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
2682 return 0;
2683
2684 return 1;
2685}
2686
2687#else
2688static int sched_rt_global_constraints(void)
2689{
2690 unsigned long flags;
2691 int i;
2692
2693 raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
2694 for_each_possible_cpu(i) {
2695 struct rt_rq *rt_rq = &cpu_rq(i)->rt;
2696
2697 raw_spin_lock(&rt_rq->rt_runtime_lock);
2698 rt_rq->rt_runtime = global_rt_runtime();
2699 raw_spin_unlock(&rt_rq->rt_runtime_lock);
2700 }
2701 raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
2702
2703 return 0;
2704}
2705#endif
2706
2707static int sched_rt_global_validate(void)
2708{
2709 if (sysctl_sched_rt_period <= 0)
2710 return -EINVAL;
2711
2712 if ((sysctl_sched_rt_runtime != RUNTIME_INF) &&
2713 ((sysctl_sched_rt_runtime > sysctl_sched_rt_period) ||
2714 ((u64)sysctl_sched_rt_runtime *
2715 NSEC_PER_USEC > max_rt_runtime)))
2716 return -EINVAL;
2717
2718 return 0;
2719}
2720
2721static void sched_rt_do_global(void)
2722{
2723 def_rt_bandwidth.rt_runtime = global_rt_runtime();
2724 def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period());
2725}
2726
2727int sched_rt_handler(struct ctl_table *table, int write, void *buffer,
2728 size_t *lenp, loff_t *ppos)
2729{
2730 int old_period, old_runtime;
2731 static DEFINE_MUTEX(mutex);
2732 int ret;
2733
2734 mutex_lock(&mutex);
2735 old_period = sysctl_sched_rt_period;
2736 old_runtime = sysctl_sched_rt_runtime;
2737
2738 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2739
2740 if (!ret && write) {
2741 ret = sched_rt_global_validate();
2742 if (ret)
2743 goto undo;
2744
2745 ret = sched_dl_global_validate();
2746 if (ret)
2747 goto undo;
2748
2749 ret = sched_rt_global_constraints();
2750 if (ret)
2751 goto undo;
2752
2753 sched_rt_do_global();
2754 sched_dl_do_global();
2755 }
2756 if (0) {
2757undo:
2758 sysctl_sched_rt_period = old_period;
2759 sysctl_sched_rt_runtime = old_runtime;
2760 }
2761 mutex_unlock(&mutex);
2762
2763 return ret;
2764}
2765
2766int sched_rr_handler(struct ctl_table *table, int write, void *buffer,
2767 size_t *lenp, loff_t *ppos)
2768{
2769 int ret;
2770 static DEFINE_MUTEX(mutex);
2771
2772 mutex_lock(&mutex);
2773 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2774
2775
2776
2777
2778 if (!ret && write) {
2779 sched_rr_timeslice =
2780 sysctl_sched_rr_timeslice <= 0 ? RR_TIMESLICE :
2781 msecs_to_jiffies(sysctl_sched_rr_timeslice);
2782 }
2783 mutex_unlock(&mutex);
2784
2785 return ret;
2786}
2787
2788#ifdef CONFIG_SCHED_DEBUG
2789void print_rt_stats(struct seq_file *m, int cpu)
2790{
2791 rt_rq_iter_t iter;
2792 struct rt_rq *rt_rq;
2793
2794 rcu_read_lock();
2795 for_each_rt_rq(rt_rq, iter, cpu_rq(cpu))
2796 print_rt_rq(m, cpu, rt_rq);
2797 rcu_read_unlock();
2798}
2799#endif
2800