linux/kernel/sched/rt.c
<<
>>
Prefs
   1/*
   2 * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
   3 * policies)
   4 */
   5
   6#include "sched.h"
   7
   8#include <linux/slab.h>
   9
  10int sched_rr_timeslice = RR_TIMESLICE;
  11
  12static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
  13
  14struct rt_bandwidth def_rt_bandwidth;
  15
  16static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
  17{
  18        struct rt_bandwidth *rt_b =
  19                container_of(timer, struct rt_bandwidth, rt_period_timer);
  20        ktime_t now;
  21        int overrun;
  22        int idle = 0;
  23
  24        for (;;) {
  25                now = hrtimer_cb_get_time(timer);
  26                overrun = hrtimer_forward(timer, now, rt_b->rt_period);
  27
  28                if (!overrun)
  29                        break;
  30
  31                idle = do_sched_rt_period_timer(rt_b, overrun);
  32        }
  33
  34        return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
  35}
  36
  37void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
  38{
  39        rt_b->rt_period = ns_to_ktime(period);
  40        rt_b->rt_runtime = runtime;
  41
  42        raw_spin_lock_init(&rt_b->rt_runtime_lock);
  43
  44        hrtimer_init(&rt_b->rt_period_timer,
  45                        CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  46        rt_b->rt_period_timer.function = sched_rt_period_timer;
  47}
  48
  49static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
  50{
  51        if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
  52                return;
  53
  54        if (hrtimer_active(&rt_b->rt_period_timer))
  55                return;
  56
  57        raw_spin_lock(&rt_b->rt_runtime_lock);
  58        start_bandwidth_timer(&rt_b->rt_period_timer, rt_b->rt_period);
  59        raw_spin_unlock(&rt_b->rt_runtime_lock);
  60}
  61
  62void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
  63{
  64        struct rt_prio_array *array;
  65        int i;
  66
  67        array = &rt_rq->active;
  68        for (i = 0; i < MAX_RT_PRIO; i++) {
  69                INIT_LIST_HEAD(array->queue + i);
  70                __clear_bit(i, array->bitmap);
  71        }
  72        /* delimiter for bitsearch: */
  73        __set_bit(MAX_RT_PRIO, array->bitmap);
  74
  75#if defined CONFIG_SMP
  76        rt_rq->highest_prio.curr = MAX_RT_PRIO;
  77        rt_rq->highest_prio.next = MAX_RT_PRIO;
  78        rt_rq->rt_nr_migratory = 0;
  79        rt_rq->overloaded = 0;
  80        plist_head_init(&rt_rq->pushable_tasks);
  81#endif
  82        /* We start is dequeued state, because no RT tasks are queued */
  83        rt_rq->rt_queued = 0;
  84
  85        rt_rq->rt_time = 0;
  86        rt_rq->rt_throttled = 0;
  87        rt_rq->rt_runtime = 0;
  88        raw_spin_lock_init(&rt_rq->rt_runtime_lock);
  89}
  90
  91#ifdef CONFIG_RT_GROUP_SCHED
  92static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b)
  93{
  94        hrtimer_cancel(&rt_b->rt_period_timer);
  95}
  96
  97#define rt_entity_is_task(rt_se) (!(rt_se)->my_q)
  98
  99static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
 100{
 101#ifdef CONFIG_SCHED_DEBUG
 102        WARN_ON_ONCE(!rt_entity_is_task(rt_se));
 103#endif
 104        return container_of(rt_se, struct task_struct, rt);
 105}
 106
 107static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
 108{
 109        return rt_rq->rq;
 110}
 111
 112static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
 113{
 114        return rt_se->rt_rq;
 115}
 116
 117static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se)
 118{
 119        struct rt_rq *rt_rq = rt_se->rt_rq;
 120
 121        return rt_rq->rq;
 122}
 123
 124void free_rt_sched_group(struct task_group *tg)
 125{
 126        int i;
 127
 128        if (tg->rt_se)
 129                destroy_rt_bandwidth(&tg->rt_bandwidth);
 130
 131        for_each_possible_cpu(i) {
 132                if (tg->rt_rq)
 133                        kfree(tg->rt_rq[i]);
 134                if (tg->rt_se)
 135                        kfree(tg->rt_se[i]);
 136        }
 137
 138        kfree(tg->rt_rq);
 139        kfree(tg->rt_se);
 140}
 141
 142void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
 143                struct sched_rt_entity *rt_se, int cpu,
 144                struct sched_rt_entity *parent)
 145{
 146        struct rq *rq = cpu_rq(cpu);
 147
 148        rt_rq->highest_prio.curr = MAX_RT_PRIO;
 149        rt_rq->rt_nr_boosted = 0;
 150        rt_rq->rq = rq;
 151        rt_rq->tg = tg;
 152
 153        tg->rt_rq[cpu] = rt_rq;
 154        tg->rt_se[cpu] = rt_se;
 155
 156        if (!rt_se)
 157                return;
 158
 159        if (!parent)
 160                rt_se->rt_rq = &rq->rt;
 161        else
 162                rt_se->rt_rq = parent->my_q;
 163
 164        rt_se->my_q = rt_rq;
 165        rt_se->parent = parent;
 166        INIT_LIST_HEAD(&rt_se->run_list);
 167}
 168
 169int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
 170{
 171        struct rt_rq *rt_rq;
 172        struct sched_rt_entity *rt_se;
 173        int i;
 174
 175        tg->rt_rq = kzalloc(sizeof(rt_rq) * nr_cpu_ids, GFP_KERNEL);
 176        if (!tg->rt_rq)
 177                goto err;
 178        tg->rt_se = kzalloc(sizeof(rt_se) * nr_cpu_ids, GFP_KERNEL);
 179        if (!tg->rt_se)
 180                goto err;
 181
 182        init_rt_bandwidth(&tg->rt_bandwidth,
 183                        ktime_to_ns(def_rt_bandwidth.rt_period), 0);
 184
 185        for_each_possible_cpu(i) {
 186                rt_rq = kzalloc_node(sizeof(struct rt_rq),
 187                                     GFP_KERNEL, cpu_to_node(i));
 188                if (!rt_rq)
 189                        goto err;
 190
 191                rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
 192                                     GFP_KERNEL, cpu_to_node(i));
 193                if (!rt_se)
 194                        goto err_free_rq;
 195
 196                init_rt_rq(rt_rq, cpu_rq(i));
 197                rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
 198                init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]);
 199        }
 200
 201        return 1;
 202
 203err_free_rq:
 204        kfree(rt_rq);
 205err:
 206        return 0;
 207}
 208
 209#else /* CONFIG_RT_GROUP_SCHED */
 210
 211#define rt_entity_is_task(rt_se) (1)
 212
 213static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
 214{
 215        return container_of(rt_se, struct task_struct, rt);
 216}
 217
 218static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
 219{
 220        return container_of(rt_rq, struct rq, rt);
 221}
 222
 223static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se)
 224{
 225        struct task_struct *p = rt_task_of(rt_se);
 226
 227        return task_rq(p);
 228}
 229
 230static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
 231{
 232        struct rq *rq = rq_of_rt_se(rt_se);
 233
 234        return &rq->rt;
 235}
 236
 237void free_rt_sched_group(struct task_group *tg) { }
 238
 239int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
 240{
 241        return 1;
 242}
 243#endif /* CONFIG_RT_GROUP_SCHED */
 244
 245#ifdef CONFIG_SMP
 246
 247static int pull_rt_task(struct rq *this_rq);
 248
 249static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
 250{
 251        /* Try to pull RT tasks here if we lower this rq's prio */
 252        return rq->rt.highest_prio.curr > prev->prio;
 253}
 254
 255static inline int rt_overloaded(struct rq *rq)
 256{
 257        return atomic_read(&rq->rd->rto_count);
 258}
 259
 260static inline void rt_set_overload(struct rq *rq)
 261{
 262        if (!rq->online)
 263                return;
 264
 265        cpumask_set_cpu(rq->cpu, rq->rd->rto_mask);
 266        /*
 267         * Make sure the mask is visible before we set
 268         * the overload count. That is checked to determine
 269         * if we should look at the mask. It would be a shame
 270         * if we looked at the mask, but the mask was not
 271         * updated yet.
 272         *
 273         * Matched by the barrier in pull_rt_task().
 274         */
 275        smp_wmb();
 276        atomic_inc(&rq->rd->rto_count);
 277}
 278
 279static inline void rt_clear_overload(struct rq *rq)
 280{
 281        if (!rq->online)
 282                return;
 283
 284        /* the order here really doesn't matter */
 285        atomic_dec(&rq->rd->rto_count);
 286        cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask);
 287}
 288
 289static void update_rt_migration(struct rt_rq *rt_rq)
 290{
 291        if (rt_rq->rt_nr_migratory && rt_rq->rt_nr_total > 1) {
 292                if (!rt_rq->overloaded) {
 293                        rt_set_overload(rq_of_rt_rq(rt_rq));
 294                        rt_rq->overloaded = 1;
 295                }
 296        } else if (rt_rq->overloaded) {
 297                rt_clear_overload(rq_of_rt_rq(rt_rq));
 298                rt_rq->overloaded = 0;
 299        }
 300}
 301
 302static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 303{
 304        struct task_struct *p;
 305
 306        if (!rt_entity_is_task(rt_se))
 307                return;
 308
 309        p = rt_task_of(rt_se);
 310        rt_rq = &rq_of_rt_rq(rt_rq)->rt;
 311
 312        rt_rq->rt_nr_total++;
 313        if (p->nr_cpus_allowed > 1)
 314                rt_rq->rt_nr_migratory++;
 315
 316        update_rt_migration(rt_rq);
 317}
 318
 319static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 320{
 321        struct task_struct *p;
 322
 323        if (!rt_entity_is_task(rt_se))
 324                return;
 325
 326        p = rt_task_of(rt_se);
 327        rt_rq = &rq_of_rt_rq(rt_rq)->rt;
 328
 329        rt_rq->rt_nr_total--;
 330        if (p->nr_cpus_allowed > 1)
 331                rt_rq->rt_nr_migratory--;
 332
 333        update_rt_migration(rt_rq);
 334}
 335
 336static inline int has_pushable_tasks(struct rq *rq)
 337{
 338        return !plist_head_empty(&rq->rt.pushable_tasks);
 339}
 340
 341static inline void set_post_schedule(struct rq *rq)
 342{
 343        /*
 344         * We detect this state here so that we can avoid taking the RQ
 345         * lock again later if there is no need to push
 346         */
 347        rq->post_schedule = has_pushable_tasks(rq);
 348}
 349
 350static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
 351{
 352        plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
 353        plist_node_init(&p->pushable_tasks, p->prio);
 354        plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks);
 355
 356        /* Update the highest prio pushable task */
 357        if (p->prio < rq->rt.highest_prio.next)
 358                rq->rt.highest_prio.next = p->prio;
 359}
 360
 361static void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
 362{
 363        plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
 364
 365        /* Update the new highest prio pushable task */
 366        if (has_pushable_tasks(rq)) {
 367                p = plist_first_entry(&rq->rt.pushable_tasks,
 368                                      struct task_struct, pushable_tasks);
 369                rq->rt.highest_prio.next = p->prio;
 370        } else
 371                rq->rt.highest_prio.next = MAX_RT_PRIO;
 372}
 373
 374#else
 375
 376static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
 377{
 378}
 379
 380static inline void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
 381{
 382}
 383
 384static inline
 385void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 386{
 387}
 388
 389static inline
 390void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 391{
 392}
 393
 394static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
 395{
 396        return false;
 397}
 398
 399static inline int pull_rt_task(struct rq *this_rq)
 400{
 401        return 0;
 402}
 403
 404static inline void set_post_schedule(struct rq *rq)
 405{
 406}
 407#endif /* CONFIG_SMP */
 408
 409static void enqueue_top_rt_rq(struct rt_rq *rt_rq);
 410static void dequeue_top_rt_rq(struct rt_rq *rt_rq);
 411
 412static inline int on_rt_rq(struct sched_rt_entity *rt_se)
 413{
 414        return !list_empty(&rt_se->run_list);
 415}
 416
 417#ifdef CONFIG_RT_GROUP_SCHED
 418
 419static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
 420{
 421        if (!rt_rq->tg)
 422                return RUNTIME_INF;
 423
 424        return rt_rq->rt_runtime;
 425}
 426
 427static inline u64 sched_rt_period(struct rt_rq *rt_rq)
 428{
 429        return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
 430}
 431
 432typedef struct task_group *rt_rq_iter_t;
 433
 434static inline struct task_group *next_task_group(struct task_group *tg)
 435{
 436        do {
 437                tg = list_entry_rcu(tg->list.next,
 438                        typeof(struct task_group), list);
 439        } while (&tg->list != &task_groups && task_group_is_autogroup(tg));
 440
 441        if (&tg->list == &task_groups)
 442                tg = NULL;
 443
 444        return tg;
 445}
 446
 447#define for_each_rt_rq(rt_rq, iter, rq)                                 \
 448        for (iter = container_of(&task_groups, typeof(*iter), list);    \
 449                (iter = next_task_group(iter)) &&                       \
 450                (rt_rq = iter->rt_rq[cpu_of(rq)]);)
 451
 452#define for_each_sched_rt_entity(rt_se) \
 453        for (; rt_se; rt_se = rt_se->parent)
 454
 455static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
 456{
 457        return rt_se->my_q;
 458}
 459
 460static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head);
 461static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
 462
 463static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
 464{
 465        struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
 466        struct rq *rq = rq_of_rt_rq(rt_rq);
 467        struct sched_rt_entity *rt_se;
 468
 469        int cpu = cpu_of(rq);
 470
 471        rt_se = rt_rq->tg->rt_se[cpu];
 472
 473        if (rt_rq->rt_nr_running) {
 474                if (!rt_se)
 475                        enqueue_top_rt_rq(rt_rq);
 476                else if (!on_rt_rq(rt_se))
 477                        enqueue_rt_entity(rt_se, false);
 478
 479                if (rt_rq->highest_prio.curr < curr->prio)
 480                        resched_curr(rq);
 481        }
 482}
 483
 484static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
 485{
 486        struct sched_rt_entity *rt_se;
 487        int cpu = cpu_of(rq_of_rt_rq(rt_rq));
 488
 489        rt_se = rt_rq->tg->rt_se[cpu];
 490
 491        if (!rt_se)
 492                dequeue_top_rt_rq(rt_rq);
 493        else if (on_rt_rq(rt_se))
 494                dequeue_rt_entity(rt_se);
 495}
 496
 497static inline int rt_rq_throttled(struct rt_rq *rt_rq)
 498{
 499        return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted;
 500}
 501
 502static int rt_se_boosted(struct sched_rt_entity *rt_se)
 503{
 504        struct rt_rq *rt_rq = group_rt_rq(rt_se);
 505        struct task_struct *p;
 506
 507        if (rt_rq)
 508                return !!rt_rq->rt_nr_boosted;
 509
 510        p = rt_task_of(rt_se);
 511        return p->prio != p->normal_prio;
 512}
 513
 514#ifdef CONFIG_SMP
 515static inline const struct cpumask *sched_rt_period_mask(void)
 516{
 517        return this_rq()->rd->span;
 518}
 519#else
 520static inline const struct cpumask *sched_rt_period_mask(void)
 521{
 522        return cpu_online_mask;
 523}
 524#endif
 525
 526static inline
 527struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
 528{
 529        return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu];
 530}
 531
 532static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
 533{
 534        return &rt_rq->tg->rt_bandwidth;
 535}
 536
 537#else /* !CONFIG_RT_GROUP_SCHED */
 538
 539static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
 540{
 541        return rt_rq->rt_runtime;
 542}
 543
 544static inline u64 sched_rt_period(struct rt_rq *rt_rq)
 545{
 546        return ktime_to_ns(def_rt_bandwidth.rt_period);
 547}
 548
 549typedef struct rt_rq *rt_rq_iter_t;
 550
 551#define for_each_rt_rq(rt_rq, iter, rq) \
 552        for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
 553
 554#define for_each_sched_rt_entity(rt_se) \
 555        for (; rt_se; rt_se = NULL)
 556
 557static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
 558{
 559        return NULL;
 560}
 561
 562static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
 563{
 564        struct rq *rq = rq_of_rt_rq(rt_rq);
 565
 566        if (!rt_rq->rt_nr_running)
 567                return;
 568
 569        enqueue_top_rt_rq(rt_rq);
 570        resched_curr(rq);
 571}
 572
 573static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
 574{
 575        dequeue_top_rt_rq(rt_rq);
 576}
 577
 578static inline int rt_rq_throttled(struct rt_rq *rt_rq)
 579{
 580        return rt_rq->rt_throttled;
 581}
 582
 583static inline const struct cpumask *sched_rt_period_mask(void)
 584{
 585        return cpu_online_mask;
 586}
 587
 588static inline
 589struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
 590{
 591        return &cpu_rq(cpu)->rt;
 592}
 593
 594static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
 595{
 596        return &def_rt_bandwidth;
 597}
 598
 599#endif /* CONFIG_RT_GROUP_SCHED */
 600
 601bool sched_rt_bandwidth_account(struct rt_rq *rt_rq)
 602{
 603        struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
 604
 605        return (hrtimer_active(&rt_b->rt_period_timer) ||
 606                rt_rq->rt_time < rt_b->rt_runtime);
 607}
 608
 609#ifdef CONFIG_SMP
 610/*
 611 * We ran out of runtime, see if we can borrow some from our neighbours.
 612 */
 613static int do_balance_runtime(struct rt_rq *rt_rq)
 614{
 615        struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
 616        struct root_domain *rd = rq_of_rt_rq(rt_rq)->rd;
 617        int i, weight, more = 0;
 618        u64 rt_period;
 619
 620        weight = cpumask_weight(rd->span);
 621
 622        raw_spin_lock(&rt_b->rt_runtime_lock);
 623        rt_period = ktime_to_ns(rt_b->rt_period);
 624        for_each_cpu(i, rd->span) {
 625                struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
 626                s64 diff;
 627
 628                if (iter == rt_rq)
 629                        continue;
 630
 631                raw_spin_lock(&iter->rt_runtime_lock);
 632                /*
 633                 * Either all rqs have inf runtime and there's nothing to steal
 634                 * or __disable_runtime() below sets a specific rq to inf to
 635                 * indicate its been disabled and disalow stealing.
 636                 */
 637                if (iter->rt_runtime == RUNTIME_INF)
 638                        goto next;
 639
 640                /*
 641                 * From runqueues with spare time, take 1/n part of their
 642                 * spare time, but no more than our period.
 643                 */
 644                diff = iter->rt_runtime - iter->rt_time;
 645                if (diff > 0) {
 646                        diff = div_u64((u64)diff, weight);
 647                        if (rt_rq->rt_runtime + diff > rt_period)
 648                                diff = rt_period - rt_rq->rt_runtime;
 649                        iter->rt_runtime -= diff;
 650                        rt_rq->rt_runtime += diff;
 651                        more = 1;
 652                        if (rt_rq->rt_runtime == rt_period) {
 653                                raw_spin_unlock(&iter->rt_runtime_lock);
 654                                break;
 655                        }
 656                }
 657next:
 658                raw_spin_unlock(&iter->rt_runtime_lock);
 659        }
 660        raw_spin_unlock(&rt_b->rt_runtime_lock);
 661
 662        return more;
 663}
 664
 665/*
 666 * Ensure this RQ takes back all the runtime it lend to its neighbours.
 667 */
 668static void __disable_runtime(struct rq *rq)
 669{
 670        struct root_domain *rd = rq->rd;
 671        rt_rq_iter_t iter;
 672        struct rt_rq *rt_rq;
 673
 674        if (unlikely(!scheduler_running))
 675                return;
 676
 677        for_each_rt_rq(rt_rq, iter, rq) {
 678                struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
 679                s64 want;
 680                int i;
 681
 682                raw_spin_lock(&rt_b->rt_runtime_lock);
 683                raw_spin_lock(&rt_rq->rt_runtime_lock);
 684                /*
 685                 * Either we're all inf and nobody needs to borrow, or we're
 686                 * already disabled and thus have nothing to do, or we have
 687                 * exactly the right amount of runtime to take out.
 688                 */
 689                if (rt_rq->rt_runtime == RUNTIME_INF ||
 690                                rt_rq->rt_runtime == rt_b->rt_runtime)
 691                        goto balanced;
 692                raw_spin_unlock(&rt_rq->rt_runtime_lock);
 693
 694                /*
 695                 * Calculate the difference between what we started out with
 696                 * and what we current have, that's the amount of runtime
 697                 * we lend and now have to reclaim.
 698                 */
 699                want = rt_b->rt_runtime - rt_rq->rt_runtime;
 700
 701                /*
 702                 * Greedy reclaim, take back as much as we can.
 703                 */
 704                for_each_cpu(i, rd->span) {
 705                        struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
 706                        s64 diff;
 707
 708                        /*
 709                         * Can't reclaim from ourselves or disabled runqueues.
 710                         */
 711                        if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF)
 712                                continue;
 713
 714                        raw_spin_lock(&iter->rt_runtime_lock);
 715                        if (want > 0) {
 716                                diff = min_t(s64, iter->rt_runtime, want);
 717                                iter->rt_runtime -= diff;
 718                                want -= diff;
 719                        } else {
 720                                iter->rt_runtime -= want;
 721                                want -= want;
 722                        }
 723                        raw_spin_unlock(&iter->rt_runtime_lock);
 724
 725                        if (!want)
 726                                break;
 727                }
 728
 729                raw_spin_lock(&rt_rq->rt_runtime_lock);
 730                /*
 731                 * We cannot be left wanting - that would mean some runtime
 732                 * leaked out of the system.
 733                 */
 734                BUG_ON(want);
 735balanced:
 736                /*
 737                 * Disable all the borrow logic by pretending we have inf
 738                 * runtime - in which case borrowing doesn't make sense.
 739                 */
 740                rt_rq->rt_runtime = RUNTIME_INF;
 741                rt_rq->rt_throttled = 0;
 742                raw_spin_unlock(&rt_rq->rt_runtime_lock);
 743                raw_spin_unlock(&rt_b->rt_runtime_lock);
 744
 745                /* Make rt_rq available for pick_next_task() */
 746                sched_rt_rq_enqueue(rt_rq);
 747        }
 748}
 749
 750static void __enable_runtime(struct rq *rq)
 751{
 752        rt_rq_iter_t iter;
 753        struct rt_rq *rt_rq;
 754
 755        if (unlikely(!scheduler_running))
 756                return;
 757
 758        /*
 759         * Reset each runqueue's bandwidth settings
 760         */
 761        for_each_rt_rq(rt_rq, iter, rq) {
 762                struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
 763
 764                raw_spin_lock(&rt_b->rt_runtime_lock);
 765                raw_spin_lock(&rt_rq->rt_runtime_lock);
 766                rt_rq->rt_runtime = rt_b->rt_runtime;
 767                rt_rq->rt_time = 0;
 768                rt_rq->rt_throttled = 0;
 769                raw_spin_unlock(&rt_rq->rt_runtime_lock);
 770                raw_spin_unlock(&rt_b->rt_runtime_lock);
 771        }
 772}
 773
 774static int balance_runtime(struct rt_rq *rt_rq)
 775{
 776        int more = 0;
 777
 778        if (!sched_feat(RT_RUNTIME_SHARE))
 779                return more;
 780
 781        if (rt_rq->rt_time > rt_rq->rt_runtime) {
 782                raw_spin_unlock(&rt_rq->rt_runtime_lock);
 783                more = do_balance_runtime(rt_rq);
 784                raw_spin_lock(&rt_rq->rt_runtime_lock);
 785        }
 786
 787        return more;
 788}
 789#else /* !CONFIG_SMP */
 790static inline int balance_runtime(struct rt_rq *rt_rq)
 791{
 792        return 0;
 793}
 794#endif /* CONFIG_SMP */
 795
 796static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
 797{
 798        int i, idle = 1, throttled = 0;
 799        const struct cpumask *span;
 800
 801        span = sched_rt_period_mask();
 802#ifdef CONFIG_RT_GROUP_SCHED
 803        /*
 804         * FIXME: isolated CPUs should really leave the root task group,
 805         * whether they are isolcpus or were isolated via cpusets, lest
 806         * the timer run on a CPU which does not service all runqueues,
 807         * potentially leaving other CPUs indefinitely throttled.  If
 808         * isolation is really required, the user will turn the throttle
 809         * off to kill the perturbations it causes anyway.  Meanwhile,
 810         * this maintains functionality for boot and/or troubleshooting.
 811         */
 812        if (rt_b == &root_task_group.rt_bandwidth)
 813                span = cpu_online_mask;
 814#endif
 815        for_each_cpu(i, span) {
 816                int enqueue = 0;
 817                struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
 818                struct rq *rq = rq_of_rt_rq(rt_rq);
 819
 820                raw_spin_lock(&rq->lock);
 821                if (rt_rq->rt_time) {
 822                        u64 runtime;
 823
 824                        raw_spin_lock(&rt_rq->rt_runtime_lock);
 825                        if (rt_rq->rt_throttled)
 826                                balance_runtime(rt_rq);
 827                        runtime = rt_rq->rt_runtime;
 828                        rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime);
 829                        if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
 830                                rt_rq->rt_throttled = 0;
 831                                enqueue = 1;
 832
 833                                /*
 834                                 * When we're idle and a woken (rt) task is
 835                                 * throttled check_preempt_curr() will set
 836                                 * skip_update and the time between the wakeup
 837                                 * and this unthrottle will get accounted as
 838                                 * 'runtime'.
 839                                 */
 840                                if (rt_rq->rt_nr_running && rq->curr == rq->idle)
 841                                        rq_clock_skip_update(rq, false);
 842                        }
 843                        if (rt_rq->rt_time || rt_rq->rt_nr_running)
 844                                idle = 0;
 845                        raw_spin_unlock(&rt_rq->rt_runtime_lock);
 846                } else if (rt_rq->rt_nr_running) {
 847                        idle = 0;
 848                        if (!rt_rq_throttled(rt_rq))
 849                                enqueue = 1;
 850                }
 851                if (rt_rq->rt_throttled)
 852                        throttled = 1;
 853
 854                if (enqueue)
 855                        sched_rt_rq_enqueue(rt_rq);
 856                raw_spin_unlock(&rq->lock);
 857        }
 858
 859        if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF))
 860                return 1;
 861
 862        return idle;
 863}
 864
 865static inline int rt_se_prio(struct sched_rt_entity *rt_se)
 866{
 867#ifdef CONFIG_RT_GROUP_SCHED
 868        struct rt_rq *rt_rq = group_rt_rq(rt_se);
 869
 870        if (rt_rq)
 871                return rt_rq->highest_prio.curr;
 872#endif
 873
 874        return rt_task_of(rt_se)->prio;
 875}
 876
 877static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
 878{
 879        u64 runtime = sched_rt_runtime(rt_rq);
 880
 881        if (rt_rq->rt_throttled)
 882                return rt_rq_throttled(rt_rq);
 883
 884        if (runtime >= sched_rt_period(rt_rq))
 885                return 0;
 886
 887        balance_runtime(rt_rq);
 888        runtime = sched_rt_runtime(rt_rq);
 889        if (runtime == RUNTIME_INF)
 890                return 0;
 891
 892        if (rt_rq->rt_time > runtime) {
 893                struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
 894
 895                /*
 896                 * Don't actually throttle groups that have no runtime assigned
 897                 * but accrue some time due to boosting.
 898                 */
 899                if (likely(rt_b->rt_runtime)) {
 900                        rt_rq->rt_throttled = 1;
 901                        printk_deferred_once("sched: RT throttling activated\n");
 902                } else {
 903                        /*
 904                         * In case we did anyway, make it go away,
 905                         * replenishment is a joke, since it will replenish us
 906                         * with exactly 0 ns.
 907                         */
 908                        rt_rq->rt_time = 0;
 909                }
 910
 911                if (rt_rq_throttled(rt_rq)) {
 912                        sched_rt_rq_dequeue(rt_rq);
 913                        return 1;
 914                }
 915        }
 916
 917        return 0;
 918}
 919
 920/*
 921 * Update the current task's runtime statistics. Skip current tasks that
 922 * are not in our scheduling class.
 923 */
 924static void update_curr_rt(struct rq *rq)
 925{
 926        struct task_struct *curr = rq->curr;
 927        struct sched_rt_entity *rt_se = &curr->rt;
 928        u64 delta_exec;
 929
 930        if (curr->sched_class != &rt_sched_class)
 931                return;
 932
 933        delta_exec = rq_clock_task(rq) - curr->se.exec_start;
 934        if (unlikely((s64)delta_exec <= 0))
 935                return;
 936
 937        schedstat_set(curr->se.statistics.exec_max,
 938                      max(curr->se.statistics.exec_max, delta_exec));
 939
 940        curr->se.sum_exec_runtime += delta_exec;
 941        account_group_exec_runtime(curr, delta_exec);
 942
 943        curr->se.exec_start = rq_clock_task(rq);
 944        cpuacct_charge(curr, delta_exec);
 945
 946        sched_rt_avg_update(rq, delta_exec);
 947
 948        if (!rt_bandwidth_enabled())
 949                return;
 950
 951        for_each_sched_rt_entity(rt_se) {
 952                struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
 953
 954                if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
 955                        raw_spin_lock(&rt_rq->rt_runtime_lock);
 956                        rt_rq->rt_time += delta_exec;
 957                        if (sched_rt_runtime_exceeded(rt_rq))
 958                                resched_curr(rq);
 959                        raw_spin_unlock(&rt_rq->rt_runtime_lock);
 960                }
 961        }
 962}
 963
 964static void
 965dequeue_top_rt_rq(struct rt_rq *rt_rq)
 966{
 967        struct rq *rq = rq_of_rt_rq(rt_rq);
 968
 969        BUG_ON(&rq->rt != rt_rq);
 970
 971        if (!rt_rq->rt_queued)
 972                return;
 973
 974        BUG_ON(!rq->nr_running);
 975
 976        sub_nr_running(rq, rt_rq->rt_nr_running);
 977        rt_rq->rt_queued = 0;
 978}
 979
 980static void
 981enqueue_top_rt_rq(struct rt_rq *rt_rq)
 982{
 983        struct rq *rq = rq_of_rt_rq(rt_rq);
 984
 985        BUG_ON(&rq->rt != rt_rq);
 986
 987        if (rt_rq->rt_queued)
 988                return;
 989        if (rt_rq_throttled(rt_rq) || !rt_rq->rt_nr_running)
 990                return;
 991
 992        add_nr_running(rq, rt_rq->rt_nr_running);
 993        rt_rq->rt_queued = 1;
 994}
 995
 996#if defined CONFIG_SMP
 997
 998static void
 999inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
1000{
1001        struct rq *rq = rq_of_rt_rq(rt_rq);
1002
1003#ifdef CONFIG_RT_GROUP_SCHED
1004        /*
1005         * Change rq's cpupri only if rt_rq is the top queue.
1006         */
1007        if (&rq->rt != rt_rq)
1008                return;
1009#endif
1010        if (rq->online && prio < prev_prio)
1011                cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
1012}
1013
1014static void
1015dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
1016{
1017        struct rq *rq = rq_of_rt_rq(rt_rq);
1018
1019#ifdef CONFIG_RT_GROUP_SCHED
1020        /*
1021         * Change rq's cpupri only if rt_rq is the top queue.
1022         */
1023        if (&rq->rt != rt_rq)
1024                return;
1025#endif
1026        if (rq->online && rt_rq->highest_prio.curr != prev_prio)
1027                cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
1028}
1029
1030#else /* CONFIG_SMP */
1031
1032static inline
1033void inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
1034static inline
1035void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
1036
1037#endif /* CONFIG_SMP */
1038
1039#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
1040static void
1041inc_rt_prio(struct rt_rq *rt_rq, int prio)
1042{
1043        int prev_prio = rt_rq->highest_prio.curr;
1044
1045        if (prio < prev_prio)
1046                rt_rq->highest_prio.curr = prio;
1047
1048        inc_rt_prio_smp(rt_rq, prio, prev_prio);
1049}
1050
1051static void
1052dec_rt_prio(struct rt_rq *rt_rq, int prio)
1053{
1054        int prev_prio = rt_rq->highest_prio.curr;
1055
1056        if (rt_rq->rt_nr_running) {
1057
1058                WARN_ON(prio < prev_prio);
1059
1060                /*
1061                 * This may have been our highest task, and therefore
1062                 * we may have some recomputation to do
1063                 */
1064                if (prio == prev_prio) {
1065                        struct rt_prio_array *array = &rt_rq->active;
1066
1067                        rt_rq->highest_prio.curr =
1068                                sched_find_first_bit(array->bitmap);
1069                }
1070
1071        } else
1072                rt_rq->highest_prio.curr = MAX_RT_PRIO;
1073
1074        dec_rt_prio_smp(rt_rq, prio, prev_prio);
1075}
1076
1077#else
1078
1079static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {}
1080static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {}
1081
1082#endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */
1083
1084#ifdef CONFIG_RT_GROUP_SCHED
1085
1086static void
1087inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1088{
1089        if (rt_se_boosted(rt_se))
1090                rt_rq->rt_nr_boosted++;
1091
1092        if (rt_rq->tg)
1093                start_rt_bandwidth(&rt_rq->tg->rt_bandwidth);
1094}
1095
1096static void
1097dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1098{
1099        if (rt_se_boosted(rt_se))
1100                rt_rq->rt_nr_boosted--;
1101
1102        WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted);
1103}
1104
1105#else /* CONFIG_RT_GROUP_SCHED */
1106
1107static void
1108inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1109{
1110        start_rt_bandwidth(&def_rt_bandwidth);
1111}
1112
1113static inline
1114void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {}
1115
1116#endif /* CONFIG_RT_GROUP_SCHED */
1117
1118static inline
1119unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se)
1120{
1121        struct rt_rq *group_rq = group_rt_rq(rt_se);
1122
1123        if (group_rq)
1124                return group_rq->rt_nr_running;
1125        else
1126                return 1;
1127}
1128
1129static inline
1130void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1131{
1132        int prio = rt_se_prio(rt_se);
1133
1134        WARN_ON(!rt_prio(prio));
1135        rt_rq->rt_nr_running += rt_se_nr_running(rt_se);
1136
1137        inc_rt_prio(rt_rq, prio);
1138        inc_rt_migration(rt_se, rt_rq);
1139        inc_rt_group(rt_se, rt_rq);
1140}
1141
1142static inline
1143void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1144{
1145        WARN_ON(!rt_prio(rt_se_prio(rt_se)));
1146        WARN_ON(!rt_rq->rt_nr_running);
1147        rt_rq->rt_nr_running -= rt_se_nr_running(rt_se);
1148
1149        dec_rt_prio(rt_rq, rt_se_prio(rt_se));
1150        dec_rt_migration(rt_se, rt_rq);
1151        dec_rt_group(rt_se, rt_rq);
1152}
1153
1154static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
1155{
1156        struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
1157        struct rt_prio_array *array = &rt_rq->active;
1158        struct rt_rq *group_rq = group_rt_rq(rt_se);
1159        struct list_head *queue = array->queue + rt_se_prio(rt_se);
1160
1161        /*
1162         * Don't enqueue the group if its throttled, or when empty.
1163         * The latter is a consequence of the former when a child group
1164         * get throttled and the current group doesn't have any other
1165         * active members.
1166         */
1167        if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
1168                return;
1169
1170        if (head)
1171                list_add(&rt_se->run_list, queue);
1172        else
1173                list_add_tail(&rt_se->run_list, queue);
1174        __set_bit(rt_se_prio(rt_se), array->bitmap);
1175
1176        inc_rt_tasks(rt_se, rt_rq);
1177}
1178
1179static void __dequeue_rt_entity(struct sched_rt_entity *rt_se)
1180{
1181        struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
1182        struct rt_prio_array *array = &rt_rq->active;
1183
1184        list_del_init(&rt_se->run_list);
1185        if (list_empty(array->queue + rt_se_prio(rt_se)))
1186                __clear_bit(rt_se_prio(rt_se), array->bitmap);
1187
1188        dec_rt_tasks(rt_se, rt_rq);
1189}
1190
1191/*
1192 * Because the prio of an upper entry depends on the lower
1193 * entries, we must remove entries top - down.
1194 */
1195static void dequeue_rt_stack(struct sched_rt_entity *rt_se)
1196{
1197        struct sched_rt_entity *back = NULL;
1198
1199        for_each_sched_rt_entity(rt_se) {
1200                rt_se->back = back;
1201                back = rt_se;
1202        }
1203
1204        dequeue_top_rt_rq(rt_rq_of_se(back));
1205
1206        for (rt_se = back; rt_se; rt_se = rt_se->back) {
1207                if (on_rt_rq(rt_se))
1208                        __dequeue_rt_entity(rt_se);
1209        }
1210}
1211
1212static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
1213{
1214        struct rq *rq = rq_of_rt_se(rt_se);
1215
1216        dequeue_rt_stack(rt_se);
1217        for_each_sched_rt_entity(rt_se)
1218                __enqueue_rt_entity(rt_se, head);
1219        enqueue_top_rt_rq(&rq->rt);
1220}
1221
1222static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
1223{
1224        struct rq *rq = rq_of_rt_se(rt_se);
1225
1226        dequeue_rt_stack(rt_se);
1227
1228        for_each_sched_rt_entity(rt_se) {
1229                struct rt_rq *rt_rq = group_rt_rq(rt_se);
1230
1231                if (rt_rq && rt_rq->rt_nr_running)
1232                        __enqueue_rt_entity(rt_se, false);
1233        }
1234        enqueue_top_rt_rq(&rq->rt);
1235}
1236
1237/*
1238 * Adding/removing a task to/from a priority array:
1239 */
1240static void
1241enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
1242{
1243        struct sched_rt_entity *rt_se = &p->rt;
1244
1245        if (flags & ENQUEUE_WAKEUP)
1246                rt_se->timeout = 0;
1247
1248        enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD);
1249
1250        if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
1251                enqueue_pushable_task(rq, p);
1252}
1253
1254static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
1255{
1256        struct sched_rt_entity *rt_se = &p->rt;
1257
1258        update_curr_rt(rq);
1259        dequeue_rt_entity(rt_se);
1260
1261        dequeue_pushable_task(rq, p);
1262}
1263
1264/*
1265 * Put task to the head or the end of the run list without the overhead of
1266 * dequeue followed by enqueue.
1267 */
1268static void
1269requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head)
1270{
1271        if (on_rt_rq(rt_se)) {
1272                struct rt_prio_array *array = &rt_rq->active;
1273                struct list_head *queue = array->queue + rt_se_prio(rt_se);
1274
1275                if (head)
1276                        list_move(&rt_se->run_list, queue);
1277                else
1278                        list_move_tail(&rt_se->run_list, queue);
1279        }
1280}
1281
1282static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head)
1283{
1284        struct sched_rt_entity *rt_se = &p->rt;
1285        struct rt_rq *rt_rq;
1286
1287        for_each_sched_rt_entity(rt_se) {
1288                rt_rq = rt_rq_of_se(rt_se);
1289                requeue_rt_entity(rt_rq, rt_se, head);
1290        }
1291}
1292
1293static void yield_task_rt(struct rq *rq)
1294{
1295        requeue_task_rt(rq, rq->curr, 0);
1296}
1297
1298#ifdef CONFIG_SMP
1299static int find_lowest_rq(struct task_struct *task);
1300
1301static int
1302select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
1303{
1304        struct task_struct *curr;
1305        struct rq *rq;
1306
1307        /* For anything but wake ups, just return the task_cpu */
1308        if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
1309                goto out;
1310
1311        rq = cpu_rq(cpu);
1312
1313        rcu_read_lock();
1314        curr = ACCESS_ONCE(rq->curr); /* unlocked access */
1315
1316        /*
1317         * If the current task on @p's runqueue is an RT task, then
1318         * try to see if we can wake this RT task up on another
1319         * runqueue. Otherwise simply start this RT task
1320         * on its current runqueue.
1321         *
1322         * We want to avoid overloading runqueues. If the woken
1323         * task is a higher priority, then it will stay on this CPU
1324         * and the lower prio task should be moved to another CPU.
1325         * Even though this will probably make the lower prio task
1326         * lose its cache, we do not want to bounce a higher task
1327         * around just because it gave up its CPU, perhaps for a
1328         * lock?
1329         *
1330         * For equal prio tasks, we just let the scheduler sort it out.
1331         *
1332         * Otherwise, just let it ride on the affined RQ and the
1333         * post-schedule router will push the preempted task away
1334         *
1335         * This test is optimistic, if we get it wrong the load-balancer
1336         * will have to sort it out.
1337         */
1338        if (curr && unlikely(rt_task(curr)) &&
1339            (curr->nr_cpus_allowed < 2 ||
1340             curr->prio <= p->prio)) {
1341                int target = find_lowest_rq(p);
1342
1343                /*
1344                 * Don't bother moving it if the destination CPU is
1345                 * not running a lower priority task.
1346                 */
1347                if (target != -1 &&
1348                    p->prio < cpu_rq(target)->rt.highest_prio.curr)
1349                        cpu = target;
1350        }
1351        rcu_read_unlock();
1352
1353out:
1354        return cpu;
1355}
1356
1357static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
1358{
1359        /*
1360         * Current can't be migrated, useless to reschedule,
1361         * let's hope p can move out.
1362         */
1363        if (rq->curr->nr_cpus_allowed == 1 ||
1364            !cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
1365                return;
1366
1367        /*
1368         * p is migratable, so let's not schedule it and
1369         * see if it is pushed or pulled somewhere else.
1370         */
1371        if (p->nr_cpus_allowed != 1
1372            && cpupri_find(&rq->rd->cpupri, p, NULL))
1373                return;
1374
1375        /*
1376         * There appears to be other cpus that can accept
1377         * current and none to run 'p', so lets reschedule
1378         * to try and push current away:
1379         */
1380        requeue_task_rt(rq, p, 1);
1381        resched_curr(rq);
1382}
1383
1384#endif /* CONFIG_SMP */
1385
1386/*
1387 * Preempt the current task with a newly woken task if needed:
1388 */
1389static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags)
1390{
1391        if (p->prio < rq->curr->prio) {
1392                resched_curr(rq);
1393                return;
1394        }
1395
1396#ifdef CONFIG_SMP
1397        /*
1398         * If:
1399         *
1400         * - the newly woken task is of equal priority to the current task
1401         * - the newly woken task is non-migratable while current is migratable
1402         * - current will be preempted on the next reschedule
1403         *
1404         * we should check to see if current can readily move to a different
1405         * cpu.  If so, we will reschedule to allow the push logic to try
1406         * to move current somewhere else, making room for our non-migratable
1407         * task.
1408         */
1409        if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr))
1410                check_preempt_equal_prio(rq, p);
1411#endif
1412}
1413
1414static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
1415                                                   struct rt_rq *rt_rq)
1416{
1417        struct rt_prio_array *array = &rt_rq->active;
1418        struct sched_rt_entity *next = NULL;
1419        struct list_head *queue;
1420        int idx;
1421
1422        idx = sched_find_first_bit(array->bitmap);
1423        BUG_ON(idx >= MAX_RT_PRIO);
1424
1425        queue = array->queue + idx;
1426        next = list_entry(queue->next, struct sched_rt_entity, run_list);
1427
1428        return next;
1429}
1430
1431static struct task_struct *_pick_next_task_rt(struct rq *rq)
1432{
1433        struct sched_rt_entity *rt_se;
1434        struct task_struct *p;
1435        struct rt_rq *rt_rq  = &rq->rt;
1436
1437        do {
1438                rt_se = pick_next_rt_entity(rq, rt_rq);
1439                BUG_ON(!rt_se);
1440                rt_rq = group_rt_rq(rt_se);
1441        } while (rt_rq);
1442
1443        p = rt_task_of(rt_se);
1444        p->se.exec_start = rq_clock_task(rq);
1445
1446        return p;
1447}
1448
1449static struct task_struct *
1450pick_next_task_rt(struct rq *rq, struct task_struct *prev)
1451{
1452        struct task_struct *p;
1453        struct rt_rq *rt_rq = &rq->rt;
1454
1455        if (need_pull_rt_task(rq, prev)) {
1456                pull_rt_task(rq);
1457                /*
1458                 * pull_rt_task() can drop (and re-acquire) rq->lock; this
1459                 * means a dl or stop task can slip in, in which case we need
1460                 * to re-start task selection.
1461                 */
1462                if (unlikely((rq->stop && task_on_rq_queued(rq->stop)) ||
1463                             rq->dl.dl_nr_running))
1464                        return RETRY_TASK;
1465        }
1466
1467        /*
1468         * We may dequeue prev's rt_rq in put_prev_task().
1469         * So, we update time before rt_nr_running check.
1470         */
1471        if (prev->sched_class == &rt_sched_class)
1472                update_curr_rt(rq);
1473
1474        if (!rt_rq->rt_queued)
1475                return NULL;
1476
1477        put_prev_task(rq, prev);
1478
1479        p = _pick_next_task_rt(rq);
1480
1481        /* The running task is never eligible for pushing */
1482        dequeue_pushable_task(rq, p);
1483
1484        set_post_schedule(rq);
1485
1486        return p;
1487}
1488
1489static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
1490{
1491        update_curr_rt(rq);
1492
1493        /*
1494         * The previous task needs to be made eligible for pushing
1495         * if it is still active
1496         */
1497        if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
1498                enqueue_pushable_task(rq, p);
1499}
1500
1501#ifdef CONFIG_SMP
1502
1503/* Only try algorithms three times */
1504#define RT_MAX_TRIES 3
1505
1506static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
1507{
1508        if (!task_running(rq, p) &&
1509            cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
1510                return 1;
1511        return 0;
1512}
1513
1514/*
1515 * Return the highest pushable rq's task, which is suitable to be executed
1516 * on the cpu, NULL otherwise
1517 */
1518static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu)
1519{
1520        struct plist_head *head = &rq->rt.pushable_tasks;
1521        struct task_struct *p;
1522
1523        if (!has_pushable_tasks(rq))
1524                return NULL;
1525
1526        plist_for_each_entry(p, head, pushable_tasks) {
1527                if (pick_rt_task(rq, p, cpu))
1528                        return p;
1529        }
1530
1531        return NULL;
1532}
1533
1534static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
1535
1536static int find_lowest_rq(struct task_struct *task)
1537{
1538        struct sched_domain *sd;
1539        struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
1540        int this_cpu = smp_processor_id();
1541        int cpu      = task_cpu(task);
1542
1543        /* Make sure the mask is initialized first */
1544        if (unlikely(!lowest_mask))
1545                return -1;
1546
1547        if (task->nr_cpus_allowed == 1)
1548                return -1; /* No other targets possible */
1549
1550        if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
1551                return -1; /* No targets found */
1552
1553        /*
1554         * At this point we have built a mask of cpus representing the
1555         * lowest priority tasks in the system.  Now we want to elect
1556         * the best one based on our affinity and topology.
1557         *
1558         * We prioritize the last cpu that the task executed on since
1559         * it is most likely cache-hot in that location.
1560         */
1561        if (cpumask_test_cpu(cpu, lowest_mask))
1562                return cpu;
1563
1564        /*
1565         * Otherwise, we consult the sched_domains span maps to figure
1566         * out which cpu is logically closest to our hot cache data.
1567         */
1568        if (!cpumask_test_cpu(this_cpu, lowest_mask))
1569                this_cpu = -1; /* Skip this_cpu opt if not among lowest */
1570
1571        rcu_read_lock();
1572        for_each_domain(cpu, sd) {
1573                if (sd->flags & SD_WAKE_AFFINE) {
1574                        int best_cpu;
1575
1576                        /*
1577                         * "this_cpu" is cheaper to preempt than a
1578                         * remote processor.
1579                         */
1580                        if (this_cpu != -1 &&
1581                            cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
1582                                rcu_read_unlock();
1583                                return this_cpu;
1584                        }
1585
1586                        best_cpu = cpumask_first_and(lowest_mask,
1587                                                     sched_domain_span(sd));
1588                        if (best_cpu < nr_cpu_ids) {
1589                                rcu_read_unlock();
1590                                return best_cpu;
1591                        }
1592                }
1593        }
1594        rcu_read_unlock();
1595
1596        /*
1597         * And finally, if there were no matches within the domains
1598         * just give the caller *something* to work with from the compatible
1599         * locations.
1600         */
1601        if (this_cpu != -1)
1602                return this_cpu;
1603
1604        cpu = cpumask_any(lowest_mask);
1605        if (cpu < nr_cpu_ids)
1606                return cpu;
1607        return -1;
1608}
1609
1610/* Will lock the rq it finds */
1611static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
1612{
1613        struct rq *lowest_rq = NULL;
1614        int tries;
1615        int cpu;
1616
1617        for (tries = 0; tries < RT_MAX_TRIES; tries++) {
1618                cpu = find_lowest_rq(task);
1619
1620                if ((cpu == -1) || (cpu == rq->cpu))
1621                        break;
1622
1623                lowest_rq = cpu_rq(cpu);
1624
1625                if (lowest_rq->rt.highest_prio.curr <= task->prio) {
1626                        /*
1627                         * Target rq has tasks of equal or higher priority,
1628                         * retrying does not release any lock and is unlikely
1629                         * to yield a different result.
1630                         */
1631                        lowest_rq = NULL;
1632                        break;
1633                }
1634
1635                /* if the prio of this runqueue changed, try again */
1636                if (double_lock_balance(rq, lowest_rq)) {
1637                        /*
1638                         * We had to unlock the run queue. In
1639                         * the mean time, task could have
1640                         * migrated already or had its affinity changed.
1641                         * Also make sure that it wasn't scheduled on its rq.
1642                         */
1643                        if (unlikely(task_rq(task) != rq ||
1644                                     !cpumask_test_cpu(lowest_rq->cpu,
1645                                                       tsk_cpus_allowed(task)) ||
1646                                     task_running(rq, task) ||
1647                                     !task_on_rq_queued(task))) {
1648
1649                                double_unlock_balance(rq, lowest_rq);
1650                                lowest_rq = NULL;
1651                                break;
1652                        }
1653                }
1654
1655                /* If this rq is still suitable use it. */
1656                if (lowest_rq->rt.highest_prio.curr > task->prio)
1657                        break;
1658
1659                /* try again */
1660                double_unlock_balance(rq, lowest_rq);
1661                lowest_rq = NULL;
1662        }
1663
1664        return lowest_rq;
1665}
1666
1667static struct task_struct *pick_next_pushable_task(struct rq *rq)
1668{
1669        struct task_struct *p;
1670
1671        if (!has_pushable_tasks(rq))
1672                return NULL;
1673
1674        p = plist_first_entry(&rq->rt.pushable_tasks,
1675                              struct task_struct, pushable_tasks);
1676
1677        BUG_ON(rq->cpu != task_cpu(p));
1678        BUG_ON(task_current(rq, p));
1679        BUG_ON(p->nr_cpus_allowed <= 1);
1680
1681        BUG_ON(!task_on_rq_queued(p));
1682        BUG_ON(!rt_task(p));
1683
1684        return p;
1685}
1686
1687/*
1688 * If the current CPU has more than one RT task, see if the non
1689 * running task can migrate over to a CPU that is running a task
1690 * of lesser priority.
1691 */
1692static int push_rt_task(struct rq *rq)
1693{
1694        struct task_struct *next_task;
1695        struct rq *lowest_rq;
1696        int ret = 0;
1697
1698        if (!rq->rt.overloaded)
1699                return 0;
1700
1701        next_task = pick_next_pushable_task(rq);
1702        if (!next_task)
1703                return 0;
1704
1705retry:
1706        if (unlikely(next_task == rq->curr)) {
1707                WARN_ON(1);
1708                return 0;
1709        }
1710
1711        /*
1712         * It's possible that the next_task slipped in of
1713         * higher priority than current. If that's the case
1714         * just reschedule current.
1715         */
1716        if (unlikely(next_task->prio < rq->curr->prio)) {
1717                resched_curr(rq);
1718                return 0;
1719        }
1720
1721        /* We might release rq lock */
1722        get_task_struct(next_task);
1723
1724        /* find_lock_lowest_rq locks the rq if found */
1725        lowest_rq = find_lock_lowest_rq(next_task, rq);
1726        if (!lowest_rq) {
1727                struct task_struct *task;
1728                /*
1729                 * find_lock_lowest_rq releases rq->lock
1730                 * so it is possible that next_task has migrated.
1731                 *
1732                 * We need to make sure that the task is still on the same
1733                 * run-queue and is also still the next task eligible for
1734                 * pushing.
1735                 */
1736                task = pick_next_pushable_task(rq);
1737                if (task_cpu(next_task) == rq->cpu && task == next_task) {
1738                        /*
1739                         * The task hasn't migrated, and is still the next
1740                         * eligible task, but we failed to find a run-queue
1741                         * to push it to.  Do not retry in this case, since
1742                         * other cpus will pull from us when ready.
1743                         */
1744                        goto out;
1745                }
1746
1747                if (!task)
1748                        /* No more tasks, just exit */
1749                        goto out;
1750
1751                /*
1752                 * Something has shifted, try again.
1753                 */
1754                put_task_struct(next_task);
1755                next_task = task;
1756                goto retry;
1757        }
1758
1759        deactivate_task(rq, next_task, 0);
1760        set_task_cpu(next_task, lowest_rq->cpu);
1761        activate_task(lowest_rq, next_task, 0);
1762        ret = 1;
1763
1764        resched_curr(lowest_rq);
1765
1766        double_unlock_balance(rq, lowest_rq);
1767
1768out:
1769        put_task_struct(next_task);
1770
1771        return ret;
1772}
1773
1774static void push_rt_tasks(struct rq *rq)
1775{
1776        /* push_rt_task will return true if it moved an RT */
1777        while (push_rt_task(rq))
1778                ;
1779}
1780
1781static int pull_rt_task(struct rq *this_rq)
1782{
1783        int this_cpu = this_rq->cpu, ret = 0, cpu;
1784        struct task_struct *p;
1785        struct rq *src_rq;
1786
1787        if (likely(!rt_overloaded(this_rq)))
1788                return 0;
1789
1790        /*
1791         * Match the barrier from rt_set_overloaded; this guarantees that if we
1792         * see overloaded we must also see the rto_mask bit.
1793         */
1794        smp_rmb();
1795
1796        for_each_cpu(cpu, this_rq->rd->rto_mask) {
1797                if (this_cpu == cpu)
1798                        continue;
1799
1800                src_rq = cpu_rq(cpu);
1801
1802                /*
1803                 * Don't bother taking the src_rq->lock if the next highest
1804                 * task is known to be lower-priority than our current task.
1805                 * This may look racy, but if this value is about to go
1806                 * logically higher, the src_rq will push this task away.
1807                 * And if its going logically lower, we do not care
1808                 */
1809                if (src_rq->rt.highest_prio.next >=
1810                    this_rq->rt.highest_prio.curr)
1811                        continue;
1812
1813                /*
1814                 * We can potentially drop this_rq's lock in
1815                 * double_lock_balance, and another CPU could
1816                 * alter this_rq
1817                 */
1818                double_lock_balance(this_rq, src_rq);
1819
1820                /*
1821                 * We can pull only a task, which is pushable
1822                 * on its rq, and no others.
1823                 */
1824                p = pick_highest_pushable_task(src_rq, this_cpu);
1825
1826                /*
1827                 * Do we have an RT task that preempts
1828                 * the to-be-scheduled task?
1829                 */
1830                if (p && (p->prio < this_rq->rt.highest_prio.curr)) {
1831                        WARN_ON(p == src_rq->curr);
1832                        WARN_ON(!task_on_rq_queued(p));
1833
1834                        /*
1835                         * There's a chance that p is higher in priority
1836                         * than what's currently running on its cpu.
1837                         * This is just that p is wakeing up and hasn't
1838                         * had a chance to schedule. We only pull
1839                         * p if it is lower in priority than the
1840                         * current task on the run queue
1841                         */
1842                        if (p->prio < src_rq->curr->prio)
1843                                goto skip;
1844
1845                        ret = 1;
1846
1847                        deactivate_task(src_rq, p, 0);
1848                        set_task_cpu(p, this_cpu);
1849                        activate_task(this_rq, p, 0);
1850                        /*
1851                         * We continue with the search, just in
1852                         * case there's an even higher prio task
1853                         * in another runqueue. (low likelihood
1854                         * but possible)
1855                         */
1856                }
1857skip:
1858                double_unlock_balance(this_rq, src_rq);
1859        }
1860
1861        return ret;
1862}
1863
1864static void post_schedule_rt(struct rq *rq)
1865{
1866        push_rt_tasks(rq);
1867}
1868
1869/*
1870 * If we are not running and we are not going to reschedule soon, we should
1871 * try to push tasks away now
1872 */
1873static void task_woken_rt(struct rq *rq, struct task_struct *p)
1874{
1875        if (!task_running(rq, p) &&
1876            !test_tsk_need_resched(rq->curr) &&
1877            has_pushable_tasks(rq) &&
1878            p->nr_cpus_allowed > 1 &&
1879            (dl_task(rq->curr) || rt_task(rq->curr)) &&
1880            (rq->curr->nr_cpus_allowed < 2 ||
1881             rq->curr->prio <= p->prio))
1882                push_rt_tasks(rq);
1883}
1884
1885static void set_cpus_allowed_rt(struct task_struct *p,
1886                                const struct cpumask *new_mask)
1887{
1888        struct rq *rq;
1889        int weight;
1890
1891        BUG_ON(!rt_task(p));
1892
1893        if (!task_on_rq_queued(p))
1894                return;
1895
1896        weight = cpumask_weight(new_mask);
1897
1898        /*
1899         * Only update if the process changes its state from whether it
1900         * can migrate or not.
1901         */
1902        if ((p->nr_cpus_allowed > 1) == (weight > 1))
1903                return;
1904
1905        rq = task_rq(p);
1906
1907        /*
1908         * The process used to be able to migrate OR it can now migrate
1909         */
1910        if (weight <= 1) {
1911                if (!task_current(rq, p))
1912                        dequeue_pushable_task(rq, p);
1913                BUG_ON(!rq->rt.rt_nr_migratory);
1914                rq->rt.rt_nr_migratory--;
1915        } else {
1916                if (!task_current(rq, p))
1917                        enqueue_pushable_task(rq, p);
1918                rq->rt.rt_nr_migratory++;
1919        }
1920
1921        update_rt_migration(&rq->rt);
1922}
1923
1924/* Assumes rq->lock is held */
1925static void rq_online_rt(struct rq *rq)
1926{
1927        if (rq->rt.overloaded)
1928                rt_set_overload(rq);
1929
1930        __enable_runtime(rq);
1931
1932        cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr);
1933}
1934
1935/* Assumes rq->lock is held */
1936static void rq_offline_rt(struct rq *rq)
1937{
1938        if (rq->rt.overloaded)
1939                rt_clear_overload(rq);
1940
1941        __disable_runtime(rq);
1942
1943        cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID);
1944}
1945
1946/*
1947 * When switch from the rt queue, we bring ourselves to a position
1948 * that we might want to pull RT tasks from other runqueues.
1949 */
1950static void switched_from_rt(struct rq *rq, struct task_struct *p)
1951{
1952        /*
1953         * If there are other RT tasks then we will reschedule
1954         * and the scheduling of the other RT tasks will handle
1955         * the balancing. But if we are the last RT task
1956         * we may need to handle the pulling of RT tasks
1957         * now.
1958         */
1959        if (!task_on_rq_queued(p) || rq->rt.rt_nr_running)
1960                return;
1961
1962        if (pull_rt_task(rq))
1963                resched_curr(rq);
1964}
1965
1966void __init init_sched_rt_class(void)
1967{
1968        unsigned int i;
1969
1970        for_each_possible_cpu(i) {
1971                zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
1972                                        GFP_KERNEL, cpu_to_node(i));
1973        }
1974}
1975#endif /* CONFIG_SMP */
1976
1977/*
1978 * When switching a task to RT, we may overload the runqueue
1979 * with RT tasks. In this case we try to push them off to
1980 * other runqueues.
1981 */
1982static void switched_to_rt(struct rq *rq, struct task_struct *p)
1983{
1984        int check_resched = 1;
1985
1986        /*
1987         * If we are already running, then there's nothing
1988         * that needs to be done. But if we are not running
1989         * we may need to preempt the current running task.
1990         * If that current running task is also an RT task
1991         * then see if we can move to another run queue.
1992         */
1993        if (task_on_rq_queued(p) && rq->curr != p) {
1994#ifdef CONFIG_SMP
1995                if (p->nr_cpus_allowed > 1 && rq->rt.overloaded &&
1996                    /* Don't resched if we changed runqueues */
1997                    push_rt_task(rq) && rq != task_rq(p))
1998                        check_resched = 0;
1999#endif /* CONFIG_SMP */
2000                if (check_resched && p->prio < rq->curr->prio)
2001                        resched_curr(rq);
2002        }
2003}
2004
2005/*
2006 * Priority of the task has changed. This may cause
2007 * us to initiate a push or pull.
2008 */
2009static void
2010prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
2011{
2012        if (!task_on_rq_queued(p))
2013                return;
2014
2015        if (rq->curr == p) {
2016#ifdef CONFIG_SMP
2017                /*
2018                 * If our priority decreases while running, we
2019                 * may need to pull tasks to this runqueue.
2020                 */
2021                if (oldprio < p->prio)
2022                        pull_rt_task(rq);
2023                /*
2024                 * If there's a higher priority task waiting to run
2025                 * then reschedule. Note, the above pull_rt_task
2026                 * can release the rq lock and p could migrate.
2027                 * Only reschedule if p is still on the same runqueue.
2028                 */
2029                if (p->prio > rq->rt.highest_prio.curr && rq->curr == p)
2030                        resched_curr(rq);
2031#else
2032                /* For UP simply resched on drop of prio */
2033                if (oldprio < p->prio)
2034                        resched_curr(rq);
2035#endif /* CONFIG_SMP */
2036        } else {
2037                /*
2038                 * This task is not running, but if it is
2039                 * greater than the current running task
2040                 * then reschedule.
2041                 */
2042                if (p->prio < rq->curr->prio)
2043                        resched_curr(rq);
2044        }
2045}
2046
2047static void watchdog(struct rq *rq, struct task_struct *p)
2048{
2049        unsigned long soft, hard;
2050
2051        /* max may change after cur was read, this will be fixed next tick */
2052        soft = task_rlimit(p, RLIMIT_RTTIME);
2053        hard = task_rlimit_max(p, RLIMIT_RTTIME);
2054
2055        if (soft != RLIM_INFINITY) {
2056                unsigned long next;
2057
2058                if (p->rt.watchdog_stamp != jiffies) {
2059                        p->rt.timeout++;
2060                        p->rt.watchdog_stamp = jiffies;
2061                }
2062
2063                next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
2064                if (p->rt.timeout > next)
2065                        p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
2066        }
2067}
2068
2069static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
2070{
2071        struct sched_rt_entity *rt_se = &p->rt;
2072
2073        update_curr_rt(rq);
2074
2075        watchdog(rq, p);
2076
2077        /*
2078         * RR tasks need a special form of timeslice management.
2079         * FIFO tasks have no timeslices.
2080         */
2081        if (p->policy != SCHED_RR)
2082                return;
2083
2084        if (--p->rt.time_slice)
2085                return;
2086
2087        p->rt.time_slice = sched_rr_timeslice;
2088
2089        /*
2090         * Requeue to the end of queue if we (and all of our ancestors) are not
2091         * the only element on the queue
2092         */
2093        for_each_sched_rt_entity(rt_se) {
2094                if (rt_se->run_list.prev != rt_se->run_list.next) {
2095                        requeue_task_rt(rq, p, 0);
2096                        resched_curr(rq);
2097                        return;
2098                }
2099        }
2100}
2101
2102static void set_curr_task_rt(struct rq *rq)
2103{
2104        struct task_struct *p = rq->curr;
2105
2106        p->se.exec_start = rq_clock_task(rq);
2107
2108        /* The running task is never eligible for pushing */
2109        dequeue_pushable_task(rq, p);
2110}
2111
2112static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
2113{
2114        /*
2115         * Time slice is 0 for SCHED_FIFO tasks
2116         */
2117        if (task->policy == SCHED_RR)
2118                return sched_rr_timeslice;
2119        else
2120                return 0;
2121}
2122
2123const struct sched_class rt_sched_class = {
2124        .next                   = &fair_sched_class,
2125        .enqueue_task           = enqueue_task_rt,
2126        .dequeue_task           = dequeue_task_rt,
2127        .yield_task             = yield_task_rt,
2128
2129        .check_preempt_curr     = check_preempt_curr_rt,
2130
2131        .pick_next_task         = pick_next_task_rt,
2132        .put_prev_task          = put_prev_task_rt,
2133
2134#ifdef CONFIG_SMP
2135        .select_task_rq         = select_task_rq_rt,
2136
2137        .set_cpus_allowed       = set_cpus_allowed_rt,
2138        .rq_online              = rq_online_rt,
2139        .rq_offline             = rq_offline_rt,
2140        .post_schedule          = post_schedule_rt,
2141        .task_woken             = task_woken_rt,
2142        .switched_from          = switched_from_rt,
2143#endif
2144
2145        .set_curr_task          = set_curr_task_rt,
2146        .task_tick              = task_tick_rt,
2147
2148        .get_rr_interval        = get_rr_interval_rt,
2149
2150        .prio_changed           = prio_changed_rt,
2151        .switched_to            = switched_to_rt,
2152
2153        .update_curr            = update_curr_rt,
2154};
2155
2156#ifdef CONFIG_SCHED_DEBUG
2157extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
2158
2159void print_rt_stats(struct seq_file *m, int cpu)
2160{
2161        rt_rq_iter_t iter;
2162        struct rt_rq *rt_rq;
2163
2164        rcu_read_lock();
2165        for_each_rt_rq(rt_rq, iter, cpu_rq(cpu))
2166                print_rt_rq(m, cpu, rt_rq);
2167        rcu_read_unlock();
2168}
2169#endif /* CONFIG_SCHED_DEBUG */
2170