1
2#include <linux/sched.h>
3#include <linux/sched/sysctl.h>
4#include <linux/sched/rt.h>
5#include <linux/sched/deadline.h>
6#include <linux/mutex.h>
7#include <linux/spinlock.h>
8#include <linux/stop_machine.h>
9#include <linux/irq_work.h>
10#include <linux/tick.h>
11#include <linux/slab.h>
12
13#include "cpupri.h"
14#include "cpudeadline.h"
15#include "cpuacct.h"
16
17struct rq;
18struct cpuidle_state;
19
20
21#define TASK_ON_RQ_QUEUED 1
22#define TASK_ON_RQ_MIGRATING 2
23
24extern __read_mostly int scheduler_running;
25
26extern unsigned long calc_load_update;
27extern atomic_long_t calc_load_tasks;
28
29extern void calc_global_load_tick(struct rq *this_rq);
30extern long calc_load_fold_active(struct rq *this_rq);
31
32#ifdef CONFIG_SMP
33extern void update_cpu_load_active(struct rq *this_rq);
34#else
35static inline void update_cpu_load_active(struct rq *this_rq) { }
36#endif
37
38
39
40
41#define NS_TO_JIFFIES(TIME) ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ))
42
43
44
45
46
47
48
49
50
51
52
53
54
55#if 0
56# define SCHED_LOAD_RESOLUTION 10
57# define scale_load(w) ((w) << SCHED_LOAD_RESOLUTION)
58# define scale_load_down(w) ((w) >> SCHED_LOAD_RESOLUTION)
59#else
60# define SCHED_LOAD_RESOLUTION 0
61# define scale_load(w) (w)
62# define scale_load_down(w) (w)
63#endif
64
65#define SCHED_LOAD_SHIFT (10 + SCHED_LOAD_RESOLUTION)
66#define SCHED_LOAD_SCALE (1L << SCHED_LOAD_SHIFT)
67
68#define NICE_0_LOAD SCHED_LOAD_SCALE
69#define NICE_0_SHIFT SCHED_LOAD_SHIFT
70
71
72
73
74
75
76#define DL_SCALE (10)
77
78
79
80
81
82
83
84
85#define RUNTIME_INF ((u64)~0ULL)
86
87static inline int idle_policy(int policy)
88{
89 return policy == SCHED_IDLE;
90}
91static inline int fair_policy(int policy)
92{
93 return policy == SCHED_NORMAL || policy == SCHED_BATCH;
94}
95
96static inline int rt_policy(int policy)
97{
98 return policy == SCHED_FIFO || policy == SCHED_RR;
99}
100
101static inline int dl_policy(int policy)
102{
103 return policy == SCHED_DEADLINE;
104}
105static inline bool valid_policy(int policy)
106{
107 return idle_policy(policy) || fair_policy(policy) ||
108 rt_policy(policy) || dl_policy(policy);
109}
110
111static inline int task_has_rt_policy(struct task_struct *p)
112{
113 return rt_policy(p->policy);
114}
115
116static inline int task_has_dl_policy(struct task_struct *p)
117{
118 return dl_policy(p->policy);
119}
120
121
122
123
124static inline bool
125dl_entity_preempt(struct sched_dl_entity *a, struct sched_dl_entity *b)
126{
127 return dl_time_before(a->deadline, b->deadline);
128}
129
130
131
132
133struct rt_prio_array {
134 DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1);
135 struct list_head queue[MAX_RT_PRIO];
136};
137
138struct rt_bandwidth {
139
140 raw_spinlock_t rt_runtime_lock;
141 ktime_t rt_period;
142 u64 rt_runtime;
143 struct hrtimer rt_period_timer;
144 unsigned int rt_period_active;
145};
146
147void __dl_clear_params(struct task_struct *p);
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173struct dl_bandwidth {
174 raw_spinlock_t dl_runtime_lock;
175 u64 dl_runtime;
176 u64 dl_period;
177};
178
179static inline int dl_bandwidth_enabled(void)
180{
181 return sysctl_sched_rt_runtime >= 0;
182}
183
184extern struct dl_bw *dl_bw_of(int i);
185
186struct dl_bw {
187 raw_spinlock_t lock;
188 u64 bw, total_bw;
189};
190
191static inline
192void __dl_clear(struct dl_bw *dl_b, u64 tsk_bw)
193{
194 dl_b->total_bw -= tsk_bw;
195}
196
197static inline
198void __dl_add(struct dl_bw *dl_b, u64 tsk_bw)
199{
200 dl_b->total_bw += tsk_bw;
201}
202
203static inline
204bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw)
205{
206 return dl_b->bw != -1 &&
207 dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw;
208}
209
210extern struct mutex sched_domains_mutex;
211
212#ifdef CONFIG_CGROUP_SCHED
213
214#include <linux/cgroup.h>
215
216struct cfs_rq;
217struct rt_rq;
218
219extern struct list_head task_groups;
220
221struct cfs_bandwidth {
222#ifdef CONFIG_CFS_BANDWIDTH
223 raw_spinlock_t lock;
224 ktime_t period;
225 u64 quota, runtime;
226 s64 hierarchical_quota;
227 u64 runtime_expires;
228
229 int idle, period_active;
230 struct hrtimer period_timer, slack_timer;
231 struct list_head throttled_cfs_rq;
232
233
234 int nr_periods, nr_throttled;
235 u64 throttled_time;
236#endif
237};
238
239
240struct task_group {
241 struct cgroup_subsys_state css;
242
243#ifdef CONFIG_FAIR_GROUP_SCHED
244
245 struct sched_entity **se;
246
247 struct cfs_rq **cfs_rq;
248 unsigned long shares;
249
250#ifdef CONFIG_SMP
251 atomic_long_t load_avg;
252#endif
253#endif
254
255#ifdef CONFIG_RT_GROUP_SCHED
256 struct sched_rt_entity **rt_se;
257 struct rt_rq **rt_rq;
258
259 struct rt_bandwidth rt_bandwidth;
260#endif
261
262 struct rcu_head rcu;
263 struct list_head list;
264
265 struct task_group *parent;
266 struct list_head siblings;
267 struct list_head children;
268
269#ifdef CONFIG_SCHED_AUTOGROUP
270 struct autogroup *autogroup;
271#endif
272
273 struct cfs_bandwidth cfs_bandwidth;
274};
275
276#ifdef CONFIG_FAIR_GROUP_SCHED
277#define ROOT_TASK_GROUP_LOAD NICE_0_LOAD
278
279
280
281
282
283
284
285
286
287#define MIN_SHARES (1UL << 1)
288#define MAX_SHARES (1UL << 18)
289#endif
290
291typedef int (*tg_visitor)(struct task_group *, void *);
292
293extern int walk_tg_tree_from(struct task_group *from,
294 tg_visitor down, tg_visitor up, void *data);
295
296
297
298
299
300
301
302static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
303{
304 return walk_tg_tree_from(&root_task_group, down, up, data);
305}
306
307extern int tg_nop(struct task_group *tg, void *data);
308
309extern void free_fair_sched_group(struct task_group *tg);
310extern int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent);
311extern void unregister_fair_sched_group(struct task_group *tg, int cpu);
312extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
313 struct sched_entity *se, int cpu,
314 struct sched_entity *parent);
315extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
316extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
317
318extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
319extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
320extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq);
321
322extern void free_rt_sched_group(struct task_group *tg);
323extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent);
324extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
325 struct sched_rt_entity *rt_se, int cpu,
326 struct sched_rt_entity *parent);
327
328extern struct task_group *sched_create_group(struct task_group *parent);
329extern void sched_online_group(struct task_group *tg,
330 struct task_group *parent);
331extern void sched_destroy_group(struct task_group *tg);
332extern void sched_offline_group(struct task_group *tg);
333
334extern void sched_move_task(struct task_struct *tsk);
335
336#ifdef CONFIG_FAIR_GROUP_SCHED
337extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
338#endif
339
340#else
341
342struct cfs_bandwidth { };
343
344#endif
345
346
347struct cfs_rq {
348 struct load_weight load;
349 unsigned int nr_running, h_nr_running;
350
351 u64 exec_clock;
352 u64 min_vruntime;
353#ifndef CONFIG_64BIT
354 u64 min_vruntime_copy;
355#endif
356
357 struct rb_root tasks_timeline;
358 struct rb_node *rb_leftmost;
359
360
361
362
363
364 struct sched_entity *curr, *next, *last, *skip;
365
366#ifdef CONFIG_SCHED_DEBUG
367 unsigned int nr_spread_over;
368#endif
369
370#ifdef CONFIG_SMP
371
372
373
374 struct sched_avg avg;
375 u64 runnable_load_sum;
376 unsigned long runnable_load_avg;
377#ifdef CONFIG_FAIR_GROUP_SCHED
378 unsigned long tg_load_avg_contrib;
379#endif
380 atomic_long_t removed_load_avg, removed_util_avg;
381#ifndef CONFIG_64BIT
382 u64 load_last_update_time_copy;
383#endif
384
385#ifdef CONFIG_FAIR_GROUP_SCHED
386
387
388
389
390
391
392 unsigned long h_load;
393 u64 last_h_load_update;
394 struct sched_entity *h_load_next;
395#endif
396#endif
397
398#ifdef CONFIG_FAIR_GROUP_SCHED
399 struct rq *rq;
400
401
402
403
404
405
406
407
408
409 int on_list;
410 struct list_head leaf_cfs_rq_list;
411 struct task_group *tg;
412
413#ifdef CONFIG_CFS_BANDWIDTH
414 int runtime_enabled;
415 u64 runtime_expires;
416 s64 runtime_remaining;
417
418 u64 throttled_clock, throttled_clock_task;
419 u64 throttled_clock_task_time;
420 int throttled, throttle_count;
421 struct list_head throttled_list;
422#endif
423#endif
424};
425
426static inline int rt_bandwidth_enabled(void)
427{
428 return sysctl_sched_rt_runtime >= 0;
429}
430
431
432#ifdef CONFIG_IRQ_WORK
433# define HAVE_RT_PUSH_IPI
434#endif
435
436
437struct rt_rq {
438 struct rt_prio_array active;
439 unsigned int rt_nr_running;
440#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
441 struct {
442 int curr;
443#ifdef CONFIG_SMP
444 int next;
445#endif
446 } highest_prio;
447#endif
448#ifdef CONFIG_SMP
449 unsigned long rt_nr_migratory;
450 unsigned long rt_nr_total;
451 int overloaded;
452 struct plist_head pushable_tasks;
453#ifdef HAVE_RT_PUSH_IPI
454 int push_flags;
455 int push_cpu;
456 struct irq_work push_work;
457 raw_spinlock_t push_lock;
458#endif
459#endif
460 int rt_queued;
461
462 int rt_throttled;
463 u64 rt_time;
464 u64 rt_runtime;
465
466 raw_spinlock_t rt_runtime_lock;
467
468#ifdef CONFIG_RT_GROUP_SCHED
469 unsigned long rt_nr_boosted;
470
471 struct rq *rq;
472 struct task_group *tg;
473#endif
474};
475
476
477struct dl_rq {
478
479 struct rb_root rb_root;
480 struct rb_node *rb_leftmost;
481
482 unsigned long dl_nr_running;
483
484#ifdef CONFIG_SMP
485
486
487
488
489
490
491 struct {
492 u64 curr;
493 u64 next;
494 } earliest_dl;
495
496 unsigned long dl_nr_migratory;
497 int overloaded;
498
499
500
501
502
503
504 struct rb_root pushable_dl_tasks_root;
505 struct rb_node *pushable_dl_tasks_leftmost;
506#else
507 struct dl_bw dl_bw;
508#endif
509};
510
511#ifdef CONFIG_SMP
512
513
514
515
516
517
518
519
520
521struct root_domain {
522 atomic_t refcount;
523 atomic_t rto_count;
524 struct rcu_head rcu;
525 cpumask_var_t span;
526 cpumask_var_t online;
527
528
529 bool overload;
530
531
532
533
534
535 cpumask_var_t dlo_mask;
536 atomic_t dlo_count;
537 struct dl_bw dl_bw;
538 struct cpudl cpudl;
539
540
541
542
543
544 cpumask_var_t rto_mask;
545 struct cpupri cpupri;
546};
547
548extern struct root_domain def_root_domain;
549
550#endif
551
552
553
554
555
556
557
558
559struct rq {
560
561 raw_spinlock_t lock;
562
563
564
565
566
567 unsigned int nr_running;
568#ifdef CONFIG_NUMA_BALANCING
569 unsigned int nr_numa_running;
570 unsigned int nr_preferred_running;
571#endif
572 #define CPU_LOAD_IDX_MAX 5
573 unsigned long cpu_load[CPU_LOAD_IDX_MAX];
574 unsigned long last_load_update_tick;
575#ifdef CONFIG_NO_HZ_COMMON
576 u64 nohz_stamp;
577 unsigned long nohz_flags;
578#endif
579#ifdef CONFIG_NO_HZ_FULL
580 unsigned long last_sched_tick;
581#endif
582
583 struct load_weight load;
584 unsigned long nr_load_updates;
585 u64 nr_switches;
586
587 struct cfs_rq cfs;
588 struct rt_rq rt;
589 struct dl_rq dl;
590
591#ifdef CONFIG_FAIR_GROUP_SCHED
592
593 struct list_head leaf_cfs_rq_list;
594#endif
595
596
597
598
599
600
601
602 unsigned long nr_uninterruptible;
603
604 struct task_struct *curr, *idle, *stop;
605 unsigned long next_balance;
606 struct mm_struct *prev_mm;
607
608 unsigned int clock_skip_update;
609 u64 clock;
610 u64 clock_task;
611
612 atomic_t nr_iowait;
613
614#ifdef CONFIG_SMP
615 struct root_domain *rd;
616 struct sched_domain *sd;
617
618 unsigned long cpu_capacity;
619 unsigned long cpu_capacity_orig;
620
621 struct callback_head *balance_callback;
622
623 unsigned char idle_balance;
624
625 int active_balance;
626 int push_cpu;
627 struct cpu_stop_work active_balance_work;
628
629 int cpu;
630 int online;
631
632 struct list_head cfs_tasks;
633
634 u64 rt_avg;
635 u64 age_stamp;
636 u64 idle_stamp;
637 u64 avg_idle;
638
639
640 u64 max_idle_balance_cost;
641#endif
642
643#ifdef CONFIG_IRQ_TIME_ACCOUNTING
644 u64 prev_irq_time;
645#endif
646#ifdef CONFIG_PARAVIRT
647 u64 prev_steal_time;
648#endif
649#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
650 u64 prev_steal_time_rq;
651#endif
652
653
654 unsigned long calc_load_update;
655 long calc_load_active;
656
657#ifdef CONFIG_SCHED_HRTICK
658#ifdef CONFIG_SMP
659 int hrtick_csd_pending;
660 struct call_single_data hrtick_csd;
661#endif
662 struct hrtimer hrtick_timer;
663#endif
664
665#ifdef CONFIG_SCHEDSTATS
666
667 struct sched_info rq_sched_info;
668 unsigned long long rq_cpu_time;
669
670
671
672 unsigned int yld_count;
673
674
675 unsigned int sched_count;
676 unsigned int sched_goidle;
677
678
679 unsigned int ttwu_count;
680 unsigned int ttwu_local;
681#endif
682
683#ifdef CONFIG_SMP
684 struct llist_head wake_list;
685#endif
686
687#ifdef CONFIG_CPU_IDLE
688
689 struct cpuidle_state *idle_state;
690#endif
691};
692
693static inline int cpu_of(struct rq *rq)
694{
695#ifdef CONFIG_SMP
696 return rq->cpu;
697#else
698 return 0;
699#endif
700}
701
702DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
703
704#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
705#define this_rq() this_cpu_ptr(&runqueues)
706#define task_rq(p) cpu_rq(task_cpu(p))
707#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
708#define raw_rq() raw_cpu_ptr(&runqueues)
709
710static inline u64 __rq_clock_broken(struct rq *rq)
711{
712 return READ_ONCE(rq->clock);
713}
714
715static inline u64 rq_clock(struct rq *rq)
716{
717 lockdep_assert_held(&rq->lock);
718 return rq->clock;
719}
720
721static inline u64 rq_clock_task(struct rq *rq)
722{
723 lockdep_assert_held(&rq->lock);
724 return rq->clock_task;
725}
726
727#define RQCF_REQ_SKIP 0x01
728#define RQCF_ACT_SKIP 0x02
729
730static inline void rq_clock_skip_update(struct rq *rq, bool skip)
731{
732 lockdep_assert_held(&rq->lock);
733 if (skip)
734 rq->clock_skip_update |= RQCF_REQ_SKIP;
735 else
736 rq->clock_skip_update &= ~RQCF_REQ_SKIP;
737}
738
739#ifdef CONFIG_NUMA
740enum numa_topology_type {
741 NUMA_DIRECT,
742 NUMA_GLUELESS_MESH,
743 NUMA_BACKPLANE,
744};
745extern enum numa_topology_type sched_numa_topology_type;
746extern int sched_max_numa_distance;
747extern bool find_numa_distance(int distance);
748#endif
749
750#ifdef CONFIG_NUMA_BALANCING
751
752enum numa_faults_stats {
753 NUMA_MEM = 0,
754 NUMA_CPU,
755 NUMA_MEMBUF,
756 NUMA_CPUBUF
757};
758extern void sched_setnuma(struct task_struct *p, int node);
759extern int migrate_task_to(struct task_struct *p, int cpu);
760extern int migrate_swap(struct task_struct *, struct task_struct *);
761#endif
762
763#ifdef CONFIG_SMP
764
765static inline void
766queue_balance_callback(struct rq *rq,
767 struct callback_head *head,
768 void (*func)(struct rq *rq))
769{
770 lockdep_assert_held(&rq->lock);
771
772 if (unlikely(head->next))
773 return;
774
775 head->func = (void (*)(struct callback_head *))func;
776 head->next = rq->balance_callback;
777 rq->balance_callback = head;
778}
779
780extern void sched_ttwu_pending(void);
781
782#define rcu_dereference_check_sched_domain(p) \
783 rcu_dereference_check((p), \
784 lockdep_is_held(&sched_domains_mutex))
785
786
787
788
789
790
791
792
793#define for_each_domain(cpu, __sd) \
794 for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \
795 __sd; __sd = __sd->parent)
796
797#define for_each_lower_domain(sd) for (; sd; sd = sd->child)
798
799
800
801
802
803
804
805
806
807
808static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
809{
810 struct sched_domain *sd, *hsd = NULL;
811
812 for_each_domain(cpu, sd) {
813 if (!(sd->flags & flag))
814 break;
815 hsd = sd;
816 }
817
818 return hsd;
819}
820
821static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
822{
823 struct sched_domain *sd;
824
825 for_each_domain(cpu, sd) {
826 if (sd->flags & flag)
827 break;
828 }
829
830 return sd;
831}
832
833DECLARE_PER_CPU(struct sched_domain *, sd_llc);
834DECLARE_PER_CPU(int, sd_llc_size);
835DECLARE_PER_CPU(int, sd_llc_id);
836DECLARE_PER_CPU(struct sched_domain *, sd_numa);
837DECLARE_PER_CPU(struct sched_domain *, sd_busy);
838DECLARE_PER_CPU(struct sched_domain *, sd_asym);
839
840struct sched_group_capacity {
841 atomic_t ref;
842
843
844
845
846 unsigned int capacity;
847 unsigned long next_update;
848 int imbalance;
849
850
851
852 atomic_t nr_busy_cpus;
853
854 unsigned long cpumask[0];
855};
856
857struct sched_group {
858 struct sched_group *next;
859 atomic_t ref;
860
861 unsigned int group_weight;
862 struct sched_group_capacity *sgc;
863
864
865
866
867
868
869
870
871 unsigned long cpumask[0];
872};
873
874static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
875{
876 return to_cpumask(sg->cpumask);
877}
878
879
880
881
882
883static inline struct cpumask *sched_group_mask(struct sched_group *sg)
884{
885 return to_cpumask(sg->sgc->cpumask);
886}
887
888
889
890
891
892static inline unsigned int group_first_cpu(struct sched_group *group)
893{
894 return cpumask_first(sched_group_cpus(group));
895}
896
897extern int group_balance_cpu(struct sched_group *sg);
898
899#else
900
901static inline void sched_ttwu_pending(void) { }
902
903#endif
904
905#include "stats.h"
906#include "auto_group.h"
907
908#ifdef CONFIG_CGROUP_SCHED
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923static inline struct task_group *task_group(struct task_struct *p)
924{
925 return p->sched_task_group;
926}
927
928
929static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
930{
931#if defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED)
932 struct task_group *tg = task_group(p);
933#endif
934
935#ifdef CONFIG_FAIR_GROUP_SCHED
936 p->se.cfs_rq = tg->cfs_rq[cpu];
937 p->se.parent = tg->se[cpu];
938#endif
939
940#ifdef CONFIG_RT_GROUP_SCHED
941 p->rt.rt_rq = tg->rt_rq[cpu];
942 p->rt.parent = tg->rt_se[cpu];
943#endif
944}
945
946#else
947
948static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
949static inline struct task_group *task_group(struct task_struct *p)
950{
951 return NULL;
952}
953
954#endif
955
956static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
957{
958 set_task_rq(p, cpu);
959#ifdef CONFIG_SMP
960
961
962
963
964
965 smp_wmb();
966 task_thread_info(p)->cpu = cpu;
967 p->wake_cpu = cpu;
968#endif
969}
970
971
972
973
974#ifdef CONFIG_SCHED_DEBUG
975# include <linux/static_key.h>
976# define const_debug __read_mostly
977#else
978# define const_debug const
979#endif
980
981extern const_debug unsigned int sysctl_sched_features;
982
983#define SCHED_FEAT(name, enabled) \
984 __SCHED_FEAT_##name ,
985
986enum {
987#include "features.h"
988 __SCHED_FEAT_NR,
989};
990
991#undef SCHED_FEAT
992
993#if defined(CONFIG_SCHED_DEBUG) && defined(HAVE_JUMP_LABEL)
994#define SCHED_FEAT(name, enabled) \
995static __always_inline bool static_branch_##name(struct static_key *key) \
996{ \
997 return static_key_##enabled(key); \
998}
999
1000#include "features.h"
1001
1002#undef SCHED_FEAT
1003
1004extern struct static_key sched_feat_keys[__SCHED_FEAT_NR];
1005#define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x]))
1006#else
1007#define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
1008#endif
1009
1010extern struct static_key_false sched_numa_balancing;
1011
1012static inline u64 global_rt_period(void)
1013{
1014 return (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
1015}
1016
1017static inline u64 global_rt_runtime(void)
1018{
1019 if (sysctl_sched_rt_runtime < 0)
1020 return RUNTIME_INF;
1021
1022 return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
1023}
1024
1025static inline int task_current(struct rq *rq, struct task_struct *p)
1026{
1027 return rq->curr == p;
1028}
1029
1030static inline int task_running(struct rq *rq, struct task_struct *p)
1031{
1032#ifdef CONFIG_SMP
1033 return p->on_cpu;
1034#else
1035 return task_current(rq, p);
1036#endif
1037}
1038
1039static inline int task_on_rq_queued(struct task_struct *p)
1040{
1041 return p->on_rq == TASK_ON_RQ_QUEUED;
1042}
1043
1044static inline int task_on_rq_migrating(struct task_struct *p)
1045{
1046 return p->on_rq == TASK_ON_RQ_MIGRATING;
1047}
1048
1049#ifndef prepare_arch_switch
1050# define prepare_arch_switch(next) do { } while (0)
1051#endif
1052#ifndef finish_arch_post_lock_switch
1053# define finish_arch_post_lock_switch() do { } while (0)
1054#endif
1055
1056static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
1057{
1058#ifdef CONFIG_SMP
1059
1060
1061
1062
1063
1064 next->on_cpu = 1;
1065#endif
1066}
1067
1068static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
1069{
1070#ifdef CONFIG_SMP
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081 smp_store_release(&prev->on_cpu, 0);
1082#endif
1083#ifdef CONFIG_DEBUG_SPINLOCK
1084
1085 rq->lock.owner = current;
1086#endif
1087
1088
1089
1090
1091
1092 spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
1093
1094 raw_spin_unlock_irq(&rq->lock);
1095}
1096
1097
1098
1099
1100#define WF_SYNC 0x01
1101#define WF_FORK 0x02
1102#define WF_MIGRATED 0x4
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113#define WEIGHT_IDLEPRIO 3
1114#define WMULT_IDLEPRIO 1431655765
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128static const int prio_to_weight[40] = {
1129 88761, 71755, 56483, 46273, 36291,
1130 29154, 23254, 18705, 14949, 11916,
1131 9548, 7620, 6100, 4904, 3906,
1132 3121, 2501, 1991, 1586, 1277,
1133 1024, 820, 655, 526, 423,
1134 335, 272, 215, 172, 137,
1135 110, 87, 70, 56, 45,
1136 36, 29, 23, 18, 15,
1137};
1138
1139
1140
1141
1142
1143
1144
1145
1146static const u32 prio_to_wmult[40] = {
1147 48388, 59856, 76040, 92818, 118348,
1148 147320, 184698, 229616, 287308, 360437,
1149 449829, 563644, 704093, 875809, 1099582,
1150 1376151, 1717300, 2157191, 2708050, 3363326,
1151 4194304, 5237765, 6557202, 8165337, 10153587,
1152 12820798, 15790321, 19976592, 24970740, 31350126,
1153 39045157, 49367440, 61356676, 76695844, 95443717,
1154 119304647, 148102320, 186737708, 238609294, 286331153,
1155};
1156
1157#define ENQUEUE_WAKEUP 0x01
1158#define ENQUEUE_HEAD 0x02
1159#ifdef CONFIG_SMP
1160#define ENQUEUE_WAKING 0x04
1161#else
1162#define ENQUEUE_WAKING 0x00
1163#endif
1164#define ENQUEUE_REPLENISH 0x08
1165#define ENQUEUE_RESTORE 0x10
1166
1167#define DEQUEUE_SLEEP 0x01
1168#define DEQUEUE_SAVE 0x02
1169
1170#define RETRY_TASK ((void *)-1UL)
1171
1172struct sched_class {
1173 const struct sched_class *next;
1174
1175 void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
1176 void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
1177 void (*yield_task) (struct rq *rq);
1178 bool (*yield_to_task) (struct rq *rq, struct task_struct *p, bool preempt);
1179
1180 void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags);
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190 struct task_struct * (*pick_next_task) (struct rq *rq,
1191 struct task_struct *prev);
1192 void (*put_prev_task) (struct rq *rq, struct task_struct *p);
1193
1194#ifdef CONFIG_SMP
1195 int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
1196 void (*migrate_task_rq)(struct task_struct *p);
1197
1198 void (*task_waking) (struct task_struct *task);
1199 void (*task_woken) (struct rq *this_rq, struct task_struct *task);
1200
1201 void (*set_cpus_allowed)(struct task_struct *p,
1202 const struct cpumask *newmask);
1203
1204 void (*rq_online)(struct rq *rq);
1205 void (*rq_offline)(struct rq *rq);
1206#endif
1207
1208 void (*set_curr_task) (struct rq *rq);
1209 void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
1210 void (*task_fork) (struct task_struct *p);
1211 void (*task_dead) (struct task_struct *p);
1212
1213
1214
1215
1216
1217
1218 void (*switched_from) (struct rq *this_rq, struct task_struct *task);
1219 void (*switched_to) (struct rq *this_rq, struct task_struct *task);
1220 void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
1221 int oldprio);
1222
1223 unsigned int (*get_rr_interval) (struct rq *rq,
1224 struct task_struct *task);
1225
1226 void (*update_curr) (struct rq *rq);
1227
1228#ifdef CONFIG_FAIR_GROUP_SCHED
1229 void (*task_move_group) (struct task_struct *p);
1230#endif
1231};
1232
1233static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
1234{
1235 prev->sched_class->put_prev_task(rq, prev);
1236}
1237
1238#define sched_class_highest (&stop_sched_class)
1239#define for_each_class(class) \
1240 for (class = sched_class_highest; class; class = class->next)
1241
1242extern const struct sched_class stop_sched_class;
1243extern const struct sched_class dl_sched_class;
1244extern const struct sched_class rt_sched_class;
1245extern const struct sched_class fair_sched_class;
1246extern const struct sched_class idle_sched_class;
1247
1248
1249#ifdef CONFIG_SMP
1250
1251extern void update_group_capacity(struct sched_domain *sd, int cpu);
1252
1253extern void trigger_load_balance(struct rq *rq);
1254
1255extern void idle_enter_fair(struct rq *this_rq);
1256extern void idle_exit_fair(struct rq *this_rq);
1257
1258extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask);
1259
1260#else
1261
1262static inline void idle_enter_fair(struct rq *rq) { }
1263static inline void idle_exit_fair(struct rq *rq) { }
1264
1265#endif
1266
1267#ifdef CONFIG_CPU_IDLE
1268static inline void idle_set_state(struct rq *rq,
1269 struct cpuidle_state *idle_state)
1270{
1271 rq->idle_state = idle_state;
1272}
1273
1274static inline struct cpuidle_state *idle_get_state(struct rq *rq)
1275{
1276 WARN_ON(!rcu_read_lock_held());
1277 return rq->idle_state;
1278}
1279#else
1280static inline void idle_set_state(struct rq *rq,
1281 struct cpuidle_state *idle_state)
1282{
1283}
1284
1285static inline struct cpuidle_state *idle_get_state(struct rq *rq)
1286{
1287 return NULL;
1288}
1289#endif
1290
1291extern void sysrq_sched_debug_show(void);
1292extern void sched_init_granularity(void);
1293extern void update_max_interval(void);
1294
1295extern void init_sched_dl_class(void);
1296extern void init_sched_rt_class(void);
1297extern void init_sched_fair_class(void);
1298
1299extern void resched_curr(struct rq *rq);
1300extern void resched_cpu(int cpu);
1301
1302extern struct rt_bandwidth def_rt_bandwidth;
1303extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
1304
1305extern struct dl_bandwidth def_dl_bandwidth;
1306extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
1307extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
1308
1309unsigned long to_ratio(u64 period, u64 runtime);
1310
1311extern void init_entity_runnable_average(struct sched_entity *se);
1312
1313static inline void add_nr_running(struct rq *rq, unsigned count)
1314{
1315 unsigned prev_nr = rq->nr_running;
1316
1317 rq->nr_running = prev_nr + count;
1318
1319 if (prev_nr < 2 && rq->nr_running >= 2) {
1320#ifdef CONFIG_SMP
1321 if (!rq->rd->overload)
1322 rq->rd->overload = true;
1323#endif
1324
1325#ifdef CONFIG_NO_HZ_FULL
1326 if (tick_nohz_full_cpu(rq->cpu)) {
1327
1328
1329
1330
1331
1332
1333
1334
1335 tick_nohz_full_kick_cpu(rq->cpu);
1336 }
1337#endif
1338 }
1339}
1340
1341static inline void sub_nr_running(struct rq *rq, unsigned count)
1342{
1343 rq->nr_running -= count;
1344}
1345
1346static inline void rq_last_tick_reset(struct rq *rq)
1347{
1348#ifdef CONFIG_NO_HZ_FULL
1349 rq->last_sched_tick = jiffies;
1350#endif
1351}
1352
1353extern void update_rq_clock(struct rq *rq);
1354
1355extern void activate_task(struct rq *rq, struct task_struct *p, int flags);
1356extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
1357
1358extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
1359
1360extern const_debug unsigned int sysctl_sched_time_avg;
1361extern const_debug unsigned int sysctl_sched_nr_migrate;
1362extern const_debug unsigned int sysctl_sched_migration_cost;
1363
1364static inline u64 sched_avg_period(void)
1365{
1366 return (u64)sysctl_sched_time_avg * NSEC_PER_MSEC / 2;
1367}
1368
1369#ifdef CONFIG_SCHED_HRTICK
1370
1371
1372
1373
1374
1375
1376static inline int hrtick_enabled(struct rq *rq)
1377{
1378 if (!sched_feat(HRTICK))
1379 return 0;
1380 if (!cpu_active(cpu_of(rq)))
1381 return 0;
1382 return hrtimer_is_hres_active(&rq->hrtick_timer);
1383}
1384
1385void hrtick_start(struct rq *rq, u64 delay);
1386
1387#else
1388
1389static inline int hrtick_enabled(struct rq *rq)
1390{
1391 return 0;
1392}
1393
1394#endif
1395
1396#ifdef CONFIG_SMP
1397extern void sched_avg_update(struct rq *rq);
1398
1399#ifndef arch_scale_freq_capacity
1400static __always_inline
1401unsigned long arch_scale_freq_capacity(struct sched_domain *sd, int cpu)
1402{
1403 return SCHED_CAPACITY_SCALE;
1404}
1405#endif
1406
1407#ifndef arch_scale_cpu_capacity
1408static __always_inline
1409unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
1410{
1411 if (sd && (sd->flags & SD_SHARE_CPUCAPACITY) && (sd->span_weight > 1))
1412 return sd->smt_gain / sd->span_weight;
1413
1414 return SCHED_CAPACITY_SCALE;
1415}
1416#endif
1417
1418static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
1419{
1420 rq->rt_avg += rt_delta * arch_scale_freq_capacity(NULL, cpu_of(rq));
1421 sched_avg_update(rq);
1422}
1423#else
1424static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { }
1425static inline void sched_avg_update(struct rq *rq) { }
1426#endif
1427
1428
1429
1430
1431static inline struct rq *__task_rq_lock(struct task_struct *p)
1432 __acquires(rq->lock)
1433{
1434 struct rq *rq;
1435
1436 lockdep_assert_held(&p->pi_lock);
1437
1438 for (;;) {
1439 rq = task_rq(p);
1440 raw_spin_lock(&rq->lock);
1441 if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
1442 lockdep_pin_lock(&rq->lock);
1443 return rq;
1444 }
1445 raw_spin_unlock(&rq->lock);
1446
1447 while (unlikely(task_on_rq_migrating(p)))
1448 cpu_relax();
1449 }
1450}
1451
1452
1453
1454
1455static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
1456 __acquires(p->pi_lock)
1457 __acquires(rq->lock)
1458{
1459 struct rq *rq;
1460
1461 for (;;) {
1462 raw_spin_lock_irqsave(&p->pi_lock, *flags);
1463 rq = task_rq(p);
1464 raw_spin_lock(&rq->lock);
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481 if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
1482 lockdep_pin_lock(&rq->lock);
1483 return rq;
1484 }
1485 raw_spin_unlock(&rq->lock);
1486 raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
1487
1488 while (unlikely(task_on_rq_migrating(p)))
1489 cpu_relax();
1490 }
1491}
1492
1493static inline void __task_rq_unlock(struct rq *rq)
1494 __releases(rq->lock)
1495{
1496 lockdep_unpin_lock(&rq->lock);
1497 raw_spin_unlock(&rq->lock);
1498}
1499
1500static inline void
1501task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
1502 __releases(rq->lock)
1503 __releases(p->pi_lock)
1504{
1505 lockdep_unpin_lock(&rq->lock);
1506 raw_spin_unlock(&rq->lock);
1507 raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
1508}
1509
1510#ifdef CONFIG_SMP
1511#ifdef CONFIG_PREEMPT
1512
1513static inline void double_rq_lock(struct rq *rq1, struct rq *rq2);
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
1524 __releases(this_rq->lock)
1525 __acquires(busiest->lock)
1526 __acquires(this_rq->lock)
1527{
1528 raw_spin_unlock(&this_rq->lock);
1529 double_rq_lock(this_rq, busiest);
1530
1531 return 1;
1532}
1533
1534#else
1535
1536
1537
1538
1539
1540
1541
1542static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
1543 __releases(this_rq->lock)
1544 __acquires(busiest->lock)
1545 __acquires(this_rq->lock)
1546{
1547 int ret = 0;
1548
1549 if (unlikely(!raw_spin_trylock(&busiest->lock))) {
1550 if (busiest < this_rq) {
1551 raw_spin_unlock(&this_rq->lock);
1552 raw_spin_lock(&busiest->lock);
1553 raw_spin_lock_nested(&this_rq->lock,
1554 SINGLE_DEPTH_NESTING);
1555 ret = 1;
1556 } else
1557 raw_spin_lock_nested(&busiest->lock,
1558 SINGLE_DEPTH_NESTING);
1559 }
1560 return ret;
1561}
1562
1563#endif
1564
1565
1566
1567
1568static inline int double_lock_balance(struct rq *this_rq, struct rq *busiest)
1569{
1570 if (unlikely(!irqs_disabled())) {
1571
1572 raw_spin_unlock(&this_rq->lock);
1573 BUG_ON(1);
1574 }
1575
1576 return _double_lock_balance(this_rq, busiest);
1577}
1578
1579static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
1580 __releases(busiest->lock)
1581{
1582 raw_spin_unlock(&busiest->lock);
1583 lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
1584}
1585
1586static inline void double_lock(spinlock_t *l1, spinlock_t *l2)
1587{
1588 if (l1 > l2)
1589 swap(l1, l2);
1590
1591 spin_lock(l1);
1592 spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
1593}
1594
1595static inline void double_lock_irq(spinlock_t *l1, spinlock_t *l2)
1596{
1597 if (l1 > l2)
1598 swap(l1, l2);
1599
1600 spin_lock_irq(l1);
1601 spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
1602}
1603
1604static inline void double_raw_lock(raw_spinlock_t *l1, raw_spinlock_t *l2)
1605{
1606 if (l1 > l2)
1607 swap(l1, l2);
1608
1609 raw_spin_lock(l1);
1610 raw_spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
1611}
1612
1613
1614
1615
1616
1617
1618
1619static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
1620 __acquires(rq1->lock)
1621 __acquires(rq2->lock)
1622{
1623 BUG_ON(!irqs_disabled());
1624 if (rq1 == rq2) {
1625 raw_spin_lock(&rq1->lock);
1626 __acquire(rq2->lock);
1627 } else {
1628 if (rq1 < rq2) {
1629 raw_spin_lock(&rq1->lock);
1630 raw_spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING);
1631 } else {
1632 raw_spin_lock(&rq2->lock);
1633 raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
1634 }
1635 }
1636}
1637
1638
1639
1640
1641
1642
1643
1644static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
1645 __releases(rq1->lock)
1646 __releases(rq2->lock)
1647{
1648 raw_spin_unlock(&rq1->lock);
1649 if (rq1 != rq2)
1650 raw_spin_unlock(&rq2->lock);
1651 else
1652 __release(rq2->lock);
1653}
1654
1655#else
1656
1657
1658
1659
1660
1661
1662
1663static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
1664 __acquires(rq1->lock)
1665 __acquires(rq2->lock)
1666{
1667 BUG_ON(!irqs_disabled());
1668 BUG_ON(rq1 != rq2);
1669 raw_spin_lock(&rq1->lock);
1670 __acquire(rq2->lock);
1671}
1672
1673
1674
1675
1676
1677
1678
1679static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
1680 __releases(rq1->lock)
1681 __releases(rq2->lock)
1682{
1683 BUG_ON(rq1 != rq2);
1684 raw_spin_unlock(&rq1->lock);
1685 __release(rq2->lock);
1686}
1687
1688#endif
1689
1690extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq);
1691extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq);
1692
1693#ifdef CONFIG_SCHED_DEBUG
1694extern void print_cfs_stats(struct seq_file *m, int cpu);
1695extern void print_rt_stats(struct seq_file *m, int cpu);
1696extern void print_dl_stats(struct seq_file *m, int cpu);
1697extern void
1698print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
1699
1700#ifdef CONFIG_NUMA_BALANCING
1701extern void
1702show_numa_stats(struct task_struct *p, struct seq_file *m);
1703extern void
1704print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
1705 unsigned long tpf, unsigned long gsf, unsigned long gpf);
1706#endif
1707#endif
1708
1709extern void init_cfs_rq(struct cfs_rq *cfs_rq);
1710extern void init_rt_rq(struct rt_rq *rt_rq);
1711extern void init_dl_rq(struct dl_rq *dl_rq);
1712
1713extern void cfs_bandwidth_usage_inc(void);
1714extern void cfs_bandwidth_usage_dec(void);
1715
1716#ifdef CONFIG_NO_HZ_COMMON
1717enum rq_nohz_flag_bits {
1718 NOHZ_TICK_STOPPED,
1719 NOHZ_BALANCE_KICK,
1720};
1721
1722#define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags)
1723#endif
1724
1725#ifdef CONFIG_IRQ_TIME_ACCOUNTING
1726
1727DECLARE_PER_CPU(u64, cpu_hardirq_time);
1728DECLARE_PER_CPU(u64, cpu_softirq_time);
1729
1730#ifndef CONFIG_64BIT
1731DECLARE_PER_CPU(seqcount_t, irq_time_seq);
1732
1733static inline void irq_time_write_begin(void)
1734{
1735 __this_cpu_inc(irq_time_seq.sequence);
1736 smp_wmb();
1737}
1738
1739static inline void irq_time_write_end(void)
1740{
1741 smp_wmb();
1742 __this_cpu_inc(irq_time_seq.sequence);
1743}
1744
1745static inline u64 irq_time_read(int cpu)
1746{
1747 u64 irq_time;
1748 unsigned seq;
1749
1750 do {
1751 seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu));
1752 irq_time = per_cpu(cpu_softirq_time, cpu) +
1753 per_cpu(cpu_hardirq_time, cpu);
1754 } while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), seq));
1755
1756 return irq_time;
1757}
1758#else
1759static inline void irq_time_write_begin(void)
1760{
1761}
1762
1763static inline void irq_time_write_end(void)
1764{
1765}
1766
1767static inline u64 irq_time_read(int cpu)
1768{
1769 return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu);
1770}
1771#endif
1772#endif
1773