1
2
3
4
5#include "sched.h"
6
7DEFINE_MUTEX(sched_domains_mutex);
8
9
10cpumask_var_t sched_domains_tmpmask;
11cpumask_var_t sched_domains_tmpmask2;
12
13#ifdef CONFIG_SCHED_DEBUG
14
15static int __init sched_debug_setup(char *str)
16{
17 sched_debug_enabled = true;
18
19 return 0;
20}
21early_param("sched_debug", sched_debug_setup);
22
23static inline bool sched_debug(void)
24{
25 return sched_debug_enabled;
26}
27
28static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
29 struct cpumask *groupmask)
30{
31 struct sched_group *group = sd->groups;
32
33 cpumask_clear(groupmask);
34
35 printk(KERN_DEBUG "%*s domain-%d: ", level, "", level);
36
37 if (!(sd->flags & SD_LOAD_BALANCE)) {
38 printk("does not load-balance\n");
39 if (sd->parent)
40 printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain has parent");
41 return -1;
42 }
43
44 printk(KERN_CONT "span=%*pbl level=%s\n",
45 cpumask_pr_args(sched_domain_span(sd)), sd->name);
46
47 if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) {
48 printk(KERN_ERR "ERROR: domain->span does not contain CPU%d\n", cpu);
49 }
50 if (group && !cpumask_test_cpu(cpu, sched_group_span(group))) {
51 printk(KERN_ERR "ERROR: domain->groups does not contain CPU%d\n", cpu);
52 }
53
54 printk(KERN_DEBUG "%*s groups:", level + 1, "");
55 do {
56 if (!group) {
57 printk("\n");
58 printk(KERN_ERR "ERROR: group is NULL\n");
59 break;
60 }
61
62 if (!cpumask_weight(sched_group_span(group))) {
63 printk(KERN_CONT "\n");
64 printk(KERN_ERR "ERROR: empty group\n");
65 break;
66 }
67
68 if (!(sd->flags & SD_OVERLAP) &&
69 cpumask_intersects(groupmask, sched_group_span(group))) {
70 printk(KERN_CONT "\n");
71 printk(KERN_ERR "ERROR: repeated CPUs\n");
72 break;
73 }
74
75 cpumask_or(groupmask, groupmask, sched_group_span(group));
76
77 printk(KERN_CONT " %d:{ span=%*pbl",
78 group->sgc->id,
79 cpumask_pr_args(sched_group_span(group)));
80
81 if ((sd->flags & SD_OVERLAP) &&
82 !cpumask_equal(group_balance_mask(group), sched_group_span(group))) {
83 printk(KERN_CONT " mask=%*pbl",
84 cpumask_pr_args(group_balance_mask(group)));
85 }
86
87 if (group->sgc->capacity != SCHED_CAPACITY_SCALE)
88 printk(KERN_CONT " cap=%lu", group->sgc->capacity);
89
90 if (group == sd->groups && sd->child &&
91 !cpumask_equal(sched_domain_span(sd->child),
92 sched_group_span(group))) {
93 printk(KERN_ERR "ERROR: domain->groups does not match domain->child\n");
94 }
95
96 printk(KERN_CONT " }");
97
98 group = group->next;
99
100 if (group != sd->groups)
101 printk(KERN_CONT ",");
102
103 } while (group != sd->groups);
104 printk(KERN_CONT "\n");
105
106 if (!cpumask_equal(sched_domain_span(sd), groupmask))
107 printk(KERN_ERR "ERROR: groups don't span domain->span\n");
108
109 if (sd->parent &&
110 !cpumask_subset(groupmask, sched_domain_span(sd->parent)))
111 printk(KERN_ERR "ERROR: parent span is not a superset of domain->span\n");
112 return 0;
113}
114
115static void sched_domain_debug(struct sched_domain *sd, int cpu)
116{
117 int level = 0;
118
119 if (!sched_debug_enabled)
120 return;
121
122 if (!sd) {
123 printk(KERN_DEBUG "CPU%d attaching NULL sched-domain.\n", cpu);
124 return;
125 }
126
127 printk(KERN_DEBUG "CPU%d attaching sched-domain(s):\n", cpu);
128
129 for (;;) {
130 if (sched_domain_debug_one(sd, cpu, level, sched_domains_tmpmask))
131 break;
132 level++;
133 sd = sd->parent;
134 if (!sd)
135 break;
136 }
137}
138#else
139
140# define sched_debug_enabled 0
141# define sched_domain_debug(sd, cpu) do { } while (0)
142static inline bool sched_debug(void)
143{
144 return false;
145}
146#endif
147
148static int sd_degenerate(struct sched_domain *sd)
149{
150 if (cpumask_weight(sched_domain_span(sd)) == 1)
151 return 1;
152
153
154 if (sd->flags & (SD_LOAD_BALANCE |
155 SD_BALANCE_NEWIDLE |
156 SD_BALANCE_FORK |
157 SD_BALANCE_EXEC |
158 SD_SHARE_CPUCAPACITY |
159 SD_ASYM_CPUCAPACITY |
160 SD_SHARE_PKG_RESOURCES |
161 SD_SHARE_POWERDOMAIN)) {
162 if (sd->groups != sd->groups->next)
163 return 0;
164 }
165
166
167 if (sd->flags & (SD_WAKE_AFFINE))
168 return 0;
169
170 return 1;
171}
172
173static int
174sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
175{
176 unsigned long cflags = sd->flags, pflags = parent->flags;
177
178 if (sd_degenerate(parent))
179 return 1;
180
181 if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent)))
182 return 0;
183
184
185 if (parent->groups == parent->groups->next) {
186 pflags &= ~(SD_LOAD_BALANCE |
187 SD_BALANCE_NEWIDLE |
188 SD_BALANCE_FORK |
189 SD_BALANCE_EXEC |
190 SD_ASYM_CPUCAPACITY |
191 SD_SHARE_CPUCAPACITY |
192 SD_SHARE_PKG_RESOURCES |
193 SD_PREFER_SIBLING |
194 SD_SHARE_POWERDOMAIN);
195 if (nr_node_ids == 1)
196 pflags &= ~SD_SERIALIZE;
197 }
198 if (~cflags & pflags)
199 return 0;
200
201 return 1;
202}
203
204DEFINE_STATIC_KEY_FALSE(sched_energy_present);
205#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
206unsigned int sysctl_sched_energy_aware = 1;
207DEFINE_MUTEX(sched_energy_mutex);
208bool sched_energy_update;
209
210#ifdef CONFIG_PROC_SYSCTL
211int sched_energy_aware_handler(struct ctl_table *table, int write,
212 void __user *buffer, size_t *lenp, loff_t *ppos)
213{
214 int ret, state;
215
216 if (write && !capable(CAP_SYS_ADMIN))
217 return -EPERM;
218
219 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
220 if (!ret && write) {
221 state = static_branch_unlikely(&sched_energy_present);
222 if (state != sysctl_sched_energy_aware) {
223 mutex_lock(&sched_energy_mutex);
224 sched_energy_update = 1;
225 rebuild_sched_domains();
226 sched_energy_update = 0;
227 mutex_unlock(&sched_energy_mutex);
228 }
229 }
230
231 return ret;
232}
233#endif
234
235static void free_pd(struct perf_domain *pd)
236{
237 struct perf_domain *tmp;
238
239 while (pd) {
240 tmp = pd->next;
241 kfree(pd);
242 pd = tmp;
243 }
244}
245
246static struct perf_domain *find_pd(struct perf_domain *pd, int cpu)
247{
248 while (pd) {
249 if (cpumask_test_cpu(cpu, perf_domain_span(pd)))
250 return pd;
251 pd = pd->next;
252 }
253
254 return NULL;
255}
256
257static struct perf_domain *pd_init(int cpu)
258{
259 struct em_perf_domain *obj = em_cpu_get(cpu);
260 struct perf_domain *pd;
261
262 if (!obj) {
263 if (sched_debug())
264 pr_info("%s: no EM found for CPU%d\n", __func__, cpu);
265 return NULL;
266 }
267
268 pd = kzalloc(sizeof(*pd), GFP_KERNEL);
269 if (!pd)
270 return NULL;
271 pd->em_pd = obj;
272
273 return pd;
274}
275
276static void perf_domain_debug(const struct cpumask *cpu_map,
277 struct perf_domain *pd)
278{
279 if (!sched_debug() || !pd)
280 return;
281
282 printk(KERN_DEBUG "root_domain %*pbl:", cpumask_pr_args(cpu_map));
283
284 while (pd) {
285 printk(KERN_CONT " pd%d:{ cpus=%*pbl nr_cstate=%d }",
286 cpumask_first(perf_domain_span(pd)),
287 cpumask_pr_args(perf_domain_span(pd)),
288 em_pd_nr_cap_states(pd->em_pd));
289 pd = pd->next;
290 }
291
292 printk(KERN_CONT "\n");
293}
294
295static void destroy_perf_domain_rcu(struct rcu_head *rp)
296{
297 struct perf_domain *pd;
298
299 pd = container_of(rp, struct perf_domain, rcu);
300 free_pd(pd);
301}
302
303static void sched_energy_set(bool has_eas)
304{
305 if (!has_eas && static_branch_unlikely(&sched_energy_present)) {
306 if (sched_debug())
307 pr_info("%s: stopping EAS\n", __func__);
308 static_branch_disable_cpuslocked(&sched_energy_present);
309 } else if (has_eas && !static_branch_unlikely(&sched_energy_present)) {
310 if (sched_debug())
311 pr_info("%s: starting EAS\n", __func__);
312 static_branch_enable_cpuslocked(&sched_energy_present);
313 }
314}
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339#define EM_MAX_COMPLEXITY 2048
340
341extern struct cpufreq_governor schedutil_gov;
342static bool build_perf_domains(const struct cpumask *cpu_map)
343{
344 int i, nr_pd = 0, nr_cs = 0, nr_cpus = cpumask_weight(cpu_map);
345 struct perf_domain *pd = NULL, *tmp;
346 int cpu = cpumask_first(cpu_map);
347 struct root_domain *rd = cpu_rq(cpu)->rd;
348 struct cpufreq_policy *policy;
349 struct cpufreq_governor *gov;
350
351 if (!sysctl_sched_energy_aware)
352 goto free;
353
354
355 if (!per_cpu(sd_asym_cpucapacity, cpu)) {
356 if (sched_debug()) {
357 pr_info("rd %*pbl: CPUs do not have asymmetric capacities\n",
358 cpumask_pr_args(cpu_map));
359 }
360 goto free;
361 }
362
363 for_each_cpu(i, cpu_map) {
364
365 if (find_pd(pd, i))
366 continue;
367
368
369 policy = cpufreq_cpu_get(i);
370 if (!policy)
371 goto free;
372 gov = policy->governor;
373 cpufreq_cpu_put(policy);
374 if (gov != &schedutil_gov) {
375 if (rd->pd)
376 pr_warn("rd %*pbl: Disabling EAS, schedutil is mandatory\n",
377 cpumask_pr_args(cpu_map));
378 goto free;
379 }
380
381
382 tmp = pd_init(i);
383 if (!tmp)
384 goto free;
385 tmp->next = pd;
386 pd = tmp;
387
388
389
390
391
392 nr_pd++;
393 nr_cs += em_pd_nr_cap_states(pd->em_pd);
394 }
395
396
397 if (nr_pd * (nr_cs + nr_cpus) > EM_MAX_COMPLEXITY) {
398 WARN(1, "rd %*pbl: Failed to start EAS, EM complexity is too high\n",
399 cpumask_pr_args(cpu_map));
400 goto free;
401 }
402
403 perf_domain_debug(cpu_map, pd);
404
405
406 tmp = rd->pd;
407 rcu_assign_pointer(rd->pd, pd);
408 if (tmp)
409 call_rcu(&tmp->rcu, destroy_perf_domain_rcu);
410
411 return !!pd;
412
413free:
414 free_pd(pd);
415 tmp = rd->pd;
416 rcu_assign_pointer(rd->pd, NULL);
417 if (tmp)
418 call_rcu(&tmp->rcu, destroy_perf_domain_rcu);
419
420 return false;
421}
422#else
423static void free_pd(struct perf_domain *pd) { }
424#endif
425
426static void free_rootdomain(struct rcu_head *rcu)
427{
428 struct root_domain *rd = container_of(rcu, struct root_domain, rcu);
429
430 cpupri_cleanup(&rd->cpupri);
431 cpudl_cleanup(&rd->cpudl);
432 free_cpumask_var(rd->dlo_mask);
433 free_cpumask_var(rd->rto_mask);
434 free_cpumask_var(rd->online);
435 free_cpumask_var(rd->span);
436 free_pd(rd->pd);
437 kfree(rd);
438}
439
440void rq_attach_root(struct rq *rq, struct root_domain *rd)
441{
442 struct root_domain *old_rd = NULL;
443 unsigned long flags;
444
445 raw_spin_lock_irqsave(&rq->lock, flags);
446
447 if (rq->rd) {
448 old_rd = rq->rd;
449
450 if (cpumask_test_cpu(rq->cpu, old_rd->online))
451 set_rq_offline(rq);
452
453 cpumask_clear_cpu(rq->cpu, old_rd->span);
454
455
456
457
458
459
460 if (!atomic_dec_and_test(&old_rd->refcount))
461 old_rd = NULL;
462 }
463
464 atomic_inc(&rd->refcount);
465 rq->rd = rd;
466
467 cpumask_set_cpu(rq->cpu, rd->span);
468 if (cpumask_test_cpu(rq->cpu, cpu_active_mask))
469 set_rq_online(rq);
470
471 raw_spin_unlock_irqrestore(&rq->lock, flags);
472
473 if (old_rd)
474 call_rcu(&old_rd->rcu, free_rootdomain);
475}
476
477void sched_get_rd(struct root_domain *rd)
478{
479 atomic_inc(&rd->refcount);
480}
481
482void sched_put_rd(struct root_domain *rd)
483{
484 if (!atomic_dec_and_test(&rd->refcount))
485 return;
486
487 call_rcu(&rd->rcu, free_rootdomain);
488}
489
490static int init_rootdomain(struct root_domain *rd)
491{
492 if (!zalloc_cpumask_var(&rd->span, GFP_KERNEL))
493 goto out;
494 if (!zalloc_cpumask_var(&rd->online, GFP_KERNEL))
495 goto free_span;
496 if (!zalloc_cpumask_var(&rd->dlo_mask, GFP_KERNEL))
497 goto free_online;
498 if (!zalloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
499 goto free_dlo_mask;
500
501#ifdef HAVE_RT_PUSH_IPI
502 rd->rto_cpu = -1;
503 raw_spin_lock_init(&rd->rto_lock);
504 init_irq_work(&rd->rto_push_work, rto_push_irq_work_func);
505#endif
506
507 init_dl_bw(&rd->dl_bw);
508 if (cpudl_init(&rd->cpudl) != 0)
509 goto free_rto_mask;
510
511 if (cpupri_init(&rd->cpupri) != 0)
512 goto free_cpudl;
513 return 0;
514
515free_cpudl:
516 cpudl_cleanup(&rd->cpudl);
517free_rto_mask:
518 free_cpumask_var(rd->rto_mask);
519free_dlo_mask:
520 free_cpumask_var(rd->dlo_mask);
521free_online:
522 free_cpumask_var(rd->online);
523free_span:
524 free_cpumask_var(rd->span);
525out:
526 return -ENOMEM;
527}
528
529
530
531
532
533struct root_domain def_root_domain;
534
535void init_defrootdomain(void)
536{
537 init_rootdomain(&def_root_domain);
538
539 atomic_set(&def_root_domain.refcount, 1);
540}
541
542static struct root_domain *alloc_rootdomain(void)
543{
544 struct root_domain *rd;
545
546 rd = kzalloc(sizeof(*rd), GFP_KERNEL);
547 if (!rd)
548 return NULL;
549
550 if (init_rootdomain(rd) != 0) {
551 kfree(rd);
552 return NULL;
553 }
554
555 return rd;
556}
557
558static void free_sched_groups(struct sched_group *sg, int free_sgc)
559{
560 struct sched_group *tmp, *first;
561
562 if (!sg)
563 return;
564
565 first = sg;
566 do {
567 tmp = sg->next;
568
569 if (free_sgc && atomic_dec_and_test(&sg->sgc->ref))
570 kfree(sg->sgc);
571
572 if (atomic_dec_and_test(&sg->ref))
573 kfree(sg);
574 sg = tmp;
575 } while (sg != first);
576}
577
578static void destroy_sched_domain(struct sched_domain *sd)
579{
580
581
582
583
584
585 free_sched_groups(sd->groups, 1);
586
587 if (sd->shared && atomic_dec_and_test(&sd->shared->ref))
588 kfree(sd->shared);
589 kfree(sd);
590}
591
592static void destroy_sched_domains_rcu(struct rcu_head *rcu)
593{
594 struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu);
595
596 while (sd) {
597 struct sched_domain *parent = sd->parent;
598 destroy_sched_domain(sd);
599 sd = parent;
600 }
601}
602
603static void destroy_sched_domains(struct sched_domain *sd)
604{
605 if (sd)
606 call_rcu(&sd->rcu, destroy_sched_domains_rcu);
607}
608
609
610
611
612
613
614
615
616
617
618DEFINE_PER_CPU(struct sched_domain *, sd_llc);
619DEFINE_PER_CPU(int, sd_llc_size);
620DEFINE_PER_CPU(int, sd_llc_id);
621DEFINE_PER_CPU(struct sched_domain_shared *, sd_llc_shared);
622DEFINE_PER_CPU(struct sched_domain *, sd_numa);
623DEFINE_PER_CPU(struct sched_domain *, sd_asym_packing);
624DEFINE_PER_CPU(struct sched_domain *, sd_asym_cpucapacity);
625DEFINE_STATIC_KEY_FALSE(sched_asym_cpucapacity);
626
627static void update_top_cache_domain(int cpu)
628{
629 struct sched_domain_shared *sds = NULL;
630 struct sched_domain *sd;
631 int id = cpu;
632 int size = 1;
633
634 sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);
635 if (sd) {
636 id = cpumask_first(sched_domain_span(sd));
637 size = cpumask_weight(sched_domain_span(sd));
638 sds = sd->shared;
639 }
640
641 rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
642 per_cpu(sd_llc_size, cpu) = size;
643 per_cpu(sd_llc_id, cpu) = id;
644 rcu_assign_pointer(per_cpu(sd_llc_shared, cpu), sds);
645
646 sd = lowest_flag_domain(cpu, SD_NUMA);
647 rcu_assign_pointer(per_cpu(sd_numa, cpu), sd);
648
649 sd = highest_flag_domain(cpu, SD_ASYM_PACKING);
650 rcu_assign_pointer(per_cpu(sd_asym_packing, cpu), sd);
651
652 sd = lowest_flag_domain(cpu, SD_ASYM_CPUCAPACITY);
653 rcu_assign_pointer(per_cpu(sd_asym_cpucapacity, cpu), sd);
654}
655
656
657
658
659
660static void
661cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
662{
663 struct rq *rq = cpu_rq(cpu);
664 struct sched_domain *tmp;
665
666
667 for (tmp = sd; tmp; ) {
668 struct sched_domain *parent = tmp->parent;
669 if (!parent)
670 break;
671
672 if (sd_parent_degenerate(tmp, parent)) {
673 tmp->parent = parent->parent;
674 if (parent->parent)
675 parent->parent->child = tmp;
676
677
678
679
680
681 if (parent->flags & SD_PREFER_SIBLING)
682 tmp->flags |= SD_PREFER_SIBLING;
683 destroy_sched_domain(parent);
684 } else
685 tmp = tmp->parent;
686 }
687
688 if (sd && sd_degenerate(sd)) {
689 tmp = sd;
690 sd = sd->parent;
691 destroy_sched_domain(tmp);
692 if (sd)
693 sd->child = NULL;
694 }
695
696 sched_domain_debug(sd, cpu);
697
698 rq_attach_root(rq, rd);
699 tmp = rq->sd;
700 rcu_assign_pointer(rq->sd, sd);
701 dirty_sched_domain_sysctl(cpu);
702 destroy_sched_domains(tmp);
703
704 update_top_cache_domain(cpu);
705}
706
707struct s_data {
708 struct sched_domain * __percpu *sd;
709 struct root_domain *rd;
710};
711
712enum s_alloc {
713 sa_rootdomain,
714 sa_sd,
715 sa_sd_storage,
716 sa_none,
717};
718
719
720
721
722
723
724
725
726
727
728int group_balance_cpu(struct sched_group *sg)
729{
730 return cpumask_first(group_balance_mask(sg));
731}
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839static void
840build_balance_mask(struct sched_domain *sd, struct sched_group *sg, struct cpumask *mask)
841{
842 const struct cpumask *sg_span = sched_group_span(sg);
843 struct sd_data *sdd = sd->private;
844 struct sched_domain *sibling;
845 int i;
846
847 cpumask_clear(mask);
848
849 for_each_cpu(i, sg_span) {
850 sibling = *per_cpu_ptr(sdd->sd, i);
851
852
853
854
855
856
857 if (!sibling->child)
858 continue;
859
860
861 if (!cpumask_equal(sg_span, sched_domain_span(sibling->child)))
862 continue;
863
864 cpumask_set_cpu(i, mask);
865 }
866
867
868 WARN_ON_ONCE(cpumask_empty(mask));
869}
870
871
872
873
874
875
876static struct sched_group *
877build_group_from_child_sched_domain(struct sched_domain *sd, int cpu)
878{
879 struct sched_group *sg;
880 struct cpumask *sg_span;
881
882 sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
883 GFP_KERNEL, cpu_to_node(cpu));
884
885 if (!sg)
886 return NULL;
887
888 sg_span = sched_group_span(sg);
889 if (sd->child)
890 cpumask_copy(sg_span, sched_domain_span(sd->child));
891 else
892 cpumask_copy(sg_span, sched_domain_span(sd));
893
894 atomic_inc(&sg->ref);
895 return sg;
896}
897
898static void init_overlap_sched_group(struct sched_domain *sd,
899 struct sched_group *sg)
900{
901 struct cpumask *mask = sched_domains_tmpmask2;
902 struct sd_data *sdd = sd->private;
903 struct cpumask *sg_span;
904 int cpu;
905
906 build_balance_mask(sd, sg, mask);
907 cpu = cpumask_first_and(sched_group_span(sg), mask);
908
909 sg->sgc = *per_cpu_ptr(sdd->sgc, cpu);
910 if (atomic_inc_return(&sg->sgc->ref) == 1)
911 cpumask_copy(group_balance_mask(sg), mask);
912 else
913 WARN_ON_ONCE(!cpumask_equal(group_balance_mask(sg), mask));
914
915
916
917
918
919
920 sg_span = sched_group_span(sg);
921 sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
922 sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
923 sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;
924}
925
926static int
927build_overlap_sched_groups(struct sched_domain *sd, int cpu)
928{
929 struct sched_group *first = NULL, *last = NULL, *sg;
930 const struct cpumask *span = sched_domain_span(sd);
931 struct cpumask *covered = sched_domains_tmpmask;
932 struct sd_data *sdd = sd->private;
933 struct sched_domain *sibling;
934 int i;
935
936 cpumask_clear(covered);
937
938 for_each_cpu_wrap(i, span, cpu) {
939 struct cpumask *sg_span;
940
941 if (cpumask_test_cpu(i, covered))
942 continue;
943
944 sibling = *per_cpu_ptr(sdd->sd, i);
945
946
947
948
949
950
951
952
953
954
955
956 if (!cpumask_test_cpu(i, sched_domain_span(sibling)))
957 continue;
958
959 sg = build_group_from_child_sched_domain(sibling, cpu);
960 if (!sg)
961 goto fail;
962
963 sg_span = sched_group_span(sg);
964 cpumask_or(covered, covered, sg_span);
965
966 init_overlap_sched_group(sd, sg);
967
968 if (!first)
969 first = sg;
970 if (last)
971 last->next = sg;
972 last = sg;
973 last->next = first;
974 }
975 sd->groups = first;
976
977 return 0;
978
979fail:
980 free_sched_groups(first, 0);
981
982 return -ENOMEM;
983}
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057static struct sched_group *get_group(int cpu, struct sd_data *sdd)
1058{
1059 struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
1060 struct sched_domain *child = sd->child;
1061 struct sched_group *sg;
1062
1063 if (child)
1064 cpu = cpumask_first(sched_domain_span(child));
1065
1066 sg = *per_cpu_ptr(sdd->sg, cpu);
1067 sg->sgc = *per_cpu_ptr(sdd->sgc, cpu);
1068
1069
1070 atomic_inc(&sg->ref);
1071 atomic_inc(&sg->sgc->ref);
1072
1073 if (child) {
1074 cpumask_copy(sched_group_span(sg), sched_domain_span(child));
1075 cpumask_copy(group_balance_mask(sg), sched_group_span(sg));
1076 } else {
1077 cpumask_set_cpu(cpu, sched_group_span(sg));
1078 cpumask_set_cpu(cpu, group_balance_mask(sg));
1079 }
1080
1081 sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sched_group_span(sg));
1082 sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
1083 sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;
1084
1085 return sg;
1086}
1087
1088
1089
1090
1091
1092
1093
1094
1095static int
1096build_sched_groups(struct sched_domain *sd, int cpu)
1097{
1098 struct sched_group *first = NULL, *last = NULL;
1099 struct sd_data *sdd = sd->private;
1100 const struct cpumask *span = sched_domain_span(sd);
1101 struct cpumask *covered;
1102 int i;
1103
1104 lockdep_assert_held(&sched_domains_mutex);
1105 covered = sched_domains_tmpmask;
1106
1107 cpumask_clear(covered);
1108
1109 for_each_cpu_wrap(i, span, cpu) {
1110 struct sched_group *sg;
1111
1112 if (cpumask_test_cpu(i, covered))
1113 continue;
1114
1115 sg = get_group(i, sdd);
1116
1117 cpumask_or(covered, covered, sched_group_span(sg));
1118
1119 if (!first)
1120 first = sg;
1121 if (last)
1122 last->next = sg;
1123 last = sg;
1124 }
1125 last->next = first;
1126 sd->groups = first;
1127
1128 return 0;
1129}
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141static void init_sched_groups_capacity(int cpu, struct sched_domain *sd)
1142{
1143 struct sched_group *sg = sd->groups;
1144
1145 WARN_ON(!sg);
1146
1147 do {
1148 int cpu, max_cpu = -1;
1149
1150 sg->group_weight = cpumask_weight(sched_group_span(sg));
1151
1152 if (!(sd->flags & SD_ASYM_PACKING))
1153 goto next;
1154
1155 for_each_cpu(cpu, sched_group_span(sg)) {
1156 if (max_cpu < 0)
1157 max_cpu = cpu;
1158 else if (sched_asym_prefer(cpu, max_cpu))
1159 max_cpu = cpu;
1160 }
1161 sg->asym_prefer_cpu = max_cpu;
1162
1163next:
1164 sg = sg->next;
1165 } while (sg != sd->groups);
1166
1167 if (cpu != group_balance_cpu(sg))
1168 return;
1169
1170 update_group_capacity(sd, cpu);
1171}
1172
1173
1174
1175
1176
1177
1178static int default_relax_domain_level = -1;
1179int sched_domain_level_max;
1180
1181static int __init setup_relax_domain_level(char *str)
1182{
1183 if (kstrtoint(str, 0, &default_relax_domain_level))
1184 pr_warn("Unable to set relax_domain_level\n");
1185
1186 return 1;
1187}
1188__setup("relax_domain_level=", setup_relax_domain_level);
1189
1190static void set_domain_attribute(struct sched_domain *sd,
1191 struct sched_domain_attr *attr)
1192{
1193 int request;
1194
1195 if (!attr || attr->relax_domain_level < 0) {
1196 if (default_relax_domain_level < 0)
1197 return;
1198 request = default_relax_domain_level;
1199 } else
1200 request = attr->relax_domain_level;
1201
1202 if (sd->level > request) {
1203
1204 sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
1205 }
1206}
1207
1208static void __sdt_free(const struct cpumask *cpu_map);
1209static int __sdt_alloc(const struct cpumask *cpu_map);
1210
1211static void __free_domain_allocs(struct s_data *d, enum s_alloc what,
1212 const struct cpumask *cpu_map)
1213{
1214 switch (what) {
1215 case sa_rootdomain:
1216 if (!atomic_read(&d->rd->refcount))
1217 free_rootdomain(&d->rd->rcu);
1218
1219 case sa_sd:
1220 free_percpu(d->sd);
1221
1222 case sa_sd_storage:
1223 __sdt_free(cpu_map);
1224
1225 case sa_none:
1226 break;
1227 }
1228}
1229
1230static enum s_alloc
1231__visit_domain_allocation_hell(struct s_data *d, const struct cpumask *cpu_map)
1232{
1233 memset(d, 0, sizeof(*d));
1234
1235 if (__sdt_alloc(cpu_map))
1236 return sa_sd_storage;
1237 d->sd = alloc_percpu(struct sched_domain *);
1238 if (!d->sd)
1239 return sa_sd_storage;
1240 d->rd = alloc_rootdomain();
1241 if (!d->rd)
1242 return sa_sd;
1243
1244 return sa_rootdomain;
1245}
1246
1247
1248
1249
1250
1251
1252static void claim_allocations(int cpu, struct sched_domain *sd)
1253{
1254 struct sd_data *sdd = sd->private;
1255
1256 WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd);
1257 *per_cpu_ptr(sdd->sd, cpu) = NULL;
1258
1259 if (atomic_read(&(*per_cpu_ptr(sdd->sds, cpu))->ref))
1260 *per_cpu_ptr(sdd->sds, cpu) = NULL;
1261
1262 if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref))
1263 *per_cpu_ptr(sdd->sg, cpu) = NULL;
1264
1265 if (atomic_read(&(*per_cpu_ptr(sdd->sgc, cpu))->ref))
1266 *per_cpu_ptr(sdd->sgc, cpu) = NULL;
1267}
1268
1269#ifdef CONFIG_NUMA
1270enum numa_topology_type sched_numa_topology_type;
1271
1272static int sched_domains_numa_levels;
1273static int sched_domains_curr_level;
1274
1275int sched_max_numa_distance;
1276static int *sched_domains_numa_distance;
1277static struct cpumask ***sched_domains_numa_masks;
1278int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE;
1279#endif
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298#define TOPOLOGY_SD_FLAGS \
1299 (SD_SHARE_CPUCAPACITY | \
1300 SD_SHARE_PKG_RESOURCES | \
1301 SD_NUMA | \
1302 SD_ASYM_PACKING | \
1303 SD_SHARE_POWERDOMAIN)
1304
1305static struct sched_domain *
1306sd_init(struct sched_domain_topology_level *tl,
1307 const struct cpumask *cpu_map,
1308 struct sched_domain *child, int dflags, int cpu)
1309{
1310 struct sd_data *sdd = &tl->data;
1311 struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
1312 int sd_id, sd_weight, sd_flags = 0;
1313
1314#ifdef CONFIG_NUMA
1315
1316
1317
1318 sched_domains_curr_level = tl->numa_level;
1319#endif
1320
1321 sd_weight = cpumask_weight(tl->mask(cpu));
1322
1323 if (tl->sd_flags)
1324 sd_flags = (*tl->sd_flags)();
1325 if (WARN_ONCE(sd_flags & ~TOPOLOGY_SD_FLAGS,
1326 "wrong sd_flags in topology description\n"))
1327 sd_flags &= ~TOPOLOGY_SD_FLAGS;
1328
1329
1330 sd_flags |= dflags;
1331
1332 *sd = (struct sched_domain){
1333 .min_interval = sd_weight,
1334 .max_interval = 2*sd_weight,
1335 .busy_factor = 32,
1336 .imbalance_pct = 125,
1337
1338 .cache_nice_tries = 0,
1339
1340 .flags = 1*SD_LOAD_BALANCE
1341 | 1*SD_BALANCE_NEWIDLE
1342 | 1*SD_BALANCE_EXEC
1343 | 1*SD_BALANCE_FORK
1344 | 0*SD_BALANCE_WAKE
1345 | 1*SD_WAKE_AFFINE
1346 | 0*SD_SHARE_CPUCAPACITY
1347 | 0*SD_SHARE_PKG_RESOURCES
1348 | 0*SD_SERIALIZE
1349 | 1*SD_PREFER_SIBLING
1350 | 0*SD_NUMA
1351 | sd_flags
1352 ,
1353
1354 .last_balance = jiffies,
1355 .balance_interval = sd_weight,
1356 .max_newidle_lb_cost = 0,
1357 .next_decay_max_lb_cost = jiffies,
1358 .child = child,
1359#ifdef CONFIG_SCHED_DEBUG
1360 .name = tl->name,
1361#endif
1362 };
1363
1364 cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
1365 sd_id = cpumask_first(sched_domain_span(sd));
1366
1367
1368
1369
1370
1371 if (sd->flags & SD_ASYM_CPUCAPACITY) {
1372 struct sched_domain *t = sd;
1373
1374
1375
1376
1377 if (sd->child)
1378 sd->child->flags &= ~SD_PREFER_SIBLING;
1379
1380 for_each_lower_domain(t)
1381 t->flags |= SD_BALANCE_WAKE;
1382 }
1383
1384 if (sd->flags & SD_SHARE_CPUCAPACITY) {
1385 sd->imbalance_pct = 110;
1386
1387 } else if (sd->flags & SD_SHARE_PKG_RESOURCES) {
1388 sd->imbalance_pct = 117;
1389 sd->cache_nice_tries = 1;
1390
1391#ifdef CONFIG_NUMA
1392 } else if (sd->flags & SD_NUMA) {
1393 sd->cache_nice_tries = 2;
1394
1395 sd->flags &= ~SD_PREFER_SIBLING;
1396 sd->flags |= SD_SERIALIZE;
1397 if (sched_domains_numa_distance[tl->numa_level] > node_reclaim_distance) {
1398 sd->flags &= ~(SD_BALANCE_EXEC |
1399 SD_BALANCE_FORK |
1400 SD_WAKE_AFFINE);
1401 }
1402
1403#endif
1404 } else {
1405 sd->cache_nice_tries = 1;
1406 }
1407
1408
1409
1410
1411
1412 if (sd->flags & SD_SHARE_PKG_RESOURCES) {
1413 sd->shared = *per_cpu_ptr(sdd->sds, sd_id);
1414 atomic_inc(&sd->shared->ref);
1415 atomic_set(&sd->shared->nr_busy_cpus, sd_weight);
1416 }
1417
1418 sd->private = sdd;
1419
1420 return sd;
1421}
1422
1423
1424
1425
1426static struct sched_domain_topology_level default_topology[] = {
1427#ifdef CONFIG_SCHED_SMT
1428 { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
1429#endif
1430#ifdef CONFIG_SCHED_MC
1431 { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
1432#endif
1433 { cpu_cpu_mask, SD_INIT_NAME(DIE) },
1434 { NULL, },
1435};
1436
1437static struct sched_domain_topology_level *sched_domain_topology =
1438 default_topology;
1439
1440#define for_each_sd_topology(tl) \
1441 for (tl = sched_domain_topology; tl->mask; tl++)
1442
1443void set_sched_topology(struct sched_domain_topology_level *tl)
1444{
1445 if (WARN_ON_ONCE(sched_smp_initialized))
1446 return;
1447
1448 sched_domain_topology = tl;
1449}
1450
1451#ifdef CONFIG_NUMA
1452
1453static const struct cpumask *sd_numa_mask(int cpu)
1454{
1455 return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)];
1456}
1457
1458static void sched_numa_warn(const char *str)
1459{
1460 static int done = false;
1461 int i,j;
1462
1463 if (done)
1464 return;
1465
1466 done = true;
1467
1468 printk(KERN_WARNING "ERROR: %s\n\n", str);
1469
1470 for (i = 0; i < nr_node_ids; i++) {
1471 printk(KERN_WARNING " ");
1472 for (j = 0; j < nr_node_ids; j++)
1473 printk(KERN_CONT "%02d ", node_distance(i,j));
1474 printk(KERN_CONT "\n");
1475 }
1476 printk(KERN_WARNING "\n");
1477}
1478
1479bool find_numa_distance(int distance)
1480{
1481 int i;
1482
1483 if (distance == node_distance(0, 0))
1484 return true;
1485
1486 for (i = 0; i < sched_domains_numa_levels; i++) {
1487 if (sched_domains_numa_distance[i] == distance)
1488 return true;
1489 }
1490
1491 return false;
1492}
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513static void init_numa_topology_type(void)
1514{
1515 int a, b, c, n;
1516
1517 n = sched_max_numa_distance;
1518
1519 if (sched_domains_numa_levels <= 2) {
1520 sched_numa_topology_type = NUMA_DIRECT;
1521 return;
1522 }
1523
1524 for_each_online_node(a) {
1525 for_each_online_node(b) {
1526
1527 if (node_distance(a, b) < n)
1528 continue;
1529
1530
1531 for_each_online_node(c) {
1532 if (node_distance(a, c) < n &&
1533 node_distance(b, c) < n) {
1534 sched_numa_topology_type =
1535 NUMA_GLUELESS_MESH;
1536 return;
1537 }
1538 }
1539
1540 sched_numa_topology_type = NUMA_BACKPLANE;
1541 return;
1542 }
1543 }
1544}
1545
1546void sched_init_numa(void)
1547{
1548 int next_distance, curr_distance = node_distance(0, 0);
1549 struct sched_domain_topology_level *tl;
1550 int level = 0;
1551 int i, j, k;
1552
1553 sched_domains_numa_distance = kzalloc(sizeof(int) * nr_node_ids, GFP_KERNEL);
1554 if (!sched_domains_numa_distance)
1555 return;
1556
1557
1558 sched_domains_numa_distance[level++] = curr_distance;
1559 sched_domains_numa_levels = level;
1560
1561
1562
1563
1564
1565
1566
1567
1568 next_distance = curr_distance;
1569 for (i = 0; i < nr_node_ids; i++) {
1570 for (j = 0; j < nr_node_ids; j++) {
1571 for (k = 0; k < nr_node_ids; k++) {
1572 int distance = node_distance(i, k);
1573
1574 if (distance > curr_distance &&
1575 (distance < next_distance ||
1576 next_distance == curr_distance))
1577 next_distance = distance;
1578
1579
1580
1581
1582
1583
1584 if (sched_debug() && node_distance(k, i) != distance)
1585 sched_numa_warn("Node-distance not symmetric");
1586
1587 if (sched_debug() && i && !find_numa_distance(distance))
1588 sched_numa_warn("Node-0 not representative");
1589 }
1590 if (next_distance != curr_distance) {
1591 sched_domains_numa_distance[level++] = next_distance;
1592 sched_domains_numa_levels = level;
1593 curr_distance = next_distance;
1594 } else break;
1595 }
1596
1597
1598
1599
1600 if (!sched_debug())
1601 break;
1602 }
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620 sched_domains_numa_levels = 0;
1621
1622 sched_domains_numa_masks = kzalloc(sizeof(void *) * level, GFP_KERNEL);
1623 if (!sched_domains_numa_masks)
1624 return;
1625
1626
1627
1628
1629
1630 for (i = 0; i < level; i++) {
1631 sched_domains_numa_masks[i] =
1632 kzalloc(nr_node_ids * sizeof(void *), GFP_KERNEL);
1633 if (!sched_domains_numa_masks[i])
1634 return;
1635
1636 for (j = 0; j < nr_node_ids; j++) {
1637 struct cpumask *mask = kzalloc(cpumask_size(), GFP_KERNEL);
1638 if (!mask)
1639 return;
1640
1641 sched_domains_numa_masks[i][j] = mask;
1642
1643 for_each_node(k) {
1644 if (node_distance(j, k) > sched_domains_numa_distance[i])
1645 continue;
1646
1647 cpumask_or(mask, mask, cpumask_of_node(k));
1648 }
1649 }
1650 }
1651
1652
1653 for (i = 0; sched_domain_topology[i].mask; i++);
1654
1655 tl = kzalloc((i + level + 1) *
1656 sizeof(struct sched_domain_topology_level), GFP_KERNEL);
1657 if (!tl)
1658 return;
1659
1660
1661
1662
1663 for (i = 0; sched_domain_topology[i].mask; i++)
1664 tl[i] = sched_domain_topology[i];
1665
1666
1667
1668
1669 tl[i++] = (struct sched_domain_topology_level){
1670 .mask = sd_numa_mask,
1671 .numa_level = 0,
1672 SD_INIT_NAME(NODE)
1673 };
1674
1675
1676
1677
1678 for (j = 1; j < level; i++, j++) {
1679 tl[i] = (struct sched_domain_topology_level){
1680 .mask = sd_numa_mask,
1681 .sd_flags = cpu_numa_flags,
1682 .flags = SDTL_OVERLAP,
1683 .numa_level = j,
1684 SD_INIT_NAME(NUMA)
1685 };
1686 }
1687
1688 sched_domain_topology = tl;
1689
1690 sched_domains_numa_levels = level;
1691 sched_max_numa_distance = sched_domains_numa_distance[level - 1];
1692
1693 init_numa_topology_type();
1694}
1695
1696void sched_domains_numa_masks_set(unsigned int cpu)
1697{
1698 int node = cpu_to_node(cpu);
1699 int i, j;
1700
1701 for (i = 0; i < sched_domains_numa_levels; i++) {
1702 for (j = 0; j < nr_node_ids; j++) {
1703 if (node_distance(j, node) <= sched_domains_numa_distance[i])
1704 cpumask_set_cpu(cpu, sched_domains_numa_masks[i][j]);
1705 }
1706 }
1707}
1708
1709void sched_domains_numa_masks_clear(unsigned int cpu)
1710{
1711 int i, j;
1712
1713 for (i = 0; i < sched_domains_numa_levels; i++) {
1714 for (j = 0; j < nr_node_ids; j++)
1715 cpumask_clear_cpu(cpu, sched_domains_numa_masks[i][j]);
1716 }
1717}
1718
1719#endif
1720
1721static int __sdt_alloc(const struct cpumask *cpu_map)
1722{
1723 struct sched_domain_topology_level *tl;
1724 int j;
1725
1726 for_each_sd_topology(tl) {
1727 struct sd_data *sdd = &tl->data;
1728
1729 sdd->sd = alloc_percpu(struct sched_domain *);
1730 if (!sdd->sd)
1731 return -ENOMEM;
1732
1733 sdd->sds = alloc_percpu(struct sched_domain_shared *);
1734 if (!sdd->sds)
1735 return -ENOMEM;
1736
1737 sdd->sg = alloc_percpu(struct sched_group *);
1738 if (!sdd->sg)
1739 return -ENOMEM;
1740
1741 sdd->sgc = alloc_percpu(struct sched_group_capacity *);
1742 if (!sdd->sgc)
1743 return -ENOMEM;
1744
1745 for_each_cpu(j, cpu_map) {
1746 struct sched_domain *sd;
1747 struct sched_domain_shared *sds;
1748 struct sched_group *sg;
1749 struct sched_group_capacity *sgc;
1750
1751 sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(),
1752 GFP_KERNEL, cpu_to_node(j));
1753 if (!sd)
1754 return -ENOMEM;
1755
1756 *per_cpu_ptr(sdd->sd, j) = sd;
1757
1758 sds = kzalloc_node(sizeof(struct sched_domain_shared),
1759 GFP_KERNEL, cpu_to_node(j));
1760 if (!sds)
1761 return -ENOMEM;
1762
1763 *per_cpu_ptr(sdd->sds, j) = sds;
1764
1765 sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
1766 GFP_KERNEL, cpu_to_node(j));
1767 if (!sg)
1768 return -ENOMEM;
1769
1770 sg->next = sg;
1771
1772 *per_cpu_ptr(sdd->sg, j) = sg;
1773
1774 sgc = kzalloc_node(sizeof(struct sched_group_capacity) + cpumask_size(),
1775 GFP_KERNEL, cpu_to_node(j));
1776 if (!sgc)
1777 return -ENOMEM;
1778
1779#ifdef CONFIG_SCHED_DEBUG
1780 sgc->id = j;
1781#endif
1782
1783 *per_cpu_ptr(sdd->sgc, j) = sgc;
1784 }
1785 }
1786
1787 return 0;
1788}
1789
1790static void __sdt_free(const struct cpumask *cpu_map)
1791{
1792 struct sched_domain_topology_level *tl;
1793 int j;
1794
1795 for_each_sd_topology(tl) {
1796 struct sd_data *sdd = &tl->data;
1797
1798 for_each_cpu(j, cpu_map) {
1799 struct sched_domain *sd;
1800
1801 if (sdd->sd) {
1802 sd = *per_cpu_ptr(sdd->sd, j);
1803 if (sd && (sd->flags & SD_OVERLAP))
1804 free_sched_groups(sd->groups, 0);
1805 kfree(*per_cpu_ptr(sdd->sd, j));
1806 }
1807
1808 if (sdd->sds)
1809 kfree(*per_cpu_ptr(sdd->sds, j));
1810 if (sdd->sg)
1811 kfree(*per_cpu_ptr(sdd->sg, j));
1812 if (sdd->sgc)
1813 kfree(*per_cpu_ptr(sdd->sgc, j));
1814 }
1815 free_percpu(sdd->sd);
1816 sdd->sd = NULL;
1817 free_percpu(sdd->sds);
1818 sdd->sds = NULL;
1819 free_percpu(sdd->sg);
1820 sdd->sg = NULL;
1821 free_percpu(sdd->sgc);
1822 sdd->sgc = NULL;
1823 }
1824}
1825
1826static struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl,
1827 const struct cpumask *cpu_map, struct sched_domain_attr *attr,
1828 struct sched_domain *child, int dflags, int cpu)
1829{
1830 struct sched_domain *sd = sd_init(tl, cpu_map, child, dflags, cpu);
1831
1832 if (child) {
1833 sd->level = child->level + 1;
1834 sched_domain_level_max = max(sched_domain_level_max, sd->level);
1835 child->parent = sd;
1836
1837 if (!cpumask_subset(sched_domain_span(child),
1838 sched_domain_span(sd))) {
1839 pr_err("BUG: arch topology borken\n");
1840#ifdef CONFIG_SCHED_DEBUG
1841 pr_err(" the %s domain not a subset of the %s domain\n",
1842 child->name, sd->name);
1843#endif
1844
1845 cpumask_or(sched_domain_span(sd),
1846 sched_domain_span(sd),
1847 sched_domain_span(child));
1848 }
1849
1850 }
1851 set_domain_attribute(sd, attr);
1852
1853 return sd;
1854}
1855
1856
1857
1858
1859
1860static bool topology_span_sane(struct sched_domain_topology_level *tl,
1861 const struct cpumask *cpu_map, int cpu)
1862{
1863 int i;
1864
1865
1866 if (tl->flags & SDTL_OVERLAP)
1867 return true;
1868
1869
1870
1871
1872
1873
1874
1875 for_each_cpu(i, cpu_map) {
1876 if (i == cpu)
1877 continue;
1878
1879
1880
1881
1882
1883
1884 if (!cpumask_equal(tl->mask(cpu), tl->mask(i)) &&
1885 cpumask_intersects(tl->mask(cpu), tl->mask(i)))
1886 return false;
1887 }
1888
1889 return true;
1890}
1891
1892
1893
1894
1895
1896static struct sched_domain_topology_level
1897*asym_cpu_capacity_level(const struct cpumask *cpu_map)
1898{
1899 int i, j, asym_level = 0;
1900 bool asym = false;
1901 struct sched_domain_topology_level *tl, *asym_tl = NULL;
1902 unsigned long cap;
1903
1904
1905 cap = arch_scale_cpu_capacity(cpumask_first(cpu_map));
1906
1907 for_each_cpu(i, cpu_map) {
1908 if (arch_scale_cpu_capacity(i) != cap) {
1909 asym = true;
1910 break;
1911 }
1912 }
1913
1914 if (!asym)
1915 return NULL;
1916
1917
1918
1919
1920
1921
1922 for_each_cpu(i, cpu_map) {
1923 unsigned long max_capacity = arch_scale_cpu_capacity(i);
1924 int tl_id = 0;
1925
1926 for_each_sd_topology(tl) {
1927 if (tl_id < asym_level)
1928 goto next_level;
1929
1930 for_each_cpu_and(j, tl->mask(i), cpu_map) {
1931 unsigned long capacity;
1932
1933 capacity = arch_scale_cpu_capacity(j);
1934
1935 if (capacity <= max_capacity)
1936 continue;
1937
1938 max_capacity = capacity;
1939 asym_level = tl_id;
1940 asym_tl = tl;
1941 }
1942next_level:
1943 tl_id++;
1944 }
1945 }
1946
1947 return asym_tl;
1948}
1949
1950
1951
1952
1953
1954
1955static int
1956build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *attr)
1957{
1958 enum s_alloc alloc_state = sa_none;
1959 struct sched_domain *sd;
1960 struct s_data d;
1961 struct rq *rq = NULL;
1962 int i, ret = -ENOMEM;
1963 struct sched_domain_topology_level *tl_asym;
1964 bool has_asym = false;
1965
1966 if (WARN_ON(cpumask_empty(cpu_map)))
1967 goto error;
1968
1969 alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
1970 if (alloc_state != sa_rootdomain)
1971 goto error;
1972
1973 tl_asym = asym_cpu_capacity_level(cpu_map);
1974
1975
1976 for_each_cpu(i, cpu_map) {
1977 struct sched_domain_topology_level *tl;
1978
1979 sd = NULL;
1980 for_each_sd_topology(tl) {
1981 int dflags = 0;
1982
1983 if (tl == tl_asym) {
1984 dflags |= SD_ASYM_CPUCAPACITY;
1985 has_asym = true;
1986 }
1987
1988 if (WARN_ON(!topology_span_sane(tl, cpu_map, i)))
1989 goto error;
1990
1991 sd = build_sched_domain(tl, cpu_map, attr, sd, dflags, i);
1992
1993 if (tl == sched_domain_topology)
1994 *per_cpu_ptr(d.sd, i) = sd;
1995 if (tl->flags & SDTL_OVERLAP)
1996 sd->flags |= SD_OVERLAP;
1997 if (cpumask_equal(cpu_map, sched_domain_span(sd)))
1998 break;
1999 }
2000 }
2001
2002
2003 for_each_cpu(i, cpu_map) {
2004 for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
2005 sd->span_weight = cpumask_weight(sched_domain_span(sd));
2006 if (sd->flags & SD_OVERLAP) {
2007 if (build_overlap_sched_groups(sd, i))
2008 goto error;
2009 } else {
2010 if (build_sched_groups(sd, i))
2011 goto error;
2012 }
2013 }
2014 }
2015
2016
2017 for (i = nr_cpumask_bits-1; i >= 0; i--) {
2018 if (!cpumask_test_cpu(i, cpu_map))
2019 continue;
2020
2021 for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
2022 claim_allocations(i, sd);
2023 init_sched_groups_capacity(i, sd);
2024 }
2025 }
2026
2027
2028 rcu_read_lock();
2029 for_each_cpu(i, cpu_map) {
2030 rq = cpu_rq(i);
2031 sd = *per_cpu_ptr(d.sd, i);
2032
2033
2034 if (rq->cpu_capacity_orig > READ_ONCE(d.rd->max_cpu_capacity))
2035 WRITE_ONCE(d.rd->max_cpu_capacity, rq->cpu_capacity_orig);
2036
2037 cpu_attach_domain(sd, d.rd, i);
2038 }
2039 rcu_read_unlock();
2040
2041 if (has_asym)
2042 static_branch_inc_cpuslocked(&sched_asym_cpucapacity);
2043
2044 if (rq && sched_debug_enabled) {
2045 pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n",
2046 cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity);
2047 }
2048
2049 ret = 0;
2050error:
2051 __free_domain_allocs(&d, alloc_state, cpu_map);
2052
2053 return ret;
2054}
2055
2056
2057static cpumask_var_t *doms_cur;
2058
2059
2060static int ndoms_cur;
2061
2062
2063static struct sched_domain_attr *dattr_cur;
2064
2065
2066
2067
2068
2069
2070static cpumask_var_t fallback_doms;
2071
2072
2073
2074
2075
2076
2077int __weak arch_update_cpu_topology(void)
2078{
2079 return 0;
2080}
2081
2082cpumask_var_t *alloc_sched_domains(unsigned int ndoms)
2083{
2084 int i;
2085 cpumask_var_t *doms;
2086
2087 doms = kmalloc_array(ndoms, sizeof(*doms), GFP_KERNEL);
2088 if (!doms)
2089 return NULL;
2090 for (i = 0; i < ndoms; i++) {
2091 if (!alloc_cpumask_var(&doms[i], GFP_KERNEL)) {
2092 free_sched_domains(doms, i);
2093 return NULL;
2094 }
2095 }
2096 return doms;
2097}
2098
2099void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms)
2100{
2101 unsigned int i;
2102 for (i = 0; i < ndoms; i++)
2103 free_cpumask_var(doms[i]);
2104 kfree(doms);
2105}
2106
2107
2108
2109
2110
2111
2112int sched_init_domains(const struct cpumask *cpu_map)
2113{
2114 int err;
2115
2116 zalloc_cpumask_var(&sched_domains_tmpmask, GFP_KERNEL);
2117 zalloc_cpumask_var(&sched_domains_tmpmask2, GFP_KERNEL);
2118 zalloc_cpumask_var(&fallback_doms, GFP_KERNEL);
2119
2120 arch_update_cpu_topology();
2121 ndoms_cur = 1;
2122 doms_cur = alloc_sched_domains(ndoms_cur);
2123 if (!doms_cur)
2124 doms_cur = &fallback_doms;
2125 cpumask_and(doms_cur[0], cpu_map, housekeeping_cpumask(HK_FLAG_DOMAIN));
2126 err = build_sched_domains(doms_cur[0], NULL);
2127 register_sched_domain_sysctl();
2128
2129 return err;
2130}
2131
2132
2133
2134
2135
2136static void detach_destroy_domains(const struct cpumask *cpu_map)
2137{
2138 unsigned int cpu = cpumask_any(cpu_map);
2139 int i;
2140
2141 if (rcu_access_pointer(per_cpu(sd_asym_cpucapacity, cpu)))
2142 static_branch_dec_cpuslocked(&sched_asym_cpucapacity);
2143
2144 rcu_read_lock();
2145 for_each_cpu(i, cpu_map)
2146 cpu_attach_domain(NULL, &def_root_domain, i);
2147 rcu_read_unlock();
2148}
2149
2150
2151static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
2152 struct sched_domain_attr *new, int idx_new)
2153{
2154 struct sched_domain_attr tmp;
2155
2156
2157 if (!new && !cur)
2158 return 1;
2159
2160 tmp = SD_ATTR_INIT;
2161
2162 return !memcmp(cur ? (cur + idx_cur) : &tmp,
2163 new ? (new + idx_new) : &tmp,
2164 sizeof(struct sched_domain_attr));
2165}
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193void partition_sched_domains_locked(int ndoms_new, cpumask_var_t doms_new[],
2194 struct sched_domain_attr *dattr_new)
2195{
2196 bool __maybe_unused has_eas = false;
2197 int i, j, n;
2198 int new_topology;
2199
2200 lockdep_assert_held(&sched_domains_mutex);
2201
2202
2203 unregister_sched_domain_sysctl();
2204
2205
2206 new_topology = arch_update_cpu_topology();
2207
2208 if (!doms_new) {
2209 WARN_ON_ONCE(dattr_new);
2210 n = 0;
2211 doms_new = alloc_sched_domains(1);
2212 if (doms_new) {
2213 n = 1;
2214 cpumask_and(doms_new[0], cpu_active_mask,
2215 housekeeping_cpumask(HK_FLAG_DOMAIN));
2216 }
2217 } else {
2218 n = ndoms_new;
2219 }
2220
2221
2222 for (i = 0; i < ndoms_cur; i++) {
2223 for (j = 0; j < n && !new_topology; j++) {
2224 if (cpumask_equal(doms_cur[i], doms_new[j]) &&
2225 dattrs_equal(dattr_cur, i, dattr_new, j)) {
2226 struct root_domain *rd;
2227
2228
2229
2230
2231
2232
2233
2234 rd = cpu_rq(cpumask_any(doms_cur[i]))->rd;
2235 dl_clear_root_domain(rd);
2236 goto match1;
2237 }
2238 }
2239
2240 detach_destroy_domains(doms_cur[i]);
2241match1:
2242 ;
2243 }
2244
2245 n = ndoms_cur;
2246 if (!doms_new) {
2247 n = 0;
2248 doms_new = &fallback_doms;
2249 cpumask_and(doms_new[0], cpu_active_mask,
2250 housekeeping_cpumask(HK_FLAG_DOMAIN));
2251 }
2252
2253
2254 for (i = 0; i < ndoms_new; i++) {
2255 for (j = 0; j < n && !new_topology; j++) {
2256 if (cpumask_equal(doms_new[i], doms_cur[j]) &&
2257 dattrs_equal(dattr_new, i, dattr_cur, j))
2258 goto match2;
2259 }
2260
2261 build_sched_domains(doms_new[i], dattr_new ? dattr_new + i : NULL);
2262match2:
2263 ;
2264 }
2265
2266#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
2267
2268 for (i = 0; i < ndoms_new; i++) {
2269 for (j = 0; j < n && !sched_energy_update; j++) {
2270 if (cpumask_equal(doms_new[i], doms_cur[j]) &&
2271 cpu_rq(cpumask_first(doms_cur[j]))->rd->pd) {
2272 has_eas = true;
2273 goto match3;
2274 }
2275 }
2276
2277 has_eas |= build_perf_domains(doms_new[i]);
2278match3:
2279 ;
2280 }
2281 sched_energy_set(has_eas);
2282#endif
2283
2284
2285 if (doms_cur != &fallback_doms)
2286 free_sched_domains(doms_cur, ndoms_cur);
2287
2288 kfree(dattr_cur);
2289 doms_cur = doms_new;
2290 dattr_cur = dattr_new;
2291 ndoms_cur = ndoms_new;
2292
2293 register_sched_domain_sysctl();
2294}
2295
2296
2297
2298
2299void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
2300 struct sched_domain_attr *dattr_new)
2301{
2302 mutex_lock(&sched_domains_mutex);
2303 partition_sched_domains_locked(ndoms_new, doms_new, dattr_new);
2304 mutex_unlock(&sched_domains_mutex);
2305}
2306