1
2
3
4
5#include "sched.h"
6
7DEFINE_MUTEX(sched_domains_mutex);
8
9
10static cpumask_var_t sched_domains_tmpmask;
11static cpumask_var_t sched_domains_tmpmask2;
12
13#ifdef CONFIG_SCHED_DEBUG
14
15static int __init sched_debug_setup(char *str)
16{
17 sched_debug_enabled = true;
18
19 return 0;
20}
21early_param("sched_debug", sched_debug_setup);
22
23static inline bool sched_debug(void)
24{
25 return sched_debug_enabled;
26}
27
28static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
29 struct cpumask *groupmask)
30{
31 struct sched_group *group = sd->groups;
32
33 cpumask_clear(groupmask);
34
35 printk(KERN_DEBUG "%*s domain-%d: ", level, "", level);
36
37 if (!(sd->flags & SD_LOAD_BALANCE)) {
38 printk("does not load-balance\n");
39 if (sd->parent)
40 printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain has parent");
41 return -1;
42 }
43
44 printk(KERN_CONT "span=%*pbl level=%s\n",
45 cpumask_pr_args(sched_domain_span(sd)), sd->name);
46
47 if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) {
48 printk(KERN_ERR "ERROR: domain->span does not contain CPU%d\n", cpu);
49 }
50 if (group && !cpumask_test_cpu(cpu, sched_group_span(group))) {
51 printk(KERN_ERR "ERROR: domain->groups does not contain CPU%d\n", cpu);
52 }
53
54 printk(KERN_DEBUG "%*s groups:", level + 1, "");
55 do {
56 if (!group) {
57 printk("\n");
58 printk(KERN_ERR "ERROR: group is NULL\n");
59 break;
60 }
61
62 if (!cpumask_weight(sched_group_span(group))) {
63 printk(KERN_CONT "\n");
64 printk(KERN_ERR "ERROR: empty group\n");
65 break;
66 }
67
68 if (!(sd->flags & SD_OVERLAP) &&
69 cpumask_intersects(groupmask, sched_group_span(group))) {
70 printk(KERN_CONT "\n");
71 printk(KERN_ERR "ERROR: repeated CPUs\n");
72 break;
73 }
74
75 cpumask_or(groupmask, groupmask, sched_group_span(group));
76
77 printk(KERN_CONT " %d:{ span=%*pbl",
78 group->sgc->id,
79 cpumask_pr_args(sched_group_span(group)));
80
81 if ((sd->flags & SD_OVERLAP) &&
82 !cpumask_equal(group_balance_mask(group), sched_group_span(group))) {
83 printk(KERN_CONT " mask=%*pbl",
84 cpumask_pr_args(group_balance_mask(group)));
85 }
86
87 if (group->sgc->capacity != SCHED_CAPACITY_SCALE)
88 printk(KERN_CONT " cap=%lu", group->sgc->capacity);
89
90 if (group == sd->groups && sd->child &&
91 !cpumask_equal(sched_domain_span(sd->child),
92 sched_group_span(group))) {
93 printk(KERN_ERR "ERROR: domain->groups does not match domain->child\n");
94 }
95
96 printk(KERN_CONT " }");
97
98 group = group->next;
99
100 if (group != sd->groups)
101 printk(KERN_CONT ",");
102
103 } while (group != sd->groups);
104 printk(KERN_CONT "\n");
105
106 if (!cpumask_equal(sched_domain_span(sd), groupmask))
107 printk(KERN_ERR "ERROR: groups don't span domain->span\n");
108
109 if (sd->parent &&
110 !cpumask_subset(groupmask, sched_domain_span(sd->parent)))
111 printk(KERN_ERR "ERROR: parent span is not a superset of domain->span\n");
112 return 0;
113}
114
115static void sched_domain_debug(struct sched_domain *sd, int cpu)
116{
117 int level = 0;
118
119 if (!sched_debug_enabled)
120 return;
121
122 if (!sd) {
123 printk(KERN_DEBUG "CPU%d attaching NULL sched-domain.\n", cpu);
124 return;
125 }
126
127 printk(KERN_DEBUG "CPU%d attaching sched-domain(s):\n", cpu);
128
129 for (;;) {
130 if (sched_domain_debug_one(sd, cpu, level, sched_domains_tmpmask))
131 break;
132 level++;
133 sd = sd->parent;
134 if (!sd)
135 break;
136 }
137}
138#else
139
140# define sched_debug_enabled 0
141# define sched_domain_debug(sd, cpu) do { } while (0)
142static inline bool sched_debug(void)
143{
144 return false;
145}
146#endif
147
148static int sd_degenerate(struct sched_domain *sd)
149{
150 if (cpumask_weight(sched_domain_span(sd)) == 1)
151 return 1;
152
153
154 if (sd->flags & (SD_LOAD_BALANCE |
155 SD_BALANCE_NEWIDLE |
156 SD_BALANCE_FORK |
157 SD_BALANCE_EXEC |
158 SD_SHARE_CPUCAPACITY |
159 SD_ASYM_CPUCAPACITY |
160 SD_SHARE_PKG_RESOURCES |
161 SD_SHARE_POWERDOMAIN)) {
162 if (sd->groups != sd->groups->next)
163 return 0;
164 }
165
166
167 if (sd->flags & (SD_WAKE_AFFINE))
168 return 0;
169
170 return 1;
171}
172
173static int
174sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
175{
176 unsigned long cflags = sd->flags, pflags = parent->flags;
177
178 if (sd_degenerate(parent))
179 return 1;
180
181 if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent)))
182 return 0;
183
184
185 if (parent->groups == parent->groups->next) {
186 pflags &= ~(SD_LOAD_BALANCE |
187 SD_BALANCE_NEWIDLE |
188 SD_BALANCE_FORK |
189 SD_BALANCE_EXEC |
190 SD_ASYM_CPUCAPACITY |
191 SD_SHARE_CPUCAPACITY |
192 SD_SHARE_PKG_RESOURCES |
193 SD_PREFER_SIBLING |
194 SD_SHARE_POWERDOMAIN);
195 if (nr_node_ids == 1)
196 pflags &= ~SD_SERIALIZE;
197 }
198 if (~cflags & pflags)
199 return 0;
200
201 return 1;
202}
203
204#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
205DEFINE_STATIC_KEY_FALSE(sched_energy_present);
206unsigned int sysctl_sched_energy_aware = 1;
207DEFINE_MUTEX(sched_energy_mutex);
208bool sched_energy_update;
209
210#ifdef CONFIG_PROC_SYSCTL
211int sched_energy_aware_handler(struct ctl_table *table, int write,
212 void __user *buffer, size_t *lenp, loff_t *ppos)
213{
214 int ret, state;
215
216 if (write && !capable(CAP_SYS_ADMIN))
217 return -EPERM;
218
219 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
220 if (!ret && write) {
221 state = static_branch_unlikely(&sched_energy_present);
222 if (state != sysctl_sched_energy_aware) {
223 mutex_lock(&sched_energy_mutex);
224 sched_energy_update = 1;
225 rebuild_sched_domains();
226 sched_energy_update = 0;
227 mutex_unlock(&sched_energy_mutex);
228 }
229 }
230
231 return ret;
232}
233#endif
234
235static void free_pd(struct perf_domain *pd)
236{
237 struct perf_domain *tmp;
238
239 while (pd) {
240 tmp = pd->next;
241 kfree(pd);
242 pd = tmp;
243 }
244}
245
246static struct perf_domain *find_pd(struct perf_domain *pd, int cpu)
247{
248 while (pd) {
249 if (cpumask_test_cpu(cpu, perf_domain_span(pd)))
250 return pd;
251 pd = pd->next;
252 }
253
254 return NULL;
255}
256
257static struct perf_domain *pd_init(int cpu)
258{
259 struct em_perf_domain *obj = em_cpu_get(cpu);
260 struct perf_domain *pd;
261
262 if (!obj) {
263 if (sched_debug())
264 pr_info("%s: no EM found for CPU%d\n", __func__, cpu);
265 return NULL;
266 }
267
268 pd = kzalloc(sizeof(*pd), GFP_KERNEL);
269 if (!pd)
270 return NULL;
271 pd->em_pd = obj;
272
273 return pd;
274}
275
276static void perf_domain_debug(const struct cpumask *cpu_map,
277 struct perf_domain *pd)
278{
279 if (!sched_debug() || !pd)
280 return;
281
282 printk(KERN_DEBUG "root_domain %*pbl:", cpumask_pr_args(cpu_map));
283
284 while (pd) {
285 printk(KERN_CONT " pd%d:{ cpus=%*pbl nr_cstate=%d }",
286 cpumask_first(perf_domain_span(pd)),
287 cpumask_pr_args(perf_domain_span(pd)),
288 em_pd_nr_cap_states(pd->em_pd));
289 pd = pd->next;
290 }
291
292 printk(KERN_CONT "\n");
293}
294
295static void destroy_perf_domain_rcu(struct rcu_head *rp)
296{
297 struct perf_domain *pd;
298
299 pd = container_of(rp, struct perf_domain, rcu);
300 free_pd(pd);
301}
302
303static void sched_energy_set(bool has_eas)
304{
305 if (!has_eas && static_branch_unlikely(&sched_energy_present)) {
306 if (sched_debug())
307 pr_info("%s: stopping EAS\n", __func__);
308 static_branch_disable_cpuslocked(&sched_energy_present);
309 } else if (has_eas && !static_branch_unlikely(&sched_energy_present)) {
310 if (sched_debug())
311 pr_info("%s: starting EAS\n", __func__);
312 static_branch_enable_cpuslocked(&sched_energy_present);
313 }
314}
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339#define EM_MAX_COMPLEXITY 2048
340
341extern struct cpufreq_governor schedutil_gov;
342static bool build_perf_domains(const struct cpumask *cpu_map)
343{
344 int i, nr_pd = 0, nr_cs = 0, nr_cpus = cpumask_weight(cpu_map);
345 struct perf_domain *pd = NULL, *tmp;
346 int cpu = cpumask_first(cpu_map);
347 struct root_domain *rd = cpu_rq(cpu)->rd;
348 struct cpufreq_policy *policy;
349 struct cpufreq_governor *gov;
350
351 if (!sysctl_sched_energy_aware)
352 goto free;
353
354
355 if (!per_cpu(sd_asym_cpucapacity, cpu)) {
356 if (sched_debug()) {
357 pr_info("rd %*pbl: CPUs do not have asymmetric capacities\n",
358 cpumask_pr_args(cpu_map));
359 }
360 goto free;
361 }
362
363 for_each_cpu(i, cpu_map) {
364
365 if (find_pd(pd, i))
366 continue;
367
368
369 policy = cpufreq_cpu_get(i);
370 if (!policy)
371 goto free;
372 gov = policy->governor;
373 cpufreq_cpu_put(policy);
374 if (gov != &schedutil_gov) {
375 if (rd->pd)
376 pr_warn("rd %*pbl: Disabling EAS, schedutil is mandatory\n",
377 cpumask_pr_args(cpu_map));
378 goto free;
379 }
380
381
382 tmp = pd_init(i);
383 if (!tmp)
384 goto free;
385 tmp->next = pd;
386 pd = tmp;
387
388
389
390
391
392 nr_pd++;
393 nr_cs += em_pd_nr_cap_states(pd->em_pd);
394 }
395
396
397 if (nr_pd * (nr_cs + nr_cpus) > EM_MAX_COMPLEXITY) {
398 WARN(1, "rd %*pbl: Failed to start EAS, EM complexity is too high\n",
399 cpumask_pr_args(cpu_map));
400 goto free;
401 }
402
403 perf_domain_debug(cpu_map, pd);
404
405
406 tmp = rd->pd;
407 rcu_assign_pointer(rd->pd, pd);
408 if (tmp)
409 call_rcu(&tmp->rcu, destroy_perf_domain_rcu);
410
411 return !!pd;
412
413free:
414 free_pd(pd);
415 tmp = rd->pd;
416 rcu_assign_pointer(rd->pd, NULL);
417 if (tmp)
418 call_rcu(&tmp->rcu, destroy_perf_domain_rcu);
419
420 return false;
421}
422#else
423static void free_pd(struct perf_domain *pd) { }
424#endif
425
426static void free_rootdomain(struct rcu_head *rcu)
427{
428 struct root_domain *rd = container_of(rcu, struct root_domain, rcu);
429
430 cpupri_cleanup(&rd->cpupri);
431 cpudl_cleanup(&rd->cpudl);
432 free_cpumask_var(rd->dlo_mask);
433 free_cpumask_var(rd->rto_mask);
434 free_cpumask_var(rd->online);
435 free_cpumask_var(rd->span);
436 free_pd(rd->pd);
437 kfree(rd);
438}
439
440void rq_attach_root(struct rq *rq, struct root_domain *rd)
441{
442 struct root_domain *old_rd = NULL;
443 unsigned long flags;
444
445 raw_spin_lock_irqsave(&rq->lock, flags);
446
447 if (rq->rd) {
448 old_rd = rq->rd;
449
450 if (cpumask_test_cpu(rq->cpu, old_rd->online))
451 set_rq_offline(rq);
452
453 cpumask_clear_cpu(rq->cpu, old_rd->span);
454
455
456
457
458
459
460 if (!atomic_dec_and_test(&old_rd->refcount))
461 old_rd = NULL;
462 }
463
464 atomic_inc(&rd->refcount);
465 rq->rd = rd;
466
467 cpumask_set_cpu(rq->cpu, rd->span);
468 if (cpumask_test_cpu(rq->cpu, cpu_active_mask))
469 set_rq_online(rq);
470
471 raw_spin_unlock_irqrestore(&rq->lock, flags);
472
473 if (old_rd)
474 call_rcu(&old_rd->rcu, free_rootdomain);
475}
476
477void sched_get_rd(struct root_domain *rd)
478{
479 atomic_inc(&rd->refcount);
480}
481
482void sched_put_rd(struct root_domain *rd)
483{
484 if (!atomic_dec_and_test(&rd->refcount))
485 return;
486
487 call_rcu(&rd->rcu, free_rootdomain);
488}
489
490static int init_rootdomain(struct root_domain *rd)
491{
492 if (!zalloc_cpumask_var(&rd->span, GFP_KERNEL))
493 goto out;
494 if (!zalloc_cpumask_var(&rd->online, GFP_KERNEL))
495 goto free_span;
496 if (!zalloc_cpumask_var(&rd->dlo_mask, GFP_KERNEL))
497 goto free_online;
498 if (!zalloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
499 goto free_dlo_mask;
500
501#ifdef HAVE_RT_PUSH_IPI
502 rd->rto_cpu = -1;
503 raw_spin_lock_init(&rd->rto_lock);
504 init_irq_work(&rd->rto_push_work, rto_push_irq_work_func);
505#endif
506
507 init_dl_bw(&rd->dl_bw);
508 if (cpudl_init(&rd->cpudl) != 0)
509 goto free_rto_mask;
510
511 if (cpupri_init(&rd->cpupri) != 0)
512 goto free_cpudl;
513 return 0;
514
515free_cpudl:
516 cpudl_cleanup(&rd->cpudl);
517free_rto_mask:
518 free_cpumask_var(rd->rto_mask);
519free_dlo_mask:
520 free_cpumask_var(rd->dlo_mask);
521free_online:
522 free_cpumask_var(rd->online);
523free_span:
524 free_cpumask_var(rd->span);
525out:
526 return -ENOMEM;
527}
528
529
530
531
532
533struct root_domain def_root_domain;
534
535void init_defrootdomain(void)
536{
537 init_rootdomain(&def_root_domain);
538
539 atomic_set(&def_root_domain.refcount, 1);
540}
541
542static struct root_domain *alloc_rootdomain(void)
543{
544 struct root_domain *rd;
545
546 rd = kzalloc(sizeof(*rd), GFP_KERNEL);
547 if (!rd)
548 return NULL;
549
550 if (init_rootdomain(rd) != 0) {
551 kfree(rd);
552 return NULL;
553 }
554
555 return rd;
556}
557
558static void free_sched_groups(struct sched_group *sg, int free_sgc)
559{
560 struct sched_group *tmp, *first;
561
562 if (!sg)
563 return;
564
565 first = sg;
566 do {
567 tmp = sg->next;
568
569 if (free_sgc && atomic_dec_and_test(&sg->sgc->ref))
570 kfree(sg->sgc);
571
572 if (atomic_dec_and_test(&sg->ref))
573 kfree(sg);
574 sg = tmp;
575 } while (sg != first);
576}
577
578static void destroy_sched_domain(struct sched_domain *sd)
579{
580
581
582
583
584
585 free_sched_groups(sd->groups, 1);
586
587 if (sd->shared && atomic_dec_and_test(&sd->shared->ref))
588 kfree(sd->shared);
589 kfree(sd);
590}
591
592static void destroy_sched_domains_rcu(struct rcu_head *rcu)
593{
594 struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu);
595
596 while (sd) {
597 struct sched_domain *parent = sd->parent;
598 destroy_sched_domain(sd);
599 sd = parent;
600 }
601}
602
603static void destroy_sched_domains(struct sched_domain *sd)
604{
605 if (sd)
606 call_rcu(&sd->rcu, destroy_sched_domains_rcu);
607}
608
609
610
611
612
613
614
615
616
617
618DEFINE_PER_CPU(struct sched_domain __rcu *, sd_llc);
619DEFINE_PER_CPU(int, sd_llc_size);
620DEFINE_PER_CPU(int, sd_llc_id);
621DEFINE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared);
622DEFINE_PER_CPU(struct sched_domain __rcu *, sd_numa);
623DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
624DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
625DEFINE_STATIC_KEY_FALSE(sched_asym_cpucapacity);
626
627static void update_top_cache_domain(int cpu)
628{
629 struct sched_domain_shared *sds = NULL;
630 struct sched_domain *sd;
631 int id = cpu;
632 int size = 1;
633
634 sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);
635 if (sd) {
636 id = cpumask_first(sched_domain_span(sd));
637 size = cpumask_weight(sched_domain_span(sd));
638 sds = sd->shared;
639 }
640
641 rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
642 per_cpu(sd_llc_size, cpu) = size;
643 per_cpu(sd_llc_id, cpu) = id;
644 rcu_assign_pointer(per_cpu(sd_llc_shared, cpu), sds);
645
646 sd = lowest_flag_domain(cpu, SD_NUMA);
647 rcu_assign_pointer(per_cpu(sd_numa, cpu), sd);
648
649 sd = highest_flag_domain(cpu, SD_ASYM_PACKING);
650 rcu_assign_pointer(per_cpu(sd_asym_packing, cpu), sd);
651
652 sd = lowest_flag_domain(cpu, SD_ASYM_CPUCAPACITY);
653 rcu_assign_pointer(per_cpu(sd_asym_cpucapacity, cpu), sd);
654}
655
656
657
658
659
660static void
661cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
662{
663 struct rq *rq = cpu_rq(cpu);
664 struct sched_domain *tmp;
665
666
667 for (tmp = sd; tmp; ) {
668 struct sched_domain *parent = tmp->parent;
669 if (!parent)
670 break;
671
672 if (sd_parent_degenerate(tmp, parent)) {
673 tmp->parent = parent->parent;
674 if (parent->parent)
675 parent->parent->child = tmp;
676
677
678
679
680
681 if (parent->flags & SD_PREFER_SIBLING)
682 tmp->flags |= SD_PREFER_SIBLING;
683 destroy_sched_domain(parent);
684 } else
685 tmp = tmp->parent;
686 }
687
688 if (sd && sd_degenerate(sd)) {
689 tmp = sd;
690 sd = sd->parent;
691 destroy_sched_domain(tmp);
692 if (sd)
693 sd->child = NULL;
694 }
695
696 sched_domain_debug(sd, cpu);
697
698 rq_attach_root(rq, rd);
699 tmp = rq->sd;
700 rcu_assign_pointer(rq->sd, sd);
701 dirty_sched_domain_sysctl(cpu);
702 destroy_sched_domains(tmp);
703
704 update_top_cache_domain(cpu);
705}
706
707struct s_data {
708 struct sched_domain * __percpu *sd;
709 struct root_domain *rd;
710};
711
712enum s_alloc {
713 sa_rootdomain,
714 sa_sd,
715 sa_sd_storage,
716 sa_none,
717};
718
719
720
721
722
723
724
725
726
727
728int group_balance_cpu(struct sched_group *sg)
729{
730 return cpumask_first(group_balance_mask(sg));
731}
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839static void
840build_balance_mask(struct sched_domain *sd, struct sched_group *sg, struct cpumask *mask)
841{
842 const struct cpumask *sg_span = sched_group_span(sg);
843 struct sd_data *sdd = sd->private;
844 struct sched_domain *sibling;
845 int i;
846
847 cpumask_clear(mask);
848
849 for_each_cpu(i, sg_span) {
850 sibling = *per_cpu_ptr(sdd->sd, i);
851
852
853
854
855
856
857 if (!sibling->child)
858 continue;
859
860
861 if (!cpumask_equal(sg_span, sched_domain_span(sibling->child)))
862 continue;
863
864 cpumask_set_cpu(i, mask);
865 }
866
867
868 WARN_ON_ONCE(cpumask_empty(mask));
869}
870
871
872
873
874
875
876static struct sched_group *
877build_group_from_child_sched_domain(struct sched_domain *sd, int cpu)
878{
879 struct sched_group *sg;
880 struct cpumask *sg_span;
881
882 sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
883 GFP_KERNEL, cpu_to_node(cpu));
884
885 if (!sg)
886 return NULL;
887
888 sg_span = sched_group_span(sg);
889 if (sd->child)
890 cpumask_copy(sg_span, sched_domain_span(sd->child));
891 else
892 cpumask_copy(sg_span, sched_domain_span(sd));
893
894 atomic_inc(&sg->ref);
895 return sg;
896}
897
898static void init_overlap_sched_group(struct sched_domain *sd,
899 struct sched_group *sg)
900{
901 struct cpumask *mask = sched_domains_tmpmask2;
902 struct sd_data *sdd = sd->private;
903 struct cpumask *sg_span;
904 int cpu;
905
906 build_balance_mask(sd, sg, mask);
907 cpu = cpumask_first_and(sched_group_span(sg), mask);
908
909 sg->sgc = *per_cpu_ptr(sdd->sgc, cpu);
910 if (atomic_inc_return(&sg->sgc->ref) == 1)
911 cpumask_copy(group_balance_mask(sg), mask);
912 else
913 WARN_ON_ONCE(!cpumask_equal(group_balance_mask(sg), mask));
914
915
916
917
918
919
920 sg_span = sched_group_span(sg);
921 sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
922 sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
923 sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;
924}
925
926static int
927build_overlap_sched_groups(struct sched_domain *sd, int cpu)
928{
929 struct sched_group *first = NULL, *last = NULL, *sg;
930 const struct cpumask *span = sched_domain_span(sd);
931 struct cpumask *covered = sched_domains_tmpmask;
932 struct sd_data *sdd = sd->private;
933 struct sched_domain *sibling;
934 int i;
935
936 cpumask_clear(covered);
937
938 for_each_cpu_wrap(i, span, cpu) {
939 struct cpumask *sg_span;
940
941 if (cpumask_test_cpu(i, covered))
942 continue;
943
944 sibling = *per_cpu_ptr(sdd->sd, i);
945
946
947
948
949
950
951
952
953
954
955
956 if (!cpumask_test_cpu(i, sched_domain_span(sibling)))
957 continue;
958
959 sg = build_group_from_child_sched_domain(sibling, cpu);
960 if (!sg)
961 goto fail;
962
963 sg_span = sched_group_span(sg);
964 cpumask_or(covered, covered, sg_span);
965
966 init_overlap_sched_group(sd, sg);
967
968 if (!first)
969 first = sg;
970 if (last)
971 last->next = sg;
972 last = sg;
973 last->next = first;
974 }
975 sd->groups = first;
976
977 return 0;
978
979fail:
980 free_sched_groups(first, 0);
981
982 return -ENOMEM;
983}
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057static struct sched_group *get_group(int cpu, struct sd_data *sdd)
1058{
1059 struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
1060 struct sched_domain *child = sd->child;
1061 struct sched_group *sg;
1062 bool already_visited;
1063
1064 if (child)
1065 cpu = cpumask_first(sched_domain_span(child));
1066
1067 sg = *per_cpu_ptr(sdd->sg, cpu);
1068 sg->sgc = *per_cpu_ptr(sdd->sgc, cpu);
1069
1070
1071 already_visited = atomic_inc_return(&sg->ref) > 1;
1072
1073 WARN_ON(already_visited != (atomic_inc_return(&sg->sgc->ref) > 1));
1074
1075
1076 if (already_visited)
1077 return sg;
1078
1079 if (child) {
1080 cpumask_copy(sched_group_span(sg), sched_domain_span(child));
1081 cpumask_copy(group_balance_mask(sg), sched_group_span(sg));
1082 } else {
1083 cpumask_set_cpu(cpu, sched_group_span(sg));
1084 cpumask_set_cpu(cpu, group_balance_mask(sg));
1085 }
1086
1087 sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sched_group_span(sg));
1088 sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
1089 sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;
1090
1091 return sg;
1092}
1093
1094
1095
1096
1097
1098
1099
1100
1101static int
1102build_sched_groups(struct sched_domain *sd, int cpu)
1103{
1104 struct sched_group *first = NULL, *last = NULL;
1105 struct sd_data *sdd = sd->private;
1106 const struct cpumask *span = sched_domain_span(sd);
1107 struct cpumask *covered;
1108 int i;
1109
1110 lockdep_assert_held(&sched_domains_mutex);
1111 covered = sched_domains_tmpmask;
1112
1113 cpumask_clear(covered);
1114
1115 for_each_cpu_wrap(i, span, cpu) {
1116 struct sched_group *sg;
1117
1118 if (cpumask_test_cpu(i, covered))
1119 continue;
1120
1121 sg = get_group(i, sdd);
1122
1123 cpumask_or(covered, covered, sched_group_span(sg));
1124
1125 if (!first)
1126 first = sg;
1127 if (last)
1128 last->next = sg;
1129 last = sg;
1130 }
1131 last->next = first;
1132 sd->groups = first;
1133
1134 return 0;
1135}
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147static void init_sched_groups_capacity(int cpu, struct sched_domain *sd)
1148{
1149 struct sched_group *sg = sd->groups;
1150
1151 WARN_ON(!sg);
1152
1153 do {
1154 int cpu, max_cpu = -1;
1155
1156 sg->group_weight = cpumask_weight(sched_group_span(sg));
1157
1158 if (!(sd->flags & SD_ASYM_PACKING))
1159 goto next;
1160
1161 for_each_cpu(cpu, sched_group_span(sg)) {
1162 if (max_cpu < 0)
1163 max_cpu = cpu;
1164 else if (sched_asym_prefer(cpu, max_cpu))
1165 max_cpu = cpu;
1166 }
1167 sg->asym_prefer_cpu = max_cpu;
1168
1169next:
1170 sg = sg->next;
1171 } while (sg != sd->groups);
1172
1173 if (cpu != group_balance_cpu(sg))
1174 return;
1175
1176 update_group_capacity(sd, cpu);
1177}
1178
1179
1180
1181
1182
1183
1184static int default_relax_domain_level = -1;
1185int sched_domain_level_max;
1186
1187static int __init setup_relax_domain_level(char *str)
1188{
1189 if (kstrtoint(str, 0, &default_relax_domain_level))
1190 pr_warn("Unable to set relax_domain_level\n");
1191
1192 return 1;
1193}
1194__setup("relax_domain_level=", setup_relax_domain_level);
1195
1196static void set_domain_attribute(struct sched_domain *sd,
1197 struct sched_domain_attr *attr)
1198{
1199 int request;
1200
1201 if (!attr || attr->relax_domain_level < 0) {
1202 if (default_relax_domain_level < 0)
1203 return;
1204 else
1205 request = default_relax_domain_level;
1206 } else
1207 request = attr->relax_domain_level;
1208 if (request < sd->level) {
1209
1210 sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
1211 } else {
1212
1213 sd->flags |= (SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
1214 }
1215}
1216
1217static void __sdt_free(const struct cpumask *cpu_map);
1218static int __sdt_alloc(const struct cpumask *cpu_map);
1219
1220static void __free_domain_allocs(struct s_data *d, enum s_alloc what,
1221 const struct cpumask *cpu_map)
1222{
1223 switch (what) {
1224 case sa_rootdomain:
1225 if (!atomic_read(&d->rd->refcount))
1226 free_rootdomain(&d->rd->rcu);
1227
1228 case sa_sd:
1229 free_percpu(d->sd);
1230
1231 case sa_sd_storage:
1232 __sdt_free(cpu_map);
1233
1234 case sa_none:
1235 break;
1236 }
1237}
1238
1239static enum s_alloc
1240__visit_domain_allocation_hell(struct s_data *d, const struct cpumask *cpu_map)
1241{
1242 memset(d, 0, sizeof(*d));
1243
1244 if (__sdt_alloc(cpu_map))
1245 return sa_sd_storage;
1246 d->sd = alloc_percpu(struct sched_domain *);
1247 if (!d->sd)
1248 return sa_sd_storage;
1249 d->rd = alloc_rootdomain();
1250 if (!d->rd)
1251 return sa_sd;
1252
1253 return sa_rootdomain;
1254}
1255
1256
1257
1258
1259
1260
1261static void claim_allocations(int cpu, struct sched_domain *sd)
1262{
1263 struct sd_data *sdd = sd->private;
1264
1265 WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd);
1266 *per_cpu_ptr(sdd->sd, cpu) = NULL;
1267
1268 if (atomic_read(&(*per_cpu_ptr(sdd->sds, cpu))->ref))
1269 *per_cpu_ptr(sdd->sds, cpu) = NULL;
1270
1271 if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref))
1272 *per_cpu_ptr(sdd->sg, cpu) = NULL;
1273
1274 if (atomic_read(&(*per_cpu_ptr(sdd->sgc, cpu))->ref))
1275 *per_cpu_ptr(sdd->sgc, cpu) = NULL;
1276}
1277
1278#ifdef CONFIG_NUMA
1279enum numa_topology_type sched_numa_topology_type;
1280
1281static int sched_domains_numa_levels;
1282static int sched_domains_curr_level;
1283
1284int sched_max_numa_distance;
1285static int *sched_domains_numa_distance;
1286static struct cpumask ***sched_domains_numa_masks;
1287#endif
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306#define TOPOLOGY_SD_FLAGS \
1307 (SD_SHARE_CPUCAPACITY | \
1308 SD_SHARE_PKG_RESOURCES | \
1309 SD_NUMA | \
1310 SD_ASYM_PACKING | \
1311 SD_SHARE_POWERDOMAIN)
1312
1313static struct sched_domain *
1314sd_init(struct sched_domain_topology_level *tl,
1315 const struct cpumask *cpu_map,
1316 struct sched_domain *child, int dflags, int cpu)
1317{
1318 struct sd_data *sdd = &tl->data;
1319 struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
1320 int sd_id, sd_weight, sd_flags = 0;
1321
1322#ifdef CONFIG_NUMA
1323
1324
1325
1326 sched_domains_curr_level = tl->numa_level;
1327#endif
1328
1329 sd_weight = cpumask_weight(tl->mask(cpu));
1330
1331 if (tl->sd_flags)
1332 sd_flags = (*tl->sd_flags)();
1333 if (WARN_ONCE(sd_flags & ~TOPOLOGY_SD_FLAGS,
1334 "wrong sd_flags in topology description\n"))
1335 sd_flags &= ~TOPOLOGY_SD_FLAGS;
1336
1337
1338 sd_flags |= dflags;
1339
1340 *sd = (struct sched_domain){
1341 .min_interval = sd_weight,
1342 .max_interval = 2*sd_weight,
1343 .busy_factor = 32,
1344 .imbalance_pct = 125,
1345
1346 .cache_nice_tries = 0,
1347
1348 .flags = 1*SD_LOAD_BALANCE
1349 | 1*SD_BALANCE_NEWIDLE
1350 | 1*SD_BALANCE_EXEC
1351 | 1*SD_BALANCE_FORK
1352 | 0*SD_BALANCE_WAKE
1353 | 1*SD_WAKE_AFFINE
1354 | 0*SD_SHARE_CPUCAPACITY
1355 | 0*SD_SHARE_PKG_RESOURCES
1356 | 0*SD_SERIALIZE
1357 | 1*SD_PREFER_SIBLING
1358 | 0*SD_NUMA
1359 | sd_flags
1360 ,
1361
1362 .last_balance = jiffies,
1363 .balance_interval = sd_weight,
1364 .max_newidle_lb_cost = 0,
1365 .next_decay_max_lb_cost = jiffies,
1366 .child = child,
1367#ifdef CONFIG_SCHED_DEBUG
1368 .name = tl->name,
1369#endif
1370 };
1371
1372 cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
1373 sd_id = cpumask_first(sched_domain_span(sd));
1374
1375
1376
1377
1378
1379 if (sd->flags & SD_ASYM_CPUCAPACITY) {
1380 struct sched_domain *t = sd;
1381
1382
1383
1384
1385 if (sd->child)
1386 sd->child->flags &= ~SD_PREFER_SIBLING;
1387
1388 for_each_lower_domain(t)
1389 t->flags |= SD_BALANCE_WAKE;
1390 }
1391
1392 if (sd->flags & SD_SHARE_CPUCAPACITY) {
1393 sd->imbalance_pct = 110;
1394
1395 } else if (sd->flags & SD_SHARE_PKG_RESOURCES) {
1396 sd->imbalance_pct = 117;
1397 sd->cache_nice_tries = 1;
1398
1399#ifdef CONFIG_NUMA
1400 } else if (sd->flags & SD_NUMA) {
1401 sd->cache_nice_tries = 2;
1402
1403 sd->flags &= ~SD_PREFER_SIBLING;
1404 sd->flags |= SD_SERIALIZE;
1405 if (sched_domains_numa_distance[tl->numa_level] > RECLAIM_DISTANCE) {
1406 sd->flags &= ~(SD_BALANCE_EXEC |
1407 SD_BALANCE_FORK |
1408 SD_WAKE_AFFINE);
1409 }
1410
1411#endif
1412 } else {
1413 sd->cache_nice_tries = 1;
1414 }
1415
1416
1417
1418
1419
1420 if (sd->flags & SD_SHARE_PKG_RESOURCES) {
1421 sd->shared = *per_cpu_ptr(sdd->sds, sd_id);
1422 atomic_inc(&sd->shared->ref);
1423 atomic_set(&sd->shared->nr_busy_cpus, sd_weight);
1424 }
1425
1426 sd->private = sdd;
1427
1428 return sd;
1429}
1430
1431
1432
1433
1434static struct sched_domain_topology_level default_topology[] = {
1435#ifdef CONFIG_SCHED_SMT
1436 { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
1437#endif
1438#ifdef CONFIG_SCHED_MC
1439 { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
1440#endif
1441 { cpu_cpu_mask, SD_INIT_NAME(DIE) },
1442 { NULL, },
1443};
1444
1445static struct sched_domain_topology_level *sched_domain_topology =
1446 default_topology;
1447
1448#define for_each_sd_topology(tl) \
1449 for (tl = sched_domain_topology; tl->mask; tl++)
1450
1451void set_sched_topology(struct sched_domain_topology_level *tl)
1452{
1453 if (WARN_ON_ONCE(sched_smp_initialized))
1454 return;
1455
1456 sched_domain_topology = tl;
1457}
1458
1459#ifdef CONFIG_NUMA
1460
1461static const struct cpumask *sd_numa_mask(int cpu)
1462{
1463 return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)];
1464}
1465
1466static void sched_numa_warn(const char *str)
1467{
1468 static int done = false;
1469 int i,j;
1470
1471 if (done)
1472 return;
1473
1474 done = true;
1475
1476 printk(KERN_WARNING "ERROR: %s\n\n", str);
1477
1478 for (i = 0; i < nr_node_ids; i++) {
1479 printk(KERN_WARNING " ");
1480 for (j = 0; j < nr_node_ids; j++)
1481 printk(KERN_CONT "%02d ", node_distance(i,j));
1482 printk(KERN_CONT "\n");
1483 }
1484 printk(KERN_WARNING "\n");
1485}
1486
1487bool find_numa_distance(int distance)
1488{
1489 int i;
1490
1491 if (distance == node_distance(0, 0))
1492 return true;
1493
1494 for (i = 0; i < sched_domains_numa_levels; i++) {
1495 if (sched_domains_numa_distance[i] == distance)
1496 return true;
1497 }
1498
1499 return false;
1500}
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521static void init_numa_topology_type(void)
1522{
1523 int a, b, c, n;
1524
1525 n = sched_max_numa_distance;
1526
1527 if (sched_domains_numa_levels <= 2) {
1528 sched_numa_topology_type = NUMA_DIRECT;
1529 return;
1530 }
1531
1532 for_each_online_node(a) {
1533 for_each_online_node(b) {
1534
1535 if (node_distance(a, b) < n)
1536 continue;
1537
1538
1539 for_each_online_node(c) {
1540 if (node_distance(a, c) < n &&
1541 node_distance(b, c) < n) {
1542 sched_numa_topology_type =
1543 NUMA_GLUELESS_MESH;
1544 return;
1545 }
1546 }
1547
1548 sched_numa_topology_type = NUMA_BACKPLANE;
1549 return;
1550 }
1551 }
1552}
1553
1554void sched_init_numa(void)
1555{
1556 int next_distance, curr_distance = node_distance(0, 0);
1557 struct sched_domain_topology_level *tl;
1558 int level = 0;
1559 int i, j, k;
1560
1561 sched_domains_numa_distance = kzalloc(sizeof(int) * (nr_node_ids + 1), GFP_KERNEL);
1562 if (!sched_domains_numa_distance)
1563 return;
1564
1565
1566 sched_domains_numa_distance[level++] = curr_distance;
1567 sched_domains_numa_levels = level;
1568
1569
1570
1571
1572
1573
1574
1575
1576 next_distance = curr_distance;
1577 for (i = 0; i < nr_node_ids; i++) {
1578 for (j = 0; j < nr_node_ids; j++) {
1579 for (k = 0; k < nr_node_ids; k++) {
1580 int distance = node_distance(i, k);
1581
1582 if (distance > curr_distance &&
1583 (distance < next_distance ||
1584 next_distance == curr_distance))
1585 next_distance = distance;
1586
1587
1588
1589
1590
1591
1592 if (sched_debug() && node_distance(k, i) != distance)
1593 sched_numa_warn("Node-distance not symmetric");
1594
1595 if (sched_debug() && i && !find_numa_distance(distance))
1596 sched_numa_warn("Node-0 not representative");
1597 }
1598 if (next_distance != curr_distance) {
1599 sched_domains_numa_distance[level++] = next_distance;
1600 sched_domains_numa_levels = level;
1601 curr_distance = next_distance;
1602 } else break;
1603 }
1604
1605
1606
1607
1608 if (!sched_debug())
1609 break;
1610 }
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628 sched_domains_numa_levels = 0;
1629
1630 sched_domains_numa_masks = kzalloc(sizeof(void *) * level, GFP_KERNEL);
1631 if (!sched_domains_numa_masks)
1632 return;
1633
1634
1635
1636
1637
1638 for (i = 0; i < level; i++) {
1639 sched_domains_numa_masks[i] =
1640 kzalloc(nr_node_ids * sizeof(void *), GFP_KERNEL);
1641 if (!sched_domains_numa_masks[i])
1642 return;
1643
1644 for (j = 0; j < nr_node_ids; j++) {
1645 struct cpumask *mask = kzalloc(cpumask_size(), GFP_KERNEL);
1646 if (!mask)
1647 return;
1648
1649 sched_domains_numa_masks[i][j] = mask;
1650
1651 for_each_node(k) {
1652 if (node_distance(j, k) > sched_domains_numa_distance[i])
1653 continue;
1654
1655 cpumask_or(mask, mask, cpumask_of_node(k));
1656 }
1657 }
1658 }
1659
1660
1661 for (i = 0; sched_domain_topology[i].mask; i++);
1662
1663 tl = kzalloc((i + level + 1) *
1664 sizeof(struct sched_domain_topology_level), GFP_KERNEL);
1665 if (!tl)
1666 return;
1667
1668
1669
1670
1671 for (i = 0; sched_domain_topology[i].mask; i++)
1672 tl[i] = sched_domain_topology[i];
1673
1674
1675
1676
1677 tl[i++] = (struct sched_domain_topology_level){
1678 .mask = sd_numa_mask,
1679 .numa_level = 0,
1680 SD_INIT_NAME(NODE)
1681 };
1682
1683
1684
1685
1686 for (j = 1; j < level; i++, j++) {
1687 tl[i] = (struct sched_domain_topology_level){
1688 .mask = sd_numa_mask,
1689 .sd_flags = cpu_numa_flags,
1690 .flags = SDTL_OVERLAP,
1691 .numa_level = j,
1692 SD_INIT_NAME(NUMA)
1693 };
1694 }
1695
1696 sched_domain_topology = tl;
1697
1698 sched_domains_numa_levels = level;
1699 sched_max_numa_distance = sched_domains_numa_distance[level - 1];
1700
1701 init_numa_topology_type();
1702}
1703
1704void sched_domains_numa_masks_set(unsigned int cpu)
1705{
1706 int node = cpu_to_node(cpu);
1707 int i, j;
1708
1709 for (i = 0; i < sched_domains_numa_levels; i++) {
1710 for (j = 0; j < nr_node_ids; j++) {
1711 if (node_distance(j, node) <= sched_domains_numa_distance[i])
1712 cpumask_set_cpu(cpu, sched_domains_numa_masks[i][j]);
1713 }
1714 }
1715}
1716
1717void sched_domains_numa_masks_clear(unsigned int cpu)
1718{
1719 int i, j;
1720
1721 for (i = 0; i < sched_domains_numa_levels; i++) {
1722 for (j = 0; j < nr_node_ids; j++)
1723 cpumask_clear_cpu(cpu, sched_domains_numa_masks[i][j]);
1724 }
1725}
1726
1727#endif
1728
1729static int __sdt_alloc(const struct cpumask *cpu_map)
1730{
1731 struct sched_domain_topology_level *tl;
1732 int j;
1733
1734 for_each_sd_topology(tl) {
1735 struct sd_data *sdd = &tl->data;
1736
1737 sdd->sd = alloc_percpu(struct sched_domain *);
1738 if (!sdd->sd)
1739 return -ENOMEM;
1740
1741 sdd->sds = alloc_percpu(struct sched_domain_shared *);
1742 if (!sdd->sds)
1743 return -ENOMEM;
1744
1745 sdd->sg = alloc_percpu(struct sched_group *);
1746 if (!sdd->sg)
1747 return -ENOMEM;
1748
1749 sdd->sgc = alloc_percpu(struct sched_group_capacity *);
1750 if (!sdd->sgc)
1751 return -ENOMEM;
1752
1753 for_each_cpu(j, cpu_map) {
1754 struct sched_domain *sd;
1755 struct sched_domain_shared *sds;
1756 struct sched_group *sg;
1757 struct sched_group_capacity *sgc;
1758
1759 sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(),
1760 GFP_KERNEL, cpu_to_node(j));
1761 if (!sd)
1762 return -ENOMEM;
1763
1764 *per_cpu_ptr(sdd->sd, j) = sd;
1765
1766 sds = kzalloc_node(sizeof(struct sched_domain_shared),
1767 GFP_KERNEL, cpu_to_node(j));
1768 if (!sds)
1769 return -ENOMEM;
1770
1771 *per_cpu_ptr(sdd->sds, j) = sds;
1772
1773 sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
1774 GFP_KERNEL, cpu_to_node(j));
1775 if (!sg)
1776 return -ENOMEM;
1777
1778 sg->next = sg;
1779
1780 *per_cpu_ptr(sdd->sg, j) = sg;
1781
1782 sgc = kzalloc_node(sizeof(struct sched_group_capacity) + cpumask_size(),
1783 GFP_KERNEL, cpu_to_node(j));
1784 if (!sgc)
1785 return -ENOMEM;
1786
1787#ifdef CONFIG_SCHED_DEBUG
1788 sgc->id = j;
1789#endif
1790
1791 *per_cpu_ptr(sdd->sgc, j) = sgc;
1792 }
1793 }
1794
1795 return 0;
1796}
1797
1798static void __sdt_free(const struct cpumask *cpu_map)
1799{
1800 struct sched_domain_topology_level *tl;
1801 int j;
1802
1803 for_each_sd_topology(tl) {
1804 struct sd_data *sdd = &tl->data;
1805
1806 for_each_cpu(j, cpu_map) {
1807 struct sched_domain *sd;
1808
1809 if (sdd->sd) {
1810 sd = *per_cpu_ptr(sdd->sd, j);
1811 if (sd && (sd->flags & SD_OVERLAP))
1812 free_sched_groups(sd->groups, 0);
1813 kfree(*per_cpu_ptr(sdd->sd, j));
1814 }
1815
1816 if (sdd->sds)
1817 kfree(*per_cpu_ptr(sdd->sds, j));
1818 if (sdd->sg)
1819 kfree(*per_cpu_ptr(sdd->sg, j));
1820 if (sdd->sgc)
1821 kfree(*per_cpu_ptr(sdd->sgc, j));
1822 }
1823 free_percpu(sdd->sd);
1824 sdd->sd = NULL;
1825 free_percpu(sdd->sds);
1826 sdd->sds = NULL;
1827 free_percpu(sdd->sg);
1828 sdd->sg = NULL;
1829 free_percpu(sdd->sgc);
1830 sdd->sgc = NULL;
1831 }
1832}
1833
1834static struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl,
1835 const struct cpumask *cpu_map, struct sched_domain_attr *attr,
1836 struct sched_domain *child, int dflags, int cpu)
1837{
1838 struct sched_domain *sd = sd_init(tl, cpu_map, child, dflags, cpu);
1839
1840 if (child) {
1841 sd->level = child->level + 1;
1842 sched_domain_level_max = max(sched_domain_level_max, sd->level);
1843 child->parent = sd;
1844
1845 if (!cpumask_subset(sched_domain_span(child),
1846 sched_domain_span(sd))) {
1847 pr_err("BUG: arch topology borken\n");
1848#ifdef CONFIG_SCHED_DEBUG
1849 pr_err(" the %s domain not a subset of the %s domain\n",
1850 child->name, sd->name);
1851#endif
1852
1853 cpumask_or(sched_domain_span(sd),
1854 sched_domain_span(sd),
1855 sched_domain_span(child));
1856 }
1857
1858 }
1859 set_domain_attribute(sd, attr);
1860
1861 return sd;
1862}
1863
1864
1865
1866
1867
1868static struct sched_domain_topology_level
1869*asym_cpu_capacity_level(const struct cpumask *cpu_map)
1870{
1871 int i, j, asym_level = 0;
1872 bool asym = false;
1873 struct sched_domain_topology_level *tl, *asym_tl = NULL;
1874 unsigned long cap;
1875
1876
1877 cap = arch_scale_cpu_capacity(cpumask_first(cpu_map));
1878
1879 for_each_cpu(i, cpu_map) {
1880 if (arch_scale_cpu_capacity(i) != cap) {
1881 asym = true;
1882 break;
1883 }
1884 }
1885
1886 if (!asym)
1887 return NULL;
1888
1889
1890
1891
1892
1893
1894 for_each_cpu(i, cpu_map) {
1895 unsigned long max_capacity = arch_scale_cpu_capacity(i);
1896 int tl_id = 0;
1897
1898 for_each_sd_topology(tl) {
1899 if (tl_id < asym_level)
1900 goto next_level;
1901
1902 for_each_cpu_and(j, tl->mask(i), cpu_map) {
1903 unsigned long capacity;
1904
1905 capacity = arch_scale_cpu_capacity(j);
1906
1907 if (capacity <= max_capacity)
1908 continue;
1909
1910 max_capacity = capacity;
1911 asym_level = tl_id;
1912 asym_tl = tl;
1913 }
1914next_level:
1915 tl_id++;
1916 }
1917 }
1918
1919 return asym_tl;
1920}
1921
1922
1923
1924
1925
1926
1927static int
1928build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *attr)
1929{
1930 enum s_alloc alloc_state;
1931 struct sched_domain *sd;
1932 struct s_data d;
1933 struct rq *rq = NULL;
1934 int i, ret = -ENOMEM;
1935 struct sched_domain_topology_level *tl_asym;
1936 bool has_asym = false;
1937
1938 alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
1939 if (alloc_state != sa_rootdomain)
1940 goto error;
1941
1942 tl_asym = asym_cpu_capacity_level(cpu_map);
1943
1944
1945 for_each_cpu(i, cpu_map) {
1946 struct sched_domain_topology_level *tl;
1947
1948 sd = NULL;
1949 for_each_sd_topology(tl) {
1950 int dflags = 0;
1951
1952 if (tl == tl_asym) {
1953 dflags |= SD_ASYM_CPUCAPACITY;
1954 has_asym = true;
1955 }
1956
1957 sd = build_sched_domain(tl, cpu_map, attr, sd, dflags, i);
1958
1959 if (tl == sched_domain_topology)
1960 *per_cpu_ptr(d.sd, i) = sd;
1961 if (tl->flags & SDTL_OVERLAP)
1962 sd->flags |= SD_OVERLAP;
1963 if (cpumask_equal(cpu_map, sched_domain_span(sd)))
1964 break;
1965 }
1966 }
1967
1968
1969 for_each_cpu(i, cpu_map) {
1970 for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
1971 sd->span_weight = cpumask_weight(sched_domain_span(sd));
1972 if (sd->flags & SD_OVERLAP) {
1973 if (build_overlap_sched_groups(sd, i))
1974 goto error;
1975 } else {
1976 if (build_sched_groups(sd, i))
1977 goto error;
1978 }
1979 }
1980 }
1981
1982
1983 for (i = nr_cpumask_bits-1; i >= 0; i--) {
1984 if (!cpumask_test_cpu(i, cpu_map))
1985 continue;
1986
1987 for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
1988 claim_allocations(i, sd);
1989 init_sched_groups_capacity(i, sd);
1990 }
1991 }
1992
1993
1994 rcu_read_lock();
1995 for_each_cpu(i, cpu_map) {
1996 rq = cpu_rq(i);
1997 sd = *per_cpu_ptr(d.sd, i);
1998
1999
2000 if (rq->cpu_capacity_orig > READ_ONCE(d.rd->max_cpu_capacity))
2001 WRITE_ONCE(d.rd->max_cpu_capacity, rq->cpu_capacity_orig);
2002
2003 cpu_attach_domain(sd, d.rd, i);
2004 }
2005 rcu_read_unlock();
2006
2007 if (has_asym)
2008 static_branch_enable_cpuslocked(&sched_asym_cpucapacity);
2009
2010 if (rq && sched_debug_enabled) {
2011 pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n",
2012 cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity);
2013 }
2014
2015 ret = 0;
2016error:
2017 __free_domain_allocs(&d, alloc_state, cpu_map);
2018
2019 return ret;
2020}
2021
2022
2023static cpumask_var_t *doms_cur;
2024
2025
2026static int ndoms_cur;
2027
2028
2029static struct sched_domain_attr *dattr_cur;
2030
2031
2032
2033
2034
2035
2036static cpumask_var_t fallback_doms;
2037
2038
2039
2040
2041
2042
2043int __weak arch_update_cpu_topology(void)
2044{
2045 return 0;
2046}
2047
2048cpumask_var_t *alloc_sched_domains(unsigned int ndoms)
2049{
2050 int i;
2051 cpumask_var_t *doms;
2052
2053 doms = kmalloc_array(ndoms, sizeof(*doms), GFP_KERNEL);
2054 if (!doms)
2055 return NULL;
2056 for (i = 0; i < ndoms; i++) {
2057 if (!alloc_cpumask_var(&doms[i], GFP_KERNEL)) {
2058 free_sched_domains(doms, i);
2059 return NULL;
2060 }
2061 }
2062 return doms;
2063}
2064
2065void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms)
2066{
2067 unsigned int i;
2068 for (i = 0; i < ndoms; i++)
2069 free_cpumask_var(doms[i]);
2070 kfree(doms);
2071}
2072
2073
2074
2075
2076
2077int sched_init_domains(const struct cpumask *cpu_map)
2078{
2079 int err;
2080
2081 zalloc_cpumask_var(&sched_domains_tmpmask, GFP_KERNEL);
2082 zalloc_cpumask_var(&sched_domains_tmpmask2, GFP_KERNEL);
2083 zalloc_cpumask_var(&fallback_doms, GFP_KERNEL);
2084
2085 arch_update_cpu_topology();
2086 ndoms_cur = 1;
2087 doms_cur = alloc_sched_domains(ndoms_cur);
2088 if (!doms_cur)
2089 doms_cur = &fallback_doms;
2090 cpumask_and(doms_cur[0], cpu_map, housekeeping_cpumask(HK_FLAG_DOMAIN));
2091 err = build_sched_domains(doms_cur[0], NULL);
2092 register_sched_domain_sysctl();
2093
2094 return err;
2095}
2096
2097
2098
2099
2100
2101static void detach_destroy_domains(const struct cpumask *cpu_map)
2102{
2103 int i;
2104
2105 rcu_read_lock();
2106 for_each_cpu(i, cpu_map)
2107 cpu_attach_domain(NULL, &def_root_domain, i);
2108 rcu_read_unlock();
2109}
2110
2111
2112static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
2113 struct sched_domain_attr *new, int idx_new)
2114{
2115 struct sched_domain_attr tmp;
2116
2117
2118 if (!new && !cur)
2119 return 1;
2120
2121 tmp = SD_ATTR_INIT;
2122
2123 return !memcmp(cur ? (cur + idx_cur) : &tmp,
2124 new ? (new + idx_new) : &tmp,
2125 sizeof(struct sched_domain_attr));
2126}
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
2155 struct sched_domain_attr *dattr_new)
2156{
2157 bool __maybe_unused has_eas = false;
2158 int i, j, n;
2159 int new_topology;
2160
2161 mutex_lock(&sched_domains_mutex);
2162
2163
2164 unregister_sched_domain_sysctl();
2165
2166
2167 new_topology = arch_update_cpu_topology();
2168
2169 if (!doms_new) {
2170 WARN_ON_ONCE(dattr_new);
2171 n = 0;
2172 doms_new = alloc_sched_domains(1);
2173 if (doms_new) {
2174 n = 1;
2175 cpumask_and(doms_new[0], cpu_active_mask,
2176 housekeeping_cpumask(HK_FLAG_DOMAIN));
2177 }
2178 } else {
2179 n = ndoms_new;
2180 }
2181
2182
2183 for (i = 0; i < ndoms_cur; i++) {
2184 for (j = 0; j < n && !new_topology; j++) {
2185 if (cpumask_equal(doms_cur[i], doms_new[j]) &&
2186 dattrs_equal(dattr_cur, i, dattr_new, j))
2187 goto match1;
2188 }
2189
2190 detach_destroy_domains(doms_cur[i]);
2191match1:
2192 ;
2193 }
2194
2195 n = ndoms_cur;
2196 if (!doms_new) {
2197 n = 0;
2198 doms_new = &fallback_doms;
2199 cpumask_and(doms_new[0], cpu_active_mask,
2200 housekeeping_cpumask(HK_FLAG_DOMAIN));
2201 }
2202
2203
2204 for (i = 0; i < ndoms_new; i++) {
2205 for (j = 0; j < n && !new_topology; j++) {
2206 if (cpumask_equal(doms_new[i], doms_cur[j]) &&
2207 dattrs_equal(dattr_new, i, dattr_cur, j))
2208 goto match2;
2209 }
2210
2211 build_sched_domains(doms_new[i], dattr_new ? dattr_new + i : NULL);
2212match2:
2213 ;
2214 }
2215
2216#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
2217
2218 for (i = 0; i < ndoms_new; i++) {
2219 for (j = 0; j < n && !sched_energy_update; j++) {
2220 if (cpumask_equal(doms_new[i], doms_cur[j]) &&
2221 cpu_rq(cpumask_first(doms_cur[j]))->rd->pd) {
2222 has_eas = true;
2223 goto match3;
2224 }
2225 }
2226
2227 has_eas |= build_perf_domains(doms_new[i]);
2228match3:
2229 ;
2230 }
2231 sched_energy_set(has_eas);
2232#endif
2233
2234
2235 if (doms_cur != &fallback_doms)
2236 free_sched_domains(doms_cur, ndoms_cur);
2237
2238 kfree(dattr_cur);
2239 doms_cur = doms_new;
2240 dattr_cur = dattr_new;
2241 ndoms_cur = ndoms_new;
2242
2243 register_sched_domain_sysctl();
2244
2245 mutex_unlock(&sched_domains_mutex);
2246}
2247