1
2
3
4
5#include "sched.h"
6
7DEFINE_MUTEX(sched_domains_mutex);
8
9
10static cpumask_var_t sched_domains_tmpmask;
11static cpumask_var_t sched_domains_tmpmask2;
12
13#ifdef CONFIG_SCHED_DEBUG
14
15static int __init sched_debug_setup(char *str)
16{
17 sched_debug_enabled = true;
18
19 return 0;
20}
21early_param("sched_debug", sched_debug_setup);
22
23static inline bool sched_debug(void)
24{
25 return sched_debug_enabled;
26}
27
28static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
29 struct cpumask *groupmask)
30{
31 struct sched_group *group = sd->groups;
32
33 cpumask_clear(groupmask);
34
35 printk(KERN_DEBUG "%*s domain-%d: ", level, "", level);
36
37 if (!(sd->flags & SD_LOAD_BALANCE)) {
38 printk("does not load-balance\n");
39 if (sd->parent)
40 printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain has parent");
41 return -1;
42 }
43
44 printk(KERN_CONT "span=%*pbl level=%s\n",
45 cpumask_pr_args(sched_domain_span(sd)), sd->name);
46
47 if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) {
48 printk(KERN_ERR "ERROR: domain->span does not contain CPU%d\n", cpu);
49 }
50 if (group && !cpumask_test_cpu(cpu, sched_group_span(group))) {
51 printk(KERN_ERR "ERROR: domain->groups does not contain CPU%d\n", cpu);
52 }
53
54 printk(KERN_DEBUG "%*s groups:", level + 1, "");
55 do {
56 if (!group) {
57 printk("\n");
58 printk(KERN_ERR "ERROR: group is NULL\n");
59 break;
60 }
61
62 if (!cpumask_weight(sched_group_span(group))) {
63 printk(KERN_CONT "\n");
64 printk(KERN_ERR "ERROR: empty group\n");
65 break;
66 }
67
68 if (!(sd->flags & SD_OVERLAP) &&
69 cpumask_intersects(groupmask, sched_group_span(group))) {
70 printk(KERN_CONT "\n");
71 printk(KERN_ERR "ERROR: repeated CPUs\n");
72 break;
73 }
74
75 cpumask_or(groupmask, groupmask, sched_group_span(group));
76
77 printk(KERN_CONT " %d:{ span=%*pbl",
78 group->sgc->id,
79 cpumask_pr_args(sched_group_span(group)));
80
81 if ((sd->flags & SD_OVERLAP) &&
82 !cpumask_equal(group_balance_mask(group), sched_group_span(group))) {
83 printk(KERN_CONT " mask=%*pbl",
84 cpumask_pr_args(group_balance_mask(group)));
85 }
86
87 if (group->sgc->capacity != SCHED_CAPACITY_SCALE)
88 printk(KERN_CONT " cap=%lu", group->sgc->capacity);
89
90 if (group == sd->groups && sd->child &&
91 !cpumask_equal(sched_domain_span(sd->child),
92 sched_group_span(group))) {
93 printk(KERN_ERR "ERROR: domain->groups does not match domain->child\n");
94 }
95
96 printk(KERN_CONT " }");
97
98 group = group->next;
99
100 if (group != sd->groups)
101 printk(KERN_CONT ",");
102
103 } while (group != sd->groups);
104 printk(KERN_CONT "\n");
105
106 if (!cpumask_equal(sched_domain_span(sd), groupmask))
107 printk(KERN_ERR "ERROR: groups don't span domain->span\n");
108
109 if (sd->parent &&
110 !cpumask_subset(groupmask, sched_domain_span(sd->parent)))
111 printk(KERN_ERR "ERROR: parent span is not a superset of domain->span\n");
112 return 0;
113}
114
115static void sched_domain_debug(struct sched_domain *sd, int cpu)
116{
117 int level = 0;
118
119 if (!sched_debug_enabled)
120 return;
121
122 if (!sd) {
123 printk(KERN_DEBUG "CPU%d attaching NULL sched-domain.\n", cpu);
124 return;
125 }
126
127 printk(KERN_DEBUG "CPU%d attaching sched-domain(s):\n", cpu);
128
129 for (;;) {
130 if (sched_domain_debug_one(sd, cpu, level, sched_domains_tmpmask))
131 break;
132 level++;
133 sd = sd->parent;
134 if (!sd)
135 break;
136 }
137}
138#else
139
140# define sched_debug_enabled 0
141# define sched_domain_debug(sd, cpu) do { } while (0)
142static inline bool sched_debug(void)
143{
144 return false;
145}
146#endif
147
148static int sd_degenerate(struct sched_domain *sd)
149{
150 if (cpumask_weight(sched_domain_span(sd)) == 1)
151 return 1;
152
153
154 if (sd->flags & (SD_LOAD_BALANCE |
155 SD_BALANCE_NEWIDLE |
156 SD_BALANCE_FORK |
157 SD_BALANCE_EXEC |
158 SD_SHARE_CPUCAPACITY |
159 SD_ASYM_CPUCAPACITY |
160 SD_SHARE_PKG_RESOURCES |
161 SD_SHARE_POWERDOMAIN)) {
162 if (sd->groups != sd->groups->next)
163 return 0;
164 }
165
166
167 if (sd->flags & (SD_WAKE_AFFINE))
168 return 0;
169
170 return 1;
171}
172
173static int
174sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
175{
176 unsigned long cflags = sd->flags, pflags = parent->flags;
177
178 if (sd_degenerate(parent))
179 return 1;
180
181 if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent)))
182 return 0;
183
184
185 if (parent->groups == parent->groups->next) {
186 pflags &= ~(SD_LOAD_BALANCE |
187 SD_BALANCE_NEWIDLE |
188 SD_BALANCE_FORK |
189 SD_BALANCE_EXEC |
190 SD_ASYM_CPUCAPACITY |
191 SD_SHARE_CPUCAPACITY |
192 SD_SHARE_PKG_RESOURCES |
193 SD_PREFER_SIBLING |
194 SD_SHARE_POWERDOMAIN);
195 if (nr_node_ids == 1)
196 pflags &= ~SD_SERIALIZE;
197 }
198 if (~cflags & pflags)
199 return 0;
200
201 return 1;
202}
203
204#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
205DEFINE_STATIC_KEY_FALSE(sched_energy_present);
206unsigned int sysctl_sched_energy_aware = 1;
207DEFINE_MUTEX(sched_energy_mutex);
208bool sched_energy_update;
209
210#ifdef CONFIG_PROC_SYSCTL
211int sched_energy_aware_handler(struct ctl_table *table, int write,
212 void __user *buffer, size_t *lenp, loff_t *ppos)
213{
214 int ret, state;
215
216 if (write && !capable(CAP_SYS_ADMIN))
217 return -EPERM;
218
219 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
220 if (!ret && write) {
221 state = static_branch_unlikely(&sched_energy_present);
222 if (state != sysctl_sched_energy_aware) {
223 mutex_lock(&sched_energy_mutex);
224 sched_energy_update = 1;
225 rebuild_sched_domains();
226 sched_energy_update = 0;
227 mutex_unlock(&sched_energy_mutex);
228 }
229 }
230
231 return ret;
232}
233#endif
234
235static void free_pd(struct perf_domain *pd)
236{
237 struct perf_domain *tmp;
238
239 while (pd) {
240 tmp = pd->next;
241 kfree(pd);
242 pd = tmp;
243 }
244}
245
246static struct perf_domain *find_pd(struct perf_domain *pd, int cpu)
247{
248 while (pd) {
249 if (cpumask_test_cpu(cpu, perf_domain_span(pd)))
250 return pd;
251 pd = pd->next;
252 }
253
254 return NULL;
255}
256
257static struct perf_domain *pd_init(int cpu)
258{
259 struct em_perf_domain *obj = em_cpu_get(cpu);
260 struct perf_domain *pd;
261
262 if (!obj) {
263 if (sched_debug())
264 pr_info("%s: no EM found for CPU%d\n", __func__, cpu);
265 return NULL;
266 }
267
268 pd = kzalloc(sizeof(*pd), GFP_KERNEL);
269 if (!pd)
270 return NULL;
271 pd->em_pd = obj;
272
273 return pd;
274}
275
276static void perf_domain_debug(const struct cpumask *cpu_map,
277 struct perf_domain *pd)
278{
279 if (!sched_debug() || !pd)
280 return;
281
282 printk(KERN_DEBUG "root_domain %*pbl:", cpumask_pr_args(cpu_map));
283
284 while (pd) {
285 printk(KERN_CONT " pd%d:{ cpus=%*pbl nr_cstate=%d }",
286 cpumask_first(perf_domain_span(pd)),
287 cpumask_pr_args(perf_domain_span(pd)),
288 em_pd_nr_cap_states(pd->em_pd));
289 pd = pd->next;
290 }
291
292 printk(KERN_CONT "\n");
293}
294
295static void destroy_perf_domain_rcu(struct rcu_head *rp)
296{
297 struct perf_domain *pd;
298
299 pd = container_of(rp, struct perf_domain, rcu);
300 free_pd(pd);
301}
302
303static void sched_energy_set(bool has_eas)
304{
305 if (!has_eas && static_branch_unlikely(&sched_energy_present)) {
306 if (sched_debug())
307 pr_info("%s: stopping EAS\n", __func__);
308 static_branch_disable_cpuslocked(&sched_energy_present);
309 } else if (has_eas && !static_branch_unlikely(&sched_energy_present)) {
310 if (sched_debug())
311 pr_info("%s: starting EAS\n", __func__);
312 static_branch_enable_cpuslocked(&sched_energy_present);
313 }
314}
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339#define EM_MAX_COMPLEXITY 2048
340
341extern struct cpufreq_governor schedutil_gov;
342static bool build_perf_domains(const struct cpumask *cpu_map)
343{
344 int i, nr_pd = 0, nr_cs = 0, nr_cpus = cpumask_weight(cpu_map);
345 struct perf_domain *pd = NULL, *tmp;
346 int cpu = cpumask_first(cpu_map);
347 struct root_domain *rd = cpu_rq(cpu)->rd;
348 struct cpufreq_policy *policy;
349 struct cpufreq_governor *gov;
350
351 if (!sysctl_sched_energy_aware)
352 goto free;
353
354
355 if (!per_cpu(sd_asym_cpucapacity, cpu)) {
356 if (sched_debug()) {
357 pr_info("rd %*pbl: CPUs do not have asymmetric capacities\n",
358 cpumask_pr_args(cpu_map));
359 }
360 goto free;
361 }
362
363 for_each_cpu(i, cpu_map) {
364
365 if (find_pd(pd, i))
366 continue;
367
368
369 policy = cpufreq_cpu_get(i);
370 if (!policy)
371 goto free;
372 gov = policy->governor;
373 cpufreq_cpu_put(policy);
374 if (gov != &schedutil_gov) {
375 if (rd->pd)
376 pr_warn("rd %*pbl: Disabling EAS, schedutil is mandatory\n",
377 cpumask_pr_args(cpu_map));
378 goto free;
379 }
380
381
382 tmp = pd_init(i);
383 if (!tmp)
384 goto free;
385 tmp->next = pd;
386 pd = tmp;
387
388
389
390
391
392 nr_pd++;
393 nr_cs += em_pd_nr_cap_states(pd->em_pd);
394 }
395
396
397 if (nr_pd * (nr_cs + nr_cpus) > EM_MAX_COMPLEXITY) {
398 WARN(1, "rd %*pbl: Failed to start EAS, EM complexity is too high\n",
399 cpumask_pr_args(cpu_map));
400 goto free;
401 }
402
403 perf_domain_debug(cpu_map, pd);
404
405
406 tmp = rd->pd;
407 rcu_assign_pointer(rd->pd, pd);
408 if (tmp)
409 call_rcu(&tmp->rcu, destroy_perf_domain_rcu);
410
411 return !!pd;
412
413free:
414 free_pd(pd);
415 tmp = rd->pd;
416 rcu_assign_pointer(rd->pd, NULL);
417 if (tmp)
418 call_rcu(&tmp->rcu, destroy_perf_domain_rcu);
419
420 return false;
421}
422#else
423static void free_pd(struct perf_domain *pd) { }
424#endif
425
426static void free_rootdomain(struct rcu_head *rcu)
427{
428 struct root_domain *rd = container_of(rcu, struct root_domain, rcu);
429
430 cpupri_cleanup(&rd->cpupri);
431 cpudl_cleanup(&rd->cpudl);
432 free_cpumask_var(rd->dlo_mask);
433 free_cpumask_var(rd->rto_mask);
434 free_cpumask_var(rd->online);
435 free_cpumask_var(rd->span);
436 free_pd(rd->pd);
437 kfree(rd);
438}
439
440void rq_attach_root(struct rq *rq, struct root_domain *rd)
441{
442 struct root_domain *old_rd = NULL;
443 unsigned long flags;
444
445 raw_spin_lock_irqsave(&rq->lock, flags);
446
447 if (rq->rd) {
448 old_rd = rq->rd;
449
450 if (cpumask_test_cpu(rq->cpu, old_rd->online))
451 set_rq_offline(rq);
452
453 cpumask_clear_cpu(rq->cpu, old_rd->span);
454
455
456
457
458
459
460 if (!atomic_dec_and_test(&old_rd->refcount))
461 old_rd = NULL;
462 }
463
464 atomic_inc(&rd->refcount);
465 rq->rd = rd;
466
467 cpumask_set_cpu(rq->cpu, rd->span);
468 if (cpumask_test_cpu(rq->cpu, cpu_active_mask))
469 set_rq_online(rq);
470
471 raw_spin_unlock_irqrestore(&rq->lock, flags);
472
473 if (old_rd)
474 call_rcu(&old_rd->rcu, free_rootdomain);
475}
476
477void sched_get_rd(struct root_domain *rd)
478{
479 atomic_inc(&rd->refcount);
480}
481
482void sched_put_rd(struct root_domain *rd)
483{
484 if (!atomic_dec_and_test(&rd->refcount))
485 return;
486
487 call_rcu(&rd->rcu, free_rootdomain);
488}
489
490static int init_rootdomain(struct root_domain *rd)
491{
492 if (!zalloc_cpumask_var(&rd->span, GFP_KERNEL))
493 goto out;
494 if (!zalloc_cpumask_var(&rd->online, GFP_KERNEL))
495 goto free_span;
496 if (!zalloc_cpumask_var(&rd->dlo_mask, GFP_KERNEL))
497 goto free_online;
498 if (!zalloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
499 goto free_dlo_mask;
500
501#ifdef HAVE_RT_PUSH_IPI
502 rd->rto_cpu = -1;
503 raw_spin_lock_init(&rd->rto_lock);
504 init_irq_work(&rd->rto_push_work, rto_push_irq_work_func);
505#endif
506
507 init_dl_bw(&rd->dl_bw);
508 if (cpudl_init(&rd->cpudl) != 0)
509 goto free_rto_mask;
510
511 if (cpupri_init(&rd->cpupri) != 0)
512 goto free_cpudl;
513 return 0;
514
515free_cpudl:
516 cpudl_cleanup(&rd->cpudl);
517free_rto_mask:
518 free_cpumask_var(rd->rto_mask);
519free_dlo_mask:
520 free_cpumask_var(rd->dlo_mask);
521free_online:
522 free_cpumask_var(rd->online);
523free_span:
524 free_cpumask_var(rd->span);
525out:
526 return -ENOMEM;
527}
528
529
530
531
532
533struct root_domain def_root_domain;
534
535void init_defrootdomain(void)
536{
537 init_rootdomain(&def_root_domain);
538
539 atomic_set(&def_root_domain.refcount, 1);
540}
541
542static struct root_domain *alloc_rootdomain(void)
543{
544 struct root_domain *rd;
545
546 rd = kzalloc(sizeof(*rd), GFP_KERNEL);
547 if (!rd)
548 return NULL;
549
550 if (init_rootdomain(rd) != 0) {
551 kfree(rd);
552 return NULL;
553 }
554
555 return rd;
556}
557
558static void free_sched_groups(struct sched_group *sg, int free_sgc)
559{
560 struct sched_group *tmp, *first;
561
562 if (!sg)
563 return;
564
565 first = sg;
566 do {
567 tmp = sg->next;
568
569 if (free_sgc && atomic_dec_and_test(&sg->sgc->ref))
570 kfree(sg->sgc);
571
572 if (atomic_dec_and_test(&sg->ref))
573 kfree(sg);
574 sg = tmp;
575 } while (sg != first);
576}
577
578static void destroy_sched_domain(struct sched_domain *sd)
579{
580
581
582
583
584
585 free_sched_groups(sd->groups, 1);
586
587 if (sd->shared && atomic_dec_and_test(&sd->shared->ref))
588 kfree(sd->shared);
589 kfree(sd);
590}
591
592static void destroy_sched_domains_rcu(struct rcu_head *rcu)
593{
594 struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu);
595
596 while (sd) {
597 struct sched_domain *parent = sd->parent;
598 destroy_sched_domain(sd);
599 sd = parent;
600 }
601}
602
603static void destroy_sched_domains(struct sched_domain *sd)
604{
605 if (sd)
606 call_rcu(&sd->rcu, destroy_sched_domains_rcu);
607}
608
609
610
611
612
613
614
615
616
617
618DEFINE_PER_CPU(struct sched_domain __rcu *, sd_llc);
619DEFINE_PER_CPU(int, sd_llc_size);
620DEFINE_PER_CPU(int, sd_llc_id);
621DEFINE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared);
622DEFINE_PER_CPU(struct sched_domain __rcu *, sd_numa);
623DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
624DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
625DEFINE_STATIC_KEY_FALSE(sched_asym_cpucapacity);
626
627static void update_top_cache_domain(int cpu)
628{
629 struct sched_domain_shared *sds = NULL;
630 struct sched_domain *sd;
631 int id = cpu;
632 int size = 1;
633
634 sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);
635 if (sd) {
636 id = cpumask_first(sched_domain_span(sd));
637 size = cpumask_weight(sched_domain_span(sd));
638 sds = sd->shared;
639 }
640
641 rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
642 per_cpu(sd_llc_size, cpu) = size;
643 per_cpu(sd_llc_id, cpu) = id;
644 rcu_assign_pointer(per_cpu(sd_llc_shared, cpu), sds);
645
646 sd = lowest_flag_domain(cpu, SD_NUMA);
647 rcu_assign_pointer(per_cpu(sd_numa, cpu), sd);
648
649 sd = highest_flag_domain(cpu, SD_ASYM_PACKING);
650 rcu_assign_pointer(per_cpu(sd_asym_packing, cpu), sd);
651
652 sd = lowest_flag_domain(cpu, SD_ASYM_CPUCAPACITY);
653 rcu_assign_pointer(per_cpu(sd_asym_cpucapacity, cpu), sd);
654}
655
656
657
658
659
660static void
661cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
662{
663 struct rq *rq = cpu_rq(cpu);
664 struct sched_domain *tmp;
665
666
667 for (tmp = sd; tmp; ) {
668 struct sched_domain *parent = tmp->parent;
669 if (!parent)
670 break;
671
672 if (sd_parent_degenerate(tmp, parent)) {
673 tmp->parent = parent->parent;
674 if (parent->parent)
675 parent->parent->child = tmp;
676
677
678
679
680
681 if (parent->flags & SD_PREFER_SIBLING)
682 tmp->flags |= SD_PREFER_SIBLING;
683 destroy_sched_domain(parent);
684 } else
685 tmp = tmp->parent;
686 }
687
688 if (sd && sd_degenerate(sd)) {
689 tmp = sd;
690 sd = sd->parent;
691 destroy_sched_domain(tmp);
692 if (sd)
693 sd->child = NULL;
694 }
695
696 sched_domain_debug(sd, cpu);
697
698 rq_attach_root(rq, rd);
699 tmp = rq->sd;
700 rcu_assign_pointer(rq->sd, sd);
701 dirty_sched_domain_sysctl(cpu);
702 destroy_sched_domains(tmp);
703
704 update_top_cache_domain(cpu);
705}
706
707struct s_data {
708 struct sched_domain * __percpu *sd;
709 struct root_domain *rd;
710};
711
712enum s_alloc {
713 sa_rootdomain,
714 sa_sd,
715 sa_sd_storage,
716 sa_none,
717};
718
719
720
721
722
723
724
725
726
727
728int group_balance_cpu(struct sched_group *sg)
729{
730 return cpumask_first(group_balance_mask(sg));
731}
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839static void
840build_balance_mask(struct sched_domain *sd, struct sched_group *sg, struct cpumask *mask)
841{
842 const struct cpumask *sg_span = sched_group_span(sg);
843 struct sd_data *sdd = sd->private;
844 struct sched_domain *sibling;
845 int i;
846
847 cpumask_clear(mask);
848
849 for_each_cpu(i, sg_span) {
850 sibling = *per_cpu_ptr(sdd->sd, i);
851
852
853
854
855
856
857 if (!sibling->child)
858 continue;
859
860
861 if (!cpumask_equal(sg_span, sched_domain_span(sibling->child)))
862 continue;
863
864 cpumask_set_cpu(i, mask);
865 }
866
867
868 WARN_ON_ONCE(cpumask_empty(mask));
869}
870
871
872
873
874
875
876static struct sched_group *
877build_group_from_child_sched_domain(struct sched_domain *sd, int cpu)
878{
879 struct sched_group *sg;
880 struct cpumask *sg_span;
881
882 sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
883 GFP_KERNEL, cpu_to_node(cpu));
884
885 if (!sg)
886 return NULL;
887
888 sg_span = sched_group_span(sg);
889 if (sd->child)
890 cpumask_copy(sg_span, sched_domain_span(sd->child));
891 else
892 cpumask_copy(sg_span, sched_domain_span(sd));
893
894 atomic_inc(&sg->ref);
895 return sg;
896}
897
898static void init_overlap_sched_group(struct sched_domain *sd,
899 struct sched_group *sg)
900{
901 struct cpumask *mask = sched_domains_tmpmask2;
902 struct sd_data *sdd = sd->private;
903 struct cpumask *sg_span;
904 int cpu;
905
906 build_balance_mask(sd, sg, mask);
907 cpu = cpumask_first_and(sched_group_span(sg), mask);
908
909 sg->sgc = *per_cpu_ptr(sdd->sgc, cpu);
910 if (atomic_inc_return(&sg->sgc->ref) == 1)
911 cpumask_copy(group_balance_mask(sg), mask);
912 else
913 WARN_ON_ONCE(!cpumask_equal(group_balance_mask(sg), mask));
914
915
916
917
918
919
920 sg_span = sched_group_span(sg);
921 sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
922 sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
923 sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;
924}
925
926static int
927build_overlap_sched_groups(struct sched_domain *sd, int cpu)
928{
929 struct sched_group *first = NULL, *last = NULL, *sg;
930 const struct cpumask *span = sched_domain_span(sd);
931 struct cpumask *covered = sched_domains_tmpmask;
932 struct sd_data *sdd = sd->private;
933 struct sched_domain *sibling;
934 int i;
935
936 cpumask_clear(covered);
937
938 for_each_cpu_wrap(i, span, cpu) {
939 struct cpumask *sg_span;
940
941 if (cpumask_test_cpu(i, covered))
942 continue;
943
944 sibling = *per_cpu_ptr(sdd->sd, i);
945
946
947
948
949
950
951
952
953
954
955
956 if (!cpumask_test_cpu(i, sched_domain_span(sibling)))
957 continue;
958
959 sg = build_group_from_child_sched_domain(sibling, cpu);
960 if (!sg)
961 goto fail;
962
963 sg_span = sched_group_span(sg);
964 cpumask_or(covered, covered, sg_span);
965
966 init_overlap_sched_group(sd, sg);
967
968 if (!first)
969 first = sg;
970 if (last)
971 last->next = sg;
972 last = sg;
973 last->next = first;
974 }
975 sd->groups = first;
976
977 return 0;
978
979fail:
980 free_sched_groups(first, 0);
981
982 return -ENOMEM;
983}
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057static struct sched_group *get_group(int cpu, struct sd_data *sdd)
1058{
1059 struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
1060 struct sched_domain *child = sd->child;
1061 struct sched_group *sg;
1062 bool already_visited;
1063
1064 if (child)
1065 cpu = cpumask_first(sched_domain_span(child));
1066
1067 sg = *per_cpu_ptr(sdd->sg, cpu);
1068 sg->sgc = *per_cpu_ptr(sdd->sgc, cpu);
1069
1070
1071 already_visited = atomic_inc_return(&sg->ref) > 1;
1072
1073 WARN_ON(already_visited != (atomic_inc_return(&sg->sgc->ref) > 1));
1074
1075
1076 if (already_visited)
1077 return sg;
1078
1079 if (child) {
1080 cpumask_copy(sched_group_span(sg), sched_domain_span(child));
1081 cpumask_copy(group_balance_mask(sg), sched_group_span(sg));
1082 } else {
1083 cpumask_set_cpu(cpu, sched_group_span(sg));
1084 cpumask_set_cpu(cpu, group_balance_mask(sg));
1085 }
1086
1087 sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sched_group_span(sg));
1088 sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
1089 sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;
1090
1091 return sg;
1092}
1093
1094
1095
1096
1097
1098
1099
1100
1101static int
1102build_sched_groups(struct sched_domain *sd, int cpu)
1103{
1104 struct sched_group *first = NULL, *last = NULL;
1105 struct sd_data *sdd = sd->private;
1106 const struct cpumask *span = sched_domain_span(sd);
1107 struct cpumask *covered;
1108 int i;
1109
1110 lockdep_assert_held(&sched_domains_mutex);
1111 covered = sched_domains_tmpmask;
1112
1113 cpumask_clear(covered);
1114
1115 for_each_cpu_wrap(i, span, cpu) {
1116 struct sched_group *sg;
1117
1118 if (cpumask_test_cpu(i, covered))
1119 continue;
1120
1121 sg = get_group(i, sdd);
1122
1123 cpumask_or(covered, covered, sched_group_span(sg));
1124
1125 if (!first)
1126 first = sg;
1127 if (last)
1128 last->next = sg;
1129 last = sg;
1130 }
1131 last->next = first;
1132 sd->groups = first;
1133
1134 return 0;
1135}
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147static void init_sched_groups_capacity(int cpu, struct sched_domain *sd)
1148{
1149 struct sched_group *sg = sd->groups;
1150
1151 WARN_ON(!sg);
1152
1153 do {
1154 int cpu, max_cpu = -1;
1155
1156 sg->group_weight = cpumask_weight(sched_group_span(sg));
1157
1158 if (!(sd->flags & SD_ASYM_PACKING))
1159 goto next;
1160
1161 for_each_cpu(cpu, sched_group_span(sg)) {
1162 if (max_cpu < 0)
1163 max_cpu = cpu;
1164 else if (sched_asym_prefer(cpu, max_cpu))
1165 max_cpu = cpu;
1166 }
1167 sg->asym_prefer_cpu = max_cpu;
1168
1169next:
1170 sg = sg->next;
1171 } while (sg != sd->groups);
1172
1173 if (cpu != group_balance_cpu(sg))
1174 return;
1175
1176 update_group_capacity(sd, cpu);
1177}
1178
1179
1180
1181
1182
1183
1184static int default_relax_domain_level = -1;
1185int sched_domain_level_max;
1186
1187static int __init setup_relax_domain_level(char *str)
1188{
1189 if (kstrtoint(str, 0, &default_relax_domain_level))
1190 pr_warn("Unable to set relax_domain_level\n");
1191
1192 return 1;
1193}
1194__setup("relax_domain_level=", setup_relax_domain_level);
1195
1196static void set_domain_attribute(struct sched_domain *sd,
1197 struct sched_domain_attr *attr)
1198{
1199 int request;
1200
1201 if (!attr || attr->relax_domain_level < 0) {
1202 if (default_relax_domain_level < 0)
1203 return;
1204 else
1205 request = default_relax_domain_level;
1206 } else
1207 request = attr->relax_domain_level;
1208 if (request < sd->level) {
1209
1210 sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
1211 } else {
1212
1213 sd->flags |= (SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
1214 }
1215}
1216
1217static void __sdt_free(const struct cpumask *cpu_map);
1218static int __sdt_alloc(const struct cpumask *cpu_map);
1219
1220static void __free_domain_allocs(struct s_data *d, enum s_alloc what,
1221 const struct cpumask *cpu_map)
1222{
1223 switch (what) {
1224 case sa_rootdomain:
1225 if (!atomic_read(&d->rd->refcount))
1226 free_rootdomain(&d->rd->rcu);
1227
1228 case sa_sd:
1229 free_percpu(d->sd);
1230
1231 case sa_sd_storage:
1232 __sdt_free(cpu_map);
1233
1234 case sa_none:
1235 break;
1236 }
1237}
1238
1239static enum s_alloc
1240__visit_domain_allocation_hell(struct s_data *d, const struct cpumask *cpu_map)
1241{
1242 memset(d, 0, sizeof(*d));
1243
1244 if (__sdt_alloc(cpu_map))
1245 return sa_sd_storage;
1246 d->sd = alloc_percpu(struct sched_domain *);
1247 if (!d->sd)
1248 return sa_sd_storage;
1249 d->rd = alloc_rootdomain();
1250 if (!d->rd)
1251 return sa_sd;
1252
1253 return sa_rootdomain;
1254}
1255
1256
1257
1258
1259
1260
1261static void claim_allocations(int cpu, struct sched_domain *sd)
1262{
1263 struct sd_data *sdd = sd->private;
1264
1265 WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd);
1266 *per_cpu_ptr(sdd->sd, cpu) = NULL;
1267
1268 if (atomic_read(&(*per_cpu_ptr(sdd->sds, cpu))->ref))
1269 *per_cpu_ptr(sdd->sds, cpu) = NULL;
1270
1271 if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref))
1272 *per_cpu_ptr(sdd->sg, cpu) = NULL;
1273
1274 if (atomic_read(&(*per_cpu_ptr(sdd->sgc, cpu))->ref))
1275 *per_cpu_ptr(sdd->sgc, cpu) = NULL;
1276}
1277
1278#ifdef CONFIG_NUMA
1279enum numa_topology_type sched_numa_topology_type;
1280
1281static int sched_domains_numa_levels;
1282static int sched_domains_curr_level;
1283
1284int sched_max_numa_distance;
1285static int *sched_domains_numa_distance;
1286static struct cpumask ***sched_domains_numa_masks;
1287#endif
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306#define TOPOLOGY_SD_FLAGS \
1307 (SD_SHARE_CPUCAPACITY | \
1308 SD_SHARE_PKG_RESOURCES | \
1309 SD_NUMA | \
1310 SD_ASYM_PACKING | \
1311 SD_SHARE_POWERDOMAIN)
1312
1313static struct sched_domain *
1314sd_init(struct sched_domain_topology_level *tl,
1315 const struct cpumask *cpu_map,
1316 struct sched_domain *child, int dflags, int cpu)
1317{
1318 struct sd_data *sdd = &tl->data;
1319 struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
1320 int sd_id, sd_weight, sd_flags = 0;
1321
1322#ifdef CONFIG_NUMA
1323
1324
1325
1326 sched_domains_curr_level = tl->numa_level;
1327#endif
1328
1329 sd_weight = cpumask_weight(tl->mask(cpu));
1330
1331 if (tl->sd_flags)
1332 sd_flags = (*tl->sd_flags)();
1333 if (WARN_ONCE(sd_flags & ~TOPOLOGY_SD_FLAGS,
1334 "wrong sd_flags in topology description\n"))
1335 sd_flags &= ~TOPOLOGY_SD_FLAGS;
1336
1337
1338 sd_flags |= dflags;
1339
1340 *sd = (struct sched_domain){
1341 .min_interval = sd_weight,
1342 .max_interval = 2*sd_weight,
1343 .busy_factor = 32,
1344 .imbalance_pct = 125,
1345
1346 .cache_nice_tries = 0,
1347 .busy_idx = 0,
1348 .idle_idx = 0,
1349 .newidle_idx = 0,
1350 .wake_idx = 0,
1351 .forkexec_idx = 0,
1352
1353 .flags = 1*SD_LOAD_BALANCE
1354 | 1*SD_BALANCE_NEWIDLE
1355 | 1*SD_BALANCE_EXEC
1356 | 1*SD_BALANCE_FORK
1357 | 0*SD_BALANCE_WAKE
1358 | 1*SD_WAKE_AFFINE
1359 | 0*SD_SHARE_CPUCAPACITY
1360 | 0*SD_SHARE_PKG_RESOURCES
1361 | 0*SD_SERIALIZE
1362 | 1*SD_PREFER_SIBLING
1363 | 0*SD_NUMA
1364 | sd_flags
1365 ,
1366
1367 .last_balance = jiffies,
1368 .balance_interval = sd_weight,
1369 .max_newidle_lb_cost = 0,
1370 .next_decay_max_lb_cost = jiffies,
1371 .child = child,
1372#ifdef CONFIG_SCHED_DEBUG
1373 .name = tl->name,
1374#endif
1375 };
1376
1377 cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
1378 sd_id = cpumask_first(sched_domain_span(sd));
1379
1380
1381
1382
1383
1384 if (sd->flags & SD_ASYM_CPUCAPACITY) {
1385 struct sched_domain *t = sd;
1386
1387
1388
1389
1390 if (sd->child)
1391 sd->child->flags &= ~SD_PREFER_SIBLING;
1392
1393 for_each_lower_domain(t)
1394 t->flags |= SD_BALANCE_WAKE;
1395 }
1396
1397 if (sd->flags & SD_SHARE_CPUCAPACITY) {
1398 sd->imbalance_pct = 110;
1399
1400 } else if (sd->flags & SD_SHARE_PKG_RESOURCES) {
1401 sd->imbalance_pct = 117;
1402 sd->cache_nice_tries = 1;
1403 sd->busy_idx = 2;
1404
1405#ifdef CONFIG_NUMA
1406 } else if (sd->flags & SD_NUMA) {
1407 sd->cache_nice_tries = 2;
1408 sd->busy_idx = 3;
1409 sd->idle_idx = 2;
1410
1411 sd->flags &= ~SD_PREFER_SIBLING;
1412 sd->flags |= SD_SERIALIZE;
1413 if (sched_domains_numa_distance[tl->numa_level] > RECLAIM_DISTANCE) {
1414 sd->flags &= ~(SD_BALANCE_EXEC |
1415 SD_BALANCE_FORK |
1416 SD_WAKE_AFFINE);
1417 }
1418
1419#endif
1420 } else {
1421 sd->cache_nice_tries = 1;
1422 sd->busy_idx = 2;
1423 sd->idle_idx = 1;
1424 }
1425
1426
1427
1428
1429
1430 if (sd->flags & SD_SHARE_PKG_RESOURCES) {
1431 sd->shared = *per_cpu_ptr(sdd->sds, sd_id);
1432 atomic_inc(&sd->shared->ref);
1433 atomic_set(&sd->shared->nr_busy_cpus, sd_weight);
1434 }
1435
1436 sd->private = sdd;
1437
1438 return sd;
1439}
1440
1441
1442
1443
1444static struct sched_domain_topology_level default_topology[] = {
1445#ifdef CONFIG_SCHED_SMT
1446 { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
1447#endif
1448#ifdef CONFIG_SCHED_MC
1449 { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
1450#endif
1451 { cpu_cpu_mask, SD_INIT_NAME(DIE) },
1452 { NULL, },
1453};
1454
1455static struct sched_domain_topology_level *sched_domain_topology =
1456 default_topology;
1457
1458#define for_each_sd_topology(tl) \
1459 for (tl = sched_domain_topology; tl->mask; tl++)
1460
1461void set_sched_topology(struct sched_domain_topology_level *tl)
1462{
1463 if (WARN_ON_ONCE(sched_smp_initialized))
1464 return;
1465
1466 sched_domain_topology = tl;
1467}
1468
1469#ifdef CONFIG_NUMA
1470
1471static const struct cpumask *sd_numa_mask(int cpu)
1472{
1473 return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)];
1474}
1475
1476static void sched_numa_warn(const char *str)
1477{
1478 static int done = false;
1479 int i,j;
1480
1481 if (done)
1482 return;
1483
1484 done = true;
1485
1486 printk(KERN_WARNING "ERROR: %s\n\n", str);
1487
1488 for (i = 0; i < nr_node_ids; i++) {
1489 printk(KERN_WARNING " ");
1490 for (j = 0; j < nr_node_ids; j++)
1491 printk(KERN_CONT "%02d ", node_distance(i,j));
1492 printk(KERN_CONT "\n");
1493 }
1494 printk(KERN_WARNING "\n");
1495}
1496
1497bool find_numa_distance(int distance)
1498{
1499 int i;
1500
1501 if (distance == node_distance(0, 0))
1502 return true;
1503
1504 for (i = 0; i < sched_domains_numa_levels; i++) {
1505 if (sched_domains_numa_distance[i] == distance)
1506 return true;
1507 }
1508
1509 return false;
1510}
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531static void init_numa_topology_type(void)
1532{
1533 int a, b, c, n;
1534
1535 n = sched_max_numa_distance;
1536
1537 if (sched_domains_numa_levels <= 2) {
1538 sched_numa_topology_type = NUMA_DIRECT;
1539 return;
1540 }
1541
1542 for_each_online_node(a) {
1543 for_each_online_node(b) {
1544
1545 if (node_distance(a, b) < n)
1546 continue;
1547
1548
1549 for_each_online_node(c) {
1550 if (node_distance(a, c) < n &&
1551 node_distance(b, c) < n) {
1552 sched_numa_topology_type =
1553 NUMA_GLUELESS_MESH;
1554 return;
1555 }
1556 }
1557
1558 sched_numa_topology_type = NUMA_BACKPLANE;
1559 return;
1560 }
1561 }
1562}
1563
1564void sched_init_numa(void)
1565{
1566 int next_distance, curr_distance = node_distance(0, 0);
1567 struct sched_domain_topology_level *tl;
1568 int level = 0;
1569 int i, j, k;
1570
1571 sched_domains_numa_distance = kzalloc(sizeof(int) * (nr_node_ids + 1), GFP_KERNEL);
1572 if (!sched_domains_numa_distance)
1573 return;
1574
1575
1576 sched_domains_numa_distance[level++] = curr_distance;
1577 sched_domains_numa_levels = level;
1578
1579
1580
1581
1582
1583
1584
1585
1586 next_distance = curr_distance;
1587 for (i = 0; i < nr_node_ids; i++) {
1588 for (j = 0; j < nr_node_ids; j++) {
1589 for (k = 0; k < nr_node_ids; k++) {
1590 int distance = node_distance(i, k);
1591
1592 if (distance > curr_distance &&
1593 (distance < next_distance ||
1594 next_distance == curr_distance))
1595 next_distance = distance;
1596
1597
1598
1599
1600
1601
1602 if (sched_debug() && node_distance(k, i) != distance)
1603 sched_numa_warn("Node-distance not symmetric");
1604
1605 if (sched_debug() && i && !find_numa_distance(distance))
1606 sched_numa_warn("Node-0 not representative");
1607 }
1608 if (next_distance != curr_distance) {
1609 sched_domains_numa_distance[level++] = next_distance;
1610 sched_domains_numa_levels = level;
1611 curr_distance = next_distance;
1612 } else break;
1613 }
1614
1615
1616
1617
1618 if (!sched_debug())
1619 break;
1620 }
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638 sched_domains_numa_levels = 0;
1639
1640 sched_domains_numa_masks = kzalloc(sizeof(void *) * level, GFP_KERNEL);
1641 if (!sched_domains_numa_masks)
1642 return;
1643
1644
1645
1646
1647
1648 for (i = 0; i < level; i++) {
1649 sched_domains_numa_masks[i] =
1650 kzalloc(nr_node_ids * sizeof(void *), GFP_KERNEL);
1651 if (!sched_domains_numa_masks[i])
1652 return;
1653
1654 for (j = 0; j < nr_node_ids; j++) {
1655 struct cpumask *mask = kzalloc(cpumask_size(), GFP_KERNEL);
1656 if (!mask)
1657 return;
1658
1659 sched_domains_numa_masks[i][j] = mask;
1660
1661 for_each_node(k) {
1662 if (node_distance(j, k) > sched_domains_numa_distance[i])
1663 continue;
1664
1665 cpumask_or(mask, mask, cpumask_of_node(k));
1666 }
1667 }
1668 }
1669
1670
1671 for (i = 0; sched_domain_topology[i].mask; i++);
1672
1673 tl = kzalloc((i + level + 1) *
1674 sizeof(struct sched_domain_topology_level), GFP_KERNEL);
1675 if (!tl)
1676 return;
1677
1678
1679
1680
1681 for (i = 0; sched_domain_topology[i].mask; i++)
1682 tl[i] = sched_domain_topology[i];
1683
1684
1685
1686
1687 tl[i++] = (struct sched_domain_topology_level){
1688 .mask = sd_numa_mask,
1689 .numa_level = 0,
1690 SD_INIT_NAME(NODE)
1691 };
1692
1693
1694
1695
1696 for (j = 1; j < level; i++, j++) {
1697 tl[i] = (struct sched_domain_topology_level){
1698 .mask = sd_numa_mask,
1699 .sd_flags = cpu_numa_flags,
1700 .flags = SDTL_OVERLAP,
1701 .numa_level = j,
1702 SD_INIT_NAME(NUMA)
1703 };
1704 }
1705
1706 sched_domain_topology = tl;
1707
1708 sched_domains_numa_levels = level;
1709 sched_max_numa_distance = sched_domains_numa_distance[level - 1];
1710
1711 init_numa_topology_type();
1712}
1713
1714void sched_domains_numa_masks_set(unsigned int cpu)
1715{
1716 int node = cpu_to_node(cpu);
1717 int i, j;
1718
1719 for (i = 0; i < sched_domains_numa_levels; i++) {
1720 for (j = 0; j < nr_node_ids; j++) {
1721 if (node_distance(j, node) <= sched_domains_numa_distance[i])
1722 cpumask_set_cpu(cpu, sched_domains_numa_masks[i][j]);
1723 }
1724 }
1725}
1726
1727void sched_domains_numa_masks_clear(unsigned int cpu)
1728{
1729 int i, j;
1730
1731 for (i = 0; i < sched_domains_numa_levels; i++) {
1732 for (j = 0; j < nr_node_ids; j++)
1733 cpumask_clear_cpu(cpu, sched_domains_numa_masks[i][j]);
1734 }
1735}
1736
1737#endif
1738
1739static int __sdt_alloc(const struct cpumask *cpu_map)
1740{
1741 struct sched_domain_topology_level *tl;
1742 int j;
1743
1744 for_each_sd_topology(tl) {
1745 struct sd_data *sdd = &tl->data;
1746
1747 sdd->sd = alloc_percpu(struct sched_domain *);
1748 if (!sdd->sd)
1749 return -ENOMEM;
1750
1751 sdd->sds = alloc_percpu(struct sched_domain_shared *);
1752 if (!sdd->sds)
1753 return -ENOMEM;
1754
1755 sdd->sg = alloc_percpu(struct sched_group *);
1756 if (!sdd->sg)
1757 return -ENOMEM;
1758
1759 sdd->sgc = alloc_percpu(struct sched_group_capacity *);
1760 if (!sdd->sgc)
1761 return -ENOMEM;
1762
1763 for_each_cpu(j, cpu_map) {
1764 struct sched_domain *sd;
1765 struct sched_domain_shared *sds;
1766 struct sched_group *sg;
1767 struct sched_group_capacity *sgc;
1768
1769 sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(),
1770 GFP_KERNEL, cpu_to_node(j));
1771 if (!sd)
1772 return -ENOMEM;
1773
1774 *per_cpu_ptr(sdd->sd, j) = sd;
1775
1776 sds = kzalloc_node(sizeof(struct sched_domain_shared),
1777 GFP_KERNEL, cpu_to_node(j));
1778 if (!sds)
1779 return -ENOMEM;
1780
1781 *per_cpu_ptr(sdd->sds, j) = sds;
1782
1783 sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
1784 GFP_KERNEL, cpu_to_node(j));
1785 if (!sg)
1786 return -ENOMEM;
1787
1788 sg->next = sg;
1789
1790 *per_cpu_ptr(sdd->sg, j) = sg;
1791
1792 sgc = kzalloc_node(sizeof(struct sched_group_capacity) + cpumask_size(),
1793 GFP_KERNEL, cpu_to_node(j));
1794 if (!sgc)
1795 return -ENOMEM;
1796
1797#ifdef CONFIG_SCHED_DEBUG
1798 sgc->id = j;
1799#endif
1800
1801 *per_cpu_ptr(sdd->sgc, j) = sgc;
1802 }
1803 }
1804
1805 return 0;
1806}
1807
1808static void __sdt_free(const struct cpumask *cpu_map)
1809{
1810 struct sched_domain_topology_level *tl;
1811 int j;
1812
1813 for_each_sd_topology(tl) {
1814 struct sd_data *sdd = &tl->data;
1815
1816 for_each_cpu(j, cpu_map) {
1817 struct sched_domain *sd;
1818
1819 if (sdd->sd) {
1820 sd = *per_cpu_ptr(sdd->sd, j);
1821 if (sd && (sd->flags & SD_OVERLAP))
1822 free_sched_groups(sd->groups, 0);
1823 kfree(*per_cpu_ptr(sdd->sd, j));
1824 }
1825
1826 if (sdd->sds)
1827 kfree(*per_cpu_ptr(sdd->sds, j));
1828 if (sdd->sg)
1829 kfree(*per_cpu_ptr(sdd->sg, j));
1830 if (sdd->sgc)
1831 kfree(*per_cpu_ptr(sdd->sgc, j));
1832 }
1833 free_percpu(sdd->sd);
1834 sdd->sd = NULL;
1835 free_percpu(sdd->sds);
1836 sdd->sds = NULL;
1837 free_percpu(sdd->sg);
1838 sdd->sg = NULL;
1839 free_percpu(sdd->sgc);
1840 sdd->sgc = NULL;
1841 }
1842}
1843
1844static struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl,
1845 const struct cpumask *cpu_map, struct sched_domain_attr *attr,
1846 struct sched_domain *child, int dflags, int cpu)
1847{
1848 struct sched_domain *sd = sd_init(tl, cpu_map, child, dflags, cpu);
1849
1850 if (child) {
1851 sd->level = child->level + 1;
1852 sched_domain_level_max = max(sched_domain_level_max, sd->level);
1853 child->parent = sd;
1854
1855 if (!cpumask_subset(sched_domain_span(child),
1856 sched_domain_span(sd))) {
1857 pr_err("BUG: arch topology borken\n");
1858#ifdef CONFIG_SCHED_DEBUG
1859 pr_err(" the %s domain not a subset of the %s domain\n",
1860 child->name, sd->name);
1861#endif
1862
1863 cpumask_or(sched_domain_span(sd),
1864 sched_domain_span(sd),
1865 sched_domain_span(child));
1866 }
1867
1868 }
1869 set_domain_attribute(sd, attr);
1870
1871 return sd;
1872}
1873
1874
1875
1876
1877
1878static struct sched_domain_topology_level
1879*asym_cpu_capacity_level(const struct cpumask *cpu_map)
1880{
1881 int i, j, asym_level = 0;
1882 bool asym = false;
1883 struct sched_domain_topology_level *tl, *asym_tl = NULL;
1884 unsigned long cap;
1885
1886
1887 cap = arch_scale_cpu_capacity(NULL, cpumask_first(cpu_map));
1888
1889 for_each_cpu(i, cpu_map) {
1890 if (arch_scale_cpu_capacity(NULL, i) != cap) {
1891 asym = true;
1892 break;
1893 }
1894 }
1895
1896 if (!asym)
1897 return NULL;
1898
1899
1900
1901
1902
1903
1904 for_each_cpu(i, cpu_map) {
1905 unsigned long max_capacity = arch_scale_cpu_capacity(NULL, i);
1906 int tl_id = 0;
1907
1908 for_each_sd_topology(tl) {
1909 if (tl_id < asym_level)
1910 goto next_level;
1911
1912 for_each_cpu_and(j, tl->mask(i), cpu_map) {
1913 unsigned long capacity;
1914
1915 capacity = arch_scale_cpu_capacity(NULL, j);
1916
1917 if (capacity <= max_capacity)
1918 continue;
1919
1920 max_capacity = capacity;
1921 asym_level = tl_id;
1922 asym_tl = tl;
1923 }
1924next_level:
1925 tl_id++;
1926 }
1927 }
1928
1929 return asym_tl;
1930}
1931
1932
1933
1934
1935
1936
1937static int
1938build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *attr)
1939{
1940 enum s_alloc alloc_state;
1941 struct sched_domain *sd;
1942 struct s_data d;
1943 struct rq *rq = NULL;
1944 int i, ret = -ENOMEM;
1945 struct sched_domain_topology_level *tl_asym;
1946 bool has_asym = false;
1947
1948 alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
1949 if (alloc_state != sa_rootdomain)
1950 goto error;
1951
1952 tl_asym = asym_cpu_capacity_level(cpu_map);
1953
1954
1955 for_each_cpu(i, cpu_map) {
1956 struct sched_domain_topology_level *tl;
1957
1958 sd = NULL;
1959 for_each_sd_topology(tl) {
1960 int dflags = 0;
1961
1962 if (tl == tl_asym) {
1963 dflags |= SD_ASYM_CPUCAPACITY;
1964 has_asym = true;
1965 }
1966
1967 sd = build_sched_domain(tl, cpu_map, attr, sd, dflags, i);
1968
1969 if (tl == sched_domain_topology)
1970 *per_cpu_ptr(d.sd, i) = sd;
1971 if (tl->flags & SDTL_OVERLAP)
1972 sd->flags |= SD_OVERLAP;
1973 if (cpumask_equal(cpu_map, sched_domain_span(sd)))
1974 break;
1975 }
1976 }
1977
1978
1979 for_each_cpu(i, cpu_map) {
1980 for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
1981 sd->span_weight = cpumask_weight(sched_domain_span(sd));
1982 if (sd->flags & SD_OVERLAP) {
1983 if (build_overlap_sched_groups(sd, i))
1984 goto error;
1985 } else {
1986 if (build_sched_groups(sd, i))
1987 goto error;
1988 }
1989 }
1990 }
1991
1992
1993 for (i = nr_cpumask_bits-1; i >= 0; i--) {
1994 if (!cpumask_test_cpu(i, cpu_map))
1995 continue;
1996
1997 for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
1998 claim_allocations(i, sd);
1999 init_sched_groups_capacity(i, sd);
2000 }
2001 }
2002
2003
2004 rcu_read_lock();
2005 for_each_cpu(i, cpu_map) {
2006 rq = cpu_rq(i);
2007 sd = *per_cpu_ptr(d.sd, i);
2008
2009
2010 if (rq->cpu_capacity_orig > READ_ONCE(d.rd->max_cpu_capacity))
2011 WRITE_ONCE(d.rd->max_cpu_capacity, rq->cpu_capacity_orig);
2012
2013 cpu_attach_domain(sd, d.rd, i);
2014 }
2015 rcu_read_unlock();
2016
2017 if (has_asym)
2018 static_branch_enable_cpuslocked(&sched_asym_cpucapacity);
2019
2020 if (rq && sched_debug_enabled) {
2021 pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n",
2022 cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity);
2023 }
2024
2025 ret = 0;
2026error:
2027 __free_domain_allocs(&d, alloc_state, cpu_map);
2028
2029 return ret;
2030}
2031
2032
2033static cpumask_var_t *doms_cur;
2034
2035
2036static int ndoms_cur;
2037
2038
2039static struct sched_domain_attr *dattr_cur;
2040
2041
2042
2043
2044
2045
2046static cpumask_var_t fallback_doms;
2047
2048
2049
2050
2051
2052
2053int __weak arch_update_cpu_topology(void)
2054{
2055 return 0;
2056}
2057
2058cpumask_var_t *alloc_sched_domains(unsigned int ndoms)
2059{
2060 int i;
2061 cpumask_var_t *doms;
2062
2063 doms = kmalloc_array(ndoms, sizeof(*doms), GFP_KERNEL);
2064 if (!doms)
2065 return NULL;
2066 for (i = 0; i < ndoms; i++) {
2067 if (!alloc_cpumask_var(&doms[i], GFP_KERNEL)) {
2068 free_sched_domains(doms, i);
2069 return NULL;
2070 }
2071 }
2072 return doms;
2073}
2074
2075void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms)
2076{
2077 unsigned int i;
2078 for (i = 0; i < ndoms; i++)
2079 free_cpumask_var(doms[i]);
2080 kfree(doms);
2081}
2082
2083
2084
2085
2086
2087int sched_init_domains(const struct cpumask *cpu_map)
2088{
2089 int err;
2090
2091 zalloc_cpumask_var(&sched_domains_tmpmask, GFP_KERNEL);
2092 zalloc_cpumask_var(&sched_domains_tmpmask2, GFP_KERNEL);
2093 zalloc_cpumask_var(&fallback_doms, GFP_KERNEL);
2094
2095 arch_update_cpu_topology();
2096 ndoms_cur = 1;
2097 doms_cur = alloc_sched_domains(ndoms_cur);
2098 if (!doms_cur)
2099 doms_cur = &fallback_doms;
2100 cpumask_and(doms_cur[0], cpu_map, housekeeping_cpumask(HK_FLAG_DOMAIN));
2101 err = build_sched_domains(doms_cur[0], NULL);
2102 register_sched_domain_sysctl();
2103
2104 return err;
2105}
2106
2107
2108
2109
2110
2111static void detach_destroy_domains(const struct cpumask *cpu_map)
2112{
2113 int i;
2114
2115 rcu_read_lock();
2116 for_each_cpu(i, cpu_map)
2117 cpu_attach_domain(NULL, &def_root_domain, i);
2118 rcu_read_unlock();
2119}
2120
2121
2122static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
2123 struct sched_domain_attr *new, int idx_new)
2124{
2125 struct sched_domain_attr tmp;
2126
2127
2128 if (!new && !cur)
2129 return 1;
2130
2131 tmp = SD_ATTR_INIT;
2132
2133 return !memcmp(cur ? (cur + idx_cur) : &tmp,
2134 new ? (new + idx_new) : &tmp,
2135 sizeof(struct sched_domain_attr));
2136}
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
2165 struct sched_domain_attr *dattr_new)
2166{
2167 bool __maybe_unused has_eas = false;
2168 int i, j, n;
2169 int new_topology;
2170
2171 mutex_lock(&sched_domains_mutex);
2172
2173
2174 unregister_sched_domain_sysctl();
2175
2176
2177 new_topology = arch_update_cpu_topology();
2178
2179 if (!doms_new) {
2180 WARN_ON_ONCE(dattr_new);
2181 n = 0;
2182 doms_new = alloc_sched_domains(1);
2183 if (doms_new) {
2184 n = 1;
2185 cpumask_and(doms_new[0], cpu_active_mask,
2186 housekeeping_cpumask(HK_FLAG_DOMAIN));
2187 }
2188 } else {
2189 n = ndoms_new;
2190 }
2191
2192
2193 for (i = 0; i < ndoms_cur; i++) {
2194 for (j = 0; j < n && !new_topology; j++) {
2195 if (cpumask_equal(doms_cur[i], doms_new[j]) &&
2196 dattrs_equal(dattr_cur, i, dattr_new, j))
2197 goto match1;
2198 }
2199
2200 detach_destroy_domains(doms_cur[i]);
2201match1:
2202 ;
2203 }
2204
2205 n = ndoms_cur;
2206 if (!doms_new) {
2207 n = 0;
2208 doms_new = &fallback_doms;
2209 cpumask_and(doms_new[0], cpu_active_mask,
2210 housekeeping_cpumask(HK_FLAG_DOMAIN));
2211 }
2212
2213
2214 for (i = 0; i < ndoms_new; i++) {
2215 for (j = 0; j < n && !new_topology; j++) {
2216 if (cpumask_equal(doms_new[i], doms_cur[j]) &&
2217 dattrs_equal(dattr_new, i, dattr_cur, j))
2218 goto match2;
2219 }
2220
2221 build_sched_domains(doms_new[i], dattr_new ? dattr_new + i : NULL);
2222match2:
2223 ;
2224 }
2225
2226#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
2227
2228 for (i = 0; i < ndoms_new; i++) {
2229 for (j = 0; j < n && !sched_energy_update; j++) {
2230 if (cpumask_equal(doms_new[i], doms_cur[j]) &&
2231 cpu_rq(cpumask_first(doms_cur[j]))->rd->pd) {
2232 has_eas = true;
2233 goto match3;
2234 }
2235 }
2236
2237 has_eas |= build_perf_domains(doms_new[i]);
2238match3:
2239 ;
2240 }
2241 sched_energy_set(has_eas);
2242#endif
2243
2244
2245 if (doms_cur != &fallback_doms)
2246 free_sched_domains(doms_cur, ndoms_cur);
2247
2248 kfree(dattr_cur);
2249 doms_cur = doms_new;
2250 dattr_cur = dattr_new;
2251 ndoms_cur = ndoms_new;
2252
2253 register_sched_domain_sysctl();
2254
2255 mutex_unlock(&sched_domains_mutex);
2256}
2257