1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
41
42#include <linux/init.h>
43#include <linux/smp.h>
44#include <linux/export.h>
45#include <linux/sched.h>
46#include <linux/sched/topology.h>
47#include <linux/sched/hotplug.h>
48#include <linux/sched/task_stack.h>
49#include <linux/percpu.h>
50#include <linux/memblock.h>
51#include <linux/err.h>
52#include <linux/nmi.h>
53#include <linux/tboot.h>
54#include <linux/gfp.h>
55#include <linux/cpuidle.h>
56#include <linux/numa.h>
57#include <linux/pgtable.h>
58#include <linux/overflow.h>
59#include <linux/syscore_ops.h>
60
61#include <asm/acpi.h>
62#include <asm/desc.h>
63#include <asm/nmi.h>
64#include <asm/irq.h>
65#include <asm/realmode.h>
66#include <asm/cpu.h>
67#include <asm/numa.h>
68#include <asm/tlbflush.h>
69#include <asm/mtrr.h>
70#include <asm/mwait.h>
71#include <asm/apic.h>
72#include <asm/io_apic.h>
73#include <asm/fpu/api.h>
74#include <asm/setup.h>
75#include <asm/uv/uv.h>
76#include <linux/mc146818rtc.h>
77#include <asm/i8259.h>
78#include <asm/misc.h>
79#include <asm/qspinlock.h>
80#include <asm/intel-family.h>
81#include <asm/cpu_device_id.h>
82#include <asm/spec-ctrl.h>
83#include <asm/hw_irq.h>
84#include <asm/stackprotector.h>
85
86#ifdef CONFIG_ACPI_CPPC_LIB
87#include <acpi/cppc_acpi.h>
88#endif
89
90
91DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
92EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
93
94
95DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
96EXPORT_PER_CPU_SYMBOL(cpu_core_map);
97
98
99DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
100EXPORT_PER_CPU_SYMBOL(cpu_die_map);
101
102DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
103
104DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map);
105
106
107DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
108EXPORT_PER_CPU_SYMBOL(cpu_info);
109
110
111unsigned int __max_logical_packages __read_mostly;
112EXPORT_SYMBOL(__max_logical_packages);
113static unsigned int logical_packages __read_mostly;
114static unsigned int logical_die __read_mostly;
115
116
117int __read_mostly __max_smt_threads = 1;
118
119
120bool x86_topology_update;
121
122int arch_update_cpu_topology(void)
123{
124 int retval = x86_topology_update;
125
126 x86_topology_update = false;
127 return retval;
128}
129
130static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
131{
132 unsigned long flags;
133
134 spin_lock_irqsave(&rtc_lock, flags);
135 CMOS_WRITE(0xa, 0xf);
136 spin_unlock_irqrestore(&rtc_lock, flags);
137 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
138 start_eip >> 4;
139 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
140 start_eip & 0xf;
141}
142
143static inline void smpboot_restore_warm_reset_vector(void)
144{
145 unsigned long flags;
146
147
148
149
150
151 spin_lock_irqsave(&rtc_lock, flags);
152 CMOS_WRITE(0, 0xf);
153 spin_unlock_irqrestore(&rtc_lock, flags);
154
155 *((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0;
156}
157
158static void init_freq_invariance(bool secondary, bool cppc_ready);
159
160
161
162
163
164static void smp_callin(void)
165{
166 int cpuid;
167
168
169
170
171
172
173
174 cpuid = smp_processor_id();
175
176
177
178
179
180
181
182 apic_ap_setup();
183
184
185
186
187
188 smp_store_cpu_info(cpuid);
189
190
191
192
193
194 set_cpu_sibling_map(raw_smp_processor_id());
195
196 init_freq_invariance(true, false);
197
198
199
200
201
202
203
204 calibrate_delay();
205 cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy;
206 pr_debug("Stack at about %p\n", &cpuid);
207
208 wmb();
209
210 notify_cpu_starting(cpuid);
211
212
213
214
215 cpumask_set_cpu(cpuid, cpu_callin_mask);
216}
217
218static int cpu0_logical_apicid;
219static int enable_start_cpu0;
220
221
222
223static void notrace start_secondary(void *unused)
224{
225
226
227
228
229
230 cr4_init();
231
232#ifdef CONFIG_X86_32
233
234 load_cr3(swapper_pg_dir);
235 __flush_tlb_all();
236#endif
237 cpu_init_secondary();
238 rcu_cpu_starting(raw_smp_processor_id());
239 x86_cpuinit.early_percpu_clock_init();
240 smp_callin();
241
242 enable_start_cpu0 = 0;
243
244
245 barrier();
246
247
248
249 check_tsc_sync_target();
250
251 speculative_store_bypass_ht_init();
252
253
254
255
256
257
258
259 lock_vector_lock();
260 set_cpu_online(smp_processor_id(), true);
261 lapic_online();
262 unlock_vector_lock();
263 cpu_set_state_online(smp_processor_id());
264 x86_platform.nmi_init();
265
266
267 local_irq_enable();
268
269 x86_cpuinit.setup_percpu_clockev();
270
271 wmb();
272 cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
273}
274
275
276
277
278
279bool topology_is_primary_thread(unsigned int cpu)
280{
281 return apic_id_is_primary_thread(per_cpu(x86_cpu_to_apicid, cpu));
282}
283
284
285
286
287bool topology_smt_supported(void)
288{
289 return smp_num_siblings > 1;
290}
291
292
293
294
295
296
297int topology_phys_to_logical_pkg(unsigned int phys_pkg)
298{
299 int cpu;
300
301 for_each_possible_cpu(cpu) {
302 struct cpuinfo_x86 *c = &cpu_data(cpu);
303
304 if (c->initialized && c->phys_proc_id == phys_pkg)
305 return c->logical_proc_id;
306 }
307 return -1;
308}
309EXPORT_SYMBOL(topology_phys_to_logical_pkg);
310
311
312
313
314
315int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu)
316{
317 int cpu;
318 int proc_id = cpu_data(cur_cpu).phys_proc_id;
319
320 for_each_possible_cpu(cpu) {
321 struct cpuinfo_x86 *c = &cpu_data(cpu);
322
323 if (c->initialized && c->cpu_die_id == die_id &&
324 c->phys_proc_id == proc_id)
325 return c->logical_die_id;
326 }
327 return -1;
328}
329EXPORT_SYMBOL(topology_phys_to_logical_die);
330
331
332
333
334
335
336int topology_update_package_map(unsigned int pkg, unsigned int cpu)
337{
338 int new;
339
340
341 new = topology_phys_to_logical_pkg(pkg);
342 if (new >= 0)
343 goto found;
344
345 new = logical_packages++;
346 if (new != pkg) {
347 pr_info("CPU %u Converting physical %u to logical package %u\n",
348 cpu, pkg, new);
349 }
350found:
351 cpu_data(cpu).logical_proc_id = new;
352 return 0;
353}
354
355
356
357
358
359int topology_update_die_map(unsigned int die, unsigned int cpu)
360{
361 int new;
362
363
364 new = topology_phys_to_logical_die(die, cpu);
365 if (new >= 0)
366 goto found;
367
368 new = logical_die++;
369 if (new != die) {
370 pr_info("CPU %u Converting physical %u to logical die %u\n",
371 cpu, die, new);
372 }
373found:
374 cpu_data(cpu).logical_die_id = new;
375 return 0;
376}
377
378void __init smp_store_boot_cpu_info(void)
379{
380 int id = 0;
381 struct cpuinfo_x86 *c = &cpu_data(id);
382
383 *c = boot_cpu_data;
384 c->cpu_index = id;
385 topology_update_package_map(c->phys_proc_id, id);
386 topology_update_die_map(c->cpu_die_id, id);
387 c->initialized = true;
388}
389
390
391
392
393
394void smp_store_cpu_info(int id)
395{
396 struct cpuinfo_x86 *c = &cpu_data(id);
397
398
399 if (!c->initialized)
400 *c = boot_cpu_data;
401 c->cpu_index = id;
402
403
404
405
406 identify_secondary_cpu(c);
407 c->initialized = true;
408}
409
410static bool
411topology_same_node(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
412{
413 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
414
415 return (cpu_to_node(cpu1) == cpu_to_node(cpu2));
416}
417
418static bool
419topology_sane(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o, const char *name)
420{
421 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
422
423 return !WARN_ONCE(!topology_same_node(c, o),
424 "sched: CPU #%d's %s-sibling CPU #%d is not on the same node! "
425 "[node: %d != %d]. Ignoring dependency.\n",
426 cpu1, name, cpu2, cpu_to_node(cpu1), cpu_to_node(cpu2));
427}
428
429#define link_mask(mfunc, c1, c2) \
430do { \
431 cpumask_set_cpu((c1), mfunc(c2)); \
432 cpumask_set_cpu((c2), mfunc(c1)); \
433} while (0)
434
435static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
436{
437 if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
438 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
439
440 if (c->phys_proc_id == o->phys_proc_id &&
441 c->cpu_die_id == o->cpu_die_id &&
442 per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2)) {
443 if (c->cpu_core_id == o->cpu_core_id)
444 return topology_sane(c, o, "smt");
445
446 if ((c->cu_id != 0xff) &&
447 (o->cu_id != 0xff) &&
448 (c->cu_id == o->cu_id))
449 return topology_sane(c, o, "smt");
450 }
451
452 } else if (c->phys_proc_id == o->phys_proc_id &&
453 c->cpu_die_id == o->cpu_die_id &&
454 c->cpu_core_id == o->cpu_core_id) {
455 return topology_sane(c, o, "smt");
456 }
457
458 return false;
459}
460
461static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
462{
463 if (c->phys_proc_id == o->phys_proc_id &&
464 c->cpu_die_id == o->cpu_die_id)
465 return true;
466 return false;
467}
468
469static bool match_l2c(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
470{
471 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
472
473
474 if (per_cpu(cpu_l2c_id, cpu1) == BAD_APICID)
475 return match_smt(c, o);
476
477
478 if (per_cpu(cpu_l2c_id, cpu1) != per_cpu(cpu_l2c_id, cpu2))
479 return false;
480
481 return topology_sane(c, o, "l2c");
482}
483
484
485
486
487
488
489static bool match_pkg(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
490{
491 if (c->phys_proc_id == o->phys_proc_id)
492 return true;
493 return false;
494}
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510static const struct x86_cpu_id intel_cod_cpu[] = {
511 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, 0),
512 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, 0),
513 X86_MATCH_INTEL_FAM6_MODEL(ANY, 1),
514 {}
515};
516
517static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
518{
519 const struct x86_cpu_id *id = x86_match_cpu(intel_cod_cpu);
520 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
521 bool intel_snc = id && id->driver_data;
522
523
524 if (per_cpu(cpu_llc_id, cpu1) == BAD_APICID)
525 return false;
526
527
528 if (per_cpu(cpu_llc_id, cpu1) != per_cpu(cpu_llc_id, cpu2))
529 return false;
530
531
532
533
534
535
536 if (match_pkg(c, o) && !topology_same_node(c, o) && intel_snc)
537 return false;
538
539 return topology_sane(c, o, "llc");
540}
541
542
543#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_CLUSTER) || defined(CONFIG_SCHED_MC)
544static inline int x86_sched_itmt_flags(void)
545{
546 return sysctl_sched_itmt_enabled ? SD_ASYM_PACKING : 0;
547}
548
549#ifdef CONFIG_SCHED_MC
550static int x86_core_flags(void)
551{
552 return cpu_core_flags() | x86_sched_itmt_flags();
553}
554#endif
555#ifdef CONFIG_SCHED_SMT
556static int x86_smt_flags(void)
557{
558 return cpu_smt_flags() | x86_sched_itmt_flags();
559}
560#endif
561#ifdef CONFIG_SCHED_CLUSTER
562static int x86_cluster_flags(void)
563{
564 return cpu_cluster_flags() | x86_sched_itmt_flags();
565}
566#endif
567#endif
568
569static struct sched_domain_topology_level x86_numa_in_package_topology[] = {
570#ifdef CONFIG_SCHED_SMT
571 { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
572#endif
573#ifdef CONFIG_SCHED_CLUSTER
574 { cpu_clustergroup_mask, x86_cluster_flags, SD_INIT_NAME(CLS) },
575#endif
576#ifdef CONFIG_SCHED_MC
577 { cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
578#endif
579 { NULL, },
580};
581
582static struct sched_domain_topology_level x86_hybrid_topology[] = {
583#ifdef CONFIG_SCHED_SMT
584 { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
585#endif
586#ifdef CONFIG_SCHED_MC
587 { cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
588#endif
589 { cpu_cpu_mask, SD_INIT_NAME(DIE) },
590 { NULL, },
591};
592
593static struct sched_domain_topology_level x86_topology[] = {
594#ifdef CONFIG_SCHED_SMT
595 { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
596#endif
597#ifdef CONFIG_SCHED_CLUSTER
598 { cpu_clustergroup_mask, x86_cluster_flags, SD_INIT_NAME(CLS) },
599#endif
600#ifdef CONFIG_SCHED_MC
601 { cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
602#endif
603 { cpu_cpu_mask, SD_INIT_NAME(DIE) },
604 { NULL, },
605};
606
607
608
609
610
611
612static bool x86_has_numa_in_package;
613
614void set_cpu_sibling_map(int cpu)
615{
616 bool has_smt = smp_num_siblings > 1;
617 bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1;
618 struct cpuinfo_x86 *c = &cpu_data(cpu);
619 struct cpuinfo_x86 *o;
620 int i, threads;
621
622 cpumask_set_cpu(cpu, cpu_sibling_setup_mask);
623
624 if (!has_mp) {
625 cpumask_set_cpu(cpu, topology_sibling_cpumask(cpu));
626 cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
627 cpumask_set_cpu(cpu, cpu_l2c_shared_mask(cpu));
628 cpumask_set_cpu(cpu, topology_core_cpumask(cpu));
629 cpumask_set_cpu(cpu, topology_die_cpumask(cpu));
630 c->booted_cores = 1;
631 return;
632 }
633
634 for_each_cpu(i, cpu_sibling_setup_mask) {
635 o = &cpu_data(i);
636
637 if (match_pkg(c, o) && !topology_same_node(c, o))
638 x86_has_numa_in_package = true;
639
640 if ((i == cpu) || (has_smt && match_smt(c, o)))
641 link_mask(topology_sibling_cpumask, cpu, i);
642
643 if ((i == cpu) || (has_mp && match_llc(c, o)))
644 link_mask(cpu_llc_shared_mask, cpu, i);
645
646 if ((i == cpu) || (has_mp && match_l2c(c, o)))
647 link_mask(cpu_l2c_shared_mask, cpu, i);
648
649 if ((i == cpu) || (has_mp && match_die(c, o)))
650 link_mask(topology_die_cpumask, cpu, i);
651 }
652
653 threads = cpumask_weight(topology_sibling_cpumask(cpu));
654 if (threads > __max_smt_threads)
655 __max_smt_threads = threads;
656
657 for_each_cpu(i, topology_sibling_cpumask(cpu))
658 cpu_data(i).smt_active = threads > 1;
659
660
661
662
663
664 for_each_cpu(i, cpu_sibling_setup_mask) {
665 o = &cpu_data(i);
666
667 if ((i == cpu) || (has_mp && match_pkg(c, o))) {
668 link_mask(topology_core_cpumask, cpu, i);
669
670
671
672
673 if (threads == 1) {
674
675
676
677
678 if (cpumask_first(
679 topology_sibling_cpumask(i)) == i)
680 c->booted_cores++;
681
682
683
684
685 if (i != cpu)
686 cpu_data(i).booted_cores++;
687 } else if (i != cpu && !c->booted_cores)
688 c->booted_cores = cpu_data(i).booted_cores;
689 }
690 }
691}
692
693
694const struct cpumask *cpu_coregroup_mask(int cpu)
695{
696 return cpu_llc_shared_mask(cpu);
697}
698
699const struct cpumask *cpu_clustergroup_mask(int cpu)
700{
701 return cpu_l2c_shared_mask(cpu);
702}
703
704static void impress_friends(void)
705{
706 int cpu;
707 unsigned long bogosum = 0;
708
709
710
711 pr_debug("Before bogomips\n");
712 for_each_possible_cpu(cpu)
713 if (cpumask_test_cpu(cpu, cpu_callout_mask))
714 bogosum += cpu_data(cpu).loops_per_jiffy;
715 pr_info("Total of %d processors activated (%lu.%02lu BogoMIPS)\n",
716 num_online_cpus(),
717 bogosum/(500000/HZ),
718 (bogosum/(5000/HZ))%100);
719
720 pr_debug("Before bogocount - setting activated=1\n");
721}
722
723void __inquire_remote_apic(int apicid)
724{
725 unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
726 const char * const names[] = { "ID", "VERSION", "SPIV" };
727 int timeout;
728 u32 status;
729
730 pr_info("Inquiring remote APIC 0x%x...\n", apicid);
731
732 for (i = 0; i < ARRAY_SIZE(regs); i++) {
733 pr_info("... APIC 0x%x %s: ", apicid, names[i]);
734
735
736
737
738 status = safe_apic_wait_icr_idle();
739 if (status)
740 pr_cont("a previous APIC delivery may have failed\n");
741
742 apic_icr_write(APIC_DM_REMRD | regs[i], apicid);
743
744 timeout = 0;
745 do {
746 udelay(100);
747 status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
748 } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
749
750 switch (status) {
751 case APIC_ICR_RR_VALID:
752 status = apic_read(APIC_RRR);
753 pr_cont("%08x\n", status);
754 break;
755 default:
756 pr_cont("failed\n");
757 }
758 }
759}
760
761
762
763
764
765
766
767
768
769
770
771#define UDELAY_10MS_DEFAULT 10000
772
773static unsigned int init_udelay = UINT_MAX;
774
775static int __init cpu_init_udelay(char *str)
776{
777 get_option(&str, &init_udelay);
778
779 return 0;
780}
781early_param("cpu_init_udelay", cpu_init_udelay);
782
783static void __init smp_quirk_init_udelay(void)
784{
785
786 if (init_udelay != UINT_MAX)
787 return;
788
789
790 if (((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 6)) ||
791 ((boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) && (boot_cpu_data.x86 >= 0x18)) ||
792 ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && (boot_cpu_data.x86 >= 0xF))) {
793 init_udelay = 0;
794 return;
795 }
796
797 init_udelay = UDELAY_10MS_DEFAULT;
798}
799
800
801
802
803
804
805int
806wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip)
807{
808 u32 dm = apic->dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL;
809 unsigned long send_status, accept_status = 0;
810 int maxlvt;
811
812
813
814
815 apic_icr_write(APIC_DM_NMI | dm, apicid);
816
817 pr_debug("Waiting for send to finish...\n");
818 send_status = safe_apic_wait_icr_idle();
819
820
821
822
823 udelay(200);
824 if (APIC_INTEGRATED(boot_cpu_apic_version)) {
825 maxlvt = lapic_get_maxlvt();
826 if (maxlvt > 3)
827 apic_write(APIC_ESR, 0);
828 accept_status = (apic_read(APIC_ESR) & 0xEF);
829 }
830 pr_debug("NMI sent\n");
831
832 if (send_status)
833 pr_err("APIC never delivered???\n");
834 if (accept_status)
835 pr_err("APIC delivery error (%lx)\n", accept_status);
836
837 return (send_status | accept_status);
838}
839
840static int
841wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
842{
843 unsigned long send_status = 0, accept_status = 0;
844 int maxlvt, num_starts, j;
845
846 maxlvt = lapic_get_maxlvt();
847
848
849
850
851 if (APIC_INTEGRATED(boot_cpu_apic_version)) {
852 if (maxlvt > 3)
853 apic_write(APIC_ESR, 0);
854 apic_read(APIC_ESR);
855 }
856
857 pr_debug("Asserting INIT\n");
858
859
860
861
862
863
864
865 apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
866 phys_apicid);
867
868 pr_debug("Waiting for send to finish...\n");
869 send_status = safe_apic_wait_icr_idle();
870
871 udelay(init_udelay);
872
873 pr_debug("Deasserting INIT\n");
874
875
876
877 apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
878
879 pr_debug("Waiting for send to finish...\n");
880 send_status = safe_apic_wait_icr_idle();
881
882 mb();
883
884
885
886
887
888
889
890 if (APIC_INTEGRATED(boot_cpu_apic_version))
891 num_starts = 2;
892 else
893 num_starts = 0;
894
895
896
897
898 pr_debug("#startup loops: %d\n", num_starts);
899
900 for (j = 1; j <= num_starts; j++) {
901 pr_debug("Sending STARTUP #%d\n", j);
902 if (maxlvt > 3)
903 apic_write(APIC_ESR, 0);
904 apic_read(APIC_ESR);
905 pr_debug("After apic_write\n");
906
907
908
909
910
911
912
913
914 apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12),
915 phys_apicid);
916
917
918
919
920 if (init_udelay == 0)
921 udelay(10);
922 else
923 udelay(300);
924
925 pr_debug("Startup point 1\n");
926
927 pr_debug("Waiting for send to finish...\n");
928 send_status = safe_apic_wait_icr_idle();
929
930
931
932
933 if (init_udelay == 0)
934 udelay(10);
935 else
936 udelay(200);
937
938 if (maxlvt > 3)
939 apic_write(APIC_ESR, 0);
940 accept_status = (apic_read(APIC_ESR) & 0xEF);
941 if (send_status || accept_status)
942 break;
943 }
944 pr_debug("After Startup\n");
945
946 if (send_status)
947 pr_err("APIC never delivered???\n");
948 if (accept_status)
949 pr_err("APIC delivery error (%lx)\n", accept_status);
950
951 return (send_status | accept_status);
952}
953
954
955static void announce_cpu(int cpu, int apicid)
956{
957 static int current_node = NUMA_NO_NODE;
958 int node = early_cpu_to_node(cpu);
959 static int width, node_width;
960
961 if (!width)
962 width = num_digits(num_possible_cpus()) + 1;
963
964 if (!node_width)
965 node_width = num_digits(num_possible_nodes()) + 1;
966
967 if (cpu == 1)
968 printk(KERN_INFO "x86: Booting SMP configuration:\n");
969
970 if (system_state < SYSTEM_RUNNING) {
971 if (node != current_node) {
972 if (current_node > (-1))
973 pr_cont("\n");
974 current_node = node;
975
976 printk(KERN_INFO ".... node %*s#%d, CPUs: ",
977 node_width - num_digits(node), " ", node);
978 }
979
980
981 if (cpu == 1)
982 pr_cont("%*s", width + 1, " ");
983
984 pr_cont("%*s#%d", width - num_digits(cpu), " ", cpu);
985
986 } else
987 pr_info("Booting Node %d Processor %d APIC 0x%x\n",
988 node, cpu, apicid);
989}
990
991static int wakeup_cpu0_nmi(unsigned int cmd, struct pt_regs *regs)
992{
993 int cpu;
994
995 cpu = smp_processor_id();
996 if (cpu == 0 && !cpu_online(cpu) && enable_start_cpu0)
997 return NMI_HANDLED;
998
999 return NMI_DONE;
1000}
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014static int
1015wakeup_cpu_via_init_nmi(int cpu, unsigned long start_ip, int apicid,
1016 int *cpu0_nmi_registered)
1017{
1018 int id;
1019 int boot_error;
1020
1021 preempt_disable();
1022
1023
1024
1025
1026 if (cpu) {
1027 boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip);
1028 goto out;
1029 }
1030
1031
1032
1033
1034
1035
1036 boot_error = register_nmi_handler(NMI_LOCAL,
1037 wakeup_cpu0_nmi, 0, "wake_cpu0");
1038
1039 if (!boot_error) {
1040 enable_start_cpu0 = 1;
1041 *cpu0_nmi_registered = 1;
1042 id = apic->dest_mode_logical ? cpu0_logical_apicid : apicid;
1043 boot_error = wakeup_secondary_cpu_via_nmi(id, start_ip);
1044 }
1045
1046out:
1047 preempt_enable();
1048
1049 return boot_error;
1050}
1051
1052int common_cpu_up(unsigned int cpu, struct task_struct *idle)
1053{
1054 int ret;
1055
1056
1057 alternatives_enable_smp();
1058
1059 per_cpu(current_task, cpu) = idle;
1060 cpu_init_stack_canary(cpu, idle);
1061
1062
1063 ret = irq_init_percpu_irqstack(cpu);
1064 if (ret)
1065 return ret;
1066
1067#ifdef CONFIG_X86_32
1068
1069 per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle);
1070#else
1071 initial_gs = per_cpu_offset(cpu);
1072#endif
1073 return 0;
1074}
1075
1076
1077
1078
1079
1080
1081
1082static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
1083 int *cpu0_nmi_registered)
1084{
1085
1086 unsigned long start_ip = real_mode_header->trampoline_start;
1087
1088 unsigned long boot_error = 0;
1089 unsigned long timeout;
1090
1091 idle->thread.sp = (unsigned long)task_pt_regs(idle);
1092 early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu);
1093 initial_code = (unsigned long)start_secondary;
1094 initial_stack = idle->thread.sp;
1095
1096
1097 init_espfix_ap(cpu);
1098
1099
1100 announce_cpu(cpu, apicid);
1101
1102
1103
1104
1105
1106
1107 if (x86_platform.legacy.warm_reset) {
1108
1109 pr_debug("Setting warm reset code and vector.\n");
1110
1111 smpboot_setup_warm_reset_vector(start_ip);
1112
1113
1114
1115 if (APIC_INTEGRATED(boot_cpu_apic_version)) {
1116 apic_write(APIC_ESR, 0);
1117 apic_read(APIC_ESR);
1118 }
1119 }
1120
1121
1122
1123
1124
1125
1126
1127 cpumask_clear_cpu(cpu, cpu_initialized_mask);
1128 smp_mb();
1129
1130
1131
1132
1133
1134
1135
1136 if (apic->wakeup_secondary_cpu)
1137 boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
1138 else
1139 boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid,
1140 cpu0_nmi_registered);
1141
1142 if (!boot_error) {
1143
1144
1145
1146 boot_error = -1;
1147 timeout = jiffies + 10*HZ;
1148 while (time_before(jiffies, timeout)) {
1149 if (cpumask_test_cpu(cpu, cpu_initialized_mask)) {
1150
1151
1152
1153 cpumask_set_cpu(cpu, cpu_callout_mask);
1154 boot_error = 0;
1155 break;
1156 }
1157 schedule();
1158 }
1159 }
1160
1161 if (!boot_error) {
1162
1163
1164
1165 while (!cpumask_test_cpu(cpu, cpu_callin_mask)) {
1166
1167
1168
1169
1170
1171
1172 schedule();
1173 }
1174 }
1175
1176 if (x86_platform.legacy.warm_reset) {
1177
1178
1179
1180 smpboot_restore_warm_reset_vector();
1181 }
1182
1183 return boot_error;
1184}
1185
1186int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
1187{
1188 int apicid = apic->cpu_present_to_apicid(cpu);
1189 int cpu0_nmi_registered = 0;
1190 unsigned long flags;
1191 int err, ret = 0;
1192
1193 lockdep_assert_irqs_enabled();
1194
1195 pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu);
1196
1197 if (apicid == BAD_APICID ||
1198 !physid_isset(apicid, phys_cpu_present_map) ||
1199 !apic->apic_id_valid(apicid)) {
1200 pr_err("%s: bad cpu %d\n", __func__, cpu);
1201 return -EINVAL;
1202 }
1203
1204
1205
1206
1207 if (cpumask_test_cpu(cpu, cpu_callin_mask)) {
1208 pr_debug("do_boot_cpu %d Already started\n", cpu);
1209 return -ENOSYS;
1210 }
1211
1212
1213
1214
1215
1216 mtrr_save_state();
1217
1218
1219 err = cpu_check_up_prepare(cpu);
1220 if (err && err != -EBUSY)
1221 return err;
1222
1223
1224 per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL;
1225
1226 err = common_cpu_up(cpu, tidle);
1227 if (err)
1228 return err;
1229
1230 err = do_boot_cpu(apicid, cpu, tidle, &cpu0_nmi_registered);
1231 if (err) {
1232 pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);
1233 ret = -EIO;
1234 goto unreg_nmi;
1235 }
1236
1237
1238
1239
1240
1241 local_irq_save(flags);
1242 check_tsc_sync_source(cpu);
1243 local_irq_restore(flags);
1244
1245 while (!cpu_online(cpu)) {
1246 cpu_relax();
1247 touch_nmi_watchdog();
1248 }
1249
1250unreg_nmi:
1251
1252
1253
1254
1255 if (cpu0_nmi_registered)
1256 unregister_nmi_handler(NMI_LOCAL, "wake_cpu0");
1257
1258 return ret;
1259}
1260
1261
1262
1263
1264void arch_disable_smp_support(void)
1265{
1266 disable_ioapic_support();
1267}
1268
1269
1270
1271
1272
1273
1274static __init void disable_smp(void)
1275{
1276 pr_info("SMP disabled\n");
1277
1278 disable_ioapic_support();
1279
1280 init_cpu_present(cpumask_of(0));
1281 init_cpu_possible(cpumask_of(0));
1282
1283 if (smp_found_config)
1284 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
1285 else
1286 physid_set_mask_of_physid(0, &phys_cpu_present_map);
1287 cpumask_set_cpu(0, topology_sibling_cpumask(0));
1288 cpumask_set_cpu(0, topology_core_cpumask(0));
1289 cpumask_set_cpu(0, topology_die_cpumask(0));
1290}
1291
1292
1293
1294
1295static void __init smp_sanity_check(void)
1296{
1297 preempt_disable();
1298
1299#if !defined(CONFIG_X86_BIGSMP) && defined(CONFIG_X86_32)
1300 if (def_to_bigsmp && nr_cpu_ids > 8) {
1301 unsigned int cpu;
1302 unsigned nr;
1303
1304 pr_warn("More than 8 CPUs detected - skipping them\n"
1305 "Use CONFIG_X86_BIGSMP\n");
1306
1307 nr = 0;
1308 for_each_present_cpu(cpu) {
1309 if (nr >= 8)
1310 set_cpu_present(cpu, false);
1311 nr++;
1312 }
1313
1314 nr = 0;
1315 for_each_possible_cpu(cpu) {
1316 if (nr >= 8)
1317 set_cpu_possible(cpu, false);
1318 nr++;
1319 }
1320
1321 nr_cpu_ids = 8;
1322 }
1323#endif
1324
1325 if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
1326 pr_warn("weird, boot CPU (#%d) not listed by the BIOS\n",
1327 hard_smp_processor_id());
1328
1329 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
1330 }
1331
1332
1333
1334
1335
1336 if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) {
1337 pr_notice("weird, boot CPU (#%d) not listed by the BIOS\n",
1338 boot_cpu_physical_apicid);
1339 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
1340 }
1341 preempt_enable();
1342}
1343
1344static void __init smp_cpu_index_default(void)
1345{
1346 int i;
1347 struct cpuinfo_x86 *c;
1348
1349 for_each_possible_cpu(i) {
1350 c = &cpu_data(i);
1351
1352 c->cpu_index = nr_cpu_ids;
1353 }
1354}
1355
1356static void __init smp_get_logical_apicid(void)
1357{
1358 if (x2apic_mode)
1359 cpu0_logical_apicid = apic_read(APIC_LDR);
1360 else
1361 cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
1362}
1363
1364void __init smp_prepare_cpus_common(void)
1365{
1366 unsigned int i;
1367
1368 smp_cpu_index_default();
1369
1370
1371
1372
1373 smp_store_boot_cpu_info();
1374 cpumask_copy(cpu_callin_mask, cpumask_of(0));
1375 mb();
1376
1377 for_each_possible_cpu(i) {
1378 zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
1379 zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
1380 zalloc_cpumask_var(&per_cpu(cpu_die_map, i), GFP_KERNEL);
1381 zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
1382 zalloc_cpumask_var(&per_cpu(cpu_l2c_shared_map, i), GFP_KERNEL);
1383 }
1384
1385
1386
1387
1388
1389
1390
1391
1392 set_sched_topology(x86_topology);
1393
1394 set_cpu_sibling_map(0);
1395}
1396
1397
1398
1399
1400
1401
1402void __init native_smp_prepare_cpus(unsigned int max_cpus)
1403{
1404 smp_prepare_cpus_common();
1405
1406 init_freq_invariance(false, false);
1407 smp_sanity_check();
1408
1409 switch (apic_intr_mode) {
1410 case APIC_PIC:
1411 case APIC_VIRTUAL_WIRE_NO_CONFIG:
1412 disable_smp();
1413 return;
1414 case APIC_SYMMETRIC_IO_NO_ROUTING:
1415 disable_smp();
1416
1417 x86_init.timers.setup_percpu_clockev();
1418 return;
1419 case APIC_VIRTUAL_WIRE:
1420 case APIC_SYMMETRIC_IO:
1421 break;
1422 }
1423
1424
1425 x86_init.timers.setup_percpu_clockev();
1426
1427 smp_get_logical_apicid();
1428
1429 pr_info("CPU0: ");
1430 print_cpu_info(&cpu_data(0));
1431
1432 uv_system_init();
1433
1434 set_mtrr_aps_delayed_init();
1435
1436 smp_quirk_init_udelay();
1437
1438 speculative_store_bypass_ht_init();
1439}
1440
1441void arch_thaw_secondary_cpus_begin(void)
1442{
1443 set_mtrr_aps_delayed_init();
1444}
1445
1446void arch_thaw_secondary_cpus_end(void)
1447{
1448 mtrr_aps_init();
1449}
1450
1451
1452
1453
1454void __init native_smp_prepare_boot_cpu(void)
1455{
1456 int me = smp_processor_id();
1457 switch_to_new_gdt(me);
1458
1459 cpumask_set_cpu(me, cpu_callout_mask);
1460 cpu_set_state_online(me);
1461 native_pv_lock_init();
1462}
1463
1464void __init calculate_max_logical_packages(void)
1465{
1466 int ncpus;
1467
1468
1469
1470
1471
1472 ncpus = cpu_data(0).booted_cores * topology_max_smt_threads();
1473 __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus);
1474 pr_info("Max logical packages: %u\n", __max_logical_packages);
1475}
1476
1477void __init native_smp_cpus_done(unsigned int max_cpus)
1478{
1479 pr_debug("Boot done\n");
1480
1481 calculate_max_logical_packages();
1482
1483
1484 if (x86_has_numa_in_package)
1485 set_sched_topology(x86_numa_in_package_topology);
1486 if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU))
1487 set_sched_topology(x86_hybrid_topology);
1488
1489 nmi_selftest();
1490 impress_friends();
1491 mtrr_aps_init();
1492}
1493
1494static int __initdata setup_possible_cpus = -1;
1495static int __init _setup_possible_cpus(char *str)
1496{
1497 get_option(&str, &setup_possible_cpus);
1498 return 0;
1499}
1500early_param("possible_cpus", _setup_possible_cpus);
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520__init void prefill_possible_map(void)
1521{
1522 int i, possible;
1523
1524
1525 if (!num_processors) {
1526 if (boot_cpu_has(X86_FEATURE_APIC)) {
1527 int apicid = boot_cpu_physical_apicid;
1528 int cpu = hard_smp_processor_id();
1529
1530 pr_warn("Boot CPU (id %d) not listed by BIOS\n", cpu);
1531
1532
1533 if (apic->cpu_present_to_apicid(0) == BAD_APICID &&
1534 apic->apic_id_valid(apicid))
1535 generic_processor_info(apicid, boot_cpu_apic_version);
1536 }
1537
1538 if (!num_processors)
1539 num_processors = 1;
1540 }
1541
1542 i = setup_max_cpus ?: 1;
1543 if (setup_possible_cpus == -1) {
1544 possible = num_processors;
1545#ifdef CONFIG_HOTPLUG_CPU
1546 if (setup_max_cpus)
1547 possible += disabled_cpus;
1548#else
1549 if (possible > i)
1550 possible = i;
1551#endif
1552 } else
1553 possible = setup_possible_cpus;
1554
1555 total_cpus = max_t(int, possible, num_processors + disabled_cpus);
1556
1557
1558 if (possible > nr_cpu_ids) {
1559 pr_warn("%d Processors exceeds NR_CPUS limit of %u\n",
1560 possible, nr_cpu_ids);
1561 possible = nr_cpu_ids;
1562 }
1563
1564#ifdef CONFIG_HOTPLUG_CPU
1565 if (!setup_max_cpus)
1566#endif
1567 if (possible > i) {
1568 pr_warn("%d Processors exceeds max_cpus limit of %u\n",
1569 possible, setup_max_cpus);
1570 possible = i;
1571 }
1572
1573 nr_cpu_ids = possible;
1574
1575 pr_info("Allowing %d CPUs, %d hotplug CPUs\n",
1576 possible, max_t(int, possible - num_processors, 0));
1577
1578 reset_cpu_possible_mask();
1579
1580 for (i = 0; i < possible; i++)
1581 set_cpu_possible(i, true);
1582}
1583
1584#ifdef CONFIG_HOTPLUG_CPU
1585
1586
1587static void recompute_smt_state(void)
1588{
1589 int max_threads, cpu;
1590
1591 max_threads = 0;
1592 for_each_online_cpu (cpu) {
1593 int threads = cpumask_weight(topology_sibling_cpumask(cpu));
1594
1595 if (threads > max_threads)
1596 max_threads = threads;
1597 }
1598 __max_smt_threads = max_threads;
1599}
1600
1601static void remove_siblinginfo(int cpu)
1602{
1603 int sibling;
1604 struct cpuinfo_x86 *c = &cpu_data(cpu);
1605
1606 for_each_cpu(sibling, topology_core_cpumask(cpu)) {
1607 cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
1608
1609
1610
1611 if (cpumask_weight(topology_sibling_cpumask(cpu)) == 1)
1612 cpu_data(sibling).booted_cores--;
1613 }
1614
1615 for_each_cpu(sibling, topology_die_cpumask(cpu))
1616 cpumask_clear_cpu(cpu, topology_die_cpumask(sibling));
1617
1618 for_each_cpu(sibling, topology_sibling_cpumask(cpu)) {
1619 cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
1620 if (cpumask_weight(topology_sibling_cpumask(sibling)) == 1)
1621 cpu_data(sibling).smt_active = false;
1622 }
1623
1624 for_each_cpu(sibling, cpu_llc_shared_mask(cpu))
1625 cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling));
1626 for_each_cpu(sibling, cpu_l2c_shared_mask(cpu))
1627 cpumask_clear_cpu(cpu, cpu_l2c_shared_mask(sibling));
1628 cpumask_clear(cpu_llc_shared_mask(cpu));
1629 cpumask_clear(cpu_l2c_shared_mask(cpu));
1630 cpumask_clear(topology_sibling_cpumask(cpu));
1631 cpumask_clear(topology_core_cpumask(cpu));
1632 cpumask_clear(topology_die_cpumask(cpu));
1633 c->cpu_core_id = 0;
1634 c->booted_cores = 0;
1635 cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
1636 recompute_smt_state();
1637}
1638
1639static void remove_cpu_from_maps(int cpu)
1640{
1641 set_cpu_online(cpu, false);
1642 cpumask_clear_cpu(cpu, cpu_callout_mask);
1643 cpumask_clear_cpu(cpu, cpu_callin_mask);
1644
1645 cpumask_clear_cpu(cpu, cpu_initialized_mask);
1646 numa_remove_cpu(cpu);
1647}
1648
1649void cpu_disable_common(void)
1650{
1651 int cpu = smp_processor_id();
1652
1653 remove_siblinginfo(cpu);
1654
1655
1656 lock_vector_lock();
1657 remove_cpu_from_maps(cpu);
1658 unlock_vector_lock();
1659 fixup_irqs();
1660 lapic_offline();
1661}
1662
1663int native_cpu_disable(void)
1664{
1665 int ret;
1666
1667 ret = lapic_can_unplug_cpu();
1668 if (ret)
1669 return ret;
1670
1671 cpu_disable_common();
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691 apic_soft_disable();
1692
1693 return 0;
1694}
1695
1696int common_cpu_die(unsigned int cpu)
1697{
1698 int ret = 0;
1699
1700
1701
1702
1703 if (cpu_wait_death(cpu, 5)) {
1704 if (system_state == SYSTEM_RUNNING)
1705 pr_info("CPU %u is now offline\n", cpu);
1706 } else {
1707 pr_err("CPU %u didn't die...\n", cpu);
1708 ret = -1;
1709 }
1710
1711 return ret;
1712}
1713
1714void native_cpu_die(unsigned int cpu)
1715{
1716 common_cpu_die(cpu);
1717}
1718
1719void play_dead_common(void)
1720{
1721 idle_task_exit();
1722
1723
1724 (void)cpu_report_death();
1725
1726
1727
1728
1729 local_irq_disable();
1730}
1731
1732
1733
1734
1735
1736
1737void cond_wakeup_cpu0(void)
1738{
1739 if (smp_processor_id() == 0 && enable_start_cpu0)
1740 start_cpu0();
1741}
1742EXPORT_SYMBOL_GPL(cond_wakeup_cpu0);
1743
1744
1745
1746
1747
1748static inline void mwait_play_dead(void)
1749{
1750 unsigned int eax, ebx, ecx, edx;
1751 unsigned int highest_cstate = 0;
1752 unsigned int highest_subcstate = 0;
1753 void *mwait_ptr;
1754 int i;
1755
1756 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
1757 boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
1758 return;
1759 if (!this_cpu_has(X86_FEATURE_MWAIT))
1760 return;
1761 if (!this_cpu_has(X86_FEATURE_CLFLUSH))
1762 return;
1763 if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF)
1764 return;
1765
1766 eax = CPUID_MWAIT_LEAF;
1767 ecx = 0;
1768 native_cpuid(&eax, &ebx, &ecx, &edx);
1769
1770
1771
1772
1773
1774 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) {
1775 eax = 0;
1776 } else {
1777 edx >>= MWAIT_SUBSTATE_SIZE;
1778 for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
1779 if (edx & MWAIT_SUBSTATE_MASK) {
1780 highest_cstate = i;
1781 highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
1782 }
1783 }
1784 eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
1785 (highest_subcstate - 1);
1786 }
1787
1788
1789
1790
1791
1792
1793 mwait_ptr = ¤t_thread_info()->flags;
1794
1795 wbinvd();
1796
1797 while (1) {
1798
1799
1800
1801
1802
1803
1804
1805 mb();
1806 clflush(mwait_ptr);
1807 mb();
1808 __monitor(mwait_ptr, 0, 0);
1809 mb();
1810 __mwait(eax, 0);
1811
1812 cond_wakeup_cpu0();
1813 }
1814}
1815
1816void hlt_play_dead(void)
1817{
1818 if (__this_cpu_read(cpu_info.x86) >= 4)
1819 wbinvd();
1820
1821 while (1) {
1822 native_halt();
1823
1824 cond_wakeup_cpu0();
1825 }
1826}
1827
1828void native_play_dead(void)
1829{
1830 play_dead_common();
1831 tboot_shutdown(TB_SHUTDOWN_WFS);
1832
1833 mwait_play_dead();
1834 if (cpuidle_play_dead())
1835 hlt_play_dead();
1836}
1837
1838#else
1839int native_cpu_disable(void)
1840{
1841 return -ENOSYS;
1842}
1843
1844void native_cpu_die(unsigned int cpu)
1845{
1846
1847 BUG();
1848}
1849
1850void native_play_dead(void)
1851{
1852 BUG();
1853}
1854
1855#endif
1856
1857#ifdef CONFIG_X86_64
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892DEFINE_STATIC_KEY_FALSE(arch_scale_freq_key);
1893
1894static DEFINE_PER_CPU(u64, arch_prev_aperf);
1895static DEFINE_PER_CPU(u64, arch_prev_mperf);
1896static u64 arch_turbo_freq_ratio = SCHED_CAPACITY_SCALE;
1897static u64 arch_max_freq_ratio = SCHED_CAPACITY_SCALE;
1898
1899void arch_set_max_freq_ratio(bool turbo_disabled)
1900{
1901 arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE :
1902 arch_turbo_freq_ratio;
1903}
1904EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio);
1905
1906static bool turbo_disabled(void)
1907{
1908 u64 misc_en;
1909 int err;
1910
1911 err = rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_en);
1912 if (err)
1913 return false;
1914
1915 return (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
1916}
1917
1918static bool slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
1919{
1920 int err;
1921
1922 err = rdmsrl_safe(MSR_ATOM_CORE_RATIOS, base_freq);
1923 if (err)
1924 return false;
1925
1926 err = rdmsrl_safe(MSR_ATOM_CORE_TURBO_RATIOS, turbo_freq);
1927 if (err)
1928 return false;
1929
1930 *base_freq = (*base_freq >> 16) & 0x3F;
1931 *turbo_freq = *turbo_freq & 0x3F;
1932
1933 return true;
1934}
1935
1936#define X86_MATCH(model) \
1937 X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, \
1938 INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL)
1939
1940static const struct x86_cpu_id has_knl_turbo_ratio_limits[] = {
1941 X86_MATCH(XEON_PHI_KNL),
1942 X86_MATCH(XEON_PHI_KNM),
1943 {}
1944};
1945
1946static const struct x86_cpu_id has_skx_turbo_ratio_limits[] = {
1947 X86_MATCH(SKYLAKE_X),
1948 {}
1949};
1950
1951static const struct x86_cpu_id has_glm_turbo_ratio_limits[] = {
1952 X86_MATCH(ATOM_GOLDMONT),
1953 X86_MATCH(ATOM_GOLDMONT_D),
1954 X86_MATCH(ATOM_GOLDMONT_PLUS),
1955 {}
1956};
1957
1958static bool knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq,
1959 int num_delta_fratio)
1960{
1961 int fratio, delta_fratio, found;
1962 int err, i;
1963 u64 msr;
1964
1965 err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
1966 if (err)
1967 return false;
1968
1969 *base_freq = (*base_freq >> 8) & 0xFF;
1970
1971 err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
1972 if (err)
1973 return false;
1974
1975 fratio = (msr >> 8) & 0xFF;
1976 i = 16;
1977 found = 0;
1978 do {
1979 if (found >= num_delta_fratio) {
1980 *turbo_freq = fratio;
1981 return true;
1982 }
1983
1984 delta_fratio = (msr >> (i + 5)) & 0x7;
1985
1986 if (delta_fratio) {
1987 found += 1;
1988 fratio -= delta_fratio;
1989 }
1990
1991 i += 8;
1992 } while (i < 64);
1993
1994 return true;
1995}
1996
1997static bool skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size)
1998{
1999 u64 ratios, counts;
2000 u32 group_size;
2001 int err, i;
2002
2003 err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
2004 if (err)
2005 return false;
2006
2007 *base_freq = (*base_freq >> 8) & 0xFF;
2008
2009 err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios);
2010 if (err)
2011 return false;
2012
2013 err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts);
2014 if (err)
2015 return false;
2016
2017 for (i = 0; i < 64; i += 8) {
2018 group_size = (counts >> i) & 0xFF;
2019 if (group_size >= size) {
2020 *turbo_freq = (ratios >> i) & 0xFF;
2021 return true;
2022 }
2023 }
2024
2025 return false;
2026}
2027
2028static bool core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
2029{
2030 u64 msr;
2031 int err;
2032
2033 err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
2034 if (err)
2035 return false;
2036
2037 err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
2038 if (err)
2039 return false;
2040
2041 *base_freq = (*base_freq >> 8) & 0xFF;
2042 *turbo_freq = (msr >> 24) & 0xFF;
2043
2044
2045 if (!*turbo_freq)
2046 *turbo_freq = msr & 0xFF;
2047
2048 return true;
2049}
2050
2051static bool intel_set_max_freq_ratio(void)
2052{
2053 u64 base_freq, turbo_freq;
2054 u64 turbo_ratio;
2055
2056 if (slv_set_max_freq_ratio(&base_freq, &turbo_freq))
2057 goto out;
2058
2059 if (x86_match_cpu(has_glm_turbo_ratio_limits) &&
2060 skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
2061 goto out;
2062
2063 if (x86_match_cpu(has_knl_turbo_ratio_limits) &&
2064 knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
2065 goto out;
2066
2067 if (x86_match_cpu(has_skx_turbo_ratio_limits) &&
2068 skx_set_max_freq_ratio(&base_freq, &turbo_freq, 4))
2069 goto out;
2070
2071 if (core_set_max_freq_ratio(&base_freq, &turbo_freq))
2072 goto out;
2073
2074 return false;
2075
2076out:
2077
2078
2079
2080
2081
2082
2083 if (!base_freq || !turbo_freq) {
2084 pr_debug("Couldn't determine cpu base or turbo frequency, necessary for scale-invariant accounting.\n");
2085 return false;
2086 }
2087
2088 turbo_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE, base_freq);
2089 if (!turbo_ratio) {
2090 pr_debug("Non-zero turbo and base frequencies led to a 0 ratio.\n");
2091 return false;
2092 }
2093
2094 arch_turbo_freq_ratio = turbo_ratio;
2095 arch_set_max_freq_ratio(turbo_disabled());
2096
2097 return true;
2098}
2099
2100#ifdef CONFIG_ACPI_CPPC_LIB
2101static bool amd_set_max_freq_ratio(void)
2102{
2103 struct cppc_perf_caps perf_caps;
2104 u64 highest_perf, nominal_perf;
2105 u64 perf_ratio;
2106 int rc;
2107
2108 rc = cppc_get_perf_caps(0, &perf_caps);
2109 if (rc) {
2110 pr_debug("Could not retrieve perf counters (%d)\n", rc);
2111 return false;
2112 }
2113
2114 highest_perf = amd_get_highest_perf();
2115 nominal_perf = perf_caps.nominal_perf;
2116
2117 if (!highest_perf || !nominal_perf) {
2118 pr_debug("Could not retrieve highest or nominal performance\n");
2119 return false;
2120 }
2121
2122 perf_ratio = div_u64(highest_perf * SCHED_CAPACITY_SCALE, nominal_perf);
2123
2124 perf_ratio = (perf_ratio + SCHED_CAPACITY_SCALE) >> 1;
2125 if (!perf_ratio) {
2126 pr_debug("Non-zero highest/nominal perf values led to a 0 ratio\n");
2127 return false;
2128 }
2129
2130 arch_turbo_freq_ratio = perf_ratio;
2131 arch_set_max_freq_ratio(false);
2132
2133 return true;
2134}
2135#else
2136static bool amd_set_max_freq_ratio(void)
2137{
2138 return false;
2139}
2140#endif
2141
2142static void init_counter_refs(void)
2143{
2144 u64 aperf, mperf;
2145
2146 rdmsrl(MSR_IA32_APERF, aperf);
2147 rdmsrl(MSR_IA32_MPERF, mperf);
2148
2149 this_cpu_write(arch_prev_aperf, aperf);
2150 this_cpu_write(arch_prev_mperf, mperf);
2151}
2152
2153#ifdef CONFIG_PM_SLEEP
2154static struct syscore_ops freq_invariance_syscore_ops = {
2155 .resume = init_counter_refs,
2156};
2157
2158static void register_freq_invariance_syscore_ops(void)
2159{
2160
2161 if (freq_invariance_syscore_ops.node.prev)
2162 return;
2163
2164 register_syscore_ops(&freq_invariance_syscore_ops);
2165}
2166#else
2167static inline void register_freq_invariance_syscore_ops(void) {}
2168#endif
2169
2170static void init_freq_invariance(bool secondary, bool cppc_ready)
2171{
2172 bool ret = false;
2173
2174 if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
2175 return;
2176
2177 if (secondary) {
2178 if (static_branch_likely(&arch_scale_freq_key)) {
2179 init_counter_refs();
2180 }
2181 return;
2182 }
2183
2184 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
2185 ret = intel_set_max_freq_ratio();
2186 else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
2187 if (!cppc_ready) {
2188 return;
2189 }
2190 ret = amd_set_max_freq_ratio();
2191 }
2192
2193 if (ret) {
2194 init_counter_refs();
2195 static_branch_enable(&arch_scale_freq_key);
2196 register_freq_invariance_syscore_ops();
2197 pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio);
2198 } else {
2199 pr_debug("Couldn't determine max cpu frequency, necessary for scale-invariant accounting.\n");
2200 }
2201}
2202
2203#ifdef CONFIG_ACPI_CPPC_LIB
2204static DEFINE_MUTEX(freq_invariance_lock);
2205
2206void init_freq_invariance_cppc(void)
2207{
2208 static bool secondary;
2209
2210 mutex_lock(&freq_invariance_lock);
2211
2212 init_freq_invariance(secondary, true);
2213 secondary = true;
2214
2215 mutex_unlock(&freq_invariance_lock);
2216}
2217#endif
2218
2219static void disable_freq_invariance_workfn(struct work_struct *work)
2220{
2221 static_branch_disable(&arch_scale_freq_key);
2222}
2223
2224static DECLARE_WORK(disable_freq_invariance_work,
2225 disable_freq_invariance_workfn);
2226
2227DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
2228
2229void arch_scale_freq_tick(void)
2230{
2231 u64 freq_scale;
2232 u64 aperf, mperf;
2233 u64 acnt, mcnt;
2234
2235 if (!arch_scale_freq_invariant())
2236 return;
2237
2238 rdmsrl(MSR_IA32_APERF, aperf);
2239 rdmsrl(MSR_IA32_MPERF, mperf);
2240
2241 acnt = aperf - this_cpu_read(arch_prev_aperf);
2242 mcnt = mperf - this_cpu_read(arch_prev_mperf);
2243
2244 this_cpu_write(arch_prev_aperf, aperf);
2245 this_cpu_write(arch_prev_mperf, mperf);
2246
2247 if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt))
2248 goto error;
2249
2250 if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt)
2251 goto error;
2252
2253 freq_scale = div64_u64(acnt, mcnt);
2254 if (!freq_scale)
2255 goto error;
2256
2257 if (freq_scale > SCHED_CAPACITY_SCALE)
2258 freq_scale = SCHED_CAPACITY_SCALE;
2259
2260 this_cpu_write(arch_freq_scale, freq_scale);
2261 return;
2262
2263error:
2264 pr_warn("Scheduler frequency invariance went wobbly, disabling!\n");
2265 schedule_work(&disable_freq_invariance_work);
2266}
2267#else
2268static inline void init_freq_invariance(bool secondary, bool cppc_ready)
2269{
2270}
2271#endif
2272