1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
41
42#include <linux/init.h>
43#include <linux/smp.h>
44#include <linux/export.h>
45#include <linux/sched.h>
46#include <linux/sched/topology.h>
47#include <linux/sched/hotplug.h>
48#include <linux/sched/task_stack.h>
49#include <linux/percpu.h>
50#include <linux/memblock.h>
51#include <linux/err.h>
52#include <linux/nmi.h>
53#include <linux/tboot.h>
54#include <linux/gfp.h>
55#include <linux/cpuidle.h>
56#include <linux/numa.h>
57#include <linux/pgtable.h>
58#include <linux/overflow.h>
59#include <linux/syscore_ops.h>
60
61#include <asm/acpi.h>
62#include <asm/desc.h>
63#include <asm/nmi.h>
64#include <asm/irq.h>
65#include <asm/realmode.h>
66#include <asm/cpu.h>
67#include <asm/numa.h>
68#include <asm/tlbflush.h>
69#include <asm/mtrr.h>
70#include <asm/mwait.h>
71#include <asm/apic.h>
72#include <asm/io_apic.h>
73#include <asm/fpu/api.h>
74#include <asm/setup.h>
75#include <asm/uv/uv.h>
76#include <linux/mc146818rtc.h>
77#include <asm/i8259.h>
78#include <asm/misc.h>
79#include <asm/qspinlock.h>
80#include <asm/intel-family.h>
81#include <asm/cpu_device_id.h>
82#include <asm/spec-ctrl.h>
83#include <asm/hw_irq.h>
84#include <asm/stackprotector.h>
85
86
87DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
88EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
89
90
91DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
92EXPORT_PER_CPU_SYMBOL(cpu_core_map);
93
94
95DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
96EXPORT_PER_CPU_SYMBOL(cpu_die_map);
97
98DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
99
100DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map);
101
102
103DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
104EXPORT_PER_CPU_SYMBOL(cpu_info);
105
106
107unsigned int __max_logical_packages __read_mostly;
108EXPORT_SYMBOL(__max_logical_packages);
109static unsigned int logical_packages __read_mostly;
110static unsigned int logical_die __read_mostly;
111
112
113int __read_mostly __max_smt_threads = 1;
114
115
116bool x86_topology_update;
117
118int arch_update_cpu_topology(void)
119{
120 int retval = x86_topology_update;
121
122 x86_topology_update = false;
123 return retval;
124}
125
126static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
127{
128 unsigned long flags;
129
130 spin_lock_irqsave(&rtc_lock, flags);
131 CMOS_WRITE(0xa, 0xf);
132 spin_unlock_irqrestore(&rtc_lock, flags);
133 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
134 start_eip >> 4;
135 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
136 start_eip & 0xf;
137}
138
139static inline void smpboot_restore_warm_reset_vector(void)
140{
141 unsigned long flags;
142
143
144
145
146
147 spin_lock_irqsave(&rtc_lock, flags);
148 CMOS_WRITE(0, 0xf);
149 spin_unlock_irqrestore(&rtc_lock, flags);
150
151 *((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0;
152}
153
154
155
156
157
158static void smp_callin(void)
159{
160 int cpuid;
161
162
163
164
165
166
167
168 cpuid = smp_processor_id();
169
170
171
172
173
174
175
176 apic_ap_setup();
177
178
179
180
181
182 smp_store_cpu_info(cpuid);
183
184
185
186
187
188 set_cpu_sibling_map(raw_smp_processor_id());
189
190 init_freq_invariance(true, false);
191
192
193
194
195
196
197
198 calibrate_delay();
199 cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy;
200 pr_debug("Stack at about %p\n", &cpuid);
201
202 wmb();
203
204 notify_cpu_starting(cpuid);
205
206
207
208
209 cpumask_set_cpu(cpuid, cpu_callin_mask);
210}
211
212static int cpu0_logical_apicid;
213static int enable_start_cpu0;
214
215
216
217static void notrace start_secondary(void *unused)
218{
219
220
221
222
223
224 cr4_init();
225
226#ifdef CONFIG_X86_32
227
228 load_cr3(swapper_pg_dir);
229 __flush_tlb_all();
230#endif
231 cpu_init_secondary();
232 rcu_cpu_starting(raw_smp_processor_id());
233 x86_cpuinit.early_percpu_clock_init();
234 smp_callin();
235
236 enable_start_cpu0 = 0;
237
238
239 barrier();
240
241
242
243 check_tsc_sync_target();
244
245 speculative_store_bypass_ht_init();
246
247
248
249
250
251
252
253 lock_vector_lock();
254 set_cpu_online(smp_processor_id(), true);
255 lapic_online();
256 unlock_vector_lock();
257 cpu_set_state_online(smp_processor_id());
258 x86_platform.nmi_init();
259
260
261 local_irq_enable();
262
263 x86_cpuinit.setup_percpu_clockev();
264
265 wmb();
266 cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
267}
268
269
270
271
272
273bool topology_is_primary_thread(unsigned int cpu)
274{
275 return apic_id_is_primary_thread(per_cpu(x86_cpu_to_apicid, cpu));
276}
277
278
279
280
281bool topology_smt_supported(void)
282{
283 return smp_num_siblings > 1;
284}
285
286
287
288
289
290
291int topology_phys_to_logical_pkg(unsigned int phys_pkg)
292{
293 int cpu;
294
295 for_each_possible_cpu(cpu) {
296 struct cpuinfo_x86 *c = &cpu_data(cpu);
297
298 if (c->initialized && c->phys_proc_id == phys_pkg)
299 return c->logical_proc_id;
300 }
301 return -1;
302}
303EXPORT_SYMBOL(topology_phys_to_logical_pkg);
304
305
306
307
308
309int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu)
310{
311 int cpu;
312 int proc_id = cpu_data(cur_cpu).phys_proc_id;
313
314 for_each_possible_cpu(cpu) {
315 struct cpuinfo_x86 *c = &cpu_data(cpu);
316
317 if (c->initialized && c->cpu_die_id == die_id &&
318 c->phys_proc_id == proc_id)
319 return c->logical_die_id;
320 }
321 return -1;
322}
323EXPORT_SYMBOL(topology_phys_to_logical_die);
324
325
326
327
328
329
330int topology_update_package_map(unsigned int pkg, unsigned int cpu)
331{
332 int new;
333
334
335 new = topology_phys_to_logical_pkg(pkg);
336 if (new >= 0)
337 goto found;
338
339 new = logical_packages++;
340 if (new != pkg) {
341 pr_info("CPU %u Converting physical %u to logical package %u\n",
342 cpu, pkg, new);
343 }
344found:
345 cpu_data(cpu).logical_proc_id = new;
346 return 0;
347}
348
349
350
351
352
353int topology_update_die_map(unsigned int die, unsigned int cpu)
354{
355 int new;
356
357
358 new = topology_phys_to_logical_die(die, cpu);
359 if (new >= 0)
360 goto found;
361
362 new = logical_die++;
363 if (new != die) {
364 pr_info("CPU %u Converting physical %u to logical die %u\n",
365 cpu, die, new);
366 }
367found:
368 cpu_data(cpu).logical_die_id = new;
369 return 0;
370}
371
372void __init smp_store_boot_cpu_info(void)
373{
374 int id = 0;
375 struct cpuinfo_x86 *c = &cpu_data(id);
376
377 *c = boot_cpu_data;
378 c->cpu_index = id;
379 topology_update_package_map(c->phys_proc_id, id);
380 topology_update_die_map(c->cpu_die_id, id);
381 c->initialized = true;
382}
383
384
385
386
387
388void smp_store_cpu_info(int id)
389{
390 struct cpuinfo_x86 *c = &cpu_data(id);
391
392
393 if (!c->initialized)
394 *c = boot_cpu_data;
395 c->cpu_index = id;
396
397
398
399
400 identify_secondary_cpu(c);
401 c->initialized = true;
402}
403
404static bool
405topology_same_node(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
406{
407 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
408
409 return (cpu_to_node(cpu1) == cpu_to_node(cpu2));
410}
411
412static bool
413topology_sane(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o, const char *name)
414{
415 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
416
417 return !WARN_ONCE(!topology_same_node(c, o),
418 "sched: CPU #%d's %s-sibling CPU #%d is not on the same node! "
419 "[node: %d != %d]. Ignoring dependency.\n",
420 cpu1, name, cpu2, cpu_to_node(cpu1), cpu_to_node(cpu2));
421}
422
423#define link_mask(mfunc, c1, c2) \
424do { \
425 cpumask_set_cpu((c1), mfunc(c2)); \
426 cpumask_set_cpu((c2), mfunc(c1)); \
427} while (0)
428
429static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
430{
431 if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
432 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
433
434 if (c->phys_proc_id == o->phys_proc_id &&
435 c->cpu_die_id == o->cpu_die_id &&
436 per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2)) {
437 if (c->cpu_core_id == o->cpu_core_id)
438 return topology_sane(c, o, "smt");
439
440 if ((c->cu_id != 0xff) &&
441 (o->cu_id != 0xff) &&
442 (c->cu_id == o->cu_id))
443 return topology_sane(c, o, "smt");
444 }
445
446 } else if (c->phys_proc_id == o->phys_proc_id &&
447 c->cpu_die_id == o->cpu_die_id &&
448 c->cpu_core_id == o->cpu_core_id) {
449 return topology_sane(c, o, "smt");
450 }
451
452 return false;
453}
454
455static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
456{
457 if (c->phys_proc_id == o->phys_proc_id &&
458 c->cpu_die_id == o->cpu_die_id)
459 return true;
460 return false;
461}
462
463static bool match_l2c(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
464{
465 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
466
467
468 if (per_cpu(cpu_l2c_id, cpu1) == BAD_APICID)
469 return match_smt(c, o);
470
471
472 if (per_cpu(cpu_l2c_id, cpu1) != per_cpu(cpu_l2c_id, cpu2))
473 return false;
474
475 return topology_sane(c, o, "l2c");
476}
477
478
479
480
481
482
483static bool match_pkg(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
484{
485 if (c->phys_proc_id == o->phys_proc_id)
486 return true;
487 return false;
488}
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504static const struct x86_cpu_id intel_cod_cpu[] = {
505 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, 0),
506 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, 0),
507 X86_MATCH_INTEL_FAM6_MODEL(ANY, 1),
508 {}
509};
510
511static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
512{
513 const struct x86_cpu_id *id = x86_match_cpu(intel_cod_cpu);
514 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
515 bool intel_snc = id && id->driver_data;
516
517
518 if (per_cpu(cpu_llc_id, cpu1) == BAD_APICID)
519 return false;
520
521
522 if (per_cpu(cpu_llc_id, cpu1) != per_cpu(cpu_llc_id, cpu2))
523 return false;
524
525
526
527
528
529
530 if (match_pkg(c, o) && !topology_same_node(c, o) && intel_snc)
531 return false;
532
533 return topology_sane(c, o, "llc");
534}
535
536
537#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_CLUSTER) || defined(CONFIG_SCHED_MC)
538static inline int x86_sched_itmt_flags(void)
539{
540 return sysctl_sched_itmt_enabled ? SD_ASYM_PACKING : 0;
541}
542
543#ifdef CONFIG_SCHED_MC
544static int x86_core_flags(void)
545{
546 return cpu_core_flags() | x86_sched_itmt_flags();
547}
548#endif
549#ifdef CONFIG_SCHED_SMT
550static int x86_smt_flags(void)
551{
552 return cpu_smt_flags() | x86_sched_itmt_flags();
553}
554#endif
555#ifdef CONFIG_SCHED_CLUSTER
556static int x86_cluster_flags(void)
557{
558 return cpu_cluster_flags() | x86_sched_itmt_flags();
559}
560#endif
561#endif
562
563static struct sched_domain_topology_level x86_numa_in_package_topology[] = {
564#ifdef CONFIG_SCHED_SMT
565 { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
566#endif
567#ifdef CONFIG_SCHED_CLUSTER
568 { cpu_clustergroup_mask, x86_cluster_flags, SD_INIT_NAME(CLS) },
569#endif
570#ifdef CONFIG_SCHED_MC
571 { cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
572#endif
573 { NULL, },
574};
575
576static struct sched_domain_topology_level x86_hybrid_topology[] = {
577#ifdef CONFIG_SCHED_SMT
578 { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
579#endif
580#ifdef CONFIG_SCHED_MC
581 { cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
582#endif
583 { cpu_cpu_mask, SD_INIT_NAME(DIE) },
584 { NULL, },
585};
586
587static struct sched_domain_topology_level x86_topology[] = {
588#ifdef CONFIG_SCHED_SMT
589 { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
590#endif
591#ifdef CONFIG_SCHED_CLUSTER
592 { cpu_clustergroup_mask, x86_cluster_flags, SD_INIT_NAME(CLS) },
593#endif
594#ifdef CONFIG_SCHED_MC
595 { cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
596#endif
597 { cpu_cpu_mask, SD_INIT_NAME(DIE) },
598 { NULL, },
599};
600
601
602
603
604
605
606static bool x86_has_numa_in_package;
607
608void set_cpu_sibling_map(int cpu)
609{
610 bool has_smt = smp_num_siblings > 1;
611 bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1;
612 struct cpuinfo_x86 *c = &cpu_data(cpu);
613 struct cpuinfo_x86 *o;
614 int i, threads;
615
616 cpumask_set_cpu(cpu, cpu_sibling_setup_mask);
617
618 if (!has_mp) {
619 cpumask_set_cpu(cpu, topology_sibling_cpumask(cpu));
620 cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
621 cpumask_set_cpu(cpu, cpu_l2c_shared_mask(cpu));
622 cpumask_set_cpu(cpu, topology_core_cpumask(cpu));
623 cpumask_set_cpu(cpu, topology_die_cpumask(cpu));
624 c->booted_cores = 1;
625 return;
626 }
627
628 for_each_cpu(i, cpu_sibling_setup_mask) {
629 o = &cpu_data(i);
630
631 if (match_pkg(c, o) && !topology_same_node(c, o))
632 x86_has_numa_in_package = true;
633
634 if ((i == cpu) || (has_smt && match_smt(c, o)))
635 link_mask(topology_sibling_cpumask, cpu, i);
636
637 if ((i == cpu) || (has_mp && match_llc(c, o)))
638 link_mask(cpu_llc_shared_mask, cpu, i);
639
640 if ((i == cpu) || (has_mp && match_l2c(c, o)))
641 link_mask(cpu_l2c_shared_mask, cpu, i);
642
643 if ((i == cpu) || (has_mp && match_die(c, o)))
644 link_mask(topology_die_cpumask, cpu, i);
645 }
646
647 threads = cpumask_weight(topology_sibling_cpumask(cpu));
648 if (threads > __max_smt_threads)
649 __max_smt_threads = threads;
650
651 for_each_cpu(i, topology_sibling_cpumask(cpu))
652 cpu_data(i).smt_active = threads > 1;
653
654
655
656
657
658 for_each_cpu(i, cpu_sibling_setup_mask) {
659 o = &cpu_data(i);
660
661 if ((i == cpu) || (has_mp && match_pkg(c, o))) {
662 link_mask(topology_core_cpumask, cpu, i);
663
664
665
666
667 if (threads == 1) {
668
669
670
671
672 if (cpumask_first(
673 topology_sibling_cpumask(i)) == i)
674 c->booted_cores++;
675
676
677
678
679 if (i != cpu)
680 cpu_data(i).booted_cores++;
681 } else if (i != cpu && !c->booted_cores)
682 c->booted_cores = cpu_data(i).booted_cores;
683 }
684 }
685}
686
687
688const struct cpumask *cpu_coregroup_mask(int cpu)
689{
690 return cpu_llc_shared_mask(cpu);
691}
692
693const struct cpumask *cpu_clustergroup_mask(int cpu)
694{
695 return cpu_l2c_shared_mask(cpu);
696}
697
698static void impress_friends(void)
699{
700 int cpu;
701 unsigned long bogosum = 0;
702
703
704
705 pr_debug("Before bogomips\n");
706 for_each_possible_cpu(cpu)
707 if (cpumask_test_cpu(cpu, cpu_callout_mask))
708 bogosum += cpu_data(cpu).loops_per_jiffy;
709 pr_info("Total of %d processors activated (%lu.%02lu BogoMIPS)\n",
710 num_online_cpus(),
711 bogosum/(500000/HZ),
712 (bogosum/(5000/HZ))%100);
713
714 pr_debug("Before bogocount - setting activated=1\n");
715}
716
717void __inquire_remote_apic(int apicid)
718{
719 unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
720 const char * const names[] = { "ID", "VERSION", "SPIV" };
721 int timeout;
722 u32 status;
723
724 pr_info("Inquiring remote APIC 0x%x...\n", apicid);
725
726 for (i = 0; i < ARRAY_SIZE(regs); i++) {
727 pr_info("... APIC 0x%x %s: ", apicid, names[i]);
728
729
730
731
732 status = safe_apic_wait_icr_idle();
733 if (status)
734 pr_cont("a previous APIC delivery may have failed\n");
735
736 apic_icr_write(APIC_DM_REMRD | regs[i], apicid);
737
738 timeout = 0;
739 do {
740 udelay(100);
741 status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
742 } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
743
744 switch (status) {
745 case APIC_ICR_RR_VALID:
746 status = apic_read(APIC_RRR);
747 pr_cont("%08x\n", status);
748 break;
749 default:
750 pr_cont("failed\n");
751 }
752 }
753}
754
755
756
757
758
759
760
761
762
763
764
765#define UDELAY_10MS_DEFAULT 10000
766
767static unsigned int init_udelay = UINT_MAX;
768
769static int __init cpu_init_udelay(char *str)
770{
771 get_option(&str, &init_udelay);
772
773 return 0;
774}
775early_param("cpu_init_udelay", cpu_init_udelay);
776
777static void __init smp_quirk_init_udelay(void)
778{
779
780 if (init_udelay != UINT_MAX)
781 return;
782
783
784 if (((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 6)) ||
785 ((boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) && (boot_cpu_data.x86 >= 0x18)) ||
786 ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && (boot_cpu_data.x86 >= 0xF))) {
787 init_udelay = 0;
788 return;
789 }
790
791 init_udelay = UDELAY_10MS_DEFAULT;
792}
793
794
795
796
797
798
799int
800wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip)
801{
802 u32 dm = apic->dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL;
803 unsigned long send_status, accept_status = 0;
804 int maxlvt;
805
806
807
808
809 apic_icr_write(APIC_DM_NMI | dm, apicid);
810
811 pr_debug("Waiting for send to finish...\n");
812 send_status = safe_apic_wait_icr_idle();
813
814
815
816
817 udelay(200);
818 if (APIC_INTEGRATED(boot_cpu_apic_version)) {
819 maxlvt = lapic_get_maxlvt();
820 if (maxlvt > 3)
821 apic_write(APIC_ESR, 0);
822 accept_status = (apic_read(APIC_ESR) & 0xEF);
823 }
824 pr_debug("NMI sent\n");
825
826 if (send_status)
827 pr_err("APIC never delivered???\n");
828 if (accept_status)
829 pr_err("APIC delivery error (%lx)\n", accept_status);
830
831 return (send_status | accept_status);
832}
833
834static int
835wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
836{
837 unsigned long send_status = 0, accept_status = 0;
838 int maxlvt, num_starts, j;
839
840 maxlvt = lapic_get_maxlvt();
841
842
843
844
845 if (APIC_INTEGRATED(boot_cpu_apic_version)) {
846 if (maxlvt > 3)
847 apic_write(APIC_ESR, 0);
848 apic_read(APIC_ESR);
849 }
850
851 pr_debug("Asserting INIT\n");
852
853
854
855
856
857
858
859 apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
860 phys_apicid);
861
862 pr_debug("Waiting for send to finish...\n");
863 send_status = safe_apic_wait_icr_idle();
864
865 udelay(init_udelay);
866
867 pr_debug("Deasserting INIT\n");
868
869
870
871 apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
872
873 pr_debug("Waiting for send to finish...\n");
874 send_status = safe_apic_wait_icr_idle();
875
876 mb();
877
878
879
880
881
882
883
884 if (APIC_INTEGRATED(boot_cpu_apic_version))
885 num_starts = 2;
886 else
887 num_starts = 0;
888
889
890
891
892 pr_debug("#startup loops: %d\n", num_starts);
893
894 for (j = 1; j <= num_starts; j++) {
895 pr_debug("Sending STARTUP #%d\n", j);
896 if (maxlvt > 3)
897 apic_write(APIC_ESR, 0);
898 apic_read(APIC_ESR);
899 pr_debug("After apic_write\n");
900
901
902
903
904
905
906
907
908 apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12),
909 phys_apicid);
910
911
912
913
914 if (init_udelay == 0)
915 udelay(10);
916 else
917 udelay(300);
918
919 pr_debug("Startup point 1\n");
920
921 pr_debug("Waiting for send to finish...\n");
922 send_status = safe_apic_wait_icr_idle();
923
924
925
926
927 if (init_udelay == 0)
928 udelay(10);
929 else
930 udelay(200);
931
932 if (maxlvt > 3)
933 apic_write(APIC_ESR, 0);
934 accept_status = (apic_read(APIC_ESR) & 0xEF);
935 if (send_status || accept_status)
936 break;
937 }
938 pr_debug("After Startup\n");
939
940 if (send_status)
941 pr_err("APIC never delivered???\n");
942 if (accept_status)
943 pr_err("APIC delivery error (%lx)\n", accept_status);
944
945 return (send_status | accept_status);
946}
947
948
949static void announce_cpu(int cpu, int apicid)
950{
951 static int current_node = NUMA_NO_NODE;
952 int node = early_cpu_to_node(cpu);
953 static int width, node_width;
954
955 if (!width)
956 width = num_digits(num_possible_cpus()) + 1;
957
958 if (!node_width)
959 node_width = num_digits(num_possible_nodes()) + 1;
960
961 if (cpu == 1)
962 printk(KERN_INFO "x86: Booting SMP configuration:\n");
963
964 if (system_state < SYSTEM_RUNNING) {
965 if (node != current_node) {
966 if (current_node > (-1))
967 pr_cont("\n");
968 current_node = node;
969
970 printk(KERN_INFO ".... node %*s#%d, CPUs: ",
971 node_width - num_digits(node), " ", node);
972 }
973
974
975 if (cpu == 1)
976 pr_cont("%*s", width + 1, " ");
977
978 pr_cont("%*s#%d", width - num_digits(cpu), " ", cpu);
979
980 } else
981 pr_info("Booting Node %d Processor %d APIC 0x%x\n",
982 node, cpu, apicid);
983}
984
985static int wakeup_cpu0_nmi(unsigned int cmd, struct pt_regs *regs)
986{
987 int cpu;
988
989 cpu = smp_processor_id();
990 if (cpu == 0 && !cpu_online(cpu) && enable_start_cpu0)
991 return NMI_HANDLED;
992
993 return NMI_DONE;
994}
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008static int
1009wakeup_cpu_via_init_nmi(int cpu, unsigned long start_ip, int apicid,
1010 int *cpu0_nmi_registered)
1011{
1012 int id;
1013 int boot_error;
1014
1015 preempt_disable();
1016
1017
1018
1019
1020 if (cpu) {
1021 boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip);
1022 goto out;
1023 }
1024
1025
1026
1027
1028
1029
1030 boot_error = register_nmi_handler(NMI_LOCAL,
1031 wakeup_cpu0_nmi, 0, "wake_cpu0");
1032
1033 if (!boot_error) {
1034 enable_start_cpu0 = 1;
1035 *cpu0_nmi_registered = 1;
1036 id = apic->dest_mode_logical ? cpu0_logical_apicid : apicid;
1037 boot_error = wakeup_secondary_cpu_via_nmi(id, start_ip);
1038 }
1039
1040out:
1041 preempt_enable();
1042
1043 return boot_error;
1044}
1045
1046int common_cpu_up(unsigned int cpu, struct task_struct *idle)
1047{
1048 int ret;
1049
1050
1051 alternatives_enable_smp();
1052
1053 per_cpu(current_task, cpu) = idle;
1054 cpu_init_stack_canary(cpu, idle);
1055
1056
1057 ret = irq_init_percpu_irqstack(cpu);
1058 if (ret)
1059 return ret;
1060
1061#ifdef CONFIG_X86_32
1062
1063 per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle);
1064#else
1065 initial_gs = per_cpu_offset(cpu);
1066#endif
1067 return 0;
1068}
1069
1070
1071
1072
1073
1074
1075
1076static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
1077 int *cpu0_nmi_registered)
1078{
1079
1080 unsigned long start_ip = real_mode_header->trampoline_start;
1081
1082 unsigned long boot_error = 0;
1083 unsigned long timeout;
1084
1085 idle->thread.sp = (unsigned long)task_pt_regs(idle);
1086 early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu);
1087 initial_code = (unsigned long)start_secondary;
1088 initial_stack = idle->thread.sp;
1089
1090
1091 init_espfix_ap(cpu);
1092
1093
1094 announce_cpu(cpu, apicid);
1095
1096
1097
1098
1099
1100
1101 if (x86_platform.legacy.warm_reset) {
1102
1103 pr_debug("Setting warm reset code and vector.\n");
1104
1105 smpboot_setup_warm_reset_vector(start_ip);
1106
1107
1108
1109 if (APIC_INTEGRATED(boot_cpu_apic_version)) {
1110 apic_write(APIC_ESR, 0);
1111 apic_read(APIC_ESR);
1112 }
1113 }
1114
1115
1116
1117
1118
1119
1120
1121 cpumask_clear_cpu(cpu, cpu_initialized_mask);
1122 smp_mb();
1123
1124
1125
1126
1127
1128
1129
1130 if (apic->wakeup_secondary_cpu)
1131 boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
1132 else
1133 boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid,
1134 cpu0_nmi_registered);
1135
1136 if (!boot_error) {
1137
1138
1139
1140 boot_error = -1;
1141 timeout = jiffies + 10*HZ;
1142 while (time_before(jiffies, timeout)) {
1143 if (cpumask_test_cpu(cpu, cpu_initialized_mask)) {
1144
1145
1146
1147 cpumask_set_cpu(cpu, cpu_callout_mask);
1148 boot_error = 0;
1149 break;
1150 }
1151 schedule();
1152 }
1153 }
1154
1155 if (!boot_error) {
1156
1157
1158
1159 while (!cpumask_test_cpu(cpu, cpu_callin_mask)) {
1160
1161
1162
1163
1164
1165
1166 schedule();
1167 }
1168 }
1169
1170 if (x86_platform.legacy.warm_reset) {
1171
1172
1173
1174 smpboot_restore_warm_reset_vector();
1175 }
1176
1177 return boot_error;
1178}
1179
1180int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
1181{
1182 int apicid = apic->cpu_present_to_apicid(cpu);
1183 int cpu0_nmi_registered = 0;
1184 unsigned long flags;
1185 int err, ret = 0;
1186
1187 lockdep_assert_irqs_enabled();
1188
1189 pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu);
1190
1191 if (apicid == BAD_APICID ||
1192 !physid_isset(apicid, phys_cpu_present_map) ||
1193 !apic->apic_id_valid(apicid)) {
1194 pr_err("%s: bad cpu %d\n", __func__, cpu);
1195 return -EINVAL;
1196 }
1197
1198
1199
1200
1201 if (cpumask_test_cpu(cpu, cpu_callin_mask)) {
1202 pr_debug("do_boot_cpu %d Already started\n", cpu);
1203 return -ENOSYS;
1204 }
1205
1206
1207
1208
1209
1210 mtrr_save_state();
1211
1212
1213 err = cpu_check_up_prepare(cpu);
1214 if (err && err != -EBUSY)
1215 return err;
1216
1217
1218 per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL;
1219
1220 err = common_cpu_up(cpu, tidle);
1221 if (err)
1222 return err;
1223
1224 err = do_boot_cpu(apicid, cpu, tidle, &cpu0_nmi_registered);
1225 if (err) {
1226 pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);
1227 ret = -EIO;
1228 goto unreg_nmi;
1229 }
1230
1231
1232
1233
1234
1235 local_irq_save(flags);
1236 check_tsc_sync_source(cpu);
1237 local_irq_restore(flags);
1238
1239 while (!cpu_online(cpu)) {
1240 cpu_relax();
1241 touch_nmi_watchdog();
1242 }
1243
1244unreg_nmi:
1245
1246
1247
1248
1249 if (cpu0_nmi_registered)
1250 unregister_nmi_handler(NMI_LOCAL, "wake_cpu0");
1251
1252 return ret;
1253}
1254
1255
1256
1257
1258void arch_disable_smp_support(void)
1259{
1260 disable_ioapic_support();
1261}
1262
1263
1264
1265
1266
1267
1268static __init void disable_smp(void)
1269{
1270 pr_info("SMP disabled\n");
1271
1272 disable_ioapic_support();
1273
1274 init_cpu_present(cpumask_of(0));
1275 init_cpu_possible(cpumask_of(0));
1276
1277 if (smp_found_config)
1278 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
1279 else
1280 physid_set_mask_of_physid(0, &phys_cpu_present_map);
1281 cpumask_set_cpu(0, topology_sibling_cpumask(0));
1282 cpumask_set_cpu(0, topology_core_cpumask(0));
1283 cpumask_set_cpu(0, topology_die_cpumask(0));
1284}
1285
1286
1287
1288
1289static void __init smp_sanity_check(void)
1290{
1291 preempt_disable();
1292
1293#if !defined(CONFIG_X86_BIGSMP) && defined(CONFIG_X86_32)
1294 if (def_to_bigsmp && nr_cpu_ids > 8) {
1295 unsigned int cpu;
1296 unsigned nr;
1297
1298 pr_warn("More than 8 CPUs detected - skipping them\n"
1299 "Use CONFIG_X86_BIGSMP\n");
1300
1301 nr = 0;
1302 for_each_present_cpu(cpu) {
1303 if (nr >= 8)
1304 set_cpu_present(cpu, false);
1305 nr++;
1306 }
1307
1308 nr = 0;
1309 for_each_possible_cpu(cpu) {
1310 if (nr >= 8)
1311 set_cpu_possible(cpu, false);
1312 nr++;
1313 }
1314
1315 nr_cpu_ids = 8;
1316 }
1317#endif
1318
1319 if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
1320 pr_warn("weird, boot CPU (#%d) not listed by the BIOS\n",
1321 hard_smp_processor_id());
1322
1323 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
1324 }
1325
1326
1327
1328
1329
1330 if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) {
1331 pr_notice("weird, boot CPU (#%d) not listed by the BIOS\n",
1332 boot_cpu_physical_apicid);
1333 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
1334 }
1335 preempt_enable();
1336}
1337
1338static void __init smp_cpu_index_default(void)
1339{
1340 int i;
1341 struct cpuinfo_x86 *c;
1342
1343 for_each_possible_cpu(i) {
1344 c = &cpu_data(i);
1345
1346 c->cpu_index = nr_cpu_ids;
1347 }
1348}
1349
1350static void __init smp_get_logical_apicid(void)
1351{
1352 if (x2apic_mode)
1353 cpu0_logical_apicid = apic_read(APIC_LDR);
1354 else
1355 cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
1356}
1357
1358void __init smp_prepare_cpus_common(void)
1359{
1360 unsigned int i;
1361
1362 smp_cpu_index_default();
1363
1364
1365
1366
1367 smp_store_boot_cpu_info();
1368 cpumask_copy(cpu_callin_mask, cpumask_of(0));
1369 mb();
1370
1371 for_each_possible_cpu(i) {
1372 zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
1373 zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
1374 zalloc_cpumask_var(&per_cpu(cpu_die_map, i), GFP_KERNEL);
1375 zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
1376 zalloc_cpumask_var(&per_cpu(cpu_l2c_shared_map, i), GFP_KERNEL);
1377 }
1378
1379
1380
1381
1382
1383
1384
1385
1386 set_sched_topology(x86_topology);
1387
1388 set_cpu_sibling_map(0);
1389}
1390
1391
1392
1393
1394
1395
1396void __init native_smp_prepare_cpus(unsigned int max_cpus)
1397{
1398 smp_prepare_cpus_common();
1399
1400 init_freq_invariance(false, false);
1401 smp_sanity_check();
1402
1403 switch (apic_intr_mode) {
1404 case APIC_PIC:
1405 case APIC_VIRTUAL_WIRE_NO_CONFIG:
1406 disable_smp();
1407 return;
1408 case APIC_SYMMETRIC_IO_NO_ROUTING:
1409 disable_smp();
1410
1411 x86_init.timers.setup_percpu_clockev();
1412 return;
1413 case APIC_VIRTUAL_WIRE:
1414 case APIC_SYMMETRIC_IO:
1415 break;
1416 }
1417
1418
1419 x86_init.timers.setup_percpu_clockev();
1420
1421 smp_get_logical_apicid();
1422
1423 pr_info("CPU0: ");
1424 print_cpu_info(&cpu_data(0));
1425
1426 uv_system_init();
1427
1428 set_mtrr_aps_delayed_init();
1429
1430 smp_quirk_init_udelay();
1431
1432 speculative_store_bypass_ht_init();
1433}
1434
1435void arch_thaw_secondary_cpus_begin(void)
1436{
1437 set_mtrr_aps_delayed_init();
1438}
1439
1440void arch_thaw_secondary_cpus_end(void)
1441{
1442 mtrr_aps_init();
1443}
1444
1445
1446
1447
1448void __init native_smp_prepare_boot_cpu(void)
1449{
1450 int me = smp_processor_id();
1451 switch_to_new_gdt(me);
1452
1453 cpumask_set_cpu(me, cpu_callout_mask);
1454 cpu_set_state_online(me);
1455 native_pv_lock_init();
1456}
1457
1458void __init calculate_max_logical_packages(void)
1459{
1460 int ncpus;
1461
1462
1463
1464
1465
1466 ncpus = cpu_data(0).booted_cores * topology_max_smt_threads();
1467 __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus);
1468 pr_info("Max logical packages: %u\n", __max_logical_packages);
1469}
1470
1471void __init native_smp_cpus_done(unsigned int max_cpus)
1472{
1473 pr_debug("Boot done\n");
1474
1475 calculate_max_logical_packages();
1476
1477
1478 if (x86_has_numa_in_package)
1479 set_sched_topology(x86_numa_in_package_topology);
1480 if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU))
1481 set_sched_topology(x86_hybrid_topology);
1482
1483 nmi_selftest();
1484 impress_friends();
1485 mtrr_aps_init();
1486}
1487
1488static int __initdata setup_possible_cpus = -1;
1489static int __init _setup_possible_cpus(char *str)
1490{
1491 get_option(&str, &setup_possible_cpus);
1492 return 0;
1493}
1494early_param("possible_cpus", _setup_possible_cpus);
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514__init void prefill_possible_map(void)
1515{
1516 int i, possible;
1517
1518
1519 if (!num_processors) {
1520 if (boot_cpu_has(X86_FEATURE_APIC)) {
1521 int apicid = boot_cpu_physical_apicid;
1522 int cpu = hard_smp_processor_id();
1523
1524 pr_warn("Boot CPU (id %d) not listed by BIOS\n", cpu);
1525
1526
1527 if (apic->cpu_present_to_apicid(0) == BAD_APICID &&
1528 apic->apic_id_valid(apicid))
1529 generic_processor_info(apicid, boot_cpu_apic_version);
1530 }
1531
1532 if (!num_processors)
1533 num_processors = 1;
1534 }
1535
1536 i = setup_max_cpus ?: 1;
1537 if (setup_possible_cpus == -1) {
1538 possible = num_processors;
1539#ifdef CONFIG_HOTPLUG_CPU
1540 if (setup_max_cpus)
1541 possible += disabled_cpus;
1542#else
1543 if (possible > i)
1544 possible = i;
1545#endif
1546 } else
1547 possible = setup_possible_cpus;
1548
1549 total_cpus = max_t(int, possible, num_processors + disabled_cpus);
1550
1551
1552 if (possible > nr_cpu_ids) {
1553 pr_warn("%d Processors exceeds NR_CPUS limit of %u\n",
1554 possible, nr_cpu_ids);
1555 possible = nr_cpu_ids;
1556 }
1557
1558#ifdef CONFIG_HOTPLUG_CPU
1559 if (!setup_max_cpus)
1560#endif
1561 if (possible > i) {
1562 pr_warn("%d Processors exceeds max_cpus limit of %u\n",
1563 possible, setup_max_cpus);
1564 possible = i;
1565 }
1566
1567 nr_cpu_ids = possible;
1568
1569 pr_info("Allowing %d CPUs, %d hotplug CPUs\n",
1570 possible, max_t(int, possible - num_processors, 0));
1571
1572 reset_cpu_possible_mask();
1573
1574 for (i = 0; i < possible; i++)
1575 set_cpu_possible(i, true);
1576}
1577
1578#ifdef CONFIG_HOTPLUG_CPU
1579
1580
1581static void recompute_smt_state(void)
1582{
1583 int max_threads, cpu;
1584
1585 max_threads = 0;
1586 for_each_online_cpu (cpu) {
1587 int threads = cpumask_weight(topology_sibling_cpumask(cpu));
1588
1589 if (threads > max_threads)
1590 max_threads = threads;
1591 }
1592 __max_smt_threads = max_threads;
1593}
1594
1595static void remove_siblinginfo(int cpu)
1596{
1597 int sibling;
1598 struct cpuinfo_x86 *c = &cpu_data(cpu);
1599
1600 for_each_cpu(sibling, topology_core_cpumask(cpu)) {
1601 cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
1602
1603
1604
1605 if (cpumask_weight(topology_sibling_cpumask(cpu)) == 1)
1606 cpu_data(sibling).booted_cores--;
1607 }
1608
1609 for_each_cpu(sibling, topology_die_cpumask(cpu))
1610 cpumask_clear_cpu(cpu, topology_die_cpumask(sibling));
1611
1612 for_each_cpu(sibling, topology_sibling_cpumask(cpu)) {
1613 cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
1614 if (cpumask_weight(topology_sibling_cpumask(sibling)) == 1)
1615 cpu_data(sibling).smt_active = false;
1616 }
1617
1618 for_each_cpu(sibling, cpu_llc_shared_mask(cpu))
1619 cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling));
1620 for_each_cpu(sibling, cpu_l2c_shared_mask(cpu))
1621 cpumask_clear_cpu(cpu, cpu_l2c_shared_mask(sibling));
1622 cpumask_clear(cpu_llc_shared_mask(cpu));
1623 cpumask_clear(cpu_l2c_shared_mask(cpu));
1624 cpumask_clear(topology_sibling_cpumask(cpu));
1625 cpumask_clear(topology_core_cpumask(cpu));
1626 cpumask_clear(topology_die_cpumask(cpu));
1627 c->cpu_core_id = 0;
1628 c->booted_cores = 0;
1629 cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
1630 recompute_smt_state();
1631}
1632
1633static void remove_cpu_from_maps(int cpu)
1634{
1635 set_cpu_online(cpu, false);
1636 cpumask_clear_cpu(cpu, cpu_callout_mask);
1637 cpumask_clear_cpu(cpu, cpu_callin_mask);
1638
1639 cpumask_clear_cpu(cpu, cpu_initialized_mask);
1640 numa_remove_cpu(cpu);
1641}
1642
1643void cpu_disable_common(void)
1644{
1645 int cpu = smp_processor_id();
1646
1647 remove_siblinginfo(cpu);
1648
1649
1650 lock_vector_lock();
1651 remove_cpu_from_maps(cpu);
1652 unlock_vector_lock();
1653 fixup_irqs();
1654 lapic_offline();
1655}
1656
1657int native_cpu_disable(void)
1658{
1659 int ret;
1660
1661 ret = lapic_can_unplug_cpu();
1662 if (ret)
1663 return ret;
1664
1665 cpu_disable_common();
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685 apic_soft_disable();
1686
1687 return 0;
1688}
1689
1690int common_cpu_die(unsigned int cpu)
1691{
1692 int ret = 0;
1693
1694
1695
1696
1697 if (cpu_wait_death(cpu, 5)) {
1698 if (system_state == SYSTEM_RUNNING)
1699 pr_info("CPU %u is now offline\n", cpu);
1700 } else {
1701 pr_err("CPU %u didn't die...\n", cpu);
1702 ret = -1;
1703 }
1704
1705 return ret;
1706}
1707
1708void native_cpu_die(unsigned int cpu)
1709{
1710 common_cpu_die(cpu);
1711}
1712
1713void play_dead_common(void)
1714{
1715 idle_task_exit();
1716
1717
1718 (void)cpu_report_death();
1719
1720
1721
1722
1723 local_irq_disable();
1724}
1725
1726
1727
1728
1729
1730
1731void cond_wakeup_cpu0(void)
1732{
1733 if (smp_processor_id() == 0 && enable_start_cpu0)
1734 start_cpu0();
1735}
1736EXPORT_SYMBOL_GPL(cond_wakeup_cpu0);
1737
1738
1739
1740
1741
1742static inline void mwait_play_dead(void)
1743{
1744 unsigned int eax, ebx, ecx, edx;
1745 unsigned int highest_cstate = 0;
1746 unsigned int highest_subcstate = 0;
1747 void *mwait_ptr;
1748 int i;
1749
1750 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
1751 boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
1752 return;
1753 if (!this_cpu_has(X86_FEATURE_MWAIT))
1754 return;
1755 if (!this_cpu_has(X86_FEATURE_CLFLUSH))
1756 return;
1757 if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF)
1758 return;
1759
1760 eax = CPUID_MWAIT_LEAF;
1761 ecx = 0;
1762 native_cpuid(&eax, &ebx, &ecx, &edx);
1763
1764
1765
1766
1767
1768 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) {
1769 eax = 0;
1770 } else {
1771 edx >>= MWAIT_SUBSTATE_SIZE;
1772 for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
1773 if (edx & MWAIT_SUBSTATE_MASK) {
1774 highest_cstate = i;
1775 highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
1776 }
1777 }
1778 eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
1779 (highest_subcstate - 1);
1780 }
1781
1782
1783
1784
1785
1786
1787 mwait_ptr = ¤t_thread_info()->flags;
1788
1789 wbinvd();
1790
1791 while (1) {
1792
1793
1794
1795
1796
1797
1798
1799 mb();
1800 clflush(mwait_ptr);
1801 mb();
1802 __monitor(mwait_ptr, 0, 0);
1803 mb();
1804 __mwait(eax, 0);
1805
1806 cond_wakeup_cpu0();
1807 }
1808}
1809
1810void hlt_play_dead(void)
1811{
1812 if (__this_cpu_read(cpu_info.x86) >= 4)
1813 wbinvd();
1814
1815 while (1) {
1816 native_halt();
1817
1818 cond_wakeup_cpu0();
1819 }
1820}
1821
1822void native_play_dead(void)
1823{
1824 play_dead_common();
1825 tboot_shutdown(TB_SHUTDOWN_WFS);
1826
1827 mwait_play_dead();
1828 if (cpuidle_play_dead())
1829 hlt_play_dead();
1830}
1831
1832#else
1833int native_cpu_disable(void)
1834{
1835 return -ENOSYS;
1836}
1837
1838void native_cpu_die(unsigned int cpu)
1839{
1840
1841 BUG();
1842}
1843
1844void native_play_dead(void)
1845{
1846 BUG();
1847}
1848
1849#endif
1850
1851#ifdef CONFIG_X86_64
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886DEFINE_STATIC_KEY_FALSE(arch_scale_freq_key);
1887
1888static DEFINE_PER_CPU(u64, arch_prev_aperf);
1889static DEFINE_PER_CPU(u64, arch_prev_mperf);
1890static u64 arch_turbo_freq_ratio = SCHED_CAPACITY_SCALE;
1891static u64 arch_max_freq_ratio = SCHED_CAPACITY_SCALE;
1892
1893void arch_set_max_freq_ratio(bool turbo_disabled)
1894{
1895 arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE :
1896 arch_turbo_freq_ratio;
1897}
1898EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio);
1899
1900static bool turbo_disabled(void)
1901{
1902 u64 misc_en;
1903 int err;
1904
1905 err = rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_en);
1906 if (err)
1907 return false;
1908
1909 return (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
1910}
1911
1912static bool slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
1913{
1914 int err;
1915
1916 err = rdmsrl_safe(MSR_ATOM_CORE_RATIOS, base_freq);
1917 if (err)
1918 return false;
1919
1920 err = rdmsrl_safe(MSR_ATOM_CORE_TURBO_RATIOS, turbo_freq);
1921 if (err)
1922 return false;
1923
1924 *base_freq = (*base_freq >> 16) & 0x3F;
1925 *turbo_freq = *turbo_freq & 0x3F;
1926
1927 return true;
1928}
1929
1930#define X86_MATCH(model) \
1931 X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, \
1932 INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL)
1933
1934static const struct x86_cpu_id has_knl_turbo_ratio_limits[] = {
1935 X86_MATCH(XEON_PHI_KNL),
1936 X86_MATCH(XEON_PHI_KNM),
1937 {}
1938};
1939
1940static const struct x86_cpu_id has_skx_turbo_ratio_limits[] = {
1941 X86_MATCH(SKYLAKE_X),
1942 {}
1943};
1944
1945static const struct x86_cpu_id has_glm_turbo_ratio_limits[] = {
1946 X86_MATCH(ATOM_GOLDMONT),
1947 X86_MATCH(ATOM_GOLDMONT_D),
1948 X86_MATCH(ATOM_GOLDMONT_PLUS),
1949 {}
1950};
1951
1952static bool knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq,
1953 int num_delta_fratio)
1954{
1955 int fratio, delta_fratio, found;
1956 int err, i;
1957 u64 msr;
1958
1959 err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
1960 if (err)
1961 return false;
1962
1963 *base_freq = (*base_freq >> 8) & 0xFF;
1964
1965 err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
1966 if (err)
1967 return false;
1968
1969 fratio = (msr >> 8) & 0xFF;
1970 i = 16;
1971 found = 0;
1972 do {
1973 if (found >= num_delta_fratio) {
1974 *turbo_freq = fratio;
1975 return true;
1976 }
1977
1978 delta_fratio = (msr >> (i + 5)) & 0x7;
1979
1980 if (delta_fratio) {
1981 found += 1;
1982 fratio -= delta_fratio;
1983 }
1984
1985 i += 8;
1986 } while (i < 64);
1987
1988 return true;
1989}
1990
1991static bool skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size)
1992{
1993 u64 ratios, counts;
1994 u32 group_size;
1995 int err, i;
1996
1997 err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
1998 if (err)
1999 return false;
2000
2001 *base_freq = (*base_freq >> 8) & 0xFF;
2002
2003 err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios);
2004 if (err)
2005 return false;
2006
2007 err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts);
2008 if (err)
2009 return false;
2010
2011 for (i = 0; i < 64; i += 8) {
2012 group_size = (counts >> i) & 0xFF;
2013 if (group_size >= size) {
2014 *turbo_freq = (ratios >> i) & 0xFF;
2015 return true;
2016 }
2017 }
2018
2019 return false;
2020}
2021
2022static bool core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
2023{
2024 u64 msr;
2025 int err;
2026
2027 err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
2028 if (err)
2029 return false;
2030
2031 err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
2032 if (err)
2033 return false;
2034
2035 *base_freq = (*base_freq >> 8) & 0xFF;
2036 *turbo_freq = (msr >> 24) & 0xFF;
2037
2038
2039 if (!*turbo_freq)
2040 *turbo_freq = msr & 0xFF;
2041
2042 return true;
2043}
2044
2045static bool intel_set_max_freq_ratio(void)
2046{
2047 u64 base_freq, turbo_freq;
2048 u64 turbo_ratio;
2049
2050 if (slv_set_max_freq_ratio(&base_freq, &turbo_freq))
2051 goto out;
2052
2053 if (x86_match_cpu(has_glm_turbo_ratio_limits) &&
2054 skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
2055 goto out;
2056
2057 if (x86_match_cpu(has_knl_turbo_ratio_limits) &&
2058 knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
2059 goto out;
2060
2061 if (x86_match_cpu(has_skx_turbo_ratio_limits) &&
2062 skx_set_max_freq_ratio(&base_freq, &turbo_freq, 4))
2063 goto out;
2064
2065 if (core_set_max_freq_ratio(&base_freq, &turbo_freq))
2066 goto out;
2067
2068 return false;
2069
2070out:
2071
2072
2073
2074
2075
2076
2077 if (!base_freq || !turbo_freq) {
2078 pr_debug("Couldn't determine cpu base or turbo frequency, necessary for scale-invariant accounting.\n");
2079 return false;
2080 }
2081
2082 turbo_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE, base_freq);
2083 if (!turbo_ratio) {
2084 pr_debug("Non-zero turbo and base frequencies led to a 0 ratio.\n");
2085 return false;
2086 }
2087
2088 arch_turbo_freq_ratio = turbo_ratio;
2089 arch_set_max_freq_ratio(turbo_disabled());
2090
2091 return true;
2092}
2093
2094static void init_counter_refs(void)
2095{
2096 u64 aperf, mperf;
2097
2098 rdmsrl(MSR_IA32_APERF, aperf);
2099 rdmsrl(MSR_IA32_MPERF, mperf);
2100
2101 this_cpu_write(arch_prev_aperf, aperf);
2102 this_cpu_write(arch_prev_mperf, mperf);
2103}
2104
2105#ifdef CONFIG_PM_SLEEP
2106static struct syscore_ops freq_invariance_syscore_ops = {
2107 .resume = init_counter_refs,
2108};
2109
2110static void register_freq_invariance_syscore_ops(void)
2111{
2112
2113 if (freq_invariance_syscore_ops.node.prev)
2114 return;
2115
2116 register_syscore_ops(&freq_invariance_syscore_ops);
2117}
2118#else
2119static inline void register_freq_invariance_syscore_ops(void) {}
2120#endif
2121
2122void init_freq_invariance(bool secondary, bool cppc_ready)
2123{
2124 bool ret = false;
2125
2126 if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
2127 return;
2128
2129 if (secondary) {
2130 if (static_branch_likely(&arch_scale_freq_key)) {
2131 init_counter_refs();
2132 }
2133 return;
2134 }
2135
2136 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
2137 ret = intel_set_max_freq_ratio();
2138 else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
2139 if (!cppc_ready) {
2140 return;
2141 }
2142 ret = amd_set_max_freq_ratio(&arch_turbo_freq_ratio);
2143 }
2144
2145 if (ret) {
2146 init_counter_refs();
2147 static_branch_enable(&arch_scale_freq_key);
2148 register_freq_invariance_syscore_ops();
2149 pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio);
2150 } else {
2151 pr_debug("Couldn't determine max cpu frequency, necessary for scale-invariant accounting.\n");
2152 }
2153}
2154
2155static void disable_freq_invariance_workfn(struct work_struct *work)
2156{
2157 static_branch_disable(&arch_scale_freq_key);
2158}
2159
2160static DECLARE_WORK(disable_freq_invariance_work,
2161 disable_freq_invariance_workfn);
2162
2163DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
2164
2165void arch_scale_freq_tick(void)
2166{
2167 u64 freq_scale;
2168 u64 aperf, mperf;
2169 u64 acnt, mcnt;
2170
2171 if (!arch_scale_freq_invariant())
2172 return;
2173
2174 rdmsrl(MSR_IA32_APERF, aperf);
2175 rdmsrl(MSR_IA32_MPERF, mperf);
2176
2177 acnt = aperf - this_cpu_read(arch_prev_aperf);
2178 mcnt = mperf - this_cpu_read(arch_prev_mperf);
2179
2180 this_cpu_write(arch_prev_aperf, aperf);
2181 this_cpu_write(arch_prev_mperf, mperf);
2182
2183 if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt))
2184 goto error;
2185
2186 if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt)
2187 goto error;
2188
2189 freq_scale = div64_u64(acnt, mcnt);
2190 if (!freq_scale)
2191 goto error;
2192
2193 if (freq_scale > SCHED_CAPACITY_SCALE)
2194 freq_scale = SCHED_CAPACITY_SCALE;
2195
2196 this_cpu_write(arch_freq_scale, freq_scale);
2197 return;
2198
2199error:
2200 pr_warn("Scheduler frequency invariance went wobbly, disabling!\n");
2201 schedule_work(&disable_freq_invariance_work);
2202}
2203#endif
2204