1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
41
42#include <linux/init.h>
43#include <linux/smp.h>
44#include <linux/export.h>
45#include <linux/sched.h>
46#include <linux/sched/topology.h>
47#include <linux/sched/hotplug.h>
48#include <linux/sched/task_stack.h>
49#include <linux/percpu.h>
50#include <linux/memblock.h>
51#include <linux/err.h>
52#include <linux/nmi.h>
53#include <linux/tboot.h>
54#include <linux/gfp.h>
55#include <linux/cpuidle.h>
56#include <linux/numa.h>
57#include <linux/pgtable.h>
58#include <linux/overflow.h>
59#include <linux/syscore_ops.h>
60
61#include <asm/acpi.h>
62#include <asm/desc.h>
63#include <asm/nmi.h>
64#include <asm/irq.h>
65#include <asm/realmode.h>
66#include <asm/cpu.h>
67#include <asm/numa.h>
68#include <asm/tlbflush.h>
69#include <asm/mtrr.h>
70#include <asm/mwait.h>
71#include <asm/apic.h>
72#include <asm/io_apic.h>
73#include <asm/fpu/internal.h>
74#include <asm/setup.h>
75#include <asm/uv/uv.h>
76#include <linux/mc146818rtc.h>
77#include <asm/i8259.h>
78#include <asm/misc.h>
79#include <asm/qspinlock.h>
80#include <asm/intel-family.h>
81#include <asm/cpu_device_id.h>
82#include <asm/spec-ctrl.h>
83#include <asm/hw_irq.h>
84#include <asm/stackprotector.h>
85
86#ifdef CONFIG_ACPI_CPPC_LIB
87#include <acpi/cppc_acpi.h>
88#endif
89
90
91DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
92EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
93
94
95DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
96EXPORT_PER_CPU_SYMBOL(cpu_core_map);
97
98
99DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
100EXPORT_PER_CPU_SYMBOL(cpu_die_map);
101
102DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
103
104
105DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
106EXPORT_PER_CPU_SYMBOL(cpu_info);
107
108
109unsigned int __max_logical_packages __read_mostly;
110EXPORT_SYMBOL(__max_logical_packages);
111static unsigned int logical_packages __read_mostly;
112static unsigned int logical_die __read_mostly;
113
114
115int __read_mostly __max_smt_threads = 1;
116
117
118bool x86_topology_update;
119
120int arch_update_cpu_topology(void)
121{
122 int retval = x86_topology_update;
123
124 x86_topology_update = false;
125 return retval;
126}
127
128static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
129{
130 unsigned long flags;
131
132 spin_lock_irqsave(&rtc_lock, flags);
133 CMOS_WRITE(0xa, 0xf);
134 spin_unlock_irqrestore(&rtc_lock, flags);
135 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
136 start_eip >> 4;
137 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
138 start_eip & 0xf;
139}
140
141static inline void smpboot_restore_warm_reset_vector(void)
142{
143 unsigned long flags;
144
145
146
147
148
149 spin_lock_irqsave(&rtc_lock, flags);
150 CMOS_WRITE(0, 0xf);
151 spin_unlock_irqrestore(&rtc_lock, flags);
152
153 *((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0;
154}
155
156static void init_freq_invariance(bool secondary, bool cppc_ready);
157
158
159
160
161
162static void smp_callin(void)
163{
164 int cpuid;
165
166
167
168
169
170
171
172 cpuid = smp_processor_id();
173
174
175
176
177
178
179
180 apic_ap_setup();
181
182
183
184
185
186 smp_store_cpu_info(cpuid);
187
188
189
190
191
192 set_cpu_sibling_map(raw_smp_processor_id());
193
194 init_freq_invariance(true, false);
195
196
197
198
199
200
201
202 calibrate_delay();
203 cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy;
204 pr_debug("Stack at about %p\n", &cpuid);
205
206 wmb();
207
208 notify_cpu_starting(cpuid);
209
210
211
212
213 cpumask_set_cpu(cpuid, cpu_callin_mask);
214}
215
216static int cpu0_logical_apicid;
217static int enable_start_cpu0;
218
219
220
221static void notrace start_secondary(void *unused)
222{
223
224
225
226
227
228 cr4_init();
229
230#ifdef CONFIG_X86_32
231
232 load_cr3(swapper_pg_dir);
233 __flush_tlb_all();
234#endif
235 cpu_init_secondary();
236 rcu_cpu_starting(raw_smp_processor_id());
237 x86_cpuinit.early_percpu_clock_init();
238 smp_callin();
239
240 enable_start_cpu0 = 0;
241
242
243 barrier();
244
245
246
247 check_tsc_sync_target();
248
249 speculative_store_bypass_ht_init();
250
251
252
253
254
255
256
257 lock_vector_lock();
258 set_cpu_online(smp_processor_id(), true);
259 lapic_online();
260 unlock_vector_lock();
261 cpu_set_state_online(smp_processor_id());
262 x86_platform.nmi_init();
263
264
265 local_irq_enable();
266
267 x86_cpuinit.setup_percpu_clockev();
268
269 wmb();
270 cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
271}
272
273
274
275
276
277bool topology_is_primary_thread(unsigned int cpu)
278{
279 return apic_id_is_primary_thread(per_cpu(x86_cpu_to_apicid, cpu));
280}
281
282
283
284
285bool topology_smt_supported(void)
286{
287 return smp_num_siblings > 1;
288}
289
290
291
292
293
294
295int topology_phys_to_logical_pkg(unsigned int phys_pkg)
296{
297 int cpu;
298
299 for_each_possible_cpu(cpu) {
300 struct cpuinfo_x86 *c = &cpu_data(cpu);
301
302 if (c->initialized && c->phys_proc_id == phys_pkg)
303 return c->logical_proc_id;
304 }
305 return -1;
306}
307EXPORT_SYMBOL(topology_phys_to_logical_pkg);
308
309
310
311
312
313int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu)
314{
315 int cpu;
316 int proc_id = cpu_data(cur_cpu).phys_proc_id;
317
318 for_each_possible_cpu(cpu) {
319 struct cpuinfo_x86 *c = &cpu_data(cpu);
320
321 if (c->initialized && c->cpu_die_id == die_id &&
322 c->phys_proc_id == proc_id)
323 return c->logical_die_id;
324 }
325 return -1;
326}
327EXPORT_SYMBOL(topology_phys_to_logical_die);
328
329
330
331
332
333
334int topology_update_package_map(unsigned int pkg, unsigned int cpu)
335{
336 int new;
337
338
339 new = topology_phys_to_logical_pkg(pkg);
340 if (new >= 0)
341 goto found;
342
343 new = logical_packages++;
344 if (new != pkg) {
345 pr_info("CPU %u Converting physical %u to logical package %u\n",
346 cpu, pkg, new);
347 }
348found:
349 cpu_data(cpu).logical_proc_id = new;
350 return 0;
351}
352
353
354
355
356
357int topology_update_die_map(unsigned int die, unsigned int cpu)
358{
359 int new;
360
361
362 new = topology_phys_to_logical_die(die, cpu);
363 if (new >= 0)
364 goto found;
365
366 new = logical_die++;
367 if (new != die) {
368 pr_info("CPU %u Converting physical %u to logical die %u\n",
369 cpu, die, new);
370 }
371found:
372 cpu_data(cpu).logical_die_id = new;
373 return 0;
374}
375
376void __init smp_store_boot_cpu_info(void)
377{
378 int id = 0;
379 struct cpuinfo_x86 *c = &cpu_data(id);
380
381 *c = boot_cpu_data;
382 c->cpu_index = id;
383 topology_update_package_map(c->phys_proc_id, id);
384 topology_update_die_map(c->cpu_die_id, id);
385 c->initialized = true;
386}
387
388
389
390
391
392void smp_store_cpu_info(int id)
393{
394 struct cpuinfo_x86 *c = &cpu_data(id);
395
396
397 if (!c->initialized)
398 *c = boot_cpu_data;
399 c->cpu_index = id;
400
401
402
403
404 identify_secondary_cpu(c);
405 c->initialized = true;
406}
407
408static bool
409topology_same_node(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
410{
411 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
412
413 return (cpu_to_node(cpu1) == cpu_to_node(cpu2));
414}
415
416static bool
417topology_sane(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o, const char *name)
418{
419 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
420
421 return !WARN_ONCE(!topology_same_node(c, o),
422 "sched: CPU #%d's %s-sibling CPU #%d is not on the same node! "
423 "[node: %d != %d]. Ignoring dependency.\n",
424 cpu1, name, cpu2, cpu_to_node(cpu1), cpu_to_node(cpu2));
425}
426
427#define link_mask(mfunc, c1, c2) \
428do { \
429 cpumask_set_cpu((c1), mfunc(c2)); \
430 cpumask_set_cpu((c2), mfunc(c1)); \
431} while (0)
432
433static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
434{
435 if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
436 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
437
438 if (c->phys_proc_id == o->phys_proc_id &&
439 c->cpu_die_id == o->cpu_die_id &&
440 per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2)) {
441 if (c->cpu_core_id == o->cpu_core_id)
442 return topology_sane(c, o, "smt");
443
444 if ((c->cu_id != 0xff) &&
445 (o->cu_id != 0xff) &&
446 (c->cu_id == o->cu_id))
447 return topology_sane(c, o, "smt");
448 }
449
450 } else if (c->phys_proc_id == o->phys_proc_id &&
451 c->cpu_die_id == o->cpu_die_id &&
452 c->cpu_core_id == o->cpu_core_id) {
453 return topology_sane(c, o, "smt");
454 }
455
456 return false;
457}
458
459static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
460{
461 if (c->phys_proc_id == o->phys_proc_id &&
462 c->cpu_die_id == o->cpu_die_id)
463 return true;
464 return false;
465}
466
467
468
469
470
471
472static bool match_pkg(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
473{
474 if (c->phys_proc_id == o->phys_proc_id)
475 return true;
476 return false;
477}
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493static const struct x86_cpu_id intel_cod_cpu[] = {
494 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, 0),
495 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, 0),
496 X86_MATCH_INTEL_FAM6_MODEL(ANY, 1),
497 {}
498};
499
500static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
501{
502 const struct x86_cpu_id *id = x86_match_cpu(intel_cod_cpu);
503 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
504 bool intel_snc = id && id->driver_data;
505
506
507 if (per_cpu(cpu_llc_id, cpu1) == BAD_APICID)
508 return false;
509
510
511 if (per_cpu(cpu_llc_id, cpu1) != per_cpu(cpu_llc_id, cpu2))
512 return false;
513
514
515
516
517
518
519 if (match_pkg(c, o) && !topology_same_node(c, o) && intel_snc)
520 return false;
521
522 return topology_sane(c, o, "llc");
523}
524
525
526#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC)
527static inline int x86_sched_itmt_flags(void)
528{
529 return sysctl_sched_itmt_enabled ? SD_ASYM_PACKING : 0;
530}
531
532#ifdef CONFIG_SCHED_MC
533static int x86_core_flags(void)
534{
535 return cpu_core_flags() | x86_sched_itmt_flags();
536}
537#endif
538#ifdef CONFIG_SCHED_SMT
539static int x86_smt_flags(void)
540{
541 return cpu_smt_flags() | x86_sched_itmt_flags();
542}
543#endif
544#endif
545
546static struct sched_domain_topology_level x86_numa_in_package_topology[] = {
547#ifdef CONFIG_SCHED_SMT
548 { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
549#endif
550#ifdef CONFIG_SCHED_MC
551 { cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
552#endif
553 { NULL, },
554};
555
556static struct sched_domain_topology_level x86_topology[] = {
557#ifdef CONFIG_SCHED_SMT
558 { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
559#endif
560#ifdef CONFIG_SCHED_MC
561 { cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
562#endif
563 { cpu_cpu_mask, SD_INIT_NAME(DIE) },
564 { NULL, },
565};
566
567
568
569
570
571
572static bool x86_has_numa_in_package;
573
574void set_cpu_sibling_map(int cpu)
575{
576 bool has_smt = smp_num_siblings > 1;
577 bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1;
578 struct cpuinfo_x86 *c = &cpu_data(cpu);
579 struct cpuinfo_x86 *o;
580 int i, threads;
581
582 cpumask_set_cpu(cpu, cpu_sibling_setup_mask);
583
584 if (!has_mp) {
585 cpumask_set_cpu(cpu, topology_sibling_cpumask(cpu));
586 cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
587 cpumask_set_cpu(cpu, topology_core_cpumask(cpu));
588 cpumask_set_cpu(cpu, topology_die_cpumask(cpu));
589 c->booted_cores = 1;
590 return;
591 }
592
593 for_each_cpu(i, cpu_sibling_setup_mask) {
594 o = &cpu_data(i);
595
596 if (match_pkg(c, o) && !topology_same_node(c, o))
597 x86_has_numa_in_package = true;
598
599 if ((i == cpu) || (has_smt && match_smt(c, o)))
600 link_mask(topology_sibling_cpumask, cpu, i);
601
602 if ((i == cpu) || (has_mp && match_llc(c, o)))
603 link_mask(cpu_llc_shared_mask, cpu, i);
604
605 if ((i == cpu) || (has_mp && match_die(c, o)))
606 link_mask(topology_die_cpumask, cpu, i);
607 }
608
609 threads = cpumask_weight(topology_sibling_cpumask(cpu));
610 if (threads > __max_smt_threads)
611 __max_smt_threads = threads;
612
613 for_each_cpu(i, topology_sibling_cpumask(cpu))
614 cpu_data(i).smt_active = threads > 1;
615
616
617
618
619
620 for_each_cpu(i, cpu_sibling_setup_mask) {
621 o = &cpu_data(i);
622
623 if ((i == cpu) || (has_mp && match_pkg(c, o))) {
624 link_mask(topology_core_cpumask, cpu, i);
625
626
627
628
629 if (threads == 1) {
630
631
632
633
634 if (cpumask_first(
635 topology_sibling_cpumask(i)) == i)
636 c->booted_cores++;
637
638
639
640
641 if (i != cpu)
642 cpu_data(i).booted_cores++;
643 } else if (i != cpu && !c->booted_cores)
644 c->booted_cores = cpu_data(i).booted_cores;
645 }
646 }
647}
648
649
650const struct cpumask *cpu_coregroup_mask(int cpu)
651{
652 return cpu_llc_shared_mask(cpu);
653}
654
655static void impress_friends(void)
656{
657 int cpu;
658 unsigned long bogosum = 0;
659
660
661
662 pr_debug("Before bogomips\n");
663 for_each_possible_cpu(cpu)
664 if (cpumask_test_cpu(cpu, cpu_callout_mask))
665 bogosum += cpu_data(cpu).loops_per_jiffy;
666 pr_info("Total of %d processors activated (%lu.%02lu BogoMIPS)\n",
667 num_online_cpus(),
668 bogosum/(500000/HZ),
669 (bogosum/(5000/HZ))%100);
670
671 pr_debug("Before bogocount - setting activated=1\n");
672}
673
674void __inquire_remote_apic(int apicid)
675{
676 unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
677 const char * const names[] = { "ID", "VERSION", "SPIV" };
678 int timeout;
679 u32 status;
680
681 pr_info("Inquiring remote APIC 0x%x...\n", apicid);
682
683 for (i = 0; i < ARRAY_SIZE(regs); i++) {
684 pr_info("... APIC 0x%x %s: ", apicid, names[i]);
685
686
687
688
689 status = safe_apic_wait_icr_idle();
690 if (status)
691 pr_cont("a previous APIC delivery may have failed\n");
692
693 apic_icr_write(APIC_DM_REMRD | regs[i], apicid);
694
695 timeout = 0;
696 do {
697 udelay(100);
698 status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
699 } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
700
701 switch (status) {
702 case APIC_ICR_RR_VALID:
703 status = apic_read(APIC_RRR);
704 pr_cont("%08x\n", status);
705 break;
706 default:
707 pr_cont("failed\n");
708 }
709 }
710}
711
712
713
714
715
716
717
718
719
720
721
722#define UDELAY_10MS_DEFAULT 10000
723
724static unsigned int init_udelay = UINT_MAX;
725
726static int __init cpu_init_udelay(char *str)
727{
728 get_option(&str, &init_udelay);
729
730 return 0;
731}
732early_param("cpu_init_udelay", cpu_init_udelay);
733
734static void __init smp_quirk_init_udelay(void)
735{
736
737 if (init_udelay != UINT_MAX)
738 return;
739
740
741 if (((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 6)) ||
742 ((boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) && (boot_cpu_data.x86 >= 0x18)) ||
743 ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && (boot_cpu_data.x86 >= 0xF))) {
744 init_udelay = 0;
745 return;
746 }
747
748 init_udelay = UDELAY_10MS_DEFAULT;
749}
750
751
752
753
754
755
756int
757wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip)
758{
759 u32 dm = apic->dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL;
760 unsigned long send_status, accept_status = 0;
761 int maxlvt;
762
763
764
765
766 apic_icr_write(APIC_DM_NMI | dm, apicid);
767
768 pr_debug("Waiting for send to finish...\n");
769 send_status = safe_apic_wait_icr_idle();
770
771
772
773
774 udelay(200);
775 if (APIC_INTEGRATED(boot_cpu_apic_version)) {
776 maxlvt = lapic_get_maxlvt();
777 if (maxlvt > 3)
778 apic_write(APIC_ESR, 0);
779 accept_status = (apic_read(APIC_ESR) & 0xEF);
780 }
781 pr_debug("NMI sent\n");
782
783 if (send_status)
784 pr_err("APIC never delivered???\n");
785 if (accept_status)
786 pr_err("APIC delivery error (%lx)\n", accept_status);
787
788 return (send_status | accept_status);
789}
790
791static int
792wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
793{
794 unsigned long send_status = 0, accept_status = 0;
795 int maxlvt, num_starts, j;
796
797 maxlvt = lapic_get_maxlvt();
798
799
800
801
802 if (APIC_INTEGRATED(boot_cpu_apic_version)) {
803 if (maxlvt > 3)
804 apic_write(APIC_ESR, 0);
805 apic_read(APIC_ESR);
806 }
807
808 pr_debug("Asserting INIT\n");
809
810
811
812
813
814
815
816 apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
817 phys_apicid);
818
819 pr_debug("Waiting for send to finish...\n");
820 send_status = safe_apic_wait_icr_idle();
821
822 udelay(init_udelay);
823
824 pr_debug("Deasserting INIT\n");
825
826
827
828 apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
829
830 pr_debug("Waiting for send to finish...\n");
831 send_status = safe_apic_wait_icr_idle();
832
833 mb();
834
835
836
837
838
839
840
841 if (APIC_INTEGRATED(boot_cpu_apic_version))
842 num_starts = 2;
843 else
844 num_starts = 0;
845
846
847
848
849 pr_debug("#startup loops: %d\n", num_starts);
850
851 for (j = 1; j <= num_starts; j++) {
852 pr_debug("Sending STARTUP #%d\n", j);
853 if (maxlvt > 3)
854 apic_write(APIC_ESR, 0);
855 apic_read(APIC_ESR);
856 pr_debug("After apic_write\n");
857
858
859
860
861
862
863
864
865 apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12),
866 phys_apicid);
867
868
869
870
871 if (init_udelay == 0)
872 udelay(10);
873 else
874 udelay(300);
875
876 pr_debug("Startup point 1\n");
877
878 pr_debug("Waiting for send to finish...\n");
879 send_status = safe_apic_wait_icr_idle();
880
881
882
883
884 if (init_udelay == 0)
885 udelay(10);
886 else
887 udelay(200);
888
889 if (maxlvt > 3)
890 apic_write(APIC_ESR, 0);
891 accept_status = (apic_read(APIC_ESR) & 0xEF);
892 if (send_status || accept_status)
893 break;
894 }
895 pr_debug("After Startup\n");
896
897 if (send_status)
898 pr_err("APIC never delivered???\n");
899 if (accept_status)
900 pr_err("APIC delivery error (%lx)\n", accept_status);
901
902 return (send_status | accept_status);
903}
904
905
906static void announce_cpu(int cpu, int apicid)
907{
908 static int current_node = NUMA_NO_NODE;
909 int node = early_cpu_to_node(cpu);
910 static int width, node_width;
911
912 if (!width)
913 width = num_digits(num_possible_cpus()) + 1;
914
915 if (!node_width)
916 node_width = num_digits(num_possible_nodes()) + 1;
917
918 if (cpu == 1)
919 printk(KERN_INFO "x86: Booting SMP configuration:\n");
920
921 if (system_state < SYSTEM_RUNNING) {
922 if (node != current_node) {
923 if (current_node > (-1))
924 pr_cont("\n");
925 current_node = node;
926
927 printk(KERN_INFO ".... node %*s#%d, CPUs: ",
928 node_width - num_digits(node), " ", node);
929 }
930
931
932 if (cpu == 1)
933 pr_cont("%*s", width + 1, " ");
934
935 pr_cont("%*s#%d", width - num_digits(cpu), " ", cpu);
936
937 } else
938 pr_info("Booting Node %d Processor %d APIC 0x%x\n",
939 node, cpu, apicid);
940}
941
942static int wakeup_cpu0_nmi(unsigned int cmd, struct pt_regs *regs)
943{
944 int cpu;
945
946 cpu = smp_processor_id();
947 if (cpu == 0 && !cpu_online(cpu) && enable_start_cpu0)
948 return NMI_HANDLED;
949
950 return NMI_DONE;
951}
952
953
954
955
956
957
958
959
960
961
962
963
964
965static int
966wakeup_cpu_via_init_nmi(int cpu, unsigned long start_ip, int apicid,
967 int *cpu0_nmi_registered)
968{
969 int id;
970 int boot_error;
971
972 preempt_disable();
973
974
975
976
977 if (cpu) {
978 boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip);
979 goto out;
980 }
981
982
983
984
985
986
987 boot_error = register_nmi_handler(NMI_LOCAL,
988 wakeup_cpu0_nmi, 0, "wake_cpu0");
989
990 if (!boot_error) {
991 enable_start_cpu0 = 1;
992 *cpu0_nmi_registered = 1;
993 id = apic->dest_mode_logical ? cpu0_logical_apicid : apicid;
994 boot_error = wakeup_secondary_cpu_via_nmi(id, start_ip);
995 }
996
997out:
998 preempt_enable();
999
1000 return boot_error;
1001}
1002
1003int common_cpu_up(unsigned int cpu, struct task_struct *idle)
1004{
1005 int ret;
1006
1007
1008 alternatives_enable_smp();
1009
1010 per_cpu(current_task, cpu) = idle;
1011 cpu_init_stack_canary(cpu, idle);
1012
1013
1014 ret = irq_init_percpu_irqstack(cpu);
1015 if (ret)
1016 return ret;
1017
1018#ifdef CONFIG_X86_32
1019
1020 per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle);
1021#else
1022 initial_gs = per_cpu_offset(cpu);
1023#endif
1024 return 0;
1025}
1026
1027
1028
1029
1030
1031
1032
1033static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
1034 int *cpu0_nmi_registered)
1035{
1036
1037 unsigned long start_ip = real_mode_header->trampoline_start;
1038
1039 unsigned long boot_error = 0;
1040 unsigned long timeout;
1041
1042 idle->thread.sp = (unsigned long)task_pt_regs(idle);
1043 early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu);
1044 initial_code = (unsigned long)start_secondary;
1045 initial_stack = idle->thread.sp;
1046
1047
1048 init_espfix_ap(cpu);
1049
1050
1051 announce_cpu(cpu, apicid);
1052
1053
1054
1055
1056
1057
1058 if (x86_platform.legacy.warm_reset) {
1059
1060 pr_debug("Setting warm reset code and vector.\n");
1061
1062 smpboot_setup_warm_reset_vector(start_ip);
1063
1064
1065
1066 if (APIC_INTEGRATED(boot_cpu_apic_version)) {
1067 apic_write(APIC_ESR, 0);
1068 apic_read(APIC_ESR);
1069 }
1070 }
1071
1072
1073
1074
1075
1076
1077
1078 cpumask_clear_cpu(cpu, cpu_initialized_mask);
1079 smp_mb();
1080
1081
1082
1083
1084
1085
1086
1087 if (apic->wakeup_secondary_cpu)
1088 boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
1089 else
1090 boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid,
1091 cpu0_nmi_registered);
1092
1093 if (!boot_error) {
1094
1095
1096
1097 boot_error = -1;
1098 timeout = jiffies + 10*HZ;
1099 while (time_before(jiffies, timeout)) {
1100 if (cpumask_test_cpu(cpu, cpu_initialized_mask)) {
1101
1102
1103
1104 cpumask_set_cpu(cpu, cpu_callout_mask);
1105 boot_error = 0;
1106 break;
1107 }
1108 schedule();
1109 }
1110 }
1111
1112 if (!boot_error) {
1113
1114
1115
1116 while (!cpumask_test_cpu(cpu, cpu_callin_mask)) {
1117
1118
1119
1120
1121
1122
1123 schedule();
1124 }
1125 }
1126
1127 if (x86_platform.legacy.warm_reset) {
1128
1129
1130
1131 smpboot_restore_warm_reset_vector();
1132 }
1133
1134 return boot_error;
1135}
1136
1137int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
1138{
1139 int apicid = apic->cpu_present_to_apicid(cpu);
1140 int cpu0_nmi_registered = 0;
1141 unsigned long flags;
1142 int err, ret = 0;
1143
1144 lockdep_assert_irqs_enabled();
1145
1146 pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu);
1147
1148 if (apicid == BAD_APICID ||
1149 !physid_isset(apicid, phys_cpu_present_map) ||
1150 !apic->apic_id_valid(apicid)) {
1151 pr_err("%s: bad cpu %d\n", __func__, cpu);
1152 return -EINVAL;
1153 }
1154
1155
1156
1157
1158 if (cpumask_test_cpu(cpu, cpu_callin_mask)) {
1159 pr_debug("do_boot_cpu %d Already started\n", cpu);
1160 return -ENOSYS;
1161 }
1162
1163
1164
1165
1166
1167 mtrr_save_state();
1168
1169
1170 err = cpu_check_up_prepare(cpu);
1171 if (err && err != -EBUSY)
1172 return err;
1173
1174
1175 per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL;
1176
1177 err = common_cpu_up(cpu, tidle);
1178 if (err)
1179 return err;
1180
1181 err = do_boot_cpu(apicid, cpu, tidle, &cpu0_nmi_registered);
1182 if (err) {
1183 pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);
1184 ret = -EIO;
1185 goto unreg_nmi;
1186 }
1187
1188
1189
1190
1191
1192 local_irq_save(flags);
1193 check_tsc_sync_source(cpu);
1194 local_irq_restore(flags);
1195
1196 while (!cpu_online(cpu)) {
1197 cpu_relax();
1198 touch_nmi_watchdog();
1199 }
1200
1201unreg_nmi:
1202
1203
1204
1205
1206 if (cpu0_nmi_registered)
1207 unregister_nmi_handler(NMI_LOCAL, "wake_cpu0");
1208
1209 return ret;
1210}
1211
1212
1213
1214
1215void arch_disable_smp_support(void)
1216{
1217 disable_ioapic_support();
1218}
1219
1220
1221
1222
1223
1224
1225static __init void disable_smp(void)
1226{
1227 pr_info("SMP disabled\n");
1228
1229 disable_ioapic_support();
1230
1231 init_cpu_present(cpumask_of(0));
1232 init_cpu_possible(cpumask_of(0));
1233
1234 if (smp_found_config)
1235 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
1236 else
1237 physid_set_mask_of_physid(0, &phys_cpu_present_map);
1238 cpumask_set_cpu(0, topology_sibling_cpumask(0));
1239 cpumask_set_cpu(0, topology_core_cpumask(0));
1240 cpumask_set_cpu(0, topology_die_cpumask(0));
1241}
1242
1243
1244
1245
1246static void __init smp_sanity_check(void)
1247{
1248 preempt_disable();
1249
1250#if !defined(CONFIG_X86_BIGSMP) && defined(CONFIG_X86_32)
1251 if (def_to_bigsmp && nr_cpu_ids > 8) {
1252 unsigned int cpu;
1253 unsigned nr;
1254
1255 pr_warn("More than 8 CPUs detected - skipping them\n"
1256 "Use CONFIG_X86_BIGSMP\n");
1257
1258 nr = 0;
1259 for_each_present_cpu(cpu) {
1260 if (nr >= 8)
1261 set_cpu_present(cpu, false);
1262 nr++;
1263 }
1264
1265 nr = 0;
1266 for_each_possible_cpu(cpu) {
1267 if (nr >= 8)
1268 set_cpu_possible(cpu, false);
1269 nr++;
1270 }
1271
1272 nr_cpu_ids = 8;
1273 }
1274#endif
1275
1276 if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
1277 pr_warn("weird, boot CPU (#%d) not listed by the BIOS\n",
1278 hard_smp_processor_id());
1279
1280 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
1281 }
1282
1283
1284
1285
1286
1287 if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) {
1288 pr_notice("weird, boot CPU (#%d) not listed by the BIOS\n",
1289 boot_cpu_physical_apicid);
1290 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
1291 }
1292 preempt_enable();
1293}
1294
1295static void __init smp_cpu_index_default(void)
1296{
1297 int i;
1298 struct cpuinfo_x86 *c;
1299
1300 for_each_possible_cpu(i) {
1301 c = &cpu_data(i);
1302
1303 c->cpu_index = nr_cpu_ids;
1304 }
1305}
1306
1307static void __init smp_get_logical_apicid(void)
1308{
1309 if (x2apic_mode)
1310 cpu0_logical_apicid = apic_read(APIC_LDR);
1311 else
1312 cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
1313}
1314
1315
1316
1317
1318
1319
1320void __init native_smp_prepare_cpus(unsigned int max_cpus)
1321{
1322 unsigned int i;
1323
1324 smp_cpu_index_default();
1325
1326
1327
1328
1329 smp_store_boot_cpu_info();
1330 cpumask_copy(cpu_callin_mask, cpumask_of(0));
1331 mb();
1332
1333 for_each_possible_cpu(i) {
1334 zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
1335 zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
1336 zalloc_cpumask_var(&per_cpu(cpu_die_map, i), GFP_KERNEL);
1337 zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
1338 }
1339
1340
1341
1342
1343
1344
1345
1346
1347 set_sched_topology(x86_topology);
1348
1349 set_cpu_sibling_map(0);
1350 init_freq_invariance(false, false);
1351 smp_sanity_check();
1352
1353 switch (apic_intr_mode) {
1354 case APIC_PIC:
1355 case APIC_VIRTUAL_WIRE_NO_CONFIG:
1356 disable_smp();
1357 return;
1358 case APIC_SYMMETRIC_IO_NO_ROUTING:
1359 disable_smp();
1360
1361 x86_init.timers.setup_percpu_clockev();
1362 return;
1363 case APIC_VIRTUAL_WIRE:
1364 case APIC_SYMMETRIC_IO:
1365 break;
1366 }
1367
1368
1369 x86_init.timers.setup_percpu_clockev();
1370
1371 smp_get_logical_apicid();
1372
1373 pr_info("CPU0: ");
1374 print_cpu_info(&cpu_data(0));
1375
1376 uv_system_init();
1377
1378 set_mtrr_aps_delayed_init();
1379
1380 smp_quirk_init_udelay();
1381
1382 speculative_store_bypass_ht_init();
1383}
1384
1385void arch_thaw_secondary_cpus_begin(void)
1386{
1387 set_mtrr_aps_delayed_init();
1388}
1389
1390void arch_thaw_secondary_cpus_end(void)
1391{
1392 mtrr_aps_init();
1393}
1394
1395
1396
1397
1398void __init native_smp_prepare_boot_cpu(void)
1399{
1400 int me = smp_processor_id();
1401 switch_to_new_gdt(me);
1402
1403 cpumask_set_cpu(me, cpu_callout_mask);
1404 cpu_set_state_online(me);
1405 native_pv_lock_init();
1406}
1407
1408void __init calculate_max_logical_packages(void)
1409{
1410 int ncpus;
1411
1412
1413
1414
1415
1416 ncpus = cpu_data(0).booted_cores * topology_max_smt_threads();
1417 __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus);
1418 pr_info("Max logical packages: %u\n", __max_logical_packages);
1419}
1420
1421void __init native_smp_cpus_done(unsigned int max_cpus)
1422{
1423 pr_debug("Boot done\n");
1424
1425 calculate_max_logical_packages();
1426
1427 if (x86_has_numa_in_package)
1428 set_sched_topology(x86_numa_in_package_topology);
1429
1430 nmi_selftest();
1431 impress_friends();
1432 mtrr_aps_init();
1433}
1434
1435static int __initdata setup_possible_cpus = -1;
1436static int __init _setup_possible_cpus(char *str)
1437{
1438 get_option(&str, &setup_possible_cpus);
1439 return 0;
1440}
1441early_param("possible_cpus", _setup_possible_cpus);
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461__init void prefill_possible_map(void)
1462{
1463 int i, possible;
1464
1465
1466 if (!num_processors) {
1467 if (boot_cpu_has(X86_FEATURE_APIC)) {
1468 int apicid = boot_cpu_physical_apicid;
1469 int cpu = hard_smp_processor_id();
1470
1471 pr_warn("Boot CPU (id %d) not listed by BIOS\n", cpu);
1472
1473
1474 if (apic->cpu_present_to_apicid(0) == BAD_APICID &&
1475 apic->apic_id_valid(apicid))
1476 generic_processor_info(apicid, boot_cpu_apic_version);
1477 }
1478
1479 if (!num_processors)
1480 num_processors = 1;
1481 }
1482
1483 i = setup_max_cpus ?: 1;
1484 if (setup_possible_cpus == -1) {
1485 possible = num_processors;
1486#ifdef CONFIG_HOTPLUG_CPU
1487 if (setup_max_cpus)
1488 possible += disabled_cpus;
1489#else
1490 if (possible > i)
1491 possible = i;
1492#endif
1493 } else
1494 possible = setup_possible_cpus;
1495
1496 total_cpus = max_t(int, possible, num_processors + disabled_cpus);
1497
1498
1499 if (possible > nr_cpu_ids) {
1500 pr_warn("%d Processors exceeds NR_CPUS limit of %u\n",
1501 possible, nr_cpu_ids);
1502 possible = nr_cpu_ids;
1503 }
1504
1505#ifdef CONFIG_HOTPLUG_CPU
1506 if (!setup_max_cpus)
1507#endif
1508 if (possible > i) {
1509 pr_warn("%d Processors exceeds max_cpus limit of %u\n",
1510 possible, setup_max_cpus);
1511 possible = i;
1512 }
1513
1514 nr_cpu_ids = possible;
1515
1516 pr_info("Allowing %d CPUs, %d hotplug CPUs\n",
1517 possible, max_t(int, possible - num_processors, 0));
1518
1519 reset_cpu_possible_mask();
1520
1521 for (i = 0; i < possible; i++)
1522 set_cpu_possible(i, true);
1523}
1524
1525#ifdef CONFIG_HOTPLUG_CPU
1526
1527
1528static void recompute_smt_state(void)
1529{
1530 int max_threads, cpu;
1531
1532 max_threads = 0;
1533 for_each_online_cpu (cpu) {
1534 int threads = cpumask_weight(topology_sibling_cpumask(cpu));
1535
1536 if (threads > max_threads)
1537 max_threads = threads;
1538 }
1539 __max_smt_threads = max_threads;
1540}
1541
1542static void remove_siblinginfo(int cpu)
1543{
1544 int sibling;
1545 struct cpuinfo_x86 *c = &cpu_data(cpu);
1546
1547 for_each_cpu(sibling, topology_core_cpumask(cpu)) {
1548 cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
1549
1550
1551
1552 if (cpumask_weight(topology_sibling_cpumask(cpu)) == 1)
1553 cpu_data(sibling).booted_cores--;
1554 }
1555
1556 for_each_cpu(sibling, topology_die_cpumask(cpu))
1557 cpumask_clear_cpu(cpu, topology_die_cpumask(sibling));
1558
1559 for_each_cpu(sibling, topology_sibling_cpumask(cpu)) {
1560 cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
1561 if (cpumask_weight(topology_sibling_cpumask(sibling)) == 1)
1562 cpu_data(sibling).smt_active = false;
1563 }
1564
1565 for_each_cpu(sibling, cpu_llc_shared_mask(cpu))
1566 cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling));
1567 cpumask_clear(cpu_llc_shared_mask(cpu));
1568 cpumask_clear(topology_sibling_cpumask(cpu));
1569 cpumask_clear(topology_core_cpumask(cpu));
1570 cpumask_clear(topology_die_cpumask(cpu));
1571 c->cpu_core_id = 0;
1572 c->booted_cores = 0;
1573 cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
1574 recompute_smt_state();
1575}
1576
1577static void remove_cpu_from_maps(int cpu)
1578{
1579 set_cpu_online(cpu, false);
1580 cpumask_clear_cpu(cpu, cpu_callout_mask);
1581 cpumask_clear_cpu(cpu, cpu_callin_mask);
1582
1583 cpumask_clear_cpu(cpu, cpu_initialized_mask);
1584 numa_remove_cpu(cpu);
1585}
1586
1587void cpu_disable_common(void)
1588{
1589 int cpu = smp_processor_id();
1590
1591 remove_siblinginfo(cpu);
1592
1593
1594 lock_vector_lock();
1595 remove_cpu_from_maps(cpu);
1596 unlock_vector_lock();
1597 fixup_irqs();
1598 lapic_offline();
1599}
1600
1601int native_cpu_disable(void)
1602{
1603 int ret;
1604
1605 ret = lapic_can_unplug_cpu();
1606 if (ret)
1607 return ret;
1608
1609 cpu_disable_common();
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629 apic_soft_disable();
1630
1631 return 0;
1632}
1633
1634int common_cpu_die(unsigned int cpu)
1635{
1636 int ret = 0;
1637
1638
1639
1640
1641 if (cpu_wait_death(cpu, 5)) {
1642 if (system_state == SYSTEM_RUNNING)
1643 pr_info("CPU %u is now offline\n", cpu);
1644 } else {
1645 pr_err("CPU %u didn't die...\n", cpu);
1646 ret = -1;
1647 }
1648
1649 return ret;
1650}
1651
1652void native_cpu_die(unsigned int cpu)
1653{
1654 common_cpu_die(cpu);
1655}
1656
1657void play_dead_common(void)
1658{
1659 idle_task_exit();
1660
1661
1662 (void)cpu_report_death();
1663
1664
1665
1666
1667 local_irq_disable();
1668}
1669
1670
1671
1672
1673
1674
1675void cond_wakeup_cpu0(void)
1676{
1677 if (smp_processor_id() == 0 && enable_start_cpu0)
1678 start_cpu0();
1679}
1680EXPORT_SYMBOL_GPL(cond_wakeup_cpu0);
1681
1682
1683
1684
1685
1686static inline void mwait_play_dead(void)
1687{
1688 unsigned int eax, ebx, ecx, edx;
1689 unsigned int highest_cstate = 0;
1690 unsigned int highest_subcstate = 0;
1691 void *mwait_ptr;
1692 int i;
1693
1694 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
1695 boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
1696 return;
1697 if (!this_cpu_has(X86_FEATURE_MWAIT))
1698 return;
1699 if (!this_cpu_has(X86_FEATURE_CLFLUSH))
1700 return;
1701 if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF)
1702 return;
1703
1704 eax = CPUID_MWAIT_LEAF;
1705 ecx = 0;
1706 native_cpuid(&eax, &ebx, &ecx, &edx);
1707
1708
1709
1710
1711
1712 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) {
1713 eax = 0;
1714 } else {
1715 edx >>= MWAIT_SUBSTATE_SIZE;
1716 for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
1717 if (edx & MWAIT_SUBSTATE_MASK) {
1718 highest_cstate = i;
1719 highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
1720 }
1721 }
1722 eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
1723 (highest_subcstate - 1);
1724 }
1725
1726
1727
1728
1729
1730
1731 mwait_ptr = ¤t_thread_info()->flags;
1732
1733 wbinvd();
1734
1735 while (1) {
1736
1737
1738
1739
1740
1741
1742
1743 mb();
1744 clflush(mwait_ptr);
1745 mb();
1746 __monitor(mwait_ptr, 0, 0);
1747 mb();
1748 __mwait(eax, 0);
1749
1750 cond_wakeup_cpu0();
1751 }
1752}
1753
1754void hlt_play_dead(void)
1755{
1756 if (__this_cpu_read(cpu_info.x86) >= 4)
1757 wbinvd();
1758
1759 while (1) {
1760 native_halt();
1761
1762 cond_wakeup_cpu0();
1763 }
1764}
1765
1766void native_play_dead(void)
1767{
1768 play_dead_common();
1769 tboot_shutdown(TB_SHUTDOWN_WFS);
1770
1771 mwait_play_dead();
1772 if (cpuidle_play_dead())
1773 hlt_play_dead();
1774}
1775
1776#else
1777int native_cpu_disable(void)
1778{
1779 return -ENOSYS;
1780}
1781
1782void native_cpu_die(unsigned int cpu)
1783{
1784
1785 BUG();
1786}
1787
1788void native_play_dead(void)
1789{
1790 BUG();
1791}
1792
1793#endif
1794
1795#ifdef CONFIG_X86_64
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830DEFINE_STATIC_KEY_FALSE(arch_scale_freq_key);
1831
1832static DEFINE_PER_CPU(u64, arch_prev_aperf);
1833static DEFINE_PER_CPU(u64, arch_prev_mperf);
1834static u64 arch_turbo_freq_ratio = SCHED_CAPACITY_SCALE;
1835static u64 arch_max_freq_ratio = SCHED_CAPACITY_SCALE;
1836
1837void arch_set_max_freq_ratio(bool turbo_disabled)
1838{
1839 arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE :
1840 arch_turbo_freq_ratio;
1841}
1842EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio);
1843
1844static bool turbo_disabled(void)
1845{
1846 u64 misc_en;
1847 int err;
1848
1849 err = rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_en);
1850 if (err)
1851 return false;
1852
1853 return (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
1854}
1855
1856static bool slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
1857{
1858 int err;
1859
1860 err = rdmsrl_safe(MSR_ATOM_CORE_RATIOS, base_freq);
1861 if (err)
1862 return false;
1863
1864 err = rdmsrl_safe(MSR_ATOM_CORE_TURBO_RATIOS, turbo_freq);
1865 if (err)
1866 return false;
1867
1868 *base_freq = (*base_freq >> 16) & 0x3F;
1869 *turbo_freq = *turbo_freq & 0x3F;
1870
1871 return true;
1872}
1873
1874#define X86_MATCH(model) \
1875 X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, \
1876 INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL)
1877
1878static const struct x86_cpu_id has_knl_turbo_ratio_limits[] = {
1879 X86_MATCH(XEON_PHI_KNL),
1880 X86_MATCH(XEON_PHI_KNM),
1881 {}
1882};
1883
1884static const struct x86_cpu_id has_skx_turbo_ratio_limits[] = {
1885 X86_MATCH(SKYLAKE_X),
1886 {}
1887};
1888
1889static const struct x86_cpu_id has_glm_turbo_ratio_limits[] = {
1890 X86_MATCH(ATOM_GOLDMONT),
1891 X86_MATCH(ATOM_GOLDMONT_D),
1892 X86_MATCH(ATOM_GOLDMONT_PLUS),
1893 {}
1894};
1895
1896static bool knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq,
1897 int num_delta_fratio)
1898{
1899 int fratio, delta_fratio, found;
1900 int err, i;
1901 u64 msr;
1902
1903 err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
1904 if (err)
1905 return false;
1906
1907 *base_freq = (*base_freq >> 8) & 0xFF;
1908
1909 err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
1910 if (err)
1911 return false;
1912
1913 fratio = (msr >> 8) & 0xFF;
1914 i = 16;
1915 found = 0;
1916 do {
1917 if (found >= num_delta_fratio) {
1918 *turbo_freq = fratio;
1919 return true;
1920 }
1921
1922 delta_fratio = (msr >> (i + 5)) & 0x7;
1923
1924 if (delta_fratio) {
1925 found += 1;
1926 fratio -= delta_fratio;
1927 }
1928
1929 i += 8;
1930 } while (i < 64);
1931
1932 return true;
1933}
1934
1935static bool skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size)
1936{
1937 u64 ratios, counts;
1938 u32 group_size;
1939 int err, i;
1940
1941 err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
1942 if (err)
1943 return false;
1944
1945 *base_freq = (*base_freq >> 8) & 0xFF;
1946
1947 err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios);
1948 if (err)
1949 return false;
1950
1951 err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts);
1952 if (err)
1953 return false;
1954
1955 for (i = 0; i < 64; i += 8) {
1956 group_size = (counts >> i) & 0xFF;
1957 if (group_size >= size) {
1958 *turbo_freq = (ratios >> i) & 0xFF;
1959 return true;
1960 }
1961 }
1962
1963 return false;
1964}
1965
1966static bool core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
1967{
1968 u64 msr;
1969 int err;
1970
1971 err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
1972 if (err)
1973 return false;
1974
1975 err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
1976 if (err)
1977 return false;
1978
1979 *base_freq = (*base_freq >> 8) & 0xFF;
1980 *turbo_freq = (msr >> 24) & 0xFF;
1981
1982
1983 if (!*turbo_freq)
1984 *turbo_freq = msr & 0xFF;
1985
1986 return true;
1987}
1988
1989static bool intel_set_max_freq_ratio(void)
1990{
1991 u64 base_freq, turbo_freq;
1992 u64 turbo_ratio;
1993
1994 if (slv_set_max_freq_ratio(&base_freq, &turbo_freq))
1995 goto out;
1996
1997 if (x86_match_cpu(has_glm_turbo_ratio_limits) &&
1998 skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
1999 goto out;
2000
2001 if (x86_match_cpu(has_knl_turbo_ratio_limits) &&
2002 knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
2003 goto out;
2004
2005 if (x86_match_cpu(has_skx_turbo_ratio_limits) &&
2006 skx_set_max_freq_ratio(&base_freq, &turbo_freq, 4))
2007 goto out;
2008
2009 if (core_set_max_freq_ratio(&base_freq, &turbo_freq))
2010 goto out;
2011
2012 return false;
2013
2014out:
2015
2016
2017
2018
2019
2020
2021 if (!base_freq || !turbo_freq) {
2022 pr_debug("Couldn't determine cpu base or turbo frequency, necessary for scale-invariant accounting.\n");
2023 return false;
2024 }
2025
2026 turbo_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE, base_freq);
2027 if (!turbo_ratio) {
2028 pr_debug("Non-zero turbo and base frequencies led to a 0 ratio.\n");
2029 return false;
2030 }
2031
2032 arch_turbo_freq_ratio = turbo_ratio;
2033 arch_set_max_freq_ratio(turbo_disabled());
2034
2035 return true;
2036}
2037
2038#ifdef CONFIG_ACPI_CPPC_LIB
2039static bool amd_set_max_freq_ratio(void)
2040{
2041 struct cppc_perf_caps perf_caps;
2042 u64 highest_perf, nominal_perf;
2043 u64 perf_ratio;
2044 int rc;
2045
2046 rc = cppc_get_perf_caps(0, &perf_caps);
2047 if (rc) {
2048 pr_debug("Could not retrieve perf counters (%d)\n", rc);
2049 return false;
2050 }
2051
2052 highest_perf = amd_get_highest_perf();
2053 nominal_perf = perf_caps.nominal_perf;
2054
2055 if (!highest_perf || !nominal_perf) {
2056 pr_debug("Could not retrieve highest or nominal performance\n");
2057 return false;
2058 }
2059
2060 perf_ratio = div_u64(highest_perf * SCHED_CAPACITY_SCALE, nominal_perf);
2061
2062 perf_ratio = (perf_ratio + SCHED_CAPACITY_SCALE) >> 1;
2063 if (!perf_ratio) {
2064 pr_debug("Non-zero highest/nominal perf values led to a 0 ratio\n");
2065 return false;
2066 }
2067
2068 arch_turbo_freq_ratio = perf_ratio;
2069 arch_set_max_freq_ratio(false);
2070
2071 return true;
2072}
2073#else
2074static bool amd_set_max_freq_ratio(void)
2075{
2076 return false;
2077}
2078#endif
2079
2080static void init_counter_refs(void)
2081{
2082 u64 aperf, mperf;
2083
2084 rdmsrl(MSR_IA32_APERF, aperf);
2085 rdmsrl(MSR_IA32_MPERF, mperf);
2086
2087 this_cpu_write(arch_prev_aperf, aperf);
2088 this_cpu_write(arch_prev_mperf, mperf);
2089}
2090
2091#ifdef CONFIG_PM_SLEEP
2092static struct syscore_ops freq_invariance_syscore_ops = {
2093 .resume = init_counter_refs,
2094};
2095
2096static void register_freq_invariance_syscore_ops(void)
2097{
2098
2099 if (freq_invariance_syscore_ops.node.prev)
2100 return;
2101
2102 register_syscore_ops(&freq_invariance_syscore_ops);
2103}
2104#else
2105static inline void register_freq_invariance_syscore_ops(void) {}
2106#endif
2107
2108static void init_freq_invariance(bool secondary, bool cppc_ready)
2109{
2110 bool ret = false;
2111
2112 if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
2113 return;
2114
2115 if (secondary) {
2116 if (static_branch_likely(&arch_scale_freq_key)) {
2117 init_counter_refs();
2118 }
2119 return;
2120 }
2121
2122 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
2123 ret = intel_set_max_freq_ratio();
2124 else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
2125 if (!cppc_ready) {
2126 return;
2127 }
2128 ret = amd_set_max_freq_ratio();
2129 }
2130
2131 if (ret) {
2132 init_counter_refs();
2133 static_branch_enable(&arch_scale_freq_key);
2134 register_freq_invariance_syscore_ops();
2135 pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio);
2136 } else {
2137 pr_debug("Couldn't determine max cpu frequency, necessary for scale-invariant accounting.\n");
2138 }
2139}
2140
2141#ifdef CONFIG_ACPI_CPPC_LIB
2142static DEFINE_MUTEX(freq_invariance_lock);
2143
2144void init_freq_invariance_cppc(void)
2145{
2146 static bool secondary;
2147
2148 mutex_lock(&freq_invariance_lock);
2149
2150 init_freq_invariance(secondary, true);
2151 secondary = true;
2152
2153 mutex_unlock(&freq_invariance_lock);
2154}
2155#endif
2156
2157static void disable_freq_invariance_workfn(struct work_struct *work)
2158{
2159 static_branch_disable(&arch_scale_freq_key);
2160}
2161
2162static DECLARE_WORK(disable_freq_invariance_work,
2163 disable_freq_invariance_workfn);
2164
2165DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
2166
2167void arch_scale_freq_tick(void)
2168{
2169 u64 freq_scale = SCHED_CAPACITY_SCALE;
2170 u64 aperf, mperf;
2171 u64 acnt, mcnt;
2172
2173 if (!arch_scale_freq_invariant())
2174 return;
2175
2176 rdmsrl(MSR_IA32_APERF, aperf);
2177 rdmsrl(MSR_IA32_MPERF, mperf);
2178
2179 acnt = aperf - this_cpu_read(arch_prev_aperf);
2180 mcnt = mperf - this_cpu_read(arch_prev_mperf);
2181
2182 this_cpu_write(arch_prev_aperf, aperf);
2183 this_cpu_write(arch_prev_mperf, mperf);
2184
2185 if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt))
2186 goto error;
2187
2188 if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt)
2189 goto error;
2190
2191 freq_scale = div64_u64(acnt, mcnt);
2192 if (!freq_scale)
2193 goto error;
2194
2195 if (freq_scale > SCHED_CAPACITY_SCALE)
2196 freq_scale = SCHED_CAPACITY_SCALE;
2197
2198 this_cpu_write(arch_freq_scale, freq_scale);
2199 return;
2200
2201error:
2202 pr_warn("Scheduler frequency invariance went wobbly, disabling!\n");
2203 schedule_work(&disable_freq_invariance_work);
2204}
2205#else
2206static inline void init_freq_invariance(bool secondary, bool cppc_ready)
2207{
2208}
2209#endif
2210