1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
41
42#include <linux/init.h>
43#include <linux/smp.h>
44#include <linux/export.h>
45#include <linux/sched.h>
46#include <linux/sched/topology.h>
47#include <linux/sched/hotplug.h>
48#include <linux/sched/task_stack.h>
49#include <linux/percpu.h>
50#include <linux/memblock.h>
51#include <linux/err.h>
52#include <linux/nmi.h>
53#include <linux/tboot.h>
54#include <linux/stackprotector.h>
55#include <linux/gfp.h>
56#include <linux/cpuidle.h>
57#include <linux/numa.h>
58
59#include <asm/acpi.h>
60#include <asm/desc.h>
61#include <asm/nmi.h>
62#include <asm/irq.h>
63#include <asm/realmode.h>
64#include <asm/cpu.h>
65#include <asm/numa.h>
66#include <asm/pgtable.h>
67#include <asm/tlbflush.h>
68#include <asm/mtrr.h>
69#include <asm/mwait.h>
70#include <asm/apic.h>
71#include <asm/io_apic.h>
72#include <asm/fpu/internal.h>
73#include <asm/setup.h>
74#include <asm/uv/uv.h>
75#include <linux/mc146818rtc.h>
76#include <asm/i8259.h>
77#include <asm/misc.h>
78#include <asm/qspinlock.h>
79#include <asm/intel-family.h>
80#include <asm/cpu_device_id.h>
81#include <asm/spec-ctrl.h>
82#include <asm/hw_irq.h>
83
84
85DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
86EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
87
88
89DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
90EXPORT_PER_CPU_SYMBOL(cpu_core_map);
91
92
93DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
94EXPORT_PER_CPU_SYMBOL(cpu_die_map);
95
96DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
97
98
99DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
100EXPORT_PER_CPU_SYMBOL(cpu_info);
101
102
103unsigned int __max_logical_packages __read_mostly;
104EXPORT_SYMBOL(__max_logical_packages);
105static unsigned int logical_packages __read_mostly;
106static unsigned int logical_die __read_mostly;
107
108
109int __read_mostly __max_smt_threads = 1;
110
111
112bool x86_topology_update;
113
114int arch_update_cpu_topology(void)
115{
116 int retval = x86_topology_update;
117
118 x86_topology_update = false;
119 return retval;
120}
121
122static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
123{
124 unsigned long flags;
125
126 spin_lock_irqsave(&rtc_lock, flags);
127 CMOS_WRITE(0xa, 0xf);
128 spin_unlock_irqrestore(&rtc_lock, flags);
129 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
130 start_eip >> 4;
131 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
132 start_eip & 0xf;
133}
134
135static inline void smpboot_restore_warm_reset_vector(void)
136{
137 unsigned long flags;
138
139
140
141
142
143 spin_lock_irqsave(&rtc_lock, flags);
144 CMOS_WRITE(0, 0xf);
145 spin_unlock_irqrestore(&rtc_lock, flags);
146
147 *((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0;
148}
149
150static void init_freq_invariance(bool secondary);
151
152
153
154
155
156static void smp_callin(void)
157{
158 int cpuid;
159
160
161
162
163
164
165
166 cpuid = smp_processor_id();
167
168
169
170
171
172
173
174 apic_ap_setup();
175
176
177
178
179
180 smp_store_cpu_info(cpuid);
181
182
183
184
185
186 set_cpu_sibling_map(raw_smp_processor_id());
187
188 init_freq_invariance(true);
189
190
191
192
193
194
195
196 calibrate_delay();
197 cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy;
198 pr_debug("Stack at about %p\n", &cpuid);
199
200 wmb();
201
202 notify_cpu_starting(cpuid);
203
204
205
206
207 cpumask_set_cpu(cpuid, cpu_callin_mask);
208}
209
210static int cpu0_logical_apicid;
211static int enable_start_cpu0;
212
213
214
215static void notrace start_secondary(void *unused)
216{
217
218
219
220
221
222 cr4_init();
223
224#ifdef CONFIG_X86_32
225
226 load_cr3(swapper_pg_dir);
227 __flush_tlb_all();
228#endif
229 load_current_idt();
230 cpu_init();
231 x86_cpuinit.early_percpu_clock_init();
232 preempt_disable();
233 smp_callin();
234
235 enable_start_cpu0 = 0;
236
237
238 barrier();
239
240
241
242 check_tsc_sync_target();
243
244 speculative_store_bypass_ht_init();
245
246
247
248
249
250
251
252 lock_vector_lock();
253 set_cpu_online(smp_processor_id(), true);
254 lapic_online();
255 unlock_vector_lock();
256 cpu_set_state_online(smp_processor_id());
257 x86_platform.nmi_init();
258
259
260 local_irq_enable();
261
262
263 boot_init_stack_canary();
264
265 x86_cpuinit.setup_percpu_clockev();
266
267 wmb();
268 cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
269
270
271
272
273
274
275
276 prevent_tail_call_optimization();
277}
278
279
280
281
282
283bool topology_is_primary_thread(unsigned int cpu)
284{
285 return apic_id_is_primary_thread(per_cpu(x86_cpu_to_apicid, cpu));
286}
287
288
289
290
291bool topology_smt_supported(void)
292{
293 return smp_num_siblings > 1;
294}
295
296
297
298
299
300
301int topology_phys_to_logical_pkg(unsigned int phys_pkg)
302{
303 int cpu;
304
305 for_each_possible_cpu(cpu) {
306 struct cpuinfo_x86 *c = &cpu_data(cpu);
307
308 if (c->initialized && c->phys_proc_id == phys_pkg)
309 return c->logical_proc_id;
310 }
311 return -1;
312}
313EXPORT_SYMBOL(topology_phys_to_logical_pkg);
314
315
316
317
318
319int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu)
320{
321 int cpu;
322 int proc_id = cpu_data(cur_cpu).phys_proc_id;
323
324 for_each_possible_cpu(cpu) {
325 struct cpuinfo_x86 *c = &cpu_data(cpu);
326
327 if (c->initialized && c->cpu_die_id == die_id &&
328 c->phys_proc_id == proc_id)
329 return c->logical_die_id;
330 }
331 return -1;
332}
333EXPORT_SYMBOL(topology_phys_to_logical_die);
334
335
336
337
338
339
340int topology_update_package_map(unsigned int pkg, unsigned int cpu)
341{
342 int new;
343
344
345 new = topology_phys_to_logical_pkg(pkg);
346 if (new >= 0)
347 goto found;
348
349 new = logical_packages++;
350 if (new != pkg) {
351 pr_info("CPU %u Converting physical %u to logical package %u\n",
352 cpu, pkg, new);
353 }
354found:
355 cpu_data(cpu).logical_proc_id = new;
356 return 0;
357}
358
359
360
361
362
363int topology_update_die_map(unsigned int die, unsigned int cpu)
364{
365 int new;
366
367
368 new = topology_phys_to_logical_die(die, cpu);
369 if (new >= 0)
370 goto found;
371
372 new = logical_die++;
373 if (new != die) {
374 pr_info("CPU %u Converting physical %u to logical die %u\n",
375 cpu, die, new);
376 }
377found:
378 cpu_data(cpu).logical_die_id = new;
379 return 0;
380}
381
382void __init smp_store_boot_cpu_info(void)
383{
384 int id = 0;
385 struct cpuinfo_x86 *c = &cpu_data(id);
386
387 *c = boot_cpu_data;
388 c->cpu_index = id;
389 topology_update_package_map(c->phys_proc_id, id);
390 topology_update_die_map(c->cpu_die_id, id);
391 c->initialized = true;
392}
393
394
395
396
397
398void smp_store_cpu_info(int id)
399{
400 struct cpuinfo_x86 *c = &cpu_data(id);
401
402
403 if (!c->initialized)
404 *c = boot_cpu_data;
405 c->cpu_index = id;
406
407
408
409
410 identify_secondary_cpu(c);
411 c->initialized = true;
412}
413
414static bool
415topology_same_node(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
416{
417 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
418
419 return (cpu_to_node(cpu1) == cpu_to_node(cpu2));
420}
421
422static bool
423topology_sane(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o, const char *name)
424{
425 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
426
427 return !WARN_ONCE(!topology_same_node(c, o),
428 "sched: CPU #%d's %s-sibling CPU #%d is not on the same node! "
429 "[node: %d != %d]. Ignoring dependency.\n",
430 cpu1, name, cpu2, cpu_to_node(cpu1), cpu_to_node(cpu2));
431}
432
433#define link_mask(mfunc, c1, c2) \
434do { \
435 cpumask_set_cpu((c1), mfunc(c2)); \
436 cpumask_set_cpu((c2), mfunc(c1)); \
437} while (0)
438
439static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
440{
441 if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
442 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
443
444 if (c->phys_proc_id == o->phys_proc_id &&
445 c->cpu_die_id == o->cpu_die_id &&
446 per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2)) {
447 if (c->cpu_core_id == o->cpu_core_id)
448 return topology_sane(c, o, "smt");
449
450 if ((c->cu_id != 0xff) &&
451 (o->cu_id != 0xff) &&
452 (c->cu_id == o->cu_id))
453 return topology_sane(c, o, "smt");
454 }
455
456 } else if (c->phys_proc_id == o->phys_proc_id &&
457 c->cpu_die_id == o->cpu_die_id &&
458 c->cpu_core_id == o->cpu_core_id) {
459 return topology_sane(c, o, "smt");
460 }
461
462 return false;
463}
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480static const struct x86_cpu_id snc_cpu[] = {
481 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, NULL),
482 {}
483};
484
485static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
486{
487 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
488
489
490 if (per_cpu(cpu_llc_id, cpu1) == BAD_APICID)
491 return false;
492
493
494 if (per_cpu(cpu_llc_id, cpu1) != per_cpu(cpu_llc_id, cpu2))
495 return false;
496
497
498
499
500
501
502 if (!topology_same_node(c, o) && x86_match_cpu(snc_cpu))
503 return false;
504
505 return topology_sane(c, o, "llc");
506}
507
508
509
510
511
512
513static bool match_pkg(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
514{
515 if (c->phys_proc_id == o->phys_proc_id)
516 return true;
517 return false;
518}
519
520static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
521{
522 if ((c->phys_proc_id == o->phys_proc_id) &&
523 (c->cpu_die_id == o->cpu_die_id))
524 return true;
525 return false;
526}
527
528
529#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC)
530static inline int x86_sched_itmt_flags(void)
531{
532 return sysctl_sched_itmt_enabled ? SD_ASYM_PACKING : 0;
533}
534
535#ifdef CONFIG_SCHED_MC
536static int x86_core_flags(void)
537{
538 return cpu_core_flags() | x86_sched_itmt_flags();
539}
540#endif
541#ifdef CONFIG_SCHED_SMT
542static int x86_smt_flags(void)
543{
544 return cpu_smt_flags() | x86_sched_itmt_flags();
545}
546#endif
547#endif
548
549static struct sched_domain_topology_level x86_numa_in_package_topology[] = {
550#ifdef CONFIG_SCHED_SMT
551 { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
552#endif
553#ifdef CONFIG_SCHED_MC
554 { cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
555#endif
556 { NULL, },
557};
558
559static struct sched_domain_topology_level x86_topology[] = {
560#ifdef CONFIG_SCHED_SMT
561 { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
562#endif
563#ifdef CONFIG_SCHED_MC
564 { cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
565#endif
566 { cpu_cpu_mask, SD_INIT_NAME(DIE) },
567 { NULL, },
568};
569
570
571
572
573
574
575static bool x86_has_numa_in_package;
576
577void set_cpu_sibling_map(int cpu)
578{
579 bool has_smt = smp_num_siblings > 1;
580 bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1;
581 struct cpuinfo_x86 *c = &cpu_data(cpu);
582 struct cpuinfo_x86 *o;
583 int i, threads;
584
585 cpumask_set_cpu(cpu, cpu_sibling_setup_mask);
586
587 if (!has_mp) {
588 cpumask_set_cpu(cpu, topology_sibling_cpumask(cpu));
589 cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
590 cpumask_set_cpu(cpu, topology_core_cpumask(cpu));
591 cpumask_set_cpu(cpu, topology_die_cpumask(cpu));
592 c->booted_cores = 1;
593 return;
594 }
595
596 for_each_cpu(i, cpu_sibling_setup_mask) {
597 o = &cpu_data(i);
598
599 if ((i == cpu) || (has_smt && match_smt(c, o)))
600 link_mask(topology_sibling_cpumask, cpu, i);
601
602 if ((i == cpu) || (has_mp && match_llc(c, o)))
603 link_mask(cpu_llc_shared_mask, cpu, i);
604
605 }
606
607
608
609
610
611 for_each_cpu(i, cpu_sibling_setup_mask) {
612 o = &cpu_data(i);
613
614 if ((i == cpu) || (has_mp && match_pkg(c, o))) {
615 link_mask(topology_core_cpumask, cpu, i);
616
617
618
619
620 if (cpumask_weight(
621 topology_sibling_cpumask(cpu)) == 1) {
622
623
624
625
626 if (cpumask_first(
627 topology_sibling_cpumask(i)) == i)
628 c->booted_cores++;
629
630
631
632
633 if (i != cpu)
634 cpu_data(i).booted_cores++;
635 } else if (i != cpu && !c->booted_cores)
636 c->booted_cores = cpu_data(i).booted_cores;
637 }
638 if (match_pkg(c, o) && !topology_same_node(c, o))
639 x86_has_numa_in_package = true;
640
641 if ((i == cpu) || (has_mp && match_die(c, o)))
642 link_mask(topology_die_cpumask, cpu, i);
643 }
644
645 threads = cpumask_weight(topology_sibling_cpumask(cpu));
646 if (threads > __max_smt_threads)
647 __max_smt_threads = threads;
648}
649
650
651const struct cpumask *cpu_coregroup_mask(int cpu)
652{
653 return cpu_llc_shared_mask(cpu);
654}
655
656static void impress_friends(void)
657{
658 int cpu;
659 unsigned long bogosum = 0;
660
661
662
663 pr_debug("Before bogomips\n");
664 for_each_possible_cpu(cpu)
665 if (cpumask_test_cpu(cpu, cpu_callout_mask))
666 bogosum += cpu_data(cpu).loops_per_jiffy;
667 pr_info("Total of %d processors activated (%lu.%02lu BogoMIPS)\n",
668 num_online_cpus(),
669 bogosum/(500000/HZ),
670 (bogosum/(5000/HZ))%100);
671
672 pr_debug("Before bogocount - setting activated=1\n");
673}
674
675void __inquire_remote_apic(int apicid)
676{
677 unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
678 const char * const names[] = { "ID", "VERSION", "SPIV" };
679 int timeout;
680 u32 status;
681
682 pr_info("Inquiring remote APIC 0x%x...\n", apicid);
683
684 for (i = 0; i < ARRAY_SIZE(regs); i++) {
685 pr_info("... APIC 0x%x %s: ", apicid, names[i]);
686
687
688
689
690 status = safe_apic_wait_icr_idle();
691 if (status)
692 pr_cont("a previous APIC delivery may have failed\n");
693
694 apic_icr_write(APIC_DM_REMRD | regs[i], apicid);
695
696 timeout = 0;
697 do {
698 udelay(100);
699 status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
700 } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
701
702 switch (status) {
703 case APIC_ICR_RR_VALID:
704 status = apic_read(APIC_RRR);
705 pr_cont("%08x\n", status);
706 break;
707 default:
708 pr_cont("failed\n");
709 }
710 }
711}
712
713
714
715
716
717
718
719
720
721
722
723#define UDELAY_10MS_DEFAULT 10000
724
725static unsigned int init_udelay = UINT_MAX;
726
727static int __init cpu_init_udelay(char *str)
728{
729 get_option(&str, &init_udelay);
730
731 return 0;
732}
733early_param("cpu_init_udelay", cpu_init_udelay);
734
735static void __init smp_quirk_init_udelay(void)
736{
737
738 if (init_udelay != UINT_MAX)
739 return;
740
741
742 if (((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 6)) ||
743 ((boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) && (boot_cpu_data.x86 >= 0x18)) ||
744 ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && (boot_cpu_data.x86 >= 0xF))) {
745 init_udelay = 0;
746 return;
747 }
748
749 init_udelay = UDELAY_10MS_DEFAULT;
750}
751
752
753
754
755
756
757int
758wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip)
759{
760 unsigned long send_status, accept_status = 0;
761 int maxlvt;
762
763
764
765
766 apic_icr_write(APIC_DM_NMI | apic->dest_logical, apicid);
767
768 pr_debug("Waiting for send to finish...\n");
769 send_status = safe_apic_wait_icr_idle();
770
771
772
773
774 udelay(200);
775 if (APIC_INTEGRATED(boot_cpu_apic_version)) {
776 maxlvt = lapic_get_maxlvt();
777 if (maxlvt > 3)
778 apic_write(APIC_ESR, 0);
779 accept_status = (apic_read(APIC_ESR) & 0xEF);
780 }
781 pr_debug("NMI sent\n");
782
783 if (send_status)
784 pr_err("APIC never delivered???\n");
785 if (accept_status)
786 pr_err("APIC delivery error (%lx)\n", accept_status);
787
788 return (send_status | accept_status);
789}
790
791static int
792wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
793{
794 unsigned long send_status = 0, accept_status = 0;
795 int maxlvt, num_starts, j;
796
797 maxlvt = lapic_get_maxlvt();
798
799
800
801
802 if (APIC_INTEGRATED(boot_cpu_apic_version)) {
803 if (maxlvt > 3)
804 apic_write(APIC_ESR, 0);
805 apic_read(APIC_ESR);
806 }
807
808 pr_debug("Asserting INIT\n");
809
810
811
812
813
814
815
816 apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
817 phys_apicid);
818
819 pr_debug("Waiting for send to finish...\n");
820 send_status = safe_apic_wait_icr_idle();
821
822 udelay(init_udelay);
823
824 pr_debug("Deasserting INIT\n");
825
826
827
828 apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
829
830 pr_debug("Waiting for send to finish...\n");
831 send_status = safe_apic_wait_icr_idle();
832
833 mb();
834
835
836
837
838
839
840
841 if (APIC_INTEGRATED(boot_cpu_apic_version))
842 num_starts = 2;
843 else
844 num_starts = 0;
845
846
847
848
849 pr_debug("#startup loops: %d\n", num_starts);
850
851 for (j = 1; j <= num_starts; j++) {
852 pr_debug("Sending STARTUP #%d\n", j);
853 if (maxlvt > 3)
854 apic_write(APIC_ESR, 0);
855 apic_read(APIC_ESR);
856 pr_debug("After apic_write\n");
857
858
859
860
861
862
863
864
865 apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12),
866 phys_apicid);
867
868
869
870
871 if (init_udelay == 0)
872 udelay(10);
873 else
874 udelay(300);
875
876 pr_debug("Startup point 1\n");
877
878 pr_debug("Waiting for send to finish...\n");
879 send_status = safe_apic_wait_icr_idle();
880
881
882
883
884 if (init_udelay == 0)
885 udelay(10);
886 else
887 udelay(200);
888
889 if (maxlvt > 3)
890 apic_write(APIC_ESR, 0);
891 accept_status = (apic_read(APIC_ESR) & 0xEF);
892 if (send_status || accept_status)
893 break;
894 }
895 pr_debug("After Startup\n");
896
897 if (send_status)
898 pr_err("APIC never delivered???\n");
899 if (accept_status)
900 pr_err("APIC delivery error (%lx)\n", accept_status);
901
902 return (send_status | accept_status);
903}
904
905
906static void announce_cpu(int cpu, int apicid)
907{
908 static int current_node = NUMA_NO_NODE;
909 int node = early_cpu_to_node(cpu);
910 static int width, node_width;
911
912 if (!width)
913 width = num_digits(num_possible_cpus()) + 1;
914
915 if (!node_width)
916 node_width = num_digits(num_possible_nodes()) + 1;
917
918 if (cpu == 1)
919 printk(KERN_INFO "x86: Booting SMP configuration:\n");
920
921 if (system_state < SYSTEM_RUNNING) {
922 if (node != current_node) {
923 if (current_node > (-1))
924 pr_cont("\n");
925 current_node = node;
926
927 printk(KERN_INFO ".... node %*s#%d, CPUs: ",
928 node_width - num_digits(node), " ", node);
929 }
930
931
932 if (cpu == 1)
933 pr_cont("%*s", width + 1, " ");
934
935 pr_cont("%*s#%d", width - num_digits(cpu), " ", cpu);
936
937 } else
938 pr_info("Booting Node %d Processor %d APIC 0x%x\n",
939 node, cpu, apicid);
940}
941
942static int wakeup_cpu0_nmi(unsigned int cmd, struct pt_regs *regs)
943{
944 int cpu;
945
946 cpu = smp_processor_id();
947 if (cpu == 0 && !cpu_online(cpu) && enable_start_cpu0)
948 return NMI_HANDLED;
949
950 return NMI_DONE;
951}
952
953
954
955
956
957
958
959
960
961
962
963
964
965static int
966wakeup_cpu_via_init_nmi(int cpu, unsigned long start_ip, int apicid,
967 int *cpu0_nmi_registered)
968{
969 int id;
970 int boot_error;
971
972 preempt_disable();
973
974
975
976
977 if (cpu) {
978 boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip);
979 goto out;
980 }
981
982
983
984
985
986
987 boot_error = register_nmi_handler(NMI_LOCAL,
988 wakeup_cpu0_nmi, 0, "wake_cpu0");
989
990 if (!boot_error) {
991 enable_start_cpu0 = 1;
992 *cpu0_nmi_registered = 1;
993 if (apic->dest_logical == APIC_DEST_LOGICAL)
994 id = cpu0_logical_apicid;
995 else
996 id = apicid;
997 boot_error = wakeup_secondary_cpu_via_nmi(id, start_ip);
998 }
999
1000out:
1001 preempt_enable();
1002
1003 return boot_error;
1004}
1005
1006int common_cpu_up(unsigned int cpu, struct task_struct *idle)
1007{
1008 int ret;
1009
1010
1011 alternatives_enable_smp();
1012
1013 per_cpu(current_task, cpu) = idle;
1014
1015
1016 ret = irq_init_percpu_irqstack(cpu);
1017 if (ret)
1018 return ret;
1019
1020#ifdef CONFIG_X86_32
1021
1022 per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle);
1023#else
1024 initial_gs = per_cpu_offset(cpu);
1025#endif
1026 return 0;
1027}
1028
1029
1030
1031
1032
1033
1034
1035static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
1036 int *cpu0_nmi_registered)
1037{
1038
1039 unsigned long start_ip = real_mode_header->trampoline_start;
1040
1041 unsigned long boot_error = 0;
1042 unsigned long timeout;
1043
1044 idle->thread.sp = (unsigned long)task_pt_regs(idle);
1045 early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu);
1046 initial_code = (unsigned long)start_secondary;
1047 initial_stack = idle->thread.sp;
1048
1049
1050 init_espfix_ap(cpu);
1051
1052
1053 announce_cpu(cpu, apicid);
1054
1055
1056
1057
1058
1059
1060 if (x86_platform.legacy.warm_reset) {
1061
1062 pr_debug("Setting warm reset code and vector.\n");
1063
1064 smpboot_setup_warm_reset_vector(start_ip);
1065
1066
1067
1068 if (APIC_INTEGRATED(boot_cpu_apic_version)) {
1069 apic_write(APIC_ESR, 0);
1070 apic_read(APIC_ESR);
1071 }
1072 }
1073
1074
1075
1076
1077
1078
1079
1080 cpumask_clear_cpu(cpu, cpu_initialized_mask);
1081 smp_mb();
1082
1083
1084
1085
1086
1087
1088
1089 if (apic->wakeup_secondary_cpu)
1090 boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
1091 else
1092 boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid,
1093 cpu0_nmi_registered);
1094
1095 if (!boot_error) {
1096
1097
1098
1099 boot_error = -1;
1100 timeout = jiffies + 10*HZ;
1101 while (time_before(jiffies, timeout)) {
1102 if (cpumask_test_cpu(cpu, cpu_initialized_mask)) {
1103
1104
1105
1106 cpumask_set_cpu(cpu, cpu_callout_mask);
1107 boot_error = 0;
1108 break;
1109 }
1110 schedule();
1111 }
1112 }
1113
1114 if (!boot_error) {
1115
1116
1117
1118 while (!cpumask_test_cpu(cpu, cpu_callin_mask)) {
1119
1120
1121
1122
1123
1124
1125 schedule();
1126 }
1127 }
1128
1129 if (x86_platform.legacy.warm_reset) {
1130
1131
1132
1133 smpboot_restore_warm_reset_vector();
1134 }
1135
1136 return boot_error;
1137}
1138
1139int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
1140{
1141 int apicid = apic->cpu_present_to_apicid(cpu);
1142 int cpu0_nmi_registered = 0;
1143 unsigned long flags;
1144 int err, ret = 0;
1145
1146 lockdep_assert_irqs_enabled();
1147
1148 pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu);
1149
1150 if (apicid == BAD_APICID ||
1151 !physid_isset(apicid, phys_cpu_present_map) ||
1152 !apic->apic_id_valid(apicid)) {
1153 pr_err("%s: bad cpu %d\n", __func__, cpu);
1154 return -EINVAL;
1155 }
1156
1157
1158
1159
1160 if (cpumask_test_cpu(cpu, cpu_callin_mask)) {
1161 pr_debug("do_boot_cpu %d Already started\n", cpu);
1162 return -ENOSYS;
1163 }
1164
1165
1166
1167
1168
1169 mtrr_save_state();
1170
1171
1172 err = cpu_check_up_prepare(cpu);
1173 if (err && err != -EBUSY)
1174 return err;
1175
1176
1177 per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL;
1178
1179 err = common_cpu_up(cpu, tidle);
1180 if (err)
1181 return err;
1182
1183 err = do_boot_cpu(apicid, cpu, tidle, &cpu0_nmi_registered);
1184 if (err) {
1185 pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);
1186 ret = -EIO;
1187 goto unreg_nmi;
1188 }
1189
1190
1191
1192
1193
1194 local_irq_save(flags);
1195 check_tsc_sync_source(cpu);
1196 local_irq_restore(flags);
1197
1198 while (!cpu_online(cpu)) {
1199 cpu_relax();
1200 touch_nmi_watchdog();
1201 }
1202
1203unreg_nmi:
1204
1205
1206
1207
1208 if (cpu0_nmi_registered)
1209 unregister_nmi_handler(NMI_LOCAL, "wake_cpu0");
1210
1211 return ret;
1212}
1213
1214
1215
1216
1217void arch_disable_smp_support(void)
1218{
1219 disable_ioapic_support();
1220}
1221
1222
1223
1224
1225
1226
1227static __init void disable_smp(void)
1228{
1229 pr_info("SMP disabled\n");
1230
1231 disable_ioapic_support();
1232
1233 init_cpu_present(cpumask_of(0));
1234 init_cpu_possible(cpumask_of(0));
1235
1236 if (smp_found_config)
1237 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
1238 else
1239 physid_set_mask_of_physid(0, &phys_cpu_present_map);
1240 cpumask_set_cpu(0, topology_sibling_cpumask(0));
1241 cpumask_set_cpu(0, topology_core_cpumask(0));
1242 cpumask_set_cpu(0, topology_die_cpumask(0));
1243}
1244
1245
1246
1247
1248static void __init smp_sanity_check(void)
1249{
1250 preempt_disable();
1251
1252#if !defined(CONFIG_X86_BIGSMP) && defined(CONFIG_X86_32)
1253 if (def_to_bigsmp && nr_cpu_ids > 8) {
1254 unsigned int cpu;
1255 unsigned nr;
1256
1257 pr_warn("More than 8 CPUs detected - skipping them\n"
1258 "Use CONFIG_X86_BIGSMP\n");
1259
1260 nr = 0;
1261 for_each_present_cpu(cpu) {
1262 if (nr >= 8)
1263 set_cpu_present(cpu, false);
1264 nr++;
1265 }
1266
1267 nr = 0;
1268 for_each_possible_cpu(cpu) {
1269 if (nr >= 8)
1270 set_cpu_possible(cpu, false);
1271 nr++;
1272 }
1273
1274 nr_cpu_ids = 8;
1275 }
1276#endif
1277
1278 if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
1279 pr_warn("weird, boot CPU (#%d) not listed by the BIOS\n",
1280 hard_smp_processor_id());
1281
1282 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
1283 }
1284
1285
1286
1287
1288
1289 if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) {
1290 pr_notice("weird, boot CPU (#%d) not listed by the BIOS\n",
1291 boot_cpu_physical_apicid);
1292 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
1293 }
1294 preempt_enable();
1295}
1296
1297static void __init smp_cpu_index_default(void)
1298{
1299 int i;
1300 struct cpuinfo_x86 *c;
1301
1302 for_each_possible_cpu(i) {
1303 c = &cpu_data(i);
1304
1305 c->cpu_index = nr_cpu_ids;
1306 }
1307}
1308
1309static void __init smp_get_logical_apicid(void)
1310{
1311 if (x2apic_mode)
1312 cpu0_logical_apicid = apic_read(APIC_LDR);
1313 else
1314 cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
1315}
1316
1317
1318
1319
1320
1321
1322void __init native_smp_prepare_cpus(unsigned int max_cpus)
1323{
1324 unsigned int i;
1325
1326 smp_cpu_index_default();
1327
1328
1329
1330
1331 smp_store_boot_cpu_info();
1332 cpumask_copy(cpu_callin_mask, cpumask_of(0));
1333 mb();
1334
1335 for_each_possible_cpu(i) {
1336 zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
1337 zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
1338 zalloc_cpumask_var(&per_cpu(cpu_die_map, i), GFP_KERNEL);
1339 zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
1340 }
1341
1342
1343
1344
1345
1346
1347
1348
1349 set_sched_topology(x86_topology);
1350
1351 set_cpu_sibling_map(0);
1352 init_freq_invariance(false);
1353 smp_sanity_check();
1354
1355 switch (apic_intr_mode) {
1356 case APIC_PIC:
1357 case APIC_VIRTUAL_WIRE_NO_CONFIG:
1358 disable_smp();
1359 return;
1360 case APIC_SYMMETRIC_IO_NO_ROUTING:
1361 disable_smp();
1362
1363 x86_init.timers.setup_percpu_clockev();
1364 return;
1365 case APIC_VIRTUAL_WIRE:
1366 case APIC_SYMMETRIC_IO:
1367 break;
1368 }
1369
1370
1371 x86_init.timers.setup_percpu_clockev();
1372
1373 smp_get_logical_apicid();
1374
1375 pr_info("CPU0: ");
1376 print_cpu_info(&cpu_data(0));
1377
1378 uv_system_init();
1379
1380 set_mtrr_aps_delayed_init();
1381
1382 smp_quirk_init_udelay();
1383
1384 speculative_store_bypass_ht_init();
1385}
1386
1387void arch_enable_nonboot_cpus_begin(void)
1388{
1389 set_mtrr_aps_delayed_init();
1390}
1391
1392void arch_enable_nonboot_cpus_end(void)
1393{
1394 mtrr_aps_init();
1395}
1396
1397
1398
1399
1400void __init native_smp_prepare_boot_cpu(void)
1401{
1402 int me = smp_processor_id();
1403 switch_to_new_gdt(me);
1404
1405 cpumask_set_cpu(me, cpu_callout_mask);
1406 cpu_set_state_online(me);
1407 native_pv_lock_init();
1408}
1409
1410void __init calculate_max_logical_packages(void)
1411{
1412 int ncpus;
1413
1414
1415
1416
1417
1418 ncpus = cpu_data(0).booted_cores * topology_max_smt_threads();
1419 __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus);
1420 pr_info("Max logical packages: %u\n", __max_logical_packages);
1421}
1422
1423void __init native_smp_cpus_done(unsigned int max_cpus)
1424{
1425 pr_debug("Boot done\n");
1426
1427 calculate_max_logical_packages();
1428
1429 if (x86_has_numa_in_package)
1430 set_sched_topology(x86_numa_in_package_topology);
1431
1432 nmi_selftest();
1433 impress_friends();
1434 mtrr_aps_init();
1435}
1436
1437static int __initdata setup_possible_cpus = -1;
1438static int __init _setup_possible_cpus(char *str)
1439{
1440 get_option(&str, &setup_possible_cpus);
1441 return 0;
1442}
1443early_param("possible_cpus", _setup_possible_cpus);
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463__init void prefill_possible_map(void)
1464{
1465 int i, possible;
1466
1467
1468 if (!num_processors) {
1469 if (boot_cpu_has(X86_FEATURE_APIC)) {
1470 int apicid = boot_cpu_physical_apicid;
1471 int cpu = hard_smp_processor_id();
1472
1473 pr_warn("Boot CPU (id %d) not listed by BIOS\n", cpu);
1474
1475
1476 if (apic->cpu_present_to_apicid(0) == BAD_APICID &&
1477 apic->apic_id_valid(apicid))
1478 generic_processor_info(apicid, boot_cpu_apic_version);
1479 }
1480
1481 if (!num_processors)
1482 num_processors = 1;
1483 }
1484
1485 i = setup_max_cpus ?: 1;
1486 if (setup_possible_cpus == -1) {
1487 possible = num_processors;
1488#ifdef CONFIG_HOTPLUG_CPU
1489 if (setup_max_cpus)
1490 possible += disabled_cpus;
1491#else
1492 if (possible > i)
1493 possible = i;
1494#endif
1495 } else
1496 possible = setup_possible_cpus;
1497
1498 total_cpus = max_t(int, possible, num_processors + disabled_cpus);
1499
1500
1501 if (possible > nr_cpu_ids) {
1502 pr_warn("%d Processors exceeds NR_CPUS limit of %u\n",
1503 possible, nr_cpu_ids);
1504 possible = nr_cpu_ids;
1505 }
1506
1507#ifdef CONFIG_HOTPLUG_CPU
1508 if (!setup_max_cpus)
1509#endif
1510 if (possible > i) {
1511 pr_warn("%d Processors exceeds max_cpus limit of %u\n",
1512 possible, setup_max_cpus);
1513 possible = i;
1514 }
1515
1516 nr_cpu_ids = possible;
1517
1518 pr_info("Allowing %d CPUs, %d hotplug CPUs\n",
1519 possible, max_t(int, possible - num_processors, 0));
1520
1521 reset_cpu_possible_mask();
1522
1523 for (i = 0; i < possible; i++)
1524 set_cpu_possible(i, true);
1525}
1526
1527#ifdef CONFIG_HOTPLUG_CPU
1528
1529
1530static void recompute_smt_state(void)
1531{
1532 int max_threads, cpu;
1533
1534 max_threads = 0;
1535 for_each_online_cpu (cpu) {
1536 int threads = cpumask_weight(topology_sibling_cpumask(cpu));
1537
1538 if (threads > max_threads)
1539 max_threads = threads;
1540 }
1541 __max_smt_threads = max_threads;
1542}
1543
1544static void remove_siblinginfo(int cpu)
1545{
1546 int sibling;
1547 struct cpuinfo_x86 *c = &cpu_data(cpu);
1548
1549 for_each_cpu(sibling, topology_core_cpumask(cpu)) {
1550 cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
1551
1552
1553
1554 if (cpumask_weight(topology_sibling_cpumask(cpu)) == 1)
1555 cpu_data(sibling).booted_cores--;
1556 }
1557
1558 for_each_cpu(sibling, topology_die_cpumask(cpu))
1559 cpumask_clear_cpu(cpu, topology_die_cpumask(sibling));
1560 for_each_cpu(sibling, topology_sibling_cpumask(cpu))
1561 cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
1562 for_each_cpu(sibling, cpu_llc_shared_mask(cpu))
1563 cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling));
1564 cpumask_clear(cpu_llc_shared_mask(cpu));
1565 cpumask_clear(topology_sibling_cpumask(cpu));
1566 cpumask_clear(topology_core_cpumask(cpu));
1567 cpumask_clear(topology_die_cpumask(cpu));
1568 c->cpu_core_id = 0;
1569 c->booted_cores = 0;
1570 cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
1571 recompute_smt_state();
1572}
1573
1574static void remove_cpu_from_maps(int cpu)
1575{
1576 set_cpu_online(cpu, false);
1577 cpumask_clear_cpu(cpu, cpu_callout_mask);
1578 cpumask_clear_cpu(cpu, cpu_callin_mask);
1579
1580 cpumask_clear_cpu(cpu, cpu_initialized_mask);
1581 numa_remove_cpu(cpu);
1582}
1583
1584void cpu_disable_common(void)
1585{
1586 int cpu = smp_processor_id();
1587
1588 remove_siblinginfo(cpu);
1589
1590
1591 lock_vector_lock();
1592 remove_cpu_from_maps(cpu);
1593 unlock_vector_lock();
1594 fixup_irqs();
1595 lapic_offline();
1596}
1597
1598int native_cpu_disable(void)
1599{
1600 int ret;
1601
1602 ret = lapic_can_unplug_cpu();
1603 if (ret)
1604 return ret;
1605
1606
1607
1608
1609
1610
1611 apic_soft_disable();
1612 cpu_disable_common();
1613
1614 return 0;
1615}
1616
1617int common_cpu_die(unsigned int cpu)
1618{
1619 int ret = 0;
1620
1621
1622
1623
1624 if (cpu_wait_death(cpu, 5)) {
1625 if (system_state == SYSTEM_RUNNING)
1626 pr_info("CPU %u is now offline\n", cpu);
1627 } else {
1628 pr_err("CPU %u didn't die...\n", cpu);
1629 ret = -1;
1630 }
1631
1632 return ret;
1633}
1634
1635void native_cpu_die(unsigned int cpu)
1636{
1637 common_cpu_die(cpu);
1638}
1639
1640void play_dead_common(void)
1641{
1642 idle_task_exit();
1643
1644
1645 (void)cpu_report_death();
1646
1647
1648
1649
1650 local_irq_disable();
1651}
1652
1653static bool wakeup_cpu0(void)
1654{
1655 if (smp_processor_id() == 0 && enable_start_cpu0)
1656 return true;
1657
1658 return false;
1659}
1660
1661
1662
1663
1664
1665static inline void mwait_play_dead(void)
1666{
1667 unsigned int eax, ebx, ecx, edx;
1668 unsigned int highest_cstate = 0;
1669 unsigned int highest_subcstate = 0;
1670 void *mwait_ptr;
1671 int i;
1672
1673 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
1674 boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
1675 return;
1676 if (!this_cpu_has(X86_FEATURE_MWAIT))
1677 return;
1678 if (!this_cpu_has(X86_FEATURE_CLFLUSH))
1679 return;
1680 if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF)
1681 return;
1682
1683 eax = CPUID_MWAIT_LEAF;
1684 ecx = 0;
1685 native_cpuid(&eax, &ebx, &ecx, &edx);
1686
1687
1688
1689
1690
1691 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) {
1692 eax = 0;
1693 } else {
1694 edx >>= MWAIT_SUBSTATE_SIZE;
1695 for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
1696 if (edx & MWAIT_SUBSTATE_MASK) {
1697 highest_cstate = i;
1698 highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
1699 }
1700 }
1701 eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
1702 (highest_subcstate - 1);
1703 }
1704
1705
1706
1707
1708
1709
1710 mwait_ptr = ¤t_thread_info()->flags;
1711
1712 wbinvd();
1713
1714 while (1) {
1715
1716
1717
1718
1719
1720
1721
1722 mb();
1723 clflush(mwait_ptr);
1724 mb();
1725 __monitor(mwait_ptr, 0, 0);
1726 mb();
1727 __mwait(eax, 0);
1728
1729
1730
1731 if (wakeup_cpu0())
1732 start_cpu0();
1733 }
1734}
1735
1736void hlt_play_dead(void)
1737{
1738 if (__this_cpu_read(cpu_info.x86) >= 4)
1739 wbinvd();
1740
1741 while (1) {
1742 native_halt();
1743
1744
1745
1746 if (wakeup_cpu0())
1747 start_cpu0();
1748 }
1749}
1750
1751void native_play_dead(void)
1752{
1753 play_dead_common();
1754 tboot_shutdown(TB_SHUTDOWN_WFS);
1755
1756 mwait_play_dead();
1757 if (cpuidle_play_dead())
1758 hlt_play_dead();
1759}
1760
1761#else
1762int native_cpu_disable(void)
1763{
1764 return -ENOSYS;
1765}
1766
1767void native_cpu_die(unsigned int cpu)
1768{
1769
1770 BUG();
1771}
1772
1773void native_play_dead(void)
1774{
1775 BUG();
1776}
1777
1778#endif
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814DEFINE_STATIC_KEY_FALSE(arch_scale_freq_key);
1815
1816static DEFINE_PER_CPU(u64, arch_prev_aperf);
1817static DEFINE_PER_CPU(u64, arch_prev_mperf);
1818static u64 arch_turbo_freq_ratio = SCHED_CAPACITY_SCALE;
1819static u64 arch_max_freq_ratio = SCHED_CAPACITY_SCALE;
1820
1821void arch_set_max_freq_ratio(bool turbo_disabled)
1822{
1823 arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE :
1824 arch_turbo_freq_ratio;
1825}
1826
1827static bool turbo_disabled(void)
1828{
1829 u64 misc_en;
1830 int err;
1831
1832 err = rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_en);
1833 if (err)
1834 return false;
1835
1836 return (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
1837}
1838
1839static bool slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
1840{
1841 int err;
1842
1843 err = rdmsrl_safe(MSR_ATOM_CORE_RATIOS, base_freq);
1844 if (err)
1845 return false;
1846
1847 err = rdmsrl_safe(MSR_ATOM_CORE_TURBO_RATIOS, turbo_freq);
1848 if (err)
1849 return false;
1850
1851 *base_freq = (*base_freq >> 16) & 0x3F;
1852 *turbo_freq = *turbo_freq & 0x3F;
1853
1854 return true;
1855}
1856
1857#include <asm/cpu_device_id.h>
1858#include <asm/intel-family.h>
1859
1860#define ICPU(model) \
1861 {X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF, 0}
1862
1863static const struct x86_cpu_id has_knl_turbo_ratio_limits[] = {
1864 ICPU(INTEL_FAM6_XEON_PHI_KNL),
1865 ICPU(INTEL_FAM6_XEON_PHI_KNM),
1866 {}
1867};
1868
1869static const struct x86_cpu_id has_skx_turbo_ratio_limits[] = {
1870 ICPU(INTEL_FAM6_SKYLAKE_X),
1871 {}
1872};
1873
1874static const struct x86_cpu_id has_glm_turbo_ratio_limits[] = {
1875 ICPU(INTEL_FAM6_ATOM_GOLDMONT),
1876 ICPU(INTEL_FAM6_ATOM_GOLDMONT_D),
1877 ICPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS),
1878 {}
1879};
1880
1881static bool knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq,
1882 int num_delta_fratio)
1883{
1884 int fratio, delta_fratio, found;
1885 int err, i;
1886 u64 msr;
1887
1888 err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
1889 if (err)
1890 return false;
1891
1892 *base_freq = (*base_freq >> 8) & 0xFF;
1893
1894 err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
1895 if (err)
1896 return false;
1897
1898 fratio = (msr >> 8) & 0xFF;
1899 i = 16;
1900 found = 0;
1901 do {
1902 if (found >= num_delta_fratio) {
1903 *turbo_freq = fratio;
1904 return true;
1905 }
1906
1907 delta_fratio = (msr >> (i + 5)) & 0x7;
1908
1909 if (delta_fratio) {
1910 found += 1;
1911 fratio -= delta_fratio;
1912 }
1913
1914 i += 8;
1915 } while (i < 64);
1916
1917 return true;
1918}
1919
1920static bool skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size)
1921{
1922 u64 ratios, counts;
1923 u32 group_size;
1924 int err, i;
1925
1926 err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
1927 if (err)
1928 return false;
1929
1930 *base_freq = (*base_freq >> 8) & 0xFF;
1931
1932 err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios);
1933 if (err)
1934 return false;
1935
1936 err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts);
1937 if (err)
1938 return false;
1939
1940 for (i = 0; i < 64; i += 8) {
1941 group_size = (counts >> i) & 0xFF;
1942 if (group_size >= size) {
1943 *turbo_freq = (ratios >> i) & 0xFF;
1944 return true;
1945 }
1946 }
1947
1948 return false;
1949}
1950
1951static bool core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
1952{
1953 u64 msr;
1954 int err;
1955
1956 err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
1957 if (err)
1958 return false;
1959
1960 err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
1961 if (err)
1962 return false;
1963
1964 *base_freq = (*base_freq >> 8) & 0xFF;
1965 *turbo_freq = (msr >> 24) & 0xFF;
1966
1967
1968 if (!*turbo_freq)
1969 *turbo_freq = msr & 0xFF;
1970
1971 return true;
1972}
1973
1974static bool intel_set_max_freq_ratio(void)
1975{
1976 u64 base_freq, turbo_freq;
1977
1978 if (slv_set_max_freq_ratio(&base_freq, &turbo_freq))
1979 goto out;
1980
1981 if (x86_match_cpu(has_glm_turbo_ratio_limits) &&
1982 skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
1983 goto out;
1984
1985 if (x86_match_cpu(has_knl_turbo_ratio_limits) &&
1986 knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
1987 goto out;
1988
1989 if (x86_match_cpu(has_skx_turbo_ratio_limits) &&
1990 skx_set_max_freq_ratio(&base_freq, &turbo_freq, 4))
1991 goto out;
1992
1993 if (core_set_max_freq_ratio(&base_freq, &turbo_freq))
1994 goto out;
1995
1996 return false;
1997
1998out:
1999
2000
2001
2002
2003 if (!base_freq) {
2004 pr_debug("Couldn't determine cpu base frequency, necessary for scale-invariant accounting.\n");
2005 return false;
2006 }
2007
2008 arch_turbo_freq_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE,
2009 base_freq);
2010 arch_set_max_freq_ratio(turbo_disabled());
2011 return true;
2012}
2013
2014static void init_counter_refs(void)
2015{
2016 u64 aperf, mperf;
2017
2018 rdmsrl(MSR_IA32_APERF, aperf);
2019 rdmsrl(MSR_IA32_MPERF, mperf);
2020
2021 this_cpu_write(arch_prev_aperf, aperf);
2022 this_cpu_write(arch_prev_mperf, mperf);
2023}
2024
2025static void init_freq_invariance(bool secondary)
2026{
2027 bool ret = false;
2028
2029 if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
2030 return;
2031
2032 if (secondary) {
2033 if (static_branch_likely(&arch_scale_freq_key)) {
2034 init_counter_refs();
2035 }
2036 return;
2037 }
2038
2039 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
2040 ret = intel_set_max_freq_ratio();
2041
2042 if (ret) {
2043 init_counter_refs();
2044 static_branch_enable(&arch_scale_freq_key);
2045 } else {
2046 pr_debug("Couldn't determine max cpu frequency, necessary for scale-invariant accounting.\n");
2047 }
2048}
2049
2050DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
2051
2052void arch_scale_freq_tick(void)
2053{
2054 u64 freq_scale;
2055 u64 aperf, mperf;
2056 u64 acnt, mcnt;
2057
2058 if (!arch_scale_freq_invariant())
2059 return;
2060
2061 rdmsrl(MSR_IA32_APERF, aperf);
2062 rdmsrl(MSR_IA32_MPERF, mperf);
2063
2064 acnt = aperf - this_cpu_read(arch_prev_aperf);
2065 mcnt = mperf - this_cpu_read(arch_prev_mperf);
2066 if (!mcnt)
2067 return;
2068
2069 this_cpu_write(arch_prev_aperf, aperf);
2070 this_cpu_write(arch_prev_mperf, mperf);
2071
2072 acnt <<= 2*SCHED_CAPACITY_SHIFT;
2073 mcnt *= arch_max_freq_ratio;
2074
2075 freq_scale = div64_u64(acnt, mcnt);
2076
2077 if (freq_scale > SCHED_CAPACITY_SCALE)
2078 freq_scale = SCHED_CAPACITY_SCALE;
2079
2080 this_cpu_write(arch_freq_scale, freq_scale);
2081}
2082