1
2#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
3
4#include <linux/kernel.h>
5#include <linux/sched.h>
6#include <linux/sched/clock.h>
7#include <linux/init.h>
8#include <linux/export.h>
9#include <linux/timer.h>
10#include <linux/acpi_pmtmr.h>
11#include <linux/cpufreq.h>
12#include <linux/delay.h>
13#include <linux/clocksource.h>
14#include <linux/percpu.h>
15#include <linux/timex.h>
16#include <linux/static_key.h>
17#include <linux/static_call.h>
18
19#include <asm/hpet.h>
20#include <asm/timer.h>
21#include <asm/vgtod.h>
22#include <asm/time.h>
23#include <asm/delay.h>
24#include <asm/hypervisor.h>
25#include <asm/nmi.h>
26#include <asm/x86_init.h>
27#include <asm/geode.h>
28#include <asm/apic.h>
29#include <asm/intel-family.h>
30#include <asm/i8259.h>
31#include <asm/uv/uv.h>
32
33unsigned int __read_mostly cpu_khz;
34EXPORT_SYMBOL(cpu_khz);
35
36unsigned int __read_mostly tsc_khz;
37EXPORT_SYMBOL(tsc_khz);
38
39#define KHZ 1000
40
41
42
43
44static int __read_mostly tsc_unstable;
45static unsigned int __initdata tsc_early_khz;
46
47static DEFINE_STATIC_KEY_FALSE(__use_tsc);
48
49int tsc_clocksource_reliable;
50
51static u32 art_to_tsc_numerator;
52static u32 art_to_tsc_denominator;
53static u64 art_to_tsc_offset;
54struct clocksource *art_related_clocksource;
55
56struct cyc2ns {
57 struct cyc2ns_data data[2];
58 seqcount_latch_t seq;
59
60};
61
62static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns);
63
64static int __init tsc_early_khz_setup(char *buf)
65{
66 return kstrtouint(buf, 0, &tsc_early_khz);
67}
68early_param("tsc_early_khz", tsc_early_khz_setup);
69
70__always_inline void cyc2ns_read_begin(struct cyc2ns_data *data)
71{
72 int seq, idx;
73
74 preempt_disable_notrace();
75
76 do {
77 seq = this_cpu_read(cyc2ns.seq.seqcount.sequence);
78 idx = seq & 1;
79
80 data->cyc2ns_offset = this_cpu_read(cyc2ns.data[idx].cyc2ns_offset);
81 data->cyc2ns_mul = this_cpu_read(cyc2ns.data[idx].cyc2ns_mul);
82 data->cyc2ns_shift = this_cpu_read(cyc2ns.data[idx].cyc2ns_shift);
83
84 } while (unlikely(seq != this_cpu_read(cyc2ns.seq.seqcount.sequence)));
85}
86
87__always_inline void cyc2ns_read_end(void)
88{
89 preempt_enable_notrace();
90}
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116static __always_inline unsigned long long cycles_2_ns(unsigned long long cyc)
117{
118 struct cyc2ns_data data;
119 unsigned long long ns;
120
121 cyc2ns_read_begin(&data);
122
123 ns = data.cyc2ns_offset;
124 ns += mul_u64_u32_shr(cyc, data.cyc2ns_mul, data.cyc2ns_shift);
125
126 cyc2ns_read_end();
127
128 return ns;
129}
130
131static void __set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now)
132{
133 unsigned long long ns_now;
134 struct cyc2ns_data data;
135 struct cyc2ns *c2n;
136
137 ns_now = cycles_2_ns(tsc_now);
138
139
140
141
142
143
144 clocks_calc_mult_shift(&data.cyc2ns_mul, &data.cyc2ns_shift, khz,
145 NSEC_PER_MSEC, 0);
146
147
148
149
150
151
152
153 if (data.cyc2ns_shift == 32) {
154 data.cyc2ns_shift = 31;
155 data.cyc2ns_mul >>= 1;
156 }
157
158 data.cyc2ns_offset = ns_now -
159 mul_u64_u32_shr(tsc_now, data.cyc2ns_mul, data.cyc2ns_shift);
160
161 c2n = per_cpu_ptr(&cyc2ns, cpu);
162
163 raw_write_seqcount_latch(&c2n->seq);
164 c2n->data[0] = data;
165 raw_write_seqcount_latch(&c2n->seq);
166 c2n->data[1] = data;
167}
168
169static void set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now)
170{
171 unsigned long flags;
172
173 local_irq_save(flags);
174 sched_clock_idle_sleep_event();
175
176 if (khz)
177 __set_cyc2ns_scale(khz, cpu, tsc_now);
178
179 sched_clock_idle_wakeup_event();
180 local_irq_restore(flags);
181}
182
183
184
185
186static void __init cyc2ns_init_boot_cpu(void)
187{
188 struct cyc2ns *c2n = this_cpu_ptr(&cyc2ns);
189
190 seqcount_latch_init(&c2n->seq);
191 __set_cyc2ns_scale(tsc_khz, smp_processor_id(), rdtsc());
192}
193
194
195
196
197
198
199static void __init cyc2ns_init_secondary_cpus(void)
200{
201 unsigned int cpu, this_cpu = smp_processor_id();
202 struct cyc2ns *c2n = this_cpu_ptr(&cyc2ns);
203 struct cyc2ns_data *data = c2n->data;
204
205 for_each_possible_cpu(cpu) {
206 if (cpu != this_cpu) {
207 seqcount_latch_init(&c2n->seq);
208 c2n = per_cpu_ptr(&cyc2ns, cpu);
209 c2n->data[0] = data[0];
210 c2n->data[1] = data[1];
211 }
212 }
213}
214
215
216
217
218u64 native_sched_clock(void)
219{
220 if (static_branch_likely(&__use_tsc)) {
221 u64 tsc_now = rdtsc();
222
223
224 return cycles_2_ns(tsc_now);
225 }
226
227
228
229
230
231
232
233
234
235
236
237 return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
238}
239
240
241
242
243u64 native_sched_clock_from_tsc(u64 tsc)
244{
245 return cycles_2_ns(tsc);
246}
247
248
249
250#ifdef CONFIG_PARAVIRT
251unsigned long long sched_clock(void)
252{
253 return paravirt_sched_clock();
254}
255
256bool using_native_sched_clock(void)
257{
258 return static_call_query(pv_sched_clock) == native_sched_clock;
259}
260#else
261unsigned long long
262sched_clock(void) __attribute__((alias("native_sched_clock")));
263
264bool using_native_sched_clock(void) { return true; }
265#endif
266
267int check_tsc_unstable(void)
268{
269 return tsc_unstable;
270}
271EXPORT_SYMBOL_GPL(check_tsc_unstable);
272
273#ifdef CONFIG_X86_TSC
274int __init notsc_setup(char *str)
275{
276 mark_tsc_unstable("boot parameter notsc");
277 return 1;
278}
279#else
280
281
282
283
284int __init notsc_setup(char *str)
285{
286 setup_clear_cpu_cap(X86_FEATURE_TSC);
287 return 1;
288}
289#endif
290
291__setup("notsc", notsc_setup);
292
293static int no_sched_irq_time;
294static int no_tsc_watchdog;
295
296static int __init tsc_setup(char *str)
297{
298 if (!strcmp(str, "reliable"))
299 tsc_clocksource_reliable = 1;
300 if (!strncmp(str, "noirqtime", 9))
301 no_sched_irq_time = 1;
302 if (!strcmp(str, "unstable"))
303 mark_tsc_unstable("boot parameter");
304 if (!strcmp(str, "nowatchdog"))
305 no_tsc_watchdog = 1;
306 return 1;
307}
308
309__setup("tsc=", tsc_setup);
310
311#define MAX_RETRIES 5
312#define TSC_DEFAULT_THRESHOLD 0x20000
313
314
315
316
317static u64 tsc_read_refs(u64 *p, int hpet)
318{
319 u64 t1, t2;
320 u64 thresh = tsc_khz ? tsc_khz >> 5 : TSC_DEFAULT_THRESHOLD;
321 int i;
322
323 for (i = 0; i < MAX_RETRIES; i++) {
324 t1 = get_cycles();
325 if (hpet)
326 *p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
327 else
328 *p = acpi_pm_read_early();
329 t2 = get_cycles();
330 if ((t2 - t1) < thresh)
331 return t2;
332 }
333 return ULLONG_MAX;
334}
335
336
337
338
339static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2)
340{
341 u64 tmp;
342
343 if (hpet2 < hpet1)
344 hpet2 += 0x100000000ULL;
345 hpet2 -= hpet1;
346 tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
347 do_div(tmp, 1000000);
348 deltatsc = div64_u64(deltatsc, tmp);
349
350 return (unsigned long) deltatsc;
351}
352
353
354
355
356static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2)
357{
358 u64 tmp;
359
360 if (!pm1 && !pm2)
361 return ULONG_MAX;
362
363 if (pm2 < pm1)
364 pm2 += (u64)ACPI_PM_OVRRUN;
365 pm2 -= pm1;
366 tmp = pm2 * 1000000000LL;
367 do_div(tmp, PMTMR_TICKS_PER_SEC);
368 do_div(deltatsc, tmp);
369
370 return (unsigned long) deltatsc;
371}
372
373#define CAL_MS 10
374#define CAL_LATCH (PIT_TICK_RATE / (1000 / CAL_MS))
375#define CAL_PIT_LOOPS 1000
376
377#define CAL2_MS 50
378#define CAL2_LATCH (PIT_TICK_RATE / (1000 / CAL2_MS))
379#define CAL2_PIT_LOOPS 5000
380
381
382
383
384
385
386
387
388
389static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
390{
391 u64 tsc, t1, t2, delta;
392 unsigned long tscmin, tscmax;
393 int pitcnt;
394
395 if (!has_legacy_pic()) {
396
397
398
399
400
401 udelay(10 * USEC_PER_MSEC);
402 udelay(10 * USEC_PER_MSEC);
403 udelay(10 * USEC_PER_MSEC);
404 udelay(10 * USEC_PER_MSEC);
405 udelay(10 * USEC_PER_MSEC);
406 return ULONG_MAX;
407 }
408
409
410 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
411
412
413
414
415
416
417 outb(0xb0, 0x43);
418 outb(latch & 0xff, 0x42);
419 outb(latch >> 8, 0x42);
420
421 tsc = t1 = t2 = get_cycles();
422
423 pitcnt = 0;
424 tscmax = 0;
425 tscmin = ULONG_MAX;
426 while ((inb(0x61) & 0x20) == 0) {
427 t2 = get_cycles();
428 delta = t2 - tsc;
429 tsc = t2;
430 if ((unsigned long) delta < tscmin)
431 tscmin = (unsigned int) delta;
432 if ((unsigned long) delta > tscmax)
433 tscmax = (unsigned int) delta;
434 pitcnt++;
435 }
436
437
438
439
440
441
442
443
444
445
446 if (pitcnt < loopmin || tscmax > 10 * tscmin)
447 return ULONG_MAX;
448
449
450 delta = t2 - t1;
451 do_div(delta, ms);
452 return delta;
453}
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490static inline int pit_verify_msb(unsigned char val)
491{
492
493 inb(0x42);
494 return inb(0x42) == val;
495}
496
497static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap)
498{
499 int count;
500 u64 tsc = 0, prev_tsc = 0;
501
502 for (count = 0; count < 50000; count++) {
503 if (!pit_verify_msb(val))
504 break;
505 prev_tsc = tsc;
506 tsc = get_cycles();
507 }
508 *deltap = get_cycles() - prev_tsc;
509 *tscp = tsc;
510
511
512
513
514
515 return count > 5;
516}
517
518
519
520
521
522
523
524#define MAX_QUICK_PIT_MS 50
525#define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256)
526
527static unsigned long quick_pit_calibrate(void)
528{
529 int i;
530 u64 tsc, delta;
531 unsigned long d1, d2;
532
533 if (!has_legacy_pic())
534 return 0;
535
536
537 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
538
539
540
541
542
543
544
545
546
547
548 outb(0xb0, 0x43);
549
550
551 outb(0xff, 0x42);
552 outb(0xff, 0x42);
553
554
555
556
557
558
559
560 pit_verify_msb(0);
561
562 if (pit_expect_msb(0xff, &tsc, &d1)) {
563 for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) {
564 if (!pit_expect_msb(0xff-i, &delta, &d2))
565 break;
566
567 delta -= tsc;
568
569
570
571
572
573 if (i == 1 &&
574 d1 + d2 >= (delta * MAX_QUICK_PIT_ITERATIONS) >> 11)
575 return 0;
576
577
578
579
580 if (d1+d2 >= delta >> 11)
581 continue;
582
583
584
585
586
587
588
589
590 if (!pit_verify_msb(0xfe - i))
591 break;
592 goto success;
593 }
594 }
595 pr_info("Fast TSC calibration failed\n");
596 return 0;
597
598success:
599
600
601
602
603
604
605
606
607
608
609
610
611
612 delta *= PIT_TICK_RATE;
613 do_div(delta, i*256*1000);
614 pr_info("Fast TSC calibration using PIT\n");
615 return delta;
616}
617
618
619
620
621
622unsigned long native_calibrate_tsc(void)
623{
624 unsigned int eax_denominator, ebx_numerator, ecx_hz, edx;
625 unsigned int crystal_khz;
626
627 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
628 return 0;
629
630 if (boot_cpu_data.cpuid_level < 0x15)
631 return 0;
632
633 eax_denominator = ebx_numerator = ecx_hz = edx = 0;
634
635
636 cpuid(0x15, &eax_denominator, &ebx_numerator, &ecx_hz, &edx);
637
638 if (ebx_numerator == 0 || eax_denominator == 0)
639 return 0;
640
641 crystal_khz = ecx_hz / 1000;
642
643
644
645
646
647
648 if (crystal_khz == 0 &&
649 boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT_D)
650 crystal_khz = 25000;
651
652
653
654
655
656
657 if (crystal_khz != 0)
658 setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
659
660
661
662
663
664
665 if (crystal_khz == 0 && boot_cpu_data.cpuid_level >= 0x16) {
666 unsigned int eax_base_mhz, ebx, ecx, edx;
667
668 cpuid(0x16, &eax_base_mhz, &ebx, &ecx, &edx);
669 crystal_khz = eax_base_mhz * 1000 *
670 eax_denominator / ebx_numerator;
671 }
672
673 if (crystal_khz == 0)
674 return 0;
675
676
677
678
679
680 if (boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT)
681 setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
682
683#ifdef CONFIG_X86_LOCAL_APIC
684
685
686
687
688
689
690 lapic_timer_period = crystal_khz * 1000 / HZ;
691#endif
692
693 return crystal_khz * ebx_numerator / eax_denominator;
694}
695
696static unsigned long cpu_khz_from_cpuid(void)
697{
698 unsigned int eax_base_mhz, ebx_max_mhz, ecx_bus_mhz, edx;
699
700 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
701 return 0;
702
703 if (boot_cpu_data.cpuid_level < 0x16)
704 return 0;
705
706 eax_base_mhz = ebx_max_mhz = ecx_bus_mhz = edx = 0;
707
708 cpuid(0x16, &eax_base_mhz, &ebx_max_mhz, &ecx_bus_mhz, &edx);
709
710 return eax_base_mhz * 1000;
711}
712
713
714
715
716
717static unsigned long pit_hpet_ptimer_calibrate_cpu(void)
718{
719 u64 tsc1, tsc2, delta, ref1, ref2;
720 unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
721 unsigned long flags, latch, ms;
722 int hpet = is_hpet_enabled(), i, loopmin;
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750 latch = CAL_LATCH;
751 ms = CAL_MS;
752 loopmin = CAL_PIT_LOOPS;
753
754 for (i = 0; i < 3; i++) {
755 unsigned long tsc_pit_khz;
756
757
758
759
760
761
762
763 local_irq_save(flags);
764 tsc1 = tsc_read_refs(&ref1, hpet);
765 tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin);
766 tsc2 = tsc_read_refs(&ref2, hpet);
767 local_irq_restore(flags);
768
769
770 tsc_pit_min = min(tsc_pit_min, tsc_pit_khz);
771
772
773 if (ref1 == ref2)
774 continue;
775
776
777 if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX)
778 continue;
779
780 tsc2 = (tsc2 - tsc1) * 1000000LL;
781 if (hpet)
782 tsc2 = calc_hpet_ref(tsc2, ref1, ref2);
783 else
784 tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2);
785
786 tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2);
787
788
789 delta = ((u64) tsc_pit_min) * 100;
790 do_div(delta, tsc_ref_min);
791
792
793
794
795
796
797
798 if (delta >= 90 && delta <= 110) {
799 pr_info("PIT calibration matches %s. %d loops\n",
800 hpet ? "HPET" : "PMTIMER", i + 1);
801 return tsc_ref_min;
802 }
803
804
805
806
807
808
809
810 if (i == 1 && tsc_pit_min == ULONG_MAX) {
811 latch = CAL2_LATCH;
812 ms = CAL2_MS;
813 loopmin = CAL2_PIT_LOOPS;
814 }
815 }
816
817
818
819
820 if (tsc_pit_min == ULONG_MAX) {
821
822 pr_warn("Unable to calibrate against PIT\n");
823
824
825 if (!hpet && !ref1 && !ref2) {
826 pr_notice("No reference (HPET/PMTIMER) available\n");
827 return 0;
828 }
829
830
831 if (tsc_ref_min == ULONG_MAX) {
832 pr_warn("HPET/PMTIMER calibration failed\n");
833 return 0;
834 }
835
836
837 pr_info("using %s reference calibration\n",
838 hpet ? "HPET" : "PMTIMER");
839
840 return tsc_ref_min;
841 }
842
843
844 if (!hpet && !ref1 && !ref2) {
845 pr_info("Using PIT calibration value\n");
846 return tsc_pit_min;
847 }
848
849
850 if (tsc_ref_min == ULONG_MAX) {
851 pr_warn("HPET/PMTIMER calibration failed. Using PIT calibration.\n");
852 return tsc_pit_min;
853 }
854
855
856
857
858
859
860 pr_warn("PIT calibration deviates from %s: %lu %lu\n",
861 hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min);
862 pr_info("Using PIT calibration value\n");
863 return tsc_pit_min;
864}
865
866
867
868
869unsigned long native_calibrate_cpu_early(void)
870{
871 unsigned long flags, fast_calibrate = cpu_khz_from_cpuid();
872
873 if (!fast_calibrate)
874 fast_calibrate = cpu_khz_from_msr();
875 if (!fast_calibrate) {
876 local_irq_save(flags);
877 fast_calibrate = quick_pit_calibrate();
878 local_irq_restore(flags);
879 }
880 return fast_calibrate;
881}
882
883
884
885
886
887static unsigned long native_calibrate_cpu(void)
888{
889 unsigned long tsc_freq = native_calibrate_cpu_early();
890
891 if (!tsc_freq)
892 tsc_freq = pit_hpet_ptimer_calibrate_cpu();
893
894 return tsc_freq;
895}
896
897void recalibrate_cpu_khz(void)
898{
899#ifndef CONFIG_SMP
900 unsigned long cpu_khz_old = cpu_khz;
901
902 if (!boot_cpu_has(X86_FEATURE_TSC))
903 return;
904
905 cpu_khz = x86_platform.calibrate_cpu();
906 tsc_khz = x86_platform.calibrate_tsc();
907 if (tsc_khz == 0)
908 tsc_khz = cpu_khz;
909 else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz)
910 cpu_khz = tsc_khz;
911 cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy,
912 cpu_khz_old, cpu_khz);
913#endif
914}
915
916EXPORT_SYMBOL(recalibrate_cpu_khz);
917
918
919static unsigned long long cyc2ns_suspend;
920
921void tsc_save_sched_clock_state(void)
922{
923 if (!sched_clock_stable())
924 return;
925
926 cyc2ns_suspend = sched_clock();
927}
928
929
930
931
932
933
934
935
936
937void tsc_restore_sched_clock_state(void)
938{
939 unsigned long long offset;
940 unsigned long flags;
941 int cpu;
942
943 if (!sched_clock_stable())
944 return;
945
946 local_irq_save(flags);
947
948
949
950
951
952
953
954 this_cpu_write(cyc2ns.data[0].cyc2ns_offset, 0);
955 this_cpu_write(cyc2ns.data[1].cyc2ns_offset, 0);
956
957 offset = cyc2ns_suspend - sched_clock();
958
959 for_each_possible_cpu(cpu) {
960 per_cpu(cyc2ns.data[0].cyc2ns_offset, cpu) = offset;
961 per_cpu(cyc2ns.data[1].cyc2ns_offset, cpu) = offset;
962 }
963
964 local_irq_restore(flags);
965}
966
967#ifdef CONFIG_CPU_FREQ
968
969
970
971
972
973
974
975
976
977
978
979static unsigned int ref_freq;
980static unsigned long loops_per_jiffy_ref;
981static unsigned long tsc_khz_ref;
982
983static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
984 void *data)
985{
986 struct cpufreq_freqs *freq = data;
987
988 if (num_online_cpus() > 1) {
989 mark_tsc_unstable("cpufreq changes on SMP");
990 return 0;
991 }
992
993 if (!ref_freq) {
994 ref_freq = freq->old;
995 loops_per_jiffy_ref = boot_cpu_data.loops_per_jiffy;
996 tsc_khz_ref = tsc_khz;
997 }
998
999 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
1000 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) {
1001 boot_cpu_data.loops_per_jiffy =
1002 cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
1003
1004 tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
1005 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
1006 mark_tsc_unstable("cpufreq changes");
1007
1008 set_cyc2ns_scale(tsc_khz, freq->policy->cpu, rdtsc());
1009 }
1010
1011 return 0;
1012}
1013
1014static struct notifier_block time_cpufreq_notifier_block = {
1015 .notifier_call = time_cpufreq_notifier
1016};
1017
1018static int __init cpufreq_register_tsc_scaling(void)
1019{
1020 if (!boot_cpu_has(X86_FEATURE_TSC))
1021 return 0;
1022 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
1023 return 0;
1024 cpufreq_register_notifier(&time_cpufreq_notifier_block,
1025 CPUFREQ_TRANSITION_NOTIFIER);
1026 return 0;
1027}
1028
1029core_initcall(cpufreq_register_tsc_scaling);
1030
1031#endif
1032
1033#define ART_CPUID_LEAF (0x15)
1034#define ART_MIN_DENOMINATOR (1)
1035
1036
1037
1038
1039
1040static void __init detect_art(void)
1041{
1042 unsigned int unused[2];
1043
1044 if (boot_cpu_data.cpuid_level < ART_CPUID_LEAF)
1045 return;
1046
1047
1048
1049
1050
1051 if (boot_cpu_has(X86_FEATURE_HYPERVISOR) ||
1052 !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) ||
1053 !boot_cpu_has(X86_FEATURE_TSC_ADJUST) ||
1054 tsc_async_resets)
1055 return;
1056
1057 cpuid(ART_CPUID_LEAF, &art_to_tsc_denominator,
1058 &art_to_tsc_numerator, unused, unused+1);
1059
1060 if (art_to_tsc_denominator < ART_MIN_DENOMINATOR)
1061 return;
1062
1063 rdmsrl(MSR_IA32_TSC_ADJUST, art_to_tsc_offset);
1064
1065
1066 setup_force_cpu_cap(X86_FEATURE_ART);
1067}
1068
1069
1070
1071
1072static void tsc_resume(struct clocksource *cs)
1073{
1074 tsc_verify_tsc_adjust(true);
1075}
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093static u64 read_tsc(struct clocksource *cs)
1094{
1095 return (u64)rdtsc_ordered();
1096}
1097
1098static void tsc_cs_mark_unstable(struct clocksource *cs)
1099{
1100 if (tsc_unstable)
1101 return;
1102
1103 tsc_unstable = 1;
1104 if (using_native_sched_clock())
1105 clear_sched_clock_stable();
1106 disable_sched_clock_irqtime();
1107 pr_info("Marking TSC unstable due to clocksource watchdog\n");
1108}
1109
1110static void tsc_cs_tick_stable(struct clocksource *cs)
1111{
1112 if (tsc_unstable)
1113 return;
1114
1115 if (using_native_sched_clock())
1116 sched_clock_tick_stable();
1117}
1118
1119static int tsc_cs_enable(struct clocksource *cs)
1120{
1121 vclocks_set_used(VDSO_CLOCKMODE_TSC);
1122 return 0;
1123}
1124
1125
1126
1127
1128static struct clocksource clocksource_tsc_early = {
1129 .name = "tsc-early",
1130 .rating = 299,
1131 .uncertainty_margin = 32 * NSEC_PER_MSEC,
1132 .read = read_tsc,
1133 .mask = CLOCKSOURCE_MASK(64),
1134 .flags = CLOCK_SOURCE_IS_CONTINUOUS |
1135 CLOCK_SOURCE_MUST_VERIFY,
1136 .vdso_clock_mode = VDSO_CLOCKMODE_TSC,
1137 .enable = tsc_cs_enable,
1138 .resume = tsc_resume,
1139 .mark_unstable = tsc_cs_mark_unstable,
1140 .tick_stable = tsc_cs_tick_stable,
1141 .list = LIST_HEAD_INIT(clocksource_tsc_early.list),
1142};
1143
1144
1145
1146
1147
1148
1149static struct clocksource clocksource_tsc = {
1150 .name = "tsc",
1151 .rating = 300,
1152 .read = read_tsc,
1153 .mask = CLOCKSOURCE_MASK(64),
1154 .flags = CLOCK_SOURCE_IS_CONTINUOUS |
1155 CLOCK_SOURCE_VALID_FOR_HRES |
1156 CLOCK_SOURCE_MUST_VERIFY |
1157 CLOCK_SOURCE_VERIFY_PERCPU,
1158 .vdso_clock_mode = VDSO_CLOCKMODE_TSC,
1159 .enable = tsc_cs_enable,
1160 .resume = tsc_resume,
1161 .mark_unstable = tsc_cs_mark_unstable,
1162 .tick_stable = tsc_cs_tick_stable,
1163 .list = LIST_HEAD_INIT(clocksource_tsc.list),
1164};
1165
1166void mark_tsc_unstable(char *reason)
1167{
1168 if (tsc_unstable)
1169 return;
1170
1171 tsc_unstable = 1;
1172 if (using_native_sched_clock())
1173 clear_sched_clock_stable();
1174 disable_sched_clock_irqtime();
1175 pr_info("Marking TSC unstable due to %s\n", reason);
1176
1177 clocksource_mark_unstable(&clocksource_tsc_early);
1178 clocksource_mark_unstable(&clocksource_tsc);
1179}
1180
1181EXPORT_SYMBOL_GPL(mark_tsc_unstable);
1182
1183static void __init tsc_disable_clocksource_watchdog(void)
1184{
1185 clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
1186 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
1187}
1188
1189static void __init check_system_tsc_reliable(void)
1190{
1191#if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC)
1192 if (is_geode_lx()) {
1193
1194#define RTSC_SUSP 0x100
1195 unsigned long res_low, res_high;
1196
1197 rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
1198
1199 if (res_low & RTSC_SUSP)
1200 tsc_clocksource_reliable = 1;
1201 }
1202#endif
1203 if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
1204 tsc_clocksource_reliable = 1;
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
1218 boot_cpu_has(X86_FEATURE_NONSTOP_TSC) &&
1219 boot_cpu_has(X86_FEATURE_TSC_ADJUST) &&
1220 nr_online_nodes <= 2)
1221 tsc_disable_clocksource_watchdog();
1222}
1223
1224
1225
1226
1227
1228int unsynchronized_tsc(void)
1229{
1230 if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_unstable)
1231 return 1;
1232
1233#ifdef CONFIG_SMP
1234 if (apic_is_clustered_box())
1235 return 1;
1236#endif
1237
1238 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
1239 return 0;
1240
1241 if (tsc_clocksource_reliable)
1242 return 0;
1243
1244
1245
1246
1247 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
1248
1249 if (num_possible_cpus() > 1)
1250 return 1;
1251 }
1252
1253 return 0;
1254}
1255
1256
1257
1258
1259struct system_counterval_t convert_art_to_tsc(u64 art)
1260{
1261 u64 tmp, res, rem;
1262
1263 rem = do_div(art, art_to_tsc_denominator);
1264
1265 res = art * art_to_tsc_numerator;
1266 tmp = rem * art_to_tsc_numerator;
1267
1268 do_div(tmp, art_to_tsc_denominator);
1269 res += tmp + art_to_tsc_offset;
1270
1271 return (struct system_counterval_t) {.cs = art_related_clocksource,
1272 .cycles = res};
1273}
1274EXPORT_SYMBOL(convert_art_to_tsc);
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns)
1298{
1299 u64 tmp, res, rem;
1300
1301 rem = do_div(art_ns, USEC_PER_SEC);
1302
1303 res = art_ns * tsc_khz;
1304 tmp = rem * tsc_khz;
1305
1306 do_div(tmp, USEC_PER_SEC);
1307 res += tmp;
1308
1309 return (struct system_counterval_t) { .cs = art_related_clocksource,
1310 .cycles = res};
1311}
1312EXPORT_SYMBOL(convert_art_ns_to_tsc);
1313
1314
1315static void tsc_refine_calibration_work(struct work_struct *work);
1316static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work);
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331static void tsc_refine_calibration_work(struct work_struct *work)
1332{
1333 static u64 tsc_start = ULLONG_MAX, ref_start;
1334 static int hpet;
1335 u64 tsc_stop, ref_stop, delta;
1336 unsigned long freq;
1337 int cpu;
1338
1339
1340 if (tsc_unstable)
1341 goto unreg;
1342
1343
1344
1345
1346
1347
1348 if (tsc_start == ULLONG_MAX) {
1349restart:
1350
1351
1352
1353
1354 hpet = is_hpet_enabled();
1355 tsc_start = tsc_read_refs(&ref_start, hpet);
1356 schedule_delayed_work(&tsc_irqwork, HZ);
1357 return;
1358 }
1359
1360 tsc_stop = tsc_read_refs(&ref_stop, hpet);
1361
1362
1363 if (ref_start == ref_stop)
1364 goto out;
1365
1366
1367 if (tsc_stop == ULLONG_MAX)
1368 goto restart;
1369
1370 delta = tsc_stop - tsc_start;
1371 delta *= 1000000LL;
1372 if (hpet)
1373 freq = calc_hpet_ref(delta, ref_start, ref_stop);
1374 else
1375 freq = calc_pmtimer_ref(delta, ref_start, ref_stop);
1376
1377
1378 if (abs(tsc_khz - freq) > tsc_khz/100)
1379 goto out;
1380
1381 tsc_khz = freq;
1382 pr_info("Refined TSC clocksource calibration: %lu.%03lu MHz\n",
1383 (unsigned long)tsc_khz / 1000,
1384 (unsigned long)tsc_khz % 1000);
1385
1386
1387 lapic_update_tsc_freq();
1388
1389
1390 for_each_possible_cpu(cpu)
1391 set_cyc2ns_scale(tsc_khz, cpu, tsc_stop);
1392
1393out:
1394 if (tsc_unstable)
1395 goto unreg;
1396
1397 if (boot_cpu_has(X86_FEATURE_ART))
1398 art_related_clocksource = &clocksource_tsc;
1399 clocksource_register_khz(&clocksource_tsc, tsc_khz);
1400unreg:
1401 clocksource_unregister(&clocksource_tsc_early);
1402}
1403
1404
1405static int __init init_tsc_clocksource(void)
1406{
1407 if (!boot_cpu_has(X86_FEATURE_TSC) || !tsc_khz)
1408 return 0;
1409
1410 if (tsc_unstable)
1411 goto unreg;
1412
1413 if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3))
1414 clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;
1415
1416
1417
1418
1419
1420 if (boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ)) {
1421 if (boot_cpu_has(X86_FEATURE_ART))
1422 art_related_clocksource = &clocksource_tsc;
1423 clocksource_register_khz(&clocksource_tsc, tsc_khz);
1424unreg:
1425 clocksource_unregister(&clocksource_tsc_early);
1426 return 0;
1427 }
1428
1429 schedule_delayed_work(&tsc_irqwork, 0);
1430 return 0;
1431}
1432
1433
1434
1435
1436device_initcall(init_tsc_clocksource);
1437
1438static bool __init determine_cpu_tsc_frequencies(bool early)
1439{
1440
1441 WARN_ON(cpu_khz || tsc_khz);
1442
1443 if (early) {
1444 cpu_khz = x86_platform.calibrate_cpu();
1445 if (tsc_early_khz)
1446 tsc_khz = tsc_early_khz;
1447 else
1448 tsc_khz = x86_platform.calibrate_tsc();
1449 } else {
1450
1451 WARN_ON(x86_platform.calibrate_cpu != native_calibrate_cpu);
1452 cpu_khz = pit_hpet_ptimer_calibrate_cpu();
1453 }
1454
1455
1456
1457
1458
1459
1460 if (tsc_khz == 0)
1461 tsc_khz = cpu_khz;
1462 else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz)
1463 cpu_khz = tsc_khz;
1464
1465 if (tsc_khz == 0)
1466 return false;
1467
1468 pr_info("Detected %lu.%03lu MHz processor\n",
1469 (unsigned long)cpu_khz / KHZ,
1470 (unsigned long)cpu_khz % KHZ);
1471
1472 if (cpu_khz != tsc_khz) {
1473 pr_info("Detected %lu.%03lu MHz TSC",
1474 (unsigned long)tsc_khz / KHZ,
1475 (unsigned long)tsc_khz % KHZ);
1476 }
1477 return true;
1478}
1479
1480static unsigned long __init get_loops_per_jiffy(void)
1481{
1482 u64 lpj = (u64)tsc_khz * KHZ;
1483
1484 do_div(lpj, HZ);
1485 return lpj;
1486}
1487
1488static void __init tsc_enable_sched_clock(void)
1489{
1490
1491 tsc_store_and_check_tsc_adjust(true);
1492 cyc2ns_init_boot_cpu();
1493 static_branch_enable(&__use_tsc);
1494}
1495
1496void __init tsc_early_init(void)
1497{
1498 if (!boot_cpu_has(X86_FEATURE_TSC))
1499 return;
1500
1501 if (is_early_uv_system())
1502 return;
1503 if (!determine_cpu_tsc_frequencies(true))
1504 return;
1505 loops_per_jiffy = get_loops_per_jiffy();
1506
1507 tsc_enable_sched_clock();
1508}
1509
1510void __init tsc_init(void)
1511{
1512
1513
1514
1515
1516 if (x86_platform.calibrate_cpu == native_calibrate_cpu_early)
1517 x86_platform.calibrate_cpu = native_calibrate_cpu;
1518
1519 if (!boot_cpu_has(X86_FEATURE_TSC)) {
1520 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
1521 return;
1522 }
1523
1524 if (!tsc_khz) {
1525
1526 if (!determine_cpu_tsc_frequencies(false)) {
1527 mark_tsc_unstable("could not calculate TSC khz");
1528 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
1529 return;
1530 }
1531 tsc_enable_sched_clock();
1532 }
1533
1534 cyc2ns_init_secondary_cpus();
1535
1536 if (!no_sched_irq_time)
1537 enable_sched_clock_irqtime();
1538
1539 lpj_fine = get_loops_per_jiffy();
1540 use_tsc_delay();
1541
1542 check_system_tsc_reliable();
1543
1544 if (unsynchronized_tsc()) {
1545 mark_tsc_unstable("TSCs unsynchronized");
1546 return;
1547 }
1548
1549 if (tsc_clocksource_reliable || no_tsc_watchdog)
1550 tsc_disable_clocksource_watchdog();
1551
1552 clocksource_register_khz(&clocksource_tsc_early, tsc_khz);
1553 detect_art();
1554}
1555
1556#ifdef CONFIG_SMP
1557
1558
1559
1560
1561
1562
1563unsigned long calibrate_delay_is_known(void)
1564{
1565 int sibling, cpu = smp_processor_id();
1566 int constant_tsc = cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC);
1567 const struct cpumask *mask = topology_core_cpumask(cpu);
1568
1569 if (!constant_tsc || !mask)
1570 return 0;
1571
1572 sibling = cpumask_any_but(mask, cpu);
1573 if (sibling < nr_cpu_ids)
1574 return cpu_data(sibling).loops_per_jiffy;
1575 return 0;
1576}
1577#endif
1578