1
2#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
3
4#include <linux/kernel.h>
5#include <linux/sched.h>
6#include <linux/sched/clock.h>
7#include <linux/init.h>
8#include <linux/export.h>
9#include <linux/timer.h>
10#include <linux/acpi_pmtmr.h>
11#include <linux/cpufreq.h>
12#include <linux/delay.h>
13#include <linux/clocksource.h>
14#include <linux/percpu.h>
15#include <linux/timex.h>
16#include <linux/static_key.h>
17#include <linux/static_call.h>
18
19#include <asm/hpet.h>
20#include <asm/timer.h>
21#include <asm/vgtod.h>
22#include <asm/time.h>
23#include <asm/delay.h>
24#include <asm/hypervisor.h>
25#include <asm/nmi.h>
26#include <asm/x86_init.h>
27#include <asm/geode.h>
28#include <asm/apic.h>
29#include <asm/intel-family.h>
30#include <asm/i8259.h>
31#include <asm/uv/uv.h>
32
33unsigned int __read_mostly cpu_khz;
34EXPORT_SYMBOL(cpu_khz);
35
36unsigned int __read_mostly tsc_khz;
37EXPORT_SYMBOL(tsc_khz);
38
39#define KHZ 1000
40
41
42
43
44static int __read_mostly tsc_unstable;
45static unsigned int __initdata tsc_early_khz;
46
47static DEFINE_STATIC_KEY_FALSE(__use_tsc);
48
49int tsc_clocksource_reliable;
50
51static u32 art_to_tsc_numerator;
52static u32 art_to_tsc_denominator;
53static u64 art_to_tsc_offset;
54struct clocksource *art_related_clocksource;
55
56struct cyc2ns {
57 struct cyc2ns_data data[2];
58 seqcount_latch_t seq;
59
60};
61
62static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns);
63
64static int __init tsc_early_khz_setup(char *buf)
65{
66 return kstrtouint(buf, 0, &tsc_early_khz);
67}
68early_param("tsc_early_khz", tsc_early_khz_setup);
69
70__always_inline void cyc2ns_read_begin(struct cyc2ns_data *data)
71{
72 int seq, idx;
73
74 preempt_disable_notrace();
75
76 do {
77 seq = this_cpu_read(cyc2ns.seq.seqcount.sequence);
78 idx = seq & 1;
79
80 data->cyc2ns_offset = this_cpu_read(cyc2ns.data[idx].cyc2ns_offset);
81 data->cyc2ns_mul = this_cpu_read(cyc2ns.data[idx].cyc2ns_mul);
82 data->cyc2ns_shift = this_cpu_read(cyc2ns.data[idx].cyc2ns_shift);
83
84 } while (unlikely(seq != this_cpu_read(cyc2ns.seq.seqcount.sequence)));
85}
86
87__always_inline void cyc2ns_read_end(void)
88{
89 preempt_enable_notrace();
90}
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116static __always_inline unsigned long long cycles_2_ns(unsigned long long cyc)
117{
118 struct cyc2ns_data data;
119 unsigned long long ns;
120
121 cyc2ns_read_begin(&data);
122
123 ns = data.cyc2ns_offset;
124 ns += mul_u64_u32_shr(cyc, data.cyc2ns_mul, data.cyc2ns_shift);
125
126 cyc2ns_read_end();
127
128 return ns;
129}
130
131static void __set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now)
132{
133 unsigned long long ns_now;
134 struct cyc2ns_data data;
135 struct cyc2ns *c2n;
136
137 ns_now = cycles_2_ns(tsc_now);
138
139
140
141
142
143
144 clocks_calc_mult_shift(&data.cyc2ns_mul, &data.cyc2ns_shift, khz,
145 NSEC_PER_MSEC, 0);
146
147
148
149
150
151
152
153 if (data.cyc2ns_shift == 32) {
154 data.cyc2ns_shift = 31;
155 data.cyc2ns_mul >>= 1;
156 }
157
158 data.cyc2ns_offset = ns_now -
159 mul_u64_u32_shr(tsc_now, data.cyc2ns_mul, data.cyc2ns_shift);
160
161 c2n = per_cpu_ptr(&cyc2ns, cpu);
162
163 raw_write_seqcount_latch(&c2n->seq);
164 c2n->data[0] = data;
165 raw_write_seqcount_latch(&c2n->seq);
166 c2n->data[1] = data;
167}
168
169static void set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now)
170{
171 unsigned long flags;
172
173 local_irq_save(flags);
174 sched_clock_idle_sleep_event();
175
176 if (khz)
177 __set_cyc2ns_scale(khz, cpu, tsc_now);
178
179 sched_clock_idle_wakeup_event();
180 local_irq_restore(flags);
181}
182
183
184
185
186static void __init cyc2ns_init_boot_cpu(void)
187{
188 struct cyc2ns *c2n = this_cpu_ptr(&cyc2ns);
189
190 seqcount_latch_init(&c2n->seq);
191 __set_cyc2ns_scale(tsc_khz, smp_processor_id(), rdtsc());
192}
193
194
195
196
197
198
199static void __init cyc2ns_init_secondary_cpus(void)
200{
201 unsigned int cpu, this_cpu = smp_processor_id();
202 struct cyc2ns *c2n = this_cpu_ptr(&cyc2ns);
203 struct cyc2ns_data *data = c2n->data;
204
205 for_each_possible_cpu(cpu) {
206 if (cpu != this_cpu) {
207 seqcount_latch_init(&c2n->seq);
208 c2n = per_cpu_ptr(&cyc2ns, cpu);
209 c2n->data[0] = data[0];
210 c2n->data[1] = data[1];
211 }
212 }
213}
214
215
216
217
218u64 native_sched_clock(void)
219{
220 if (static_branch_likely(&__use_tsc)) {
221 u64 tsc_now = rdtsc();
222
223
224 return cycles_2_ns(tsc_now);
225 }
226
227
228
229
230
231
232
233
234
235
236
237 return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
238}
239
240
241
242
243u64 native_sched_clock_from_tsc(u64 tsc)
244{
245 return cycles_2_ns(tsc);
246}
247
248
249
250#ifdef CONFIG_PARAVIRT
251unsigned long long sched_clock(void)
252{
253 return paravirt_sched_clock();
254}
255
256bool using_native_sched_clock(void)
257{
258 return static_call_query(pv_sched_clock) == native_sched_clock;
259}
260#else
261unsigned long long
262sched_clock(void) __attribute__((alias("native_sched_clock")));
263
264bool using_native_sched_clock(void) { return true; }
265#endif
266
267int check_tsc_unstable(void)
268{
269 return tsc_unstable;
270}
271EXPORT_SYMBOL_GPL(check_tsc_unstable);
272
273#ifdef CONFIG_X86_TSC
274int __init notsc_setup(char *str)
275{
276 mark_tsc_unstable("boot parameter notsc");
277 return 1;
278}
279#else
280
281
282
283
284int __init notsc_setup(char *str)
285{
286 setup_clear_cpu_cap(X86_FEATURE_TSC);
287 return 1;
288}
289#endif
290
291__setup("notsc", notsc_setup);
292
293static int no_sched_irq_time;
294static int no_tsc_watchdog;
295
296static int __init tsc_setup(char *str)
297{
298 if (!strcmp(str, "reliable"))
299 tsc_clocksource_reliable = 1;
300 if (!strncmp(str, "noirqtime", 9))
301 no_sched_irq_time = 1;
302 if (!strcmp(str, "unstable"))
303 mark_tsc_unstable("boot parameter");
304 if (!strcmp(str, "nowatchdog"))
305 no_tsc_watchdog = 1;
306 return 1;
307}
308
309__setup("tsc=", tsc_setup);
310
311#define MAX_RETRIES 5
312#define TSC_DEFAULT_THRESHOLD 0x20000
313
314
315
316
317static u64 tsc_read_refs(u64 *p, int hpet)
318{
319 u64 t1, t2;
320 u64 thresh = tsc_khz ? tsc_khz >> 5 : TSC_DEFAULT_THRESHOLD;
321 int i;
322
323 for (i = 0; i < MAX_RETRIES; i++) {
324 t1 = get_cycles();
325 if (hpet)
326 *p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
327 else
328 *p = acpi_pm_read_early();
329 t2 = get_cycles();
330 if ((t2 - t1) < thresh)
331 return t2;
332 }
333 return ULLONG_MAX;
334}
335
336
337
338
339static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2)
340{
341 u64 tmp;
342
343 if (hpet2 < hpet1)
344 hpet2 += 0x100000000ULL;
345 hpet2 -= hpet1;
346 tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
347 do_div(tmp, 1000000);
348 deltatsc = div64_u64(deltatsc, tmp);
349
350 return (unsigned long) deltatsc;
351}
352
353
354
355
356static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2)
357{
358 u64 tmp;
359
360 if (!pm1 && !pm2)
361 return ULONG_MAX;
362
363 if (pm2 < pm1)
364 pm2 += (u64)ACPI_PM_OVRRUN;
365 pm2 -= pm1;
366 tmp = pm2 * 1000000000LL;
367 do_div(tmp, PMTMR_TICKS_PER_SEC);
368 do_div(deltatsc, tmp);
369
370 return (unsigned long) deltatsc;
371}
372
373#define CAL_MS 10
374#define CAL_LATCH (PIT_TICK_RATE / (1000 / CAL_MS))
375#define CAL_PIT_LOOPS 1000
376
377#define CAL2_MS 50
378#define CAL2_LATCH (PIT_TICK_RATE / (1000 / CAL2_MS))
379#define CAL2_PIT_LOOPS 5000
380
381
382
383
384
385
386
387
388
389static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
390{
391 u64 tsc, t1, t2, delta;
392 unsigned long tscmin, tscmax;
393 int pitcnt;
394
395 if (!has_legacy_pic()) {
396
397
398
399
400
401 udelay(10 * USEC_PER_MSEC);
402 udelay(10 * USEC_PER_MSEC);
403 udelay(10 * USEC_PER_MSEC);
404 udelay(10 * USEC_PER_MSEC);
405 udelay(10 * USEC_PER_MSEC);
406 return ULONG_MAX;
407 }
408
409
410 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
411
412
413
414
415
416
417 outb(0xb0, 0x43);
418 outb(latch & 0xff, 0x42);
419 outb(latch >> 8, 0x42);
420
421 tsc = t1 = t2 = get_cycles();
422
423 pitcnt = 0;
424 tscmax = 0;
425 tscmin = ULONG_MAX;
426 while ((inb(0x61) & 0x20) == 0) {
427 t2 = get_cycles();
428 delta = t2 - tsc;
429 tsc = t2;
430 if ((unsigned long) delta < tscmin)
431 tscmin = (unsigned int) delta;
432 if ((unsigned long) delta > tscmax)
433 tscmax = (unsigned int) delta;
434 pitcnt++;
435 }
436
437
438
439
440
441
442
443
444
445
446 if (pitcnt < loopmin || tscmax > 10 * tscmin)
447 return ULONG_MAX;
448
449
450 delta = t2 - t1;
451 do_div(delta, ms);
452 return delta;
453}
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490static inline int pit_verify_msb(unsigned char val)
491{
492
493 inb(0x42);
494 return inb(0x42) == val;
495}
496
497static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap)
498{
499 int count;
500 u64 tsc = 0, prev_tsc = 0;
501
502 for (count = 0; count < 50000; count++) {
503 if (!pit_verify_msb(val))
504 break;
505 prev_tsc = tsc;
506 tsc = get_cycles();
507 }
508 *deltap = get_cycles() - prev_tsc;
509 *tscp = tsc;
510
511
512
513
514
515 return count > 5;
516}
517
518
519
520
521
522
523
524#define MAX_QUICK_PIT_MS 50
525#define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256)
526
527static unsigned long quick_pit_calibrate(void)
528{
529 int i;
530 u64 tsc, delta;
531 unsigned long d1, d2;
532
533 if (!has_legacy_pic())
534 return 0;
535
536
537 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
538
539
540
541
542
543
544
545
546
547
548 outb(0xb0, 0x43);
549
550
551 outb(0xff, 0x42);
552 outb(0xff, 0x42);
553
554
555
556
557
558
559
560 pit_verify_msb(0);
561
562 if (pit_expect_msb(0xff, &tsc, &d1)) {
563 for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) {
564 if (!pit_expect_msb(0xff-i, &delta, &d2))
565 break;
566
567 delta -= tsc;
568
569
570
571
572
573 if (i == 1 &&
574 d1 + d2 >= (delta * MAX_QUICK_PIT_ITERATIONS) >> 11)
575 return 0;
576
577
578
579
580 if (d1+d2 >= delta >> 11)
581 continue;
582
583
584
585
586
587
588
589
590 if (!pit_verify_msb(0xfe - i))
591 break;
592 goto success;
593 }
594 }
595 pr_info("Fast TSC calibration failed\n");
596 return 0;
597
598success:
599
600
601
602
603
604
605
606
607
608
609
610
611
612 delta *= PIT_TICK_RATE;
613 do_div(delta, i*256*1000);
614 pr_info("Fast TSC calibration using PIT\n");
615 return delta;
616}
617
618
619
620
621
622unsigned long native_calibrate_tsc(void)
623{
624 unsigned int eax_denominator, ebx_numerator, ecx_hz, edx;
625 unsigned int crystal_khz;
626
627 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
628 return 0;
629
630 if (boot_cpu_data.cpuid_level < 0x15)
631 return 0;
632
633 eax_denominator = ebx_numerator = ecx_hz = edx = 0;
634
635
636 cpuid(0x15, &eax_denominator, &ebx_numerator, &ecx_hz, &edx);
637
638 if (ebx_numerator == 0 || eax_denominator == 0)
639 return 0;
640
641 crystal_khz = ecx_hz / 1000;
642
643
644
645
646
647
648 if (crystal_khz == 0 &&
649 boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT_D)
650 crystal_khz = 25000;
651
652
653
654
655
656
657 if (crystal_khz != 0)
658 setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
659
660
661
662
663
664
665 if (crystal_khz == 0 && boot_cpu_data.cpuid_level >= 0x16) {
666 unsigned int eax_base_mhz, ebx, ecx, edx;
667
668 cpuid(0x16, &eax_base_mhz, &ebx, &ecx, &edx);
669 crystal_khz = eax_base_mhz * 1000 *
670 eax_denominator / ebx_numerator;
671 }
672
673 if (crystal_khz == 0)
674 return 0;
675
676
677
678
679
680 if (boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT)
681 setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
682
683#ifdef CONFIG_X86_LOCAL_APIC
684
685
686
687
688
689
690 lapic_timer_period = crystal_khz * 1000 / HZ;
691#endif
692
693 return crystal_khz * ebx_numerator / eax_denominator;
694}
695
696static unsigned long cpu_khz_from_cpuid(void)
697{
698 unsigned int eax_base_mhz, ebx_max_mhz, ecx_bus_mhz, edx;
699
700 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
701 return 0;
702
703 if (boot_cpu_data.cpuid_level < 0x16)
704 return 0;
705
706 eax_base_mhz = ebx_max_mhz = ecx_bus_mhz = edx = 0;
707
708 cpuid(0x16, &eax_base_mhz, &ebx_max_mhz, &ecx_bus_mhz, &edx);
709
710 return eax_base_mhz * 1000;
711}
712
713
714
715
716
717static unsigned long pit_hpet_ptimer_calibrate_cpu(void)
718{
719 u64 tsc1, tsc2, delta, ref1, ref2;
720 unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
721 unsigned long flags, latch, ms;
722 int hpet = is_hpet_enabled(), i, loopmin;
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750 latch = CAL_LATCH;
751 ms = CAL_MS;
752 loopmin = CAL_PIT_LOOPS;
753
754 for (i = 0; i < 3; i++) {
755 unsigned long tsc_pit_khz;
756
757
758
759
760
761
762
763 local_irq_save(flags);
764 tsc1 = tsc_read_refs(&ref1, hpet);
765 tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin);
766 tsc2 = tsc_read_refs(&ref2, hpet);
767 local_irq_restore(flags);
768
769
770 tsc_pit_min = min(tsc_pit_min, tsc_pit_khz);
771
772
773 if (ref1 == ref2)
774 continue;
775
776
777 if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX)
778 continue;
779
780 tsc2 = (tsc2 - tsc1) * 1000000LL;
781 if (hpet)
782 tsc2 = calc_hpet_ref(tsc2, ref1, ref2);
783 else
784 tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2);
785
786 tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2);
787
788
789 delta = ((u64) tsc_pit_min) * 100;
790 do_div(delta, tsc_ref_min);
791
792
793
794
795
796
797
798 if (delta >= 90 && delta <= 110) {
799 pr_info("PIT calibration matches %s. %d loops\n",
800 hpet ? "HPET" : "PMTIMER", i + 1);
801 return tsc_ref_min;
802 }
803
804
805
806
807
808
809
810 if (i == 1 && tsc_pit_min == ULONG_MAX) {
811 latch = CAL2_LATCH;
812 ms = CAL2_MS;
813 loopmin = CAL2_PIT_LOOPS;
814 }
815 }
816
817
818
819
820 if (tsc_pit_min == ULONG_MAX) {
821
822 pr_warn("Unable to calibrate against PIT\n");
823
824
825 if (!hpet && !ref1 && !ref2) {
826 pr_notice("No reference (HPET/PMTIMER) available\n");
827 return 0;
828 }
829
830
831 if (tsc_ref_min == ULONG_MAX) {
832 pr_warn("HPET/PMTIMER calibration failed\n");
833 return 0;
834 }
835
836
837 pr_info("using %s reference calibration\n",
838 hpet ? "HPET" : "PMTIMER");
839
840 return tsc_ref_min;
841 }
842
843
844 if (!hpet && !ref1 && !ref2) {
845 pr_info("Using PIT calibration value\n");
846 return tsc_pit_min;
847 }
848
849
850 if (tsc_ref_min == ULONG_MAX) {
851 pr_warn("HPET/PMTIMER calibration failed. Using PIT calibration.\n");
852 return tsc_pit_min;
853 }
854
855
856
857
858
859
860 pr_warn("PIT calibration deviates from %s: %lu %lu\n",
861 hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min);
862 pr_info("Using PIT calibration value\n");
863 return tsc_pit_min;
864}
865
866
867
868
869unsigned long native_calibrate_cpu_early(void)
870{
871 unsigned long flags, fast_calibrate = cpu_khz_from_cpuid();
872
873 if (!fast_calibrate)
874 fast_calibrate = cpu_khz_from_msr();
875 if (!fast_calibrate) {
876 local_irq_save(flags);
877 fast_calibrate = quick_pit_calibrate();
878 local_irq_restore(flags);
879 }
880 return fast_calibrate;
881}
882
883
884
885
886
887static unsigned long native_calibrate_cpu(void)
888{
889 unsigned long tsc_freq = native_calibrate_cpu_early();
890
891 if (!tsc_freq)
892 tsc_freq = pit_hpet_ptimer_calibrate_cpu();
893
894 return tsc_freq;
895}
896
897void recalibrate_cpu_khz(void)
898{
899#ifndef CONFIG_SMP
900 unsigned long cpu_khz_old = cpu_khz;
901
902 if (!boot_cpu_has(X86_FEATURE_TSC))
903 return;
904
905 cpu_khz = x86_platform.calibrate_cpu();
906 tsc_khz = x86_platform.calibrate_tsc();
907 if (tsc_khz == 0)
908 tsc_khz = cpu_khz;
909 else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz)
910 cpu_khz = tsc_khz;
911 cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy,
912 cpu_khz_old, cpu_khz);
913#endif
914}
915
916EXPORT_SYMBOL(recalibrate_cpu_khz);
917
918
919static unsigned long long cyc2ns_suspend;
920
921void tsc_save_sched_clock_state(void)
922{
923 if (!sched_clock_stable())
924 return;
925
926 cyc2ns_suspend = sched_clock();
927}
928
929
930
931
932
933
934
935
936
937void tsc_restore_sched_clock_state(void)
938{
939 unsigned long long offset;
940 unsigned long flags;
941 int cpu;
942
943 if (!sched_clock_stable())
944 return;
945
946 local_irq_save(flags);
947
948
949
950
951
952
953
954 this_cpu_write(cyc2ns.data[0].cyc2ns_offset, 0);
955 this_cpu_write(cyc2ns.data[1].cyc2ns_offset, 0);
956
957 offset = cyc2ns_suspend - sched_clock();
958
959 for_each_possible_cpu(cpu) {
960 per_cpu(cyc2ns.data[0].cyc2ns_offset, cpu) = offset;
961 per_cpu(cyc2ns.data[1].cyc2ns_offset, cpu) = offset;
962 }
963
964 local_irq_restore(flags);
965}
966
967#ifdef CONFIG_CPU_FREQ
968
969
970
971
972
973
974
975
976
977
978
979static unsigned int ref_freq;
980static unsigned long loops_per_jiffy_ref;
981static unsigned long tsc_khz_ref;
982
983static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
984 void *data)
985{
986 struct cpufreq_freqs *freq = data;
987
988 if (num_online_cpus() > 1) {
989 mark_tsc_unstable("cpufreq changes on SMP");
990 return 0;
991 }
992
993 if (!ref_freq) {
994 ref_freq = freq->old;
995 loops_per_jiffy_ref = boot_cpu_data.loops_per_jiffy;
996 tsc_khz_ref = tsc_khz;
997 }
998
999 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
1000 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) {
1001 boot_cpu_data.loops_per_jiffy =
1002 cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
1003
1004 tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
1005 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
1006 mark_tsc_unstable("cpufreq changes");
1007
1008 set_cyc2ns_scale(tsc_khz, freq->policy->cpu, rdtsc());
1009 }
1010
1011 return 0;
1012}
1013
1014static struct notifier_block time_cpufreq_notifier_block = {
1015 .notifier_call = time_cpufreq_notifier
1016};
1017
1018static int __init cpufreq_register_tsc_scaling(void)
1019{
1020 if (!boot_cpu_has(X86_FEATURE_TSC))
1021 return 0;
1022 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
1023 return 0;
1024 cpufreq_register_notifier(&time_cpufreq_notifier_block,
1025 CPUFREQ_TRANSITION_NOTIFIER);
1026 return 0;
1027}
1028
1029core_initcall(cpufreq_register_tsc_scaling);
1030
1031#endif
1032
1033#define ART_CPUID_LEAF (0x15)
1034#define ART_MIN_DENOMINATOR (1)
1035
1036
1037
1038
1039
1040static void __init detect_art(void)
1041{
1042 unsigned int unused[2];
1043
1044 if (boot_cpu_data.cpuid_level < ART_CPUID_LEAF)
1045 return;
1046
1047
1048
1049
1050
1051 if (boot_cpu_has(X86_FEATURE_HYPERVISOR) ||
1052 !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) ||
1053 !boot_cpu_has(X86_FEATURE_TSC_ADJUST) ||
1054 tsc_async_resets)
1055 return;
1056
1057 cpuid(ART_CPUID_LEAF, &art_to_tsc_denominator,
1058 &art_to_tsc_numerator, unused, unused+1);
1059
1060 if (art_to_tsc_denominator < ART_MIN_DENOMINATOR)
1061 return;
1062
1063 rdmsrl(MSR_IA32_TSC_ADJUST, art_to_tsc_offset);
1064
1065
1066 setup_force_cpu_cap(X86_FEATURE_ART);
1067}
1068
1069
1070
1071
1072static void tsc_resume(struct clocksource *cs)
1073{
1074 tsc_verify_tsc_adjust(true);
1075}
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093static u64 read_tsc(struct clocksource *cs)
1094{
1095 return (u64)rdtsc_ordered();
1096}
1097
1098static void tsc_cs_mark_unstable(struct clocksource *cs)
1099{
1100 if (tsc_unstable)
1101 return;
1102
1103 tsc_unstable = 1;
1104 if (using_native_sched_clock())
1105 clear_sched_clock_stable();
1106 disable_sched_clock_irqtime();
1107 pr_info("Marking TSC unstable due to clocksource watchdog\n");
1108}
1109
1110static void tsc_cs_tick_stable(struct clocksource *cs)
1111{
1112 if (tsc_unstable)
1113 return;
1114
1115 if (using_native_sched_clock())
1116 sched_clock_tick_stable();
1117}
1118
1119static int tsc_cs_enable(struct clocksource *cs)
1120{
1121 vclocks_set_used(VDSO_CLOCKMODE_TSC);
1122 return 0;
1123}
1124
1125
1126
1127
1128static struct clocksource clocksource_tsc_early = {
1129 .name = "tsc-early",
1130 .rating = 299,
1131 .uncertainty_margin = 32 * NSEC_PER_MSEC,
1132 .read = read_tsc,
1133 .mask = CLOCKSOURCE_MASK(64),
1134 .flags = CLOCK_SOURCE_IS_CONTINUOUS |
1135 CLOCK_SOURCE_MUST_VERIFY,
1136 .vdso_clock_mode = VDSO_CLOCKMODE_TSC,
1137 .enable = tsc_cs_enable,
1138 .resume = tsc_resume,
1139 .mark_unstable = tsc_cs_mark_unstable,
1140 .tick_stable = tsc_cs_tick_stable,
1141 .list = LIST_HEAD_INIT(clocksource_tsc_early.list),
1142};
1143
1144
1145
1146
1147
1148
1149static struct clocksource clocksource_tsc = {
1150 .name = "tsc",
1151 .rating = 300,
1152 .read = read_tsc,
1153 .mask = CLOCKSOURCE_MASK(64),
1154 .flags = CLOCK_SOURCE_IS_CONTINUOUS |
1155 CLOCK_SOURCE_VALID_FOR_HRES |
1156 CLOCK_SOURCE_MUST_VERIFY |
1157 CLOCK_SOURCE_VERIFY_PERCPU,
1158 .vdso_clock_mode = VDSO_CLOCKMODE_TSC,
1159 .enable = tsc_cs_enable,
1160 .resume = tsc_resume,
1161 .mark_unstable = tsc_cs_mark_unstable,
1162 .tick_stable = tsc_cs_tick_stable,
1163 .list = LIST_HEAD_INIT(clocksource_tsc.list),
1164};
1165
1166void mark_tsc_unstable(char *reason)
1167{
1168 if (tsc_unstable)
1169 return;
1170
1171 tsc_unstable = 1;
1172 if (using_native_sched_clock())
1173 clear_sched_clock_stable();
1174 disable_sched_clock_irqtime();
1175 pr_info("Marking TSC unstable due to %s\n", reason);
1176
1177 clocksource_mark_unstable(&clocksource_tsc_early);
1178 clocksource_mark_unstable(&clocksource_tsc);
1179}
1180
1181EXPORT_SYMBOL_GPL(mark_tsc_unstable);
1182
1183static void __init check_system_tsc_reliable(void)
1184{
1185#if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC)
1186 if (is_geode_lx()) {
1187
1188#define RTSC_SUSP 0x100
1189 unsigned long res_low, res_high;
1190
1191 rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
1192
1193 if (res_low & RTSC_SUSP)
1194 tsc_clocksource_reliable = 1;
1195 }
1196#endif
1197 if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
1198 tsc_clocksource_reliable = 1;
1199}
1200
1201
1202
1203
1204
1205int unsynchronized_tsc(void)
1206{
1207 if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_unstable)
1208 return 1;
1209
1210#ifdef CONFIG_SMP
1211 if (apic_is_clustered_box())
1212 return 1;
1213#endif
1214
1215 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
1216 return 0;
1217
1218 if (tsc_clocksource_reliable)
1219 return 0;
1220
1221
1222
1223
1224 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
1225
1226 if (num_possible_cpus() > 1)
1227 return 1;
1228 }
1229
1230 return 0;
1231}
1232
1233
1234
1235
1236struct system_counterval_t convert_art_to_tsc(u64 art)
1237{
1238 u64 tmp, res, rem;
1239
1240 rem = do_div(art, art_to_tsc_denominator);
1241
1242 res = art * art_to_tsc_numerator;
1243 tmp = rem * art_to_tsc_numerator;
1244
1245 do_div(tmp, art_to_tsc_denominator);
1246 res += tmp + art_to_tsc_offset;
1247
1248 return (struct system_counterval_t) {.cs = art_related_clocksource,
1249 .cycles = res};
1250}
1251EXPORT_SYMBOL(convert_art_to_tsc);
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns)
1275{
1276 u64 tmp, res, rem;
1277
1278 rem = do_div(art_ns, USEC_PER_SEC);
1279
1280 res = art_ns * tsc_khz;
1281 tmp = rem * tsc_khz;
1282
1283 do_div(tmp, USEC_PER_SEC);
1284 res += tmp;
1285
1286 return (struct system_counterval_t) { .cs = art_related_clocksource,
1287 .cycles = res};
1288}
1289EXPORT_SYMBOL(convert_art_ns_to_tsc);
1290
1291
1292static void tsc_refine_calibration_work(struct work_struct *work);
1293static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work);
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308static void tsc_refine_calibration_work(struct work_struct *work)
1309{
1310 static u64 tsc_start = ULLONG_MAX, ref_start;
1311 static int hpet;
1312 u64 tsc_stop, ref_stop, delta;
1313 unsigned long freq;
1314 int cpu;
1315
1316
1317 if (tsc_unstable)
1318 goto unreg;
1319
1320
1321
1322
1323
1324
1325 if (tsc_start == ULLONG_MAX) {
1326restart:
1327
1328
1329
1330
1331 hpet = is_hpet_enabled();
1332 tsc_start = tsc_read_refs(&ref_start, hpet);
1333 schedule_delayed_work(&tsc_irqwork, HZ);
1334 return;
1335 }
1336
1337 tsc_stop = tsc_read_refs(&ref_stop, hpet);
1338
1339
1340 if (ref_start == ref_stop)
1341 goto out;
1342
1343
1344 if (tsc_stop == ULLONG_MAX)
1345 goto restart;
1346
1347 delta = tsc_stop - tsc_start;
1348 delta *= 1000000LL;
1349 if (hpet)
1350 freq = calc_hpet_ref(delta, ref_start, ref_stop);
1351 else
1352 freq = calc_pmtimer_ref(delta, ref_start, ref_stop);
1353
1354
1355 if (abs(tsc_khz - freq) > tsc_khz/100)
1356 goto out;
1357
1358 tsc_khz = freq;
1359 pr_info("Refined TSC clocksource calibration: %lu.%03lu MHz\n",
1360 (unsigned long)tsc_khz / 1000,
1361 (unsigned long)tsc_khz % 1000);
1362
1363
1364 lapic_update_tsc_freq();
1365
1366
1367 for_each_possible_cpu(cpu)
1368 set_cyc2ns_scale(tsc_khz, cpu, tsc_stop);
1369
1370out:
1371 if (tsc_unstable)
1372 goto unreg;
1373
1374 if (boot_cpu_has(X86_FEATURE_ART))
1375 art_related_clocksource = &clocksource_tsc;
1376 clocksource_register_khz(&clocksource_tsc, tsc_khz);
1377unreg:
1378 clocksource_unregister(&clocksource_tsc_early);
1379}
1380
1381
1382static int __init init_tsc_clocksource(void)
1383{
1384 if (!boot_cpu_has(X86_FEATURE_TSC) || !tsc_khz)
1385 return 0;
1386
1387 if (tsc_unstable)
1388 goto unreg;
1389
1390 if (tsc_clocksource_reliable || no_tsc_watchdog)
1391 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
1392
1393 if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3))
1394 clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;
1395
1396
1397
1398
1399
1400 if (boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ)) {
1401 if (boot_cpu_has(X86_FEATURE_ART))
1402 art_related_clocksource = &clocksource_tsc;
1403 clocksource_register_khz(&clocksource_tsc, tsc_khz);
1404unreg:
1405 clocksource_unregister(&clocksource_tsc_early);
1406 return 0;
1407 }
1408
1409 schedule_delayed_work(&tsc_irqwork, 0);
1410 return 0;
1411}
1412
1413
1414
1415
1416device_initcall(init_tsc_clocksource);
1417
1418static bool __init determine_cpu_tsc_frequencies(bool early)
1419{
1420
1421 WARN_ON(cpu_khz || tsc_khz);
1422
1423 if (early) {
1424 cpu_khz = x86_platform.calibrate_cpu();
1425 if (tsc_early_khz)
1426 tsc_khz = tsc_early_khz;
1427 else
1428 tsc_khz = x86_platform.calibrate_tsc();
1429 } else {
1430
1431 WARN_ON(x86_platform.calibrate_cpu != native_calibrate_cpu);
1432 cpu_khz = pit_hpet_ptimer_calibrate_cpu();
1433 }
1434
1435
1436
1437
1438
1439
1440 if (tsc_khz == 0)
1441 tsc_khz = cpu_khz;
1442 else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz)
1443 cpu_khz = tsc_khz;
1444
1445 if (tsc_khz == 0)
1446 return false;
1447
1448 pr_info("Detected %lu.%03lu MHz processor\n",
1449 (unsigned long)cpu_khz / KHZ,
1450 (unsigned long)cpu_khz % KHZ);
1451
1452 if (cpu_khz != tsc_khz) {
1453 pr_info("Detected %lu.%03lu MHz TSC",
1454 (unsigned long)tsc_khz / KHZ,
1455 (unsigned long)tsc_khz % KHZ);
1456 }
1457 return true;
1458}
1459
1460static unsigned long __init get_loops_per_jiffy(void)
1461{
1462 u64 lpj = (u64)tsc_khz * KHZ;
1463
1464 do_div(lpj, HZ);
1465 return lpj;
1466}
1467
1468static void __init tsc_enable_sched_clock(void)
1469{
1470
1471 tsc_store_and_check_tsc_adjust(true);
1472 cyc2ns_init_boot_cpu();
1473 static_branch_enable(&__use_tsc);
1474}
1475
1476void __init tsc_early_init(void)
1477{
1478 if (!boot_cpu_has(X86_FEATURE_TSC))
1479 return;
1480
1481 if (is_early_uv_system())
1482 return;
1483 if (!determine_cpu_tsc_frequencies(true))
1484 return;
1485 loops_per_jiffy = get_loops_per_jiffy();
1486
1487 tsc_enable_sched_clock();
1488}
1489
1490void __init tsc_init(void)
1491{
1492
1493
1494
1495
1496 if (x86_platform.calibrate_cpu == native_calibrate_cpu_early)
1497 x86_platform.calibrate_cpu = native_calibrate_cpu;
1498
1499 if (!boot_cpu_has(X86_FEATURE_TSC)) {
1500 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
1501 return;
1502 }
1503
1504 if (!tsc_khz) {
1505
1506 if (!determine_cpu_tsc_frequencies(false)) {
1507 mark_tsc_unstable("could not calculate TSC khz");
1508 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
1509 return;
1510 }
1511 tsc_enable_sched_clock();
1512 }
1513
1514 cyc2ns_init_secondary_cpus();
1515
1516 if (!no_sched_irq_time)
1517 enable_sched_clock_irqtime();
1518
1519 lpj_fine = get_loops_per_jiffy();
1520 use_tsc_delay();
1521
1522 check_system_tsc_reliable();
1523
1524 if (unsynchronized_tsc()) {
1525 mark_tsc_unstable("TSCs unsynchronized");
1526 return;
1527 }
1528
1529 if (tsc_clocksource_reliable || no_tsc_watchdog)
1530 clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
1531
1532 clocksource_register_khz(&clocksource_tsc_early, tsc_khz);
1533 detect_art();
1534}
1535
1536#ifdef CONFIG_SMP
1537
1538
1539
1540
1541
1542
1543unsigned long calibrate_delay_is_known(void)
1544{
1545 int sibling, cpu = smp_processor_id();
1546 int constant_tsc = cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC);
1547 const struct cpumask *mask = topology_core_cpumask(cpu);
1548
1549 if (!constant_tsc || !mask)
1550 return 0;
1551
1552 sibling = cpumask_any_but(mask, cpu);
1553 if (sibling < nr_cpu_ids)
1554 return cpu_data(sibling).loops_per_jiffy;
1555 return 0;
1556}
1557#endif
1558