1#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
2
3#include <linux/kernel.h>
4#include <linux/sched.h>
5#include <linux/sched/clock.h>
6#include <linux/init.h>
7#include <linux/export.h>
8#include <linux/timer.h>
9#include <linux/acpi_pmtmr.h>
10#include <linux/cpufreq.h>
11#include <linux/delay.h>
12#include <linux/clocksource.h>
13#include <linux/percpu.h>
14#include <linux/timex.h>
15#include <linux/static_key.h>
16
17#include <asm/hpet.h>
18#include <asm/timer.h>
19#include <asm/vgtod.h>
20#include <asm/time.h>
21#include <asm/delay.h>
22#include <asm/hypervisor.h>
23#include <asm/nmi.h>
24#include <asm/x86_init.h>
25#include <asm/geode.h>
26#include <asm/apic.h>
27#include <asm/intel-family.h>
28
29unsigned int __read_mostly cpu_khz;
30EXPORT_SYMBOL(cpu_khz);
31
32unsigned int __read_mostly tsc_khz;
33EXPORT_SYMBOL(tsc_khz);
34
35
36
37
38static int __read_mostly tsc_unstable;
39
40
41
42
43static int __read_mostly tsc_disabled = -1;
44
45static DEFINE_STATIC_KEY_FALSE(__use_tsc);
46
47int tsc_clocksource_reliable;
48
49static u32 art_to_tsc_numerator;
50static u32 art_to_tsc_denominator;
51static u64 art_to_tsc_offset;
52struct clocksource *art_related_clocksource;
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77struct cyc2ns {
78 struct cyc2ns_data data[2];
79 struct cyc2ns_data *head;
80 struct cyc2ns_data *tail;
81};
82
83static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns);
84
85struct cyc2ns_data *cyc2ns_read_begin(void)
86{
87 struct cyc2ns_data *head;
88
89 preempt_disable();
90
91 head = this_cpu_read(cyc2ns.head);
92
93
94
95
96 smp_read_barrier_depends();
97 head->__count++;
98 barrier();
99
100 return head;
101}
102
103void cyc2ns_read_end(struct cyc2ns_data *head)
104{
105 barrier();
106
107
108
109
110
111
112 if (!--head->__count) {
113
114
115
116
117
118
119
120 this_cpu_write(cyc2ns.tail, head);
121 }
122 preempt_enable();
123}
124
125
126
127
128
129
130
131static struct cyc2ns_data *cyc2ns_write_begin(int cpu)
132{
133 struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu);
134 struct cyc2ns_data *data = c2n->data;
135
136 if (data == c2n->head)
137 data++;
138
139
140
141
142
143
144
145
146 while (c2n->tail == data)
147 cpu_relax();
148
149 return data;
150}
151
152static void cyc2ns_write_end(int cpu, struct cyc2ns_data *data)
153{
154 struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu);
155
156
157
158
159
160 smp_wmb();
161
162 ACCESS_ONCE(c2n->head) = data;
163}
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189static void cyc2ns_data_init(struct cyc2ns_data *data)
190{
191 data->cyc2ns_mul = 0;
192 data->cyc2ns_shift = 0;
193 data->cyc2ns_offset = 0;
194 data->__count = 0;
195}
196
197static void cyc2ns_init(int cpu)
198{
199 struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu);
200
201 cyc2ns_data_init(&c2n->data[0]);
202 cyc2ns_data_init(&c2n->data[1]);
203
204 c2n->head = c2n->data;
205 c2n->tail = c2n->data;
206}
207
208static inline unsigned long long cycles_2_ns(unsigned long long cyc)
209{
210 struct cyc2ns_data *data, *tail;
211 unsigned long long ns;
212
213
214
215
216
217
218
219
220 preempt_disable_notrace();
221 data = this_cpu_read(cyc2ns.head);
222 tail = this_cpu_read(cyc2ns.tail);
223
224 if (likely(data == tail)) {
225 ns = data->cyc2ns_offset;
226 ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift);
227 } else {
228 data->__count++;
229
230 barrier();
231
232 ns = data->cyc2ns_offset;
233 ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift);
234
235 barrier();
236
237 if (!--data->__count)
238 this_cpu_write(cyc2ns.tail, data);
239 }
240 preempt_enable_notrace();
241
242 return ns;
243}
244
245static void set_cyc2ns_scale(unsigned long khz, int cpu)
246{
247 unsigned long long tsc_now, ns_now;
248 struct cyc2ns_data *data;
249 unsigned long flags;
250
251 local_irq_save(flags);
252 sched_clock_idle_sleep_event();
253
254 if (!khz)
255 goto done;
256
257 data = cyc2ns_write_begin(cpu);
258
259 tsc_now = rdtsc();
260 ns_now = cycles_2_ns(tsc_now);
261
262
263
264
265
266
267 clocks_calc_mult_shift(&data->cyc2ns_mul, &data->cyc2ns_shift, khz,
268 NSEC_PER_MSEC, 0);
269
270
271
272
273
274
275
276 if (data->cyc2ns_shift == 32) {
277 data->cyc2ns_shift = 31;
278 data->cyc2ns_mul >>= 1;
279 }
280
281 data->cyc2ns_offset = ns_now -
282 mul_u64_u32_shr(tsc_now, data->cyc2ns_mul, data->cyc2ns_shift);
283
284 cyc2ns_write_end(cpu, data);
285
286done:
287 sched_clock_idle_wakeup_event(0);
288 local_irq_restore(flags);
289}
290
291
292
293u64 native_sched_clock(void)
294{
295 if (static_branch_likely(&__use_tsc)) {
296 u64 tsc_now = rdtsc();
297
298
299 return cycles_2_ns(tsc_now);
300 }
301
302
303
304
305
306
307
308
309
310
311
312 return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
313}
314
315
316
317
318u64 native_sched_clock_from_tsc(u64 tsc)
319{
320 return cycles_2_ns(tsc);
321}
322
323
324
325#ifdef CONFIG_PARAVIRT
326unsigned long long sched_clock(void)
327{
328 return paravirt_sched_clock();
329}
330
331bool using_native_sched_clock(void)
332{
333 return pv_time_ops.sched_clock == native_sched_clock;
334}
335#else
336unsigned long long
337sched_clock(void) __attribute__((alias("native_sched_clock")));
338
339bool using_native_sched_clock(void) { return true; }
340#endif
341
342int check_tsc_unstable(void)
343{
344 return tsc_unstable;
345}
346EXPORT_SYMBOL_GPL(check_tsc_unstable);
347
348#ifdef CONFIG_X86_TSC
349int __init notsc_setup(char *str)
350{
351 pr_warn("Kernel compiled with CONFIG_X86_TSC, cannot disable TSC completely\n");
352 tsc_disabled = 1;
353 return 1;
354}
355#else
356
357
358
359
360int __init notsc_setup(char *str)
361{
362 setup_clear_cpu_cap(X86_FEATURE_TSC);
363 return 1;
364}
365#endif
366
367__setup("notsc", notsc_setup);
368
369static int no_sched_irq_time;
370
371static int __init tsc_setup(char *str)
372{
373 if (!strcmp(str, "reliable"))
374 tsc_clocksource_reliable = 1;
375 if (!strncmp(str, "noirqtime", 9))
376 no_sched_irq_time = 1;
377 return 1;
378}
379
380__setup("tsc=", tsc_setup);
381
382#define MAX_RETRIES 5
383#define SMI_TRESHOLD 50000
384
385
386
387
388static u64 tsc_read_refs(u64 *p, int hpet)
389{
390 u64 t1, t2;
391 int i;
392
393 for (i = 0; i < MAX_RETRIES; i++) {
394 t1 = get_cycles();
395 if (hpet)
396 *p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
397 else
398 *p = acpi_pm_read_early();
399 t2 = get_cycles();
400 if ((t2 - t1) < SMI_TRESHOLD)
401 return t2;
402 }
403 return ULLONG_MAX;
404}
405
406
407
408
409static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2)
410{
411 u64 tmp;
412
413 if (hpet2 < hpet1)
414 hpet2 += 0x100000000ULL;
415 hpet2 -= hpet1;
416 tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
417 do_div(tmp, 1000000);
418 do_div(deltatsc, tmp);
419
420 return (unsigned long) deltatsc;
421}
422
423
424
425
426static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2)
427{
428 u64 tmp;
429
430 if (!pm1 && !pm2)
431 return ULONG_MAX;
432
433 if (pm2 < pm1)
434 pm2 += (u64)ACPI_PM_OVRRUN;
435 pm2 -= pm1;
436 tmp = pm2 * 1000000000LL;
437 do_div(tmp, PMTMR_TICKS_PER_SEC);
438 do_div(deltatsc, tmp);
439
440 return (unsigned long) deltatsc;
441}
442
443#define CAL_MS 10
444#define CAL_LATCH (PIT_TICK_RATE / (1000 / CAL_MS))
445#define CAL_PIT_LOOPS 1000
446
447#define CAL2_MS 50
448#define CAL2_LATCH (PIT_TICK_RATE / (1000 / CAL2_MS))
449#define CAL2_PIT_LOOPS 5000
450
451
452
453
454
455
456
457
458
459static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
460{
461 u64 tsc, t1, t2, delta;
462 unsigned long tscmin, tscmax;
463 int pitcnt;
464
465
466 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
467
468
469
470
471
472
473 outb(0xb0, 0x43);
474 outb(latch & 0xff, 0x42);
475 outb(latch >> 8, 0x42);
476
477 tsc = t1 = t2 = get_cycles();
478
479 pitcnt = 0;
480 tscmax = 0;
481 tscmin = ULONG_MAX;
482 while ((inb(0x61) & 0x20) == 0) {
483 t2 = get_cycles();
484 delta = t2 - tsc;
485 tsc = t2;
486 if ((unsigned long) delta < tscmin)
487 tscmin = (unsigned int) delta;
488 if ((unsigned long) delta > tscmax)
489 tscmax = (unsigned int) delta;
490 pitcnt++;
491 }
492
493
494
495
496
497
498
499
500
501
502 if (pitcnt < loopmin || tscmax > 10 * tscmin)
503 return ULONG_MAX;
504
505
506 delta = t2 - t1;
507 do_div(delta, ms);
508 return delta;
509}
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546static inline int pit_verify_msb(unsigned char val)
547{
548
549 inb(0x42);
550 return inb(0x42) == val;
551}
552
553static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap)
554{
555 int count;
556 u64 tsc = 0, prev_tsc = 0;
557
558 for (count = 0; count < 50000; count++) {
559 if (!pit_verify_msb(val))
560 break;
561 prev_tsc = tsc;
562 tsc = get_cycles();
563 }
564 *deltap = get_cycles() - prev_tsc;
565 *tscp = tsc;
566
567
568
569
570
571 return count > 5;
572}
573
574
575
576
577
578
579
580#define MAX_QUICK_PIT_MS 50
581#define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256)
582
583static unsigned long quick_pit_calibrate(void)
584{
585 int i;
586 u64 tsc, delta;
587 unsigned long d1, d2;
588
589
590 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
591
592
593
594
595
596
597
598
599
600
601 outb(0xb0, 0x43);
602
603
604 outb(0xff, 0x42);
605 outb(0xff, 0x42);
606
607
608
609
610
611
612
613 pit_verify_msb(0);
614
615 if (pit_expect_msb(0xff, &tsc, &d1)) {
616 for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) {
617 if (!pit_expect_msb(0xff-i, &delta, &d2))
618 break;
619
620 delta -= tsc;
621
622
623
624
625
626 if (i == 1 &&
627 d1 + d2 >= (delta * MAX_QUICK_PIT_ITERATIONS) >> 11)
628 return 0;
629
630
631
632
633 if (d1+d2 >= delta >> 11)
634 continue;
635
636
637
638
639
640
641
642
643 if (!pit_verify_msb(0xfe - i))
644 break;
645 goto success;
646 }
647 }
648 pr_info("Fast TSC calibration failed\n");
649 return 0;
650
651success:
652
653
654
655
656
657
658
659
660
661
662
663
664
665 delta *= PIT_TICK_RATE;
666 do_div(delta, i*256*1000);
667 pr_info("Fast TSC calibration using PIT\n");
668 return delta;
669}
670
671
672
673
674
675unsigned long native_calibrate_tsc(void)
676{
677 unsigned int eax_denominator, ebx_numerator, ecx_hz, edx;
678 unsigned int crystal_khz;
679
680 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
681 return 0;
682
683 if (boot_cpu_data.cpuid_level < 0x15)
684 return 0;
685
686 eax_denominator = ebx_numerator = ecx_hz = edx = 0;
687
688
689 cpuid(0x15, &eax_denominator, &ebx_numerator, &ecx_hz, &edx);
690
691 if (ebx_numerator == 0 || eax_denominator == 0)
692 return 0;
693
694 crystal_khz = ecx_hz / 1000;
695
696 if (crystal_khz == 0) {
697 switch (boot_cpu_data.x86_model) {
698 case INTEL_FAM6_SKYLAKE_MOBILE:
699 case INTEL_FAM6_SKYLAKE_DESKTOP:
700 case INTEL_FAM6_KABYLAKE_MOBILE:
701 case INTEL_FAM6_KABYLAKE_DESKTOP:
702 crystal_khz = 24000;
703 break;
704 case INTEL_FAM6_SKYLAKE_X:
705 case INTEL_FAM6_ATOM_DENVERTON:
706 crystal_khz = 25000;
707 break;
708 case INTEL_FAM6_ATOM_GOLDMONT:
709 crystal_khz = 19200;
710 break;
711 }
712 }
713
714
715
716
717
718
719 setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
720
721
722
723
724
725 if (boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT)
726 setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
727
728 return crystal_khz * ebx_numerator / eax_denominator;
729}
730
731static unsigned long cpu_khz_from_cpuid(void)
732{
733 unsigned int eax_base_mhz, ebx_max_mhz, ecx_bus_mhz, edx;
734
735 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
736 return 0;
737
738 if (boot_cpu_data.cpuid_level < 0x16)
739 return 0;
740
741 eax_base_mhz = ebx_max_mhz = ecx_bus_mhz = edx = 0;
742
743 cpuid(0x16, &eax_base_mhz, &ebx_max_mhz, &ecx_bus_mhz, &edx);
744
745 return eax_base_mhz * 1000;
746}
747
748
749
750
751unsigned long native_calibrate_cpu(void)
752{
753 u64 tsc1, tsc2, delta, ref1, ref2;
754 unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
755 unsigned long flags, latch, ms, fast_calibrate;
756 int hpet = is_hpet_enabled(), i, loopmin;
757
758 fast_calibrate = cpu_khz_from_cpuid();
759 if (fast_calibrate)
760 return fast_calibrate;
761
762 fast_calibrate = cpu_khz_from_msr();
763 if (fast_calibrate)
764 return fast_calibrate;
765
766 local_irq_save(flags);
767 fast_calibrate = quick_pit_calibrate();
768 local_irq_restore(flags);
769 if (fast_calibrate)
770 return fast_calibrate;
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798 latch = CAL_LATCH;
799 ms = CAL_MS;
800 loopmin = CAL_PIT_LOOPS;
801
802 for (i = 0; i < 3; i++) {
803 unsigned long tsc_pit_khz;
804
805
806
807
808
809
810
811 local_irq_save(flags);
812 tsc1 = tsc_read_refs(&ref1, hpet);
813 tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin);
814 tsc2 = tsc_read_refs(&ref2, hpet);
815 local_irq_restore(flags);
816
817
818 tsc_pit_min = min(tsc_pit_min, tsc_pit_khz);
819
820
821 if (ref1 == ref2)
822 continue;
823
824
825 if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX)
826 continue;
827
828 tsc2 = (tsc2 - tsc1) * 1000000LL;
829 if (hpet)
830 tsc2 = calc_hpet_ref(tsc2, ref1, ref2);
831 else
832 tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2);
833
834 tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2);
835
836
837 delta = ((u64) tsc_pit_min) * 100;
838 do_div(delta, tsc_ref_min);
839
840
841
842
843
844
845
846 if (delta >= 90 && delta <= 110) {
847 pr_info("PIT calibration matches %s. %d loops\n",
848 hpet ? "HPET" : "PMTIMER", i + 1);
849 return tsc_ref_min;
850 }
851
852
853
854
855
856
857
858 if (i == 1 && tsc_pit_min == ULONG_MAX) {
859 latch = CAL2_LATCH;
860 ms = CAL2_MS;
861 loopmin = CAL2_PIT_LOOPS;
862 }
863 }
864
865
866
867
868 if (tsc_pit_min == ULONG_MAX) {
869
870 pr_warn("Unable to calibrate against PIT\n");
871
872
873 if (!hpet && !ref1 && !ref2) {
874 pr_notice("No reference (HPET/PMTIMER) available\n");
875 return 0;
876 }
877
878
879 if (tsc_ref_min == ULONG_MAX) {
880 pr_warn("HPET/PMTIMER calibration failed\n");
881 return 0;
882 }
883
884
885 pr_info("using %s reference calibration\n",
886 hpet ? "HPET" : "PMTIMER");
887
888 return tsc_ref_min;
889 }
890
891
892 if (!hpet && !ref1 && !ref2) {
893 pr_info("Using PIT calibration value\n");
894 return tsc_pit_min;
895 }
896
897
898 if (tsc_ref_min == ULONG_MAX) {
899 pr_warn("HPET/PMTIMER calibration failed. Using PIT calibration.\n");
900 return tsc_pit_min;
901 }
902
903
904
905
906
907
908 pr_warn("PIT calibration deviates from %s: %lu %lu\n",
909 hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min);
910 pr_info("Using PIT calibration value\n");
911 return tsc_pit_min;
912}
913
914int recalibrate_cpu_khz(void)
915{
916#ifndef CONFIG_SMP
917 unsigned long cpu_khz_old = cpu_khz;
918
919 if (!boot_cpu_has(X86_FEATURE_TSC))
920 return -ENODEV;
921
922 cpu_khz = x86_platform.calibrate_cpu();
923 tsc_khz = x86_platform.calibrate_tsc();
924 if (tsc_khz == 0)
925 tsc_khz = cpu_khz;
926 else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz)
927 cpu_khz = tsc_khz;
928 cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy,
929 cpu_khz_old, cpu_khz);
930
931 return 0;
932#else
933 return -ENODEV;
934#endif
935}
936
937EXPORT_SYMBOL(recalibrate_cpu_khz);
938
939
940static unsigned long long cyc2ns_suspend;
941
942void tsc_save_sched_clock_state(void)
943{
944 if (!sched_clock_stable())
945 return;
946
947 cyc2ns_suspend = sched_clock();
948}
949
950
951
952
953
954
955
956
957
958void tsc_restore_sched_clock_state(void)
959{
960 unsigned long long offset;
961 unsigned long flags;
962 int cpu;
963
964 if (!sched_clock_stable())
965 return;
966
967 local_irq_save(flags);
968
969
970
971
972
973
974
975 this_cpu_write(cyc2ns.data[0].cyc2ns_offset, 0);
976 this_cpu_write(cyc2ns.data[1].cyc2ns_offset, 0);
977
978 offset = cyc2ns_suspend - sched_clock();
979
980 for_each_possible_cpu(cpu) {
981 per_cpu(cyc2ns.data[0].cyc2ns_offset, cpu) = offset;
982 per_cpu(cyc2ns.data[1].cyc2ns_offset, cpu) = offset;
983 }
984
985 local_irq_restore(flags);
986}
987
988#ifdef CONFIG_CPU_FREQ
989
990
991
992
993
994
995
996
997
998
999
1000
1001static unsigned int ref_freq;
1002static unsigned long loops_per_jiffy_ref;
1003static unsigned long tsc_khz_ref;
1004
1005static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
1006 void *data)
1007{
1008 struct cpufreq_freqs *freq = data;
1009 unsigned long *lpj;
1010
1011 lpj = &boot_cpu_data.loops_per_jiffy;
1012#ifdef CONFIG_SMP
1013 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
1014 lpj = &cpu_data(freq->cpu).loops_per_jiffy;
1015#endif
1016
1017 if (!ref_freq) {
1018 ref_freq = freq->old;
1019 loops_per_jiffy_ref = *lpj;
1020 tsc_khz_ref = tsc_khz;
1021 }
1022 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
1023 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) {
1024 *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
1025
1026 tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
1027 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
1028 mark_tsc_unstable("cpufreq changes");
1029
1030 set_cyc2ns_scale(tsc_khz, freq->cpu);
1031 }
1032
1033 return 0;
1034}
1035
1036static struct notifier_block time_cpufreq_notifier_block = {
1037 .notifier_call = time_cpufreq_notifier
1038};
1039
1040static int __init cpufreq_register_tsc_scaling(void)
1041{
1042 if (!boot_cpu_has(X86_FEATURE_TSC))
1043 return 0;
1044 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
1045 return 0;
1046 cpufreq_register_notifier(&time_cpufreq_notifier_block,
1047 CPUFREQ_TRANSITION_NOTIFIER);
1048 return 0;
1049}
1050
1051core_initcall(cpufreq_register_tsc_scaling);
1052
1053#endif
1054
1055#define ART_CPUID_LEAF (0x15)
1056#define ART_MIN_DENOMINATOR (1)
1057
1058
1059
1060
1061
1062static void detect_art(void)
1063{
1064 unsigned int unused[2];
1065
1066 if (boot_cpu_data.cpuid_level < ART_CPUID_LEAF)
1067 return;
1068
1069
1070 if (boot_cpu_has(X86_FEATURE_HYPERVISOR) ||
1071 !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) ||
1072 !boot_cpu_has(X86_FEATURE_TSC_ADJUST))
1073 return;
1074
1075 cpuid(ART_CPUID_LEAF, &art_to_tsc_denominator,
1076 &art_to_tsc_numerator, unused, unused+1);
1077
1078 if (art_to_tsc_denominator < ART_MIN_DENOMINATOR)
1079 return;
1080
1081 rdmsrl(MSR_IA32_TSC_ADJUST, art_to_tsc_offset);
1082
1083
1084 setup_force_cpu_cap(X86_FEATURE_ART);
1085}
1086
1087
1088
1089
1090static struct clocksource clocksource_tsc;
1091
1092static void tsc_resume(struct clocksource *cs)
1093{
1094 tsc_verify_tsc_adjust(true);
1095}
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113static u64 read_tsc(struct clocksource *cs)
1114{
1115 return (u64)rdtsc_ordered();
1116}
1117
1118static void tsc_cs_mark_unstable(struct clocksource *cs)
1119{
1120 if (tsc_unstable)
1121 return;
1122
1123 tsc_unstable = 1;
1124 if (using_native_sched_clock())
1125 clear_sched_clock_stable();
1126 disable_sched_clock_irqtime();
1127 pr_info("Marking TSC unstable due to clocksource watchdog\n");
1128}
1129
1130
1131
1132
1133static struct clocksource clocksource_tsc = {
1134 .name = "tsc",
1135 .rating = 300,
1136 .read = read_tsc,
1137 .mask = CLOCKSOURCE_MASK(64),
1138 .flags = CLOCK_SOURCE_IS_CONTINUOUS |
1139 CLOCK_SOURCE_MUST_VERIFY,
1140 .archdata = { .vclock_mode = VCLOCK_TSC },
1141 .resume = tsc_resume,
1142 .mark_unstable = tsc_cs_mark_unstable,
1143};
1144
1145void mark_tsc_unstable(char *reason)
1146{
1147 if (tsc_unstable)
1148 return;
1149
1150 tsc_unstable = 1;
1151 if (using_native_sched_clock())
1152 clear_sched_clock_stable();
1153 disable_sched_clock_irqtime();
1154 pr_info("Marking TSC unstable due to %s\n", reason);
1155
1156 if (clocksource_tsc.mult) {
1157 clocksource_mark_unstable(&clocksource_tsc);
1158 } else {
1159 clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE;
1160 clocksource_tsc.rating = 0;
1161 }
1162}
1163
1164EXPORT_SYMBOL_GPL(mark_tsc_unstable);
1165
1166static void __init check_system_tsc_reliable(void)
1167{
1168#if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC)
1169 if (is_geode_lx()) {
1170
1171#define RTSC_SUSP 0x100
1172 unsigned long res_low, res_high;
1173
1174 rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
1175
1176 if (res_low & RTSC_SUSP)
1177 tsc_clocksource_reliable = 1;
1178 }
1179#endif
1180 if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
1181 tsc_clocksource_reliable = 1;
1182}
1183
1184
1185
1186
1187
1188int unsynchronized_tsc(void)
1189{
1190 if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_unstable)
1191 return 1;
1192
1193#ifdef CONFIG_SMP
1194 if (apic_is_clustered_box())
1195 return 1;
1196#endif
1197
1198 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
1199 return 0;
1200
1201 if (tsc_clocksource_reliable)
1202 return 0;
1203
1204
1205
1206
1207 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
1208
1209 if (num_possible_cpus() > 1)
1210 return 1;
1211 }
1212
1213 return 0;
1214}
1215
1216
1217
1218
1219struct system_counterval_t convert_art_to_tsc(u64 art)
1220{
1221 u64 tmp, res, rem;
1222
1223 rem = do_div(art, art_to_tsc_denominator);
1224
1225 res = art * art_to_tsc_numerator;
1226 tmp = rem * art_to_tsc_numerator;
1227
1228 do_div(tmp, art_to_tsc_denominator);
1229 res += tmp + art_to_tsc_offset;
1230
1231 return (struct system_counterval_t) {.cs = art_related_clocksource,
1232 .cycles = res};
1233}
1234EXPORT_SYMBOL(convert_art_to_tsc);
1235
1236static void tsc_refine_calibration_work(struct work_struct *work);
1237static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work);
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252static void tsc_refine_calibration_work(struct work_struct *work)
1253{
1254 static u64 tsc_start = -1, ref_start;
1255 static int hpet;
1256 u64 tsc_stop, ref_stop, delta;
1257 unsigned long freq;
1258
1259
1260 if (check_tsc_unstable())
1261 goto out;
1262
1263
1264
1265
1266
1267
1268 if (tsc_start == -1) {
1269
1270
1271
1272
1273 hpet = is_hpet_enabled();
1274 schedule_delayed_work(&tsc_irqwork, HZ);
1275 tsc_start = tsc_read_refs(&ref_start, hpet);
1276 return;
1277 }
1278
1279 tsc_stop = tsc_read_refs(&ref_stop, hpet);
1280
1281
1282 if (ref_start == ref_stop)
1283 goto out;
1284
1285
1286 if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX)
1287 goto out;
1288
1289 delta = tsc_stop - tsc_start;
1290 delta *= 1000000LL;
1291 if (hpet)
1292 freq = calc_hpet_ref(delta, ref_start, ref_stop);
1293 else
1294 freq = calc_pmtimer_ref(delta, ref_start, ref_stop);
1295
1296
1297 if (abs(tsc_khz - freq) > tsc_khz/100)
1298 goto out;
1299
1300 tsc_khz = freq;
1301 pr_info("Refined TSC clocksource calibration: %lu.%03lu MHz\n",
1302 (unsigned long)tsc_khz / 1000,
1303 (unsigned long)tsc_khz % 1000);
1304
1305
1306 lapic_update_tsc_freq();
1307
1308out:
1309 if (boot_cpu_has(X86_FEATURE_ART))
1310 art_related_clocksource = &clocksource_tsc;
1311 clocksource_register_khz(&clocksource_tsc, tsc_khz);
1312}
1313
1314
1315static int __init init_tsc_clocksource(void)
1316{
1317 if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_disabled > 0 || !tsc_khz)
1318 return 0;
1319
1320 if (tsc_clocksource_reliable)
1321 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
1322
1323 if (check_tsc_unstable()) {
1324 clocksource_tsc.rating = 0;
1325 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
1326 }
1327
1328 if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3))
1329 clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;
1330
1331
1332
1333
1334
1335 if (boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ)) {
1336 if (boot_cpu_has(X86_FEATURE_ART))
1337 art_related_clocksource = &clocksource_tsc;
1338 clocksource_register_khz(&clocksource_tsc, tsc_khz);
1339 return 0;
1340 }
1341
1342 schedule_delayed_work(&tsc_irqwork, 0);
1343 return 0;
1344}
1345
1346
1347
1348
1349device_initcall(init_tsc_clocksource);
1350
1351void __init tsc_init(void)
1352{
1353 u64 lpj;
1354 int cpu;
1355
1356 if (!boot_cpu_has(X86_FEATURE_TSC)) {
1357 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
1358 return;
1359 }
1360
1361 cpu_khz = x86_platform.calibrate_cpu();
1362 tsc_khz = x86_platform.calibrate_tsc();
1363
1364
1365
1366
1367
1368
1369 if (tsc_khz == 0)
1370 tsc_khz = cpu_khz;
1371 else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz)
1372 cpu_khz = tsc_khz;
1373
1374 if (!tsc_khz) {
1375 mark_tsc_unstable("could not calculate TSC khz");
1376 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
1377 return;
1378 }
1379
1380 pr_info("Detected %lu.%03lu MHz processor\n",
1381 (unsigned long)cpu_khz / 1000,
1382 (unsigned long)cpu_khz % 1000);
1383
1384
1385 tsc_store_and_check_tsc_adjust(true);
1386
1387
1388
1389
1390
1391
1392
1393 for_each_possible_cpu(cpu) {
1394 cyc2ns_init(cpu);
1395 set_cyc2ns_scale(tsc_khz, cpu);
1396 }
1397
1398 if (tsc_disabled > 0)
1399 return;
1400
1401
1402
1403 tsc_disabled = 0;
1404 static_branch_enable(&__use_tsc);
1405
1406 if (!no_sched_irq_time)
1407 enable_sched_clock_irqtime();
1408
1409 lpj = ((u64)tsc_khz * 1000);
1410 do_div(lpj, HZ);
1411 lpj_fine = lpj;
1412
1413 use_tsc_delay();
1414
1415 if (unsynchronized_tsc())
1416 mark_tsc_unstable("TSCs unsynchronized");
1417
1418 check_system_tsc_reliable();
1419
1420 detect_art();
1421}
1422
1423#ifdef CONFIG_SMP
1424
1425
1426
1427
1428
1429
1430unsigned long calibrate_delay_is_known(void)
1431{
1432 int sibling, cpu = smp_processor_id();
1433 struct cpumask *mask = topology_core_cpumask(cpu);
1434
1435 if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC))
1436 return 0;
1437
1438 if (!mask)
1439 return 0;
1440
1441 sibling = cpumask_any_but(mask, cpu);
1442 if (sibling < nr_cpu_ids)
1443 return cpu_data(sibling).loops_per_jiffy;
1444 return 0;
1445}
1446#endif
1447