1#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
2
3#include <linux/kernel.h>
4#include <linux/sched.h>
5#include <linux/init.h>
6#include <linux/export.h>
7#include <linux/timer.h>
8#include <linux/acpi_pmtmr.h>
9#include <linux/cpufreq.h>
10#include <linux/delay.h>
11#include <linux/clocksource.h>
12#include <linux/percpu.h>
13#include <linux/timex.h>
14#include <linux/static_key.h>
15
16#include <asm/hpet.h>
17#include <asm/timer.h>
18#include <asm/vgtod.h>
19#include <asm/time.h>
20#include <asm/delay.h>
21#include <asm/hypervisor.h>
22#include <asm/nmi.h>
23#include <asm/x86_init.h>
24#include <asm/geode.h>
25#include <asm/apic.h>
26#include <asm/intel-family.h>
27
28unsigned int __read_mostly cpu_khz;
29EXPORT_SYMBOL(cpu_khz);
30
31unsigned int __read_mostly tsc_khz;
32EXPORT_SYMBOL(tsc_khz);
33
34
35
36
37static int __read_mostly tsc_unstable;
38
39
40
41
42static int __read_mostly tsc_disabled = -1;
43
44static DEFINE_STATIC_KEY_FALSE(__use_tsc);
45
46int tsc_clocksource_reliable;
47
48static u32 art_to_tsc_numerator;
49static u32 art_to_tsc_denominator;
50static u64 art_to_tsc_offset;
51struct clocksource *art_related_clocksource;
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76struct cyc2ns {
77 struct cyc2ns_data data[2];
78 struct cyc2ns_data *head;
79 struct cyc2ns_data *tail;
80};
81
82static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns);
83
84struct cyc2ns_data *cyc2ns_read_begin(void)
85{
86 struct cyc2ns_data *head;
87
88 preempt_disable();
89
90 head = this_cpu_read(cyc2ns.head);
91
92
93
94
95 smp_read_barrier_depends();
96 head->__count++;
97 barrier();
98
99 return head;
100}
101
102void cyc2ns_read_end(struct cyc2ns_data *head)
103{
104 barrier();
105
106
107
108
109
110
111 if (!--head->__count) {
112
113
114
115
116
117
118
119 this_cpu_write(cyc2ns.tail, head);
120 }
121 preempt_enable();
122}
123
124
125
126
127
128
129
130static struct cyc2ns_data *cyc2ns_write_begin(int cpu)
131{
132 struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu);
133 struct cyc2ns_data *data = c2n->data;
134
135 if (data == c2n->head)
136 data++;
137
138
139
140
141
142
143
144
145 while (c2n->tail == data)
146 cpu_relax();
147
148 return data;
149}
150
151static void cyc2ns_write_end(int cpu, struct cyc2ns_data *data)
152{
153 struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu);
154
155
156
157
158
159 smp_wmb();
160
161 ACCESS_ONCE(c2n->head) = data;
162}
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188static void cyc2ns_data_init(struct cyc2ns_data *data)
189{
190 data->cyc2ns_mul = 0;
191 data->cyc2ns_shift = 0;
192 data->cyc2ns_offset = 0;
193 data->__count = 0;
194}
195
196static void cyc2ns_init(int cpu)
197{
198 struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu);
199
200 cyc2ns_data_init(&c2n->data[0]);
201 cyc2ns_data_init(&c2n->data[1]);
202
203 c2n->head = c2n->data;
204 c2n->tail = c2n->data;
205}
206
207static inline unsigned long long cycles_2_ns(unsigned long long cyc)
208{
209 struct cyc2ns_data *data, *tail;
210 unsigned long long ns;
211
212
213
214
215
216
217
218
219 preempt_disable_notrace();
220 data = this_cpu_read(cyc2ns.head);
221 tail = this_cpu_read(cyc2ns.tail);
222
223 if (likely(data == tail)) {
224 ns = data->cyc2ns_offset;
225 ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift);
226 } else {
227 data->__count++;
228
229 barrier();
230
231 ns = data->cyc2ns_offset;
232 ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift);
233
234 barrier();
235
236 if (!--data->__count)
237 this_cpu_write(cyc2ns.tail, data);
238 }
239 preempt_enable_notrace();
240
241 return ns;
242}
243
244static void set_cyc2ns_scale(unsigned long khz, int cpu)
245{
246 unsigned long long tsc_now, ns_now;
247 struct cyc2ns_data *data;
248 unsigned long flags;
249
250 local_irq_save(flags);
251 sched_clock_idle_sleep_event();
252
253 if (!khz)
254 goto done;
255
256 data = cyc2ns_write_begin(cpu);
257
258 tsc_now = rdtsc();
259 ns_now = cycles_2_ns(tsc_now);
260
261
262
263
264
265
266 clocks_calc_mult_shift(&data->cyc2ns_mul, &data->cyc2ns_shift, khz,
267 NSEC_PER_MSEC, 0);
268
269
270
271
272
273
274
275 if (data->cyc2ns_shift == 32) {
276 data->cyc2ns_shift = 31;
277 data->cyc2ns_mul >>= 1;
278 }
279
280 data->cyc2ns_offset = ns_now -
281 mul_u64_u32_shr(tsc_now, data->cyc2ns_mul, data->cyc2ns_shift);
282
283 cyc2ns_write_end(cpu, data);
284
285done:
286 sched_clock_idle_wakeup_event(0);
287 local_irq_restore(flags);
288}
289
290
291
292u64 native_sched_clock(void)
293{
294 if (static_branch_likely(&__use_tsc)) {
295 u64 tsc_now = rdtsc();
296
297
298 return cycles_2_ns(tsc_now);
299 }
300
301
302
303
304
305
306
307
308
309
310
311 return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
312}
313
314
315
316
317u64 native_sched_clock_from_tsc(u64 tsc)
318{
319 return cycles_2_ns(tsc);
320}
321
322
323
324#ifdef CONFIG_PARAVIRT
325unsigned long long sched_clock(void)
326{
327 return paravirt_sched_clock();
328}
329#else
330unsigned long long
331sched_clock(void) __attribute__((alias("native_sched_clock")));
332#endif
333
334int check_tsc_unstable(void)
335{
336 return tsc_unstable;
337}
338EXPORT_SYMBOL_GPL(check_tsc_unstable);
339
340#ifdef CONFIG_X86_TSC
341int __init notsc_setup(char *str)
342{
343 pr_warn("Kernel compiled with CONFIG_X86_TSC, cannot disable TSC completely\n");
344 tsc_disabled = 1;
345 return 1;
346}
347#else
348
349
350
351
352int __init notsc_setup(char *str)
353{
354 setup_clear_cpu_cap(X86_FEATURE_TSC);
355 return 1;
356}
357#endif
358
359__setup("notsc", notsc_setup);
360
361static int no_sched_irq_time;
362
363static int __init tsc_setup(char *str)
364{
365 if (!strcmp(str, "reliable"))
366 tsc_clocksource_reliable = 1;
367 if (!strncmp(str, "noirqtime", 9))
368 no_sched_irq_time = 1;
369 return 1;
370}
371
372__setup("tsc=", tsc_setup);
373
374#define MAX_RETRIES 5
375#define SMI_TRESHOLD 50000
376
377
378
379
380static u64 tsc_read_refs(u64 *p, int hpet)
381{
382 u64 t1, t2;
383 int i;
384
385 for (i = 0; i < MAX_RETRIES; i++) {
386 t1 = get_cycles();
387 if (hpet)
388 *p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
389 else
390 *p = acpi_pm_read_early();
391 t2 = get_cycles();
392 if ((t2 - t1) < SMI_TRESHOLD)
393 return t2;
394 }
395 return ULLONG_MAX;
396}
397
398
399
400
401static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2)
402{
403 u64 tmp;
404
405 if (hpet2 < hpet1)
406 hpet2 += 0x100000000ULL;
407 hpet2 -= hpet1;
408 tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
409 do_div(tmp, 1000000);
410 do_div(deltatsc, tmp);
411
412 return (unsigned long) deltatsc;
413}
414
415
416
417
418static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2)
419{
420 u64 tmp;
421
422 if (!pm1 && !pm2)
423 return ULONG_MAX;
424
425 if (pm2 < pm1)
426 pm2 += (u64)ACPI_PM_OVRRUN;
427 pm2 -= pm1;
428 tmp = pm2 * 1000000000LL;
429 do_div(tmp, PMTMR_TICKS_PER_SEC);
430 do_div(deltatsc, tmp);
431
432 return (unsigned long) deltatsc;
433}
434
435#define CAL_MS 10
436#define CAL_LATCH (PIT_TICK_RATE / (1000 / CAL_MS))
437#define CAL_PIT_LOOPS 1000
438
439#define CAL2_MS 50
440#define CAL2_LATCH (PIT_TICK_RATE / (1000 / CAL2_MS))
441#define CAL2_PIT_LOOPS 5000
442
443
444
445
446
447
448
449
450
451static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
452{
453 u64 tsc, t1, t2, delta;
454 unsigned long tscmin, tscmax;
455 int pitcnt;
456
457
458 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
459
460
461
462
463
464
465 outb(0xb0, 0x43);
466 outb(latch & 0xff, 0x42);
467 outb(latch >> 8, 0x42);
468
469 tsc = t1 = t2 = get_cycles();
470
471 pitcnt = 0;
472 tscmax = 0;
473 tscmin = ULONG_MAX;
474 while ((inb(0x61) & 0x20) == 0) {
475 t2 = get_cycles();
476 delta = t2 - tsc;
477 tsc = t2;
478 if ((unsigned long) delta < tscmin)
479 tscmin = (unsigned int) delta;
480 if ((unsigned long) delta > tscmax)
481 tscmax = (unsigned int) delta;
482 pitcnt++;
483 }
484
485
486
487
488
489
490
491
492
493
494 if (pitcnt < loopmin || tscmax > 10 * tscmin)
495 return ULONG_MAX;
496
497
498 delta = t2 - t1;
499 do_div(delta, ms);
500 return delta;
501}
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538static inline int pit_verify_msb(unsigned char val)
539{
540
541 inb(0x42);
542 return inb(0x42) == val;
543}
544
545static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap)
546{
547 int count;
548 u64 tsc = 0, prev_tsc = 0;
549
550 for (count = 0; count < 50000; count++) {
551 if (!pit_verify_msb(val))
552 break;
553 prev_tsc = tsc;
554 tsc = get_cycles();
555 }
556 *deltap = get_cycles() - prev_tsc;
557 *tscp = tsc;
558
559
560
561
562
563 return count > 5;
564}
565
566
567
568
569
570
571
572#define MAX_QUICK_PIT_MS 50
573#define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256)
574
575static unsigned long quick_pit_calibrate(void)
576{
577 int i;
578 u64 tsc, delta;
579 unsigned long d1, d2;
580
581
582 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
583
584
585
586
587
588
589
590
591
592
593 outb(0xb0, 0x43);
594
595
596 outb(0xff, 0x42);
597 outb(0xff, 0x42);
598
599
600
601
602
603
604
605 pit_verify_msb(0);
606
607 if (pit_expect_msb(0xff, &tsc, &d1)) {
608 for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) {
609 if (!pit_expect_msb(0xff-i, &delta, &d2))
610 break;
611
612 delta -= tsc;
613
614
615
616
617
618 if (i == 1 &&
619 d1 + d2 >= (delta * MAX_QUICK_PIT_ITERATIONS) >> 11)
620 return 0;
621
622
623
624
625 if (d1+d2 >= delta >> 11)
626 continue;
627
628
629
630
631
632
633
634
635 if (!pit_verify_msb(0xfe - i))
636 break;
637 goto success;
638 }
639 }
640 pr_info("Fast TSC calibration failed\n");
641 return 0;
642
643success:
644
645
646
647
648
649
650
651
652
653
654
655
656
657 delta *= PIT_TICK_RATE;
658 do_div(delta, i*256*1000);
659 pr_info("Fast TSC calibration using PIT\n");
660 return delta;
661}
662
663
664
665
666
667unsigned long native_calibrate_tsc(void)
668{
669 unsigned int eax_denominator, ebx_numerator, ecx_hz, edx;
670 unsigned int crystal_khz;
671
672 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
673 return 0;
674
675 if (boot_cpu_data.cpuid_level < 0x15)
676 return 0;
677
678 eax_denominator = ebx_numerator = ecx_hz = edx = 0;
679
680
681 cpuid(0x15, &eax_denominator, &ebx_numerator, &ecx_hz, &edx);
682
683 if (ebx_numerator == 0 || eax_denominator == 0)
684 return 0;
685
686 crystal_khz = ecx_hz / 1000;
687
688 if (crystal_khz == 0) {
689 switch (boot_cpu_data.x86_model) {
690 case INTEL_FAM6_SKYLAKE_MOBILE:
691 case INTEL_FAM6_SKYLAKE_DESKTOP:
692 case INTEL_FAM6_KABYLAKE_MOBILE:
693 case INTEL_FAM6_KABYLAKE_DESKTOP:
694 crystal_khz = 24000;
695 break;
696 case INTEL_FAM6_SKYLAKE_X:
697 crystal_khz = 25000;
698 break;
699 case INTEL_FAM6_ATOM_GOLDMONT:
700 crystal_khz = 19200;
701 break;
702 }
703 }
704
705 return crystal_khz * ebx_numerator / eax_denominator;
706}
707
708static unsigned long cpu_khz_from_cpuid(void)
709{
710 unsigned int eax_base_mhz, ebx_max_mhz, ecx_bus_mhz, edx;
711
712 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
713 return 0;
714
715 if (boot_cpu_data.cpuid_level < 0x16)
716 return 0;
717
718 eax_base_mhz = ebx_max_mhz = ecx_bus_mhz = edx = 0;
719
720 cpuid(0x16, &eax_base_mhz, &ebx_max_mhz, &ecx_bus_mhz, &edx);
721
722 return eax_base_mhz * 1000;
723}
724
725
726
727
728unsigned long native_calibrate_cpu(void)
729{
730 u64 tsc1, tsc2, delta, ref1, ref2;
731 unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
732 unsigned long flags, latch, ms, fast_calibrate;
733 int hpet = is_hpet_enabled(), i, loopmin;
734
735 fast_calibrate = cpu_khz_from_cpuid();
736 if (fast_calibrate)
737 return fast_calibrate;
738
739 fast_calibrate = cpu_khz_from_msr();
740 if (fast_calibrate)
741 return fast_calibrate;
742
743 local_irq_save(flags);
744 fast_calibrate = quick_pit_calibrate();
745 local_irq_restore(flags);
746 if (fast_calibrate)
747 return fast_calibrate;
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775 latch = CAL_LATCH;
776 ms = CAL_MS;
777 loopmin = CAL_PIT_LOOPS;
778
779 for (i = 0; i < 3; i++) {
780 unsigned long tsc_pit_khz;
781
782
783
784
785
786
787
788 local_irq_save(flags);
789 tsc1 = tsc_read_refs(&ref1, hpet);
790 tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin);
791 tsc2 = tsc_read_refs(&ref2, hpet);
792 local_irq_restore(flags);
793
794
795 tsc_pit_min = min(tsc_pit_min, tsc_pit_khz);
796
797
798 if (ref1 == ref2)
799 continue;
800
801
802 if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX)
803 continue;
804
805 tsc2 = (tsc2 - tsc1) * 1000000LL;
806 if (hpet)
807 tsc2 = calc_hpet_ref(tsc2, ref1, ref2);
808 else
809 tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2);
810
811 tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2);
812
813
814 delta = ((u64) tsc_pit_min) * 100;
815 do_div(delta, tsc_ref_min);
816
817
818
819
820
821
822
823 if (delta >= 90 && delta <= 110) {
824 pr_info("PIT calibration matches %s. %d loops\n",
825 hpet ? "HPET" : "PMTIMER", i + 1);
826 return tsc_ref_min;
827 }
828
829
830
831
832
833
834
835 if (i == 1 && tsc_pit_min == ULONG_MAX) {
836 latch = CAL2_LATCH;
837 ms = CAL2_MS;
838 loopmin = CAL2_PIT_LOOPS;
839 }
840 }
841
842
843
844
845 if (tsc_pit_min == ULONG_MAX) {
846
847 pr_warn("Unable to calibrate against PIT\n");
848
849
850 if (!hpet && !ref1 && !ref2) {
851 pr_notice("No reference (HPET/PMTIMER) available\n");
852 return 0;
853 }
854
855
856 if (tsc_ref_min == ULONG_MAX) {
857 pr_warn("HPET/PMTIMER calibration failed\n");
858 return 0;
859 }
860
861
862 pr_info("using %s reference calibration\n",
863 hpet ? "HPET" : "PMTIMER");
864
865 return tsc_ref_min;
866 }
867
868
869 if (!hpet && !ref1 && !ref2) {
870 pr_info("Using PIT calibration value\n");
871 return tsc_pit_min;
872 }
873
874
875 if (tsc_ref_min == ULONG_MAX) {
876 pr_warn("HPET/PMTIMER calibration failed. Using PIT calibration.\n");
877 return tsc_pit_min;
878 }
879
880
881
882
883
884
885 pr_warn("PIT calibration deviates from %s: %lu %lu\n",
886 hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min);
887 pr_info("Using PIT calibration value\n");
888 return tsc_pit_min;
889}
890
891int recalibrate_cpu_khz(void)
892{
893#ifndef CONFIG_SMP
894 unsigned long cpu_khz_old = cpu_khz;
895
896 if (!boot_cpu_has(X86_FEATURE_TSC))
897 return -ENODEV;
898
899 cpu_khz = x86_platform.calibrate_cpu();
900 tsc_khz = x86_platform.calibrate_tsc();
901 if (tsc_khz == 0)
902 tsc_khz = cpu_khz;
903 else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz)
904 cpu_khz = tsc_khz;
905 cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy,
906 cpu_khz_old, cpu_khz);
907
908 return 0;
909#else
910 return -ENODEV;
911#endif
912}
913
914EXPORT_SYMBOL(recalibrate_cpu_khz);
915
916
917static unsigned long long cyc2ns_suspend;
918
919void tsc_save_sched_clock_state(void)
920{
921 if (!sched_clock_stable())
922 return;
923
924 cyc2ns_suspend = sched_clock();
925}
926
927
928
929
930
931
932
933
934
935void tsc_restore_sched_clock_state(void)
936{
937 unsigned long long offset;
938 unsigned long flags;
939 int cpu;
940
941 if (!sched_clock_stable())
942 return;
943
944 local_irq_save(flags);
945
946
947
948
949
950
951
952 this_cpu_write(cyc2ns.data[0].cyc2ns_offset, 0);
953 this_cpu_write(cyc2ns.data[1].cyc2ns_offset, 0);
954
955 offset = cyc2ns_suspend - sched_clock();
956
957 for_each_possible_cpu(cpu) {
958 per_cpu(cyc2ns.data[0].cyc2ns_offset, cpu) = offset;
959 per_cpu(cyc2ns.data[1].cyc2ns_offset, cpu) = offset;
960 }
961
962 local_irq_restore(flags);
963}
964
965#ifdef CONFIG_CPU_FREQ
966
967
968
969
970
971
972
973
974
975
976
977
978static unsigned int ref_freq;
979static unsigned long loops_per_jiffy_ref;
980static unsigned long tsc_khz_ref;
981
982static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
983 void *data)
984{
985 struct cpufreq_freqs *freq = data;
986 unsigned long *lpj;
987
988 lpj = &boot_cpu_data.loops_per_jiffy;
989#ifdef CONFIG_SMP
990 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
991 lpj = &cpu_data(freq->cpu).loops_per_jiffy;
992#endif
993
994 if (!ref_freq) {
995 ref_freq = freq->old;
996 loops_per_jiffy_ref = *lpj;
997 tsc_khz_ref = tsc_khz;
998 }
999 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
1000 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) {
1001 *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
1002
1003 tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
1004 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
1005 mark_tsc_unstable("cpufreq changes");
1006
1007 set_cyc2ns_scale(tsc_khz, freq->cpu);
1008 }
1009
1010 return 0;
1011}
1012
1013static struct notifier_block time_cpufreq_notifier_block = {
1014 .notifier_call = time_cpufreq_notifier
1015};
1016
1017static int __init cpufreq_register_tsc_scaling(void)
1018{
1019 if (!boot_cpu_has(X86_FEATURE_TSC))
1020 return 0;
1021 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
1022 return 0;
1023 cpufreq_register_notifier(&time_cpufreq_notifier_block,
1024 CPUFREQ_TRANSITION_NOTIFIER);
1025 return 0;
1026}
1027
1028core_initcall(cpufreq_register_tsc_scaling);
1029
1030#endif
1031
1032#define ART_CPUID_LEAF (0x15)
1033#define ART_MIN_DENOMINATOR (1)
1034
1035
1036
1037
1038
1039static void detect_art(void)
1040{
1041 unsigned int unused[2];
1042
1043 if (boot_cpu_data.cpuid_level < ART_CPUID_LEAF)
1044 return;
1045
1046 cpuid(ART_CPUID_LEAF, &art_to_tsc_denominator,
1047 &art_to_tsc_numerator, unused, unused+1);
1048
1049
1050 if (boot_cpu_has(X86_FEATURE_HYPERVISOR) ||
1051 !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) ||
1052 art_to_tsc_denominator < ART_MIN_DENOMINATOR)
1053 return;
1054
1055 if (rdmsrl_safe(MSR_IA32_TSC_ADJUST, &art_to_tsc_offset))
1056 return;
1057
1058
1059 setup_force_cpu_cap(X86_FEATURE_ART);
1060}
1061
1062
1063
1064
1065static struct clocksource clocksource_tsc;
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083static cycle_t read_tsc(struct clocksource *cs)
1084{
1085 return (cycle_t)rdtsc_ordered();
1086}
1087
1088
1089
1090
1091static struct clocksource clocksource_tsc = {
1092 .name = "tsc",
1093 .rating = 300,
1094 .read = read_tsc,
1095 .mask = CLOCKSOURCE_MASK(64),
1096 .flags = CLOCK_SOURCE_IS_CONTINUOUS |
1097 CLOCK_SOURCE_MUST_VERIFY,
1098 .archdata = { .vclock_mode = VCLOCK_TSC },
1099};
1100
1101void mark_tsc_unstable(char *reason)
1102{
1103 if (!tsc_unstable) {
1104 tsc_unstable = 1;
1105 clear_sched_clock_stable();
1106 disable_sched_clock_irqtime();
1107 pr_info("Marking TSC unstable due to %s\n", reason);
1108
1109 if (clocksource_tsc.mult)
1110 clocksource_mark_unstable(&clocksource_tsc);
1111 else {
1112 clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE;
1113 clocksource_tsc.rating = 0;
1114 }
1115 }
1116}
1117
1118EXPORT_SYMBOL_GPL(mark_tsc_unstable);
1119
1120static void __init check_system_tsc_reliable(void)
1121{
1122#if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC)
1123 if (is_geode_lx()) {
1124
1125#define RTSC_SUSP 0x100
1126 unsigned long res_low, res_high;
1127
1128 rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
1129
1130 if (res_low & RTSC_SUSP)
1131 tsc_clocksource_reliable = 1;
1132 }
1133#endif
1134 if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
1135 tsc_clocksource_reliable = 1;
1136}
1137
1138
1139
1140
1141
1142int unsynchronized_tsc(void)
1143{
1144 if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_unstable)
1145 return 1;
1146
1147#ifdef CONFIG_SMP
1148 if (apic_is_clustered_box())
1149 return 1;
1150#endif
1151
1152 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
1153 return 0;
1154
1155 if (tsc_clocksource_reliable)
1156 return 0;
1157
1158
1159
1160
1161 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
1162
1163 if (num_possible_cpus() > 1)
1164 return 1;
1165 }
1166
1167 return 0;
1168}
1169
1170
1171
1172
1173struct system_counterval_t convert_art_to_tsc(cycle_t art)
1174{
1175 u64 tmp, res, rem;
1176
1177 rem = do_div(art, art_to_tsc_denominator);
1178
1179 res = art * art_to_tsc_numerator;
1180 tmp = rem * art_to_tsc_numerator;
1181
1182 do_div(tmp, art_to_tsc_denominator);
1183 res += tmp + art_to_tsc_offset;
1184
1185 return (struct system_counterval_t) {.cs = art_related_clocksource,
1186 .cycles = res};
1187}
1188EXPORT_SYMBOL(convert_art_to_tsc);
1189
1190static void tsc_refine_calibration_work(struct work_struct *work);
1191static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work);
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206static void tsc_refine_calibration_work(struct work_struct *work)
1207{
1208 static u64 tsc_start = -1, ref_start;
1209 static int hpet;
1210 u64 tsc_stop, ref_stop, delta;
1211 unsigned long freq;
1212
1213
1214 if (check_tsc_unstable())
1215 goto out;
1216
1217
1218
1219
1220
1221
1222 if (tsc_start == -1) {
1223
1224
1225
1226
1227 hpet = is_hpet_enabled();
1228 schedule_delayed_work(&tsc_irqwork, HZ);
1229 tsc_start = tsc_read_refs(&ref_start, hpet);
1230 return;
1231 }
1232
1233 tsc_stop = tsc_read_refs(&ref_stop, hpet);
1234
1235
1236 if (ref_start == ref_stop)
1237 goto out;
1238
1239
1240 if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX)
1241 goto out;
1242
1243 delta = tsc_stop - tsc_start;
1244 delta *= 1000000LL;
1245 if (hpet)
1246 freq = calc_hpet_ref(delta, ref_start, ref_stop);
1247 else
1248 freq = calc_pmtimer_ref(delta, ref_start, ref_stop);
1249
1250
1251 if (abs(tsc_khz - freq) > tsc_khz/100)
1252 goto out;
1253
1254 tsc_khz = freq;
1255 pr_info("Refined TSC clocksource calibration: %lu.%03lu MHz\n",
1256 (unsigned long)tsc_khz / 1000,
1257 (unsigned long)tsc_khz % 1000);
1258
1259
1260 lapic_update_tsc_freq();
1261
1262out:
1263 if (boot_cpu_has(X86_FEATURE_ART))
1264 art_related_clocksource = &clocksource_tsc;
1265 clocksource_register_khz(&clocksource_tsc, tsc_khz);
1266}
1267
1268
1269static int __init init_tsc_clocksource(void)
1270{
1271 if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_disabled > 0 || !tsc_khz)
1272 return 0;
1273
1274 if (tsc_clocksource_reliable)
1275 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
1276
1277 if (check_tsc_unstable()) {
1278 clocksource_tsc.rating = 0;
1279 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
1280 }
1281
1282 if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3))
1283 clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;
1284
1285
1286
1287
1288
1289 if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
1290 clocksource_register_khz(&clocksource_tsc, tsc_khz);
1291 return 0;
1292 }
1293
1294 schedule_delayed_work(&tsc_irqwork, 0);
1295 return 0;
1296}
1297
1298
1299
1300
1301device_initcall(init_tsc_clocksource);
1302
1303void __init tsc_init(void)
1304{
1305 u64 lpj;
1306 int cpu;
1307
1308 if (!boot_cpu_has(X86_FEATURE_TSC)) {
1309 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
1310 return;
1311 }
1312
1313 cpu_khz = x86_platform.calibrate_cpu();
1314 tsc_khz = x86_platform.calibrate_tsc();
1315
1316
1317
1318
1319
1320
1321 if (tsc_khz == 0)
1322 tsc_khz = cpu_khz;
1323 else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz)
1324 cpu_khz = tsc_khz;
1325
1326 if (!tsc_khz) {
1327 mark_tsc_unstable("could not calculate TSC khz");
1328 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
1329 return;
1330 }
1331
1332 pr_info("Detected %lu.%03lu MHz processor\n",
1333 (unsigned long)cpu_khz / 1000,
1334 (unsigned long)cpu_khz % 1000);
1335
1336
1337
1338
1339
1340
1341
1342 for_each_possible_cpu(cpu) {
1343 cyc2ns_init(cpu);
1344 set_cyc2ns_scale(tsc_khz, cpu);
1345 }
1346
1347 if (tsc_disabled > 0)
1348 return;
1349
1350
1351
1352 tsc_disabled = 0;
1353 static_branch_enable(&__use_tsc);
1354
1355 if (!no_sched_irq_time)
1356 enable_sched_clock_irqtime();
1357
1358 lpj = ((u64)tsc_khz * 1000);
1359 do_div(lpj, HZ);
1360 lpj_fine = lpj;
1361
1362 use_tsc_delay();
1363
1364 if (unsynchronized_tsc())
1365 mark_tsc_unstable("TSCs unsynchronized");
1366
1367 check_system_tsc_reliable();
1368
1369 detect_art();
1370}
1371
1372#ifdef CONFIG_SMP
1373
1374
1375
1376
1377
1378
1379unsigned long calibrate_delay_is_known(void)
1380{
1381 int sibling, cpu = smp_processor_id();
1382 struct cpumask *mask = topology_core_cpumask(cpu);
1383
1384 if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC))
1385 return 0;
1386
1387 if (!mask)
1388 return 0;
1389
1390 sibling = cpumask_any_but(mask, cpu);
1391 if (sibling < nr_cpu_ids)
1392 return cpu_data(sibling).loops_per_jiffy;
1393 return 0;
1394}
1395#endif
1396