1#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
2
3#include <linux/kernel.h>
4#include <linux/sched.h>
5#include <linux/init.h>
6#include <linux/module.h>
7#include <linux/timer.h>
8#include <linux/acpi_pmtmr.h>
9#include <linux/cpufreq.h>
10#include <linux/delay.h>
11#include <linux/clocksource.h>
12#include <linux/percpu.h>
13#include <linux/timex.h>
14
15#include <asm/hpet.h>
16#include <asm/timer.h>
17#include <asm/vgtod.h>
18#include <asm/time.h>
19#include <asm/delay.h>
20#include <asm/hypervisor.h>
21#include <asm/nmi.h>
22#include <asm/x86_init.h>
23
24unsigned int __read_mostly cpu_khz;
25EXPORT_SYMBOL(cpu_khz);
26
27unsigned int __read_mostly tsc_khz;
28EXPORT_SYMBOL(tsc_khz);
29
30
31
32
33static int __read_mostly tsc_unstable;
34
35
36
37
38static int __read_mostly tsc_disabled = -1;
39
40int tsc_clocksource_reliable;
41
42
43
44u64 native_sched_clock(void)
45{
46 u64 this_offset;
47
48
49
50
51
52
53
54
55
56 if (unlikely(tsc_disabled)) {
57
58 return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
59 }
60
61
62 rdtscll(this_offset);
63
64
65 return __cycles_2_ns(this_offset);
66}
67
68
69
70#ifdef CONFIG_PARAVIRT
71unsigned long long sched_clock(void)
72{
73 return paravirt_sched_clock();
74}
75#else
76unsigned long long
77sched_clock(void) __attribute__((alias("native_sched_clock")));
78#endif
79
80unsigned long long native_read_tsc(void)
81{
82 return __native_read_tsc();
83}
84EXPORT_SYMBOL(native_read_tsc);
85
86int check_tsc_unstable(void)
87{
88 return tsc_unstable;
89}
90EXPORT_SYMBOL_GPL(check_tsc_unstable);
91
92#ifdef CONFIG_X86_TSC
93int __init notsc_setup(char *str)
94{
95 pr_warn("Kernel compiled with CONFIG_X86_TSC, cannot disable TSC completely\n");
96 tsc_disabled = 1;
97 return 1;
98}
99#else
100
101
102
103
104int __init notsc_setup(char *str)
105{
106 setup_clear_cpu_cap(X86_FEATURE_TSC);
107 return 1;
108}
109#endif
110
111__setup("notsc", notsc_setup);
112
113static int no_sched_irq_time;
114
115static int __init tsc_setup(char *str)
116{
117 if (!strcmp(str, "reliable"))
118 tsc_clocksource_reliable = 1;
119 if (!strncmp(str, "noirqtime", 9))
120 no_sched_irq_time = 1;
121 return 1;
122}
123
124__setup("tsc=", tsc_setup);
125
126#define MAX_RETRIES 5
127#define SMI_TRESHOLD 50000
128
129
130
131
132static u64 tsc_read_refs(u64 *p, int hpet)
133{
134 u64 t1, t2;
135 int i;
136
137 for (i = 0; i < MAX_RETRIES; i++) {
138 t1 = get_cycles();
139 if (hpet)
140 *p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
141 else
142 *p = acpi_pm_read_early();
143 t2 = get_cycles();
144 if ((t2 - t1) < SMI_TRESHOLD)
145 return t2;
146 }
147 return ULLONG_MAX;
148}
149
150
151
152
153static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2)
154{
155 u64 tmp;
156
157 if (hpet2 < hpet1)
158 hpet2 += 0x100000000ULL;
159 hpet2 -= hpet1;
160 tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
161 do_div(tmp, 1000000);
162 do_div(deltatsc, tmp);
163
164 return (unsigned long) deltatsc;
165}
166
167
168
169
170static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2)
171{
172 u64 tmp;
173
174 if (!pm1 && !pm2)
175 return ULONG_MAX;
176
177 if (pm2 < pm1)
178 pm2 += (u64)ACPI_PM_OVRRUN;
179 pm2 -= pm1;
180 tmp = pm2 * 1000000000LL;
181 do_div(tmp, PMTMR_TICKS_PER_SEC);
182 do_div(deltatsc, tmp);
183
184 return (unsigned long) deltatsc;
185}
186
187#define CAL_MS 10
188#define CAL_LATCH (PIT_TICK_RATE / (1000 / CAL_MS))
189#define CAL_PIT_LOOPS 1000
190
191#define CAL2_MS 50
192#define CAL2_LATCH (PIT_TICK_RATE / (1000 / CAL2_MS))
193#define CAL2_PIT_LOOPS 5000
194
195
196
197
198
199
200
201
202
203static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
204{
205 u64 tsc, t1, t2, delta;
206 unsigned long tscmin, tscmax;
207 int pitcnt;
208
209
210 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
211
212
213
214
215
216
217 outb(0xb0, 0x43);
218 outb(latch & 0xff, 0x42);
219 outb(latch >> 8, 0x42);
220
221 tsc = t1 = t2 = get_cycles();
222
223 pitcnt = 0;
224 tscmax = 0;
225 tscmin = ULONG_MAX;
226 while ((inb(0x61) & 0x20) == 0) {
227 t2 = get_cycles();
228 delta = t2 - tsc;
229 tsc = t2;
230 if ((unsigned long) delta < tscmin)
231 tscmin = (unsigned int) delta;
232 if ((unsigned long) delta > tscmax)
233 tscmax = (unsigned int) delta;
234 pitcnt++;
235 }
236
237
238
239
240
241
242
243
244
245
246 if (pitcnt < loopmin || tscmax > 10 * tscmin)
247 return ULONG_MAX;
248
249
250 delta = t2 - t1;
251 do_div(delta, ms);
252 return delta;
253}
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290static inline int pit_verify_msb(unsigned char val)
291{
292
293 inb(0x42);
294 return inb(0x42) == val;
295}
296
297static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap)
298{
299 int count;
300 u64 tsc = 0, prev_tsc = 0;
301
302 for (count = 0; count < 50000; count++) {
303 if (!pit_verify_msb(val))
304 break;
305 prev_tsc = tsc;
306 tsc = get_cycles();
307 }
308 *deltap = get_cycles() - prev_tsc;
309 *tscp = tsc;
310
311
312
313
314
315 return count > 5;
316}
317
318
319
320
321
322
323
324#define MAX_QUICK_PIT_MS 50
325#define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256)
326
327static unsigned long quick_pit_calibrate(void)
328{
329 int i;
330 u64 tsc, delta;
331 unsigned long d1, d2;
332
333
334 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
335
336
337
338
339
340
341
342
343
344
345 outb(0xb0, 0x43);
346
347
348 outb(0xff, 0x42);
349 outb(0xff, 0x42);
350
351
352
353
354
355
356
357 pit_verify_msb(0);
358
359 if (pit_expect_msb(0xff, &tsc, &d1)) {
360 for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) {
361 if (!pit_expect_msb(0xff-i, &delta, &d2))
362 break;
363
364
365
366
367 delta -= tsc;
368 if (d1+d2 >= delta >> 11)
369 continue;
370
371
372
373
374
375
376
377
378 if (!pit_verify_msb(0xfe - i))
379 break;
380 goto success;
381 }
382 }
383 pr_err("Fast TSC calibration failed\n");
384 return 0;
385
386success:
387
388
389
390
391
392
393
394
395
396
397
398
399
400 delta *= PIT_TICK_RATE;
401 do_div(delta, i*256*1000);
402 pr_info("Fast TSC calibration using PIT\n");
403 return delta;
404}
405
406
407
408
409unsigned long native_calibrate_tsc(void)
410{
411 u64 tsc1, tsc2, delta, ref1, ref2;
412 unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
413 unsigned long flags, latch, ms, fast_calibrate;
414 int hpet = is_hpet_enabled(), i, loopmin;
415
416 local_irq_save(flags);
417 fast_calibrate = quick_pit_calibrate();
418 local_irq_restore(flags);
419 if (fast_calibrate)
420 return fast_calibrate;
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448 latch = CAL_LATCH;
449 ms = CAL_MS;
450 loopmin = CAL_PIT_LOOPS;
451
452 for (i = 0; i < 3; i++) {
453 unsigned long tsc_pit_khz;
454
455
456
457
458
459
460
461 local_irq_save(flags);
462 tsc1 = tsc_read_refs(&ref1, hpet);
463 tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin);
464 tsc2 = tsc_read_refs(&ref2, hpet);
465 local_irq_restore(flags);
466
467
468 tsc_pit_min = min(tsc_pit_min, tsc_pit_khz);
469
470
471 if (ref1 == ref2)
472 continue;
473
474
475 if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX)
476 continue;
477
478 tsc2 = (tsc2 - tsc1) * 1000000LL;
479 if (hpet)
480 tsc2 = calc_hpet_ref(tsc2, ref1, ref2);
481 else
482 tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2);
483
484 tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2);
485
486
487 delta = ((u64) tsc_pit_min) * 100;
488 do_div(delta, tsc_ref_min);
489
490
491
492
493
494
495
496 if (delta >= 90 && delta <= 110) {
497 pr_info("PIT calibration matches %s. %d loops\n",
498 hpet ? "HPET" : "PMTIMER", i + 1);
499 return tsc_ref_min;
500 }
501
502
503
504
505
506
507
508 if (i == 1 && tsc_pit_min == ULONG_MAX) {
509 latch = CAL2_LATCH;
510 ms = CAL2_MS;
511 loopmin = CAL2_PIT_LOOPS;
512 }
513 }
514
515
516
517
518 if (tsc_pit_min == ULONG_MAX) {
519
520 pr_warn("Unable to calibrate against PIT\n");
521
522
523 if (!hpet && !ref1 && !ref2) {
524 pr_notice("No reference (HPET/PMTIMER) available\n");
525 return 0;
526 }
527
528
529 if (tsc_ref_min == ULONG_MAX) {
530 pr_warn("HPET/PMTIMER calibration failed\n");
531 return 0;
532 }
533
534
535 pr_info("using %s reference calibration\n",
536 hpet ? "HPET" : "PMTIMER");
537
538 return tsc_ref_min;
539 }
540
541
542 if (!hpet && !ref1 && !ref2) {
543 pr_info("Using PIT calibration value\n");
544 return tsc_pit_min;
545 }
546
547
548 if (tsc_ref_min == ULONG_MAX) {
549 pr_warn("HPET/PMTIMER calibration failed. Using PIT calibration.\n");
550 return tsc_pit_min;
551 }
552
553
554
555
556
557
558 pr_warn("PIT calibration deviates from %s: %lu %lu\n",
559 hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min);
560 pr_info("Using PIT calibration value\n");
561 return tsc_pit_min;
562}
563
564int recalibrate_cpu_khz(void)
565{
566#ifndef CONFIG_SMP
567 unsigned long cpu_khz_old = cpu_khz;
568
569 if (cpu_has_tsc) {
570 tsc_khz = x86_platform.calibrate_tsc();
571 cpu_khz = tsc_khz;
572 cpu_data(0).loops_per_jiffy =
573 cpufreq_scale(cpu_data(0).loops_per_jiffy,
574 cpu_khz_old, cpu_khz);
575 return 0;
576 } else
577 return -ENODEV;
578#else
579 return -ENODEV;
580#endif
581}
582
583EXPORT_SYMBOL(recalibrate_cpu_khz);
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608DEFINE_PER_CPU(unsigned long, cyc2ns);
609DEFINE_PER_CPU(unsigned long long, cyc2ns_offset);
610
611static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
612{
613 unsigned long long tsc_now, ns_now, *offset;
614 unsigned long flags, *scale;
615
616 local_irq_save(flags);
617 sched_clock_idle_sleep_event();
618
619 scale = &per_cpu(cyc2ns, cpu);
620 offset = &per_cpu(cyc2ns_offset, cpu);
621
622 rdtscll(tsc_now);
623 ns_now = __cycles_2_ns(tsc_now);
624
625 if (cpu_khz) {
626 *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
627 *offset = ns_now - mult_frac(tsc_now, *scale,
628 (1UL << CYC2NS_SCALE_FACTOR));
629 }
630
631 sched_clock_idle_wakeup_event(0);
632 local_irq_restore(flags);
633}
634
635static unsigned long long cyc2ns_suspend;
636
637void tsc_save_sched_clock_state(void)
638{
639 if (!sched_clock_stable)
640 return;
641
642 cyc2ns_suspend = sched_clock();
643}
644
645
646
647
648
649
650
651
652
653void tsc_restore_sched_clock_state(void)
654{
655 unsigned long long offset;
656 unsigned long flags;
657 int cpu;
658
659 if (!sched_clock_stable)
660 return;
661
662 local_irq_save(flags);
663
664 __this_cpu_write(cyc2ns_offset, 0);
665 offset = cyc2ns_suspend - sched_clock();
666
667 for_each_possible_cpu(cpu)
668 per_cpu(cyc2ns_offset, cpu) = offset;
669
670 local_irq_restore(flags);
671}
672
673#ifdef CONFIG_CPU_FREQ
674
675
676
677
678
679
680
681
682
683
684
685
686static unsigned int ref_freq;
687static unsigned long loops_per_jiffy_ref;
688static unsigned long tsc_khz_ref;
689
690static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
691 void *data)
692{
693 struct cpufreq_freqs *freq = data;
694 unsigned long *lpj;
695
696 if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC))
697 return 0;
698
699 lpj = &boot_cpu_data.loops_per_jiffy;
700#ifdef CONFIG_SMP
701 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
702 lpj = &cpu_data(freq->cpu).loops_per_jiffy;
703#endif
704
705 if (!ref_freq) {
706 ref_freq = freq->old;
707 loops_per_jiffy_ref = *lpj;
708 tsc_khz_ref = tsc_khz;
709 }
710 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
711 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
712 (val == CPUFREQ_RESUMECHANGE)) {
713 *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
714
715 tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
716 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
717 mark_tsc_unstable("cpufreq changes");
718 }
719
720 set_cyc2ns_scale(tsc_khz, freq->cpu);
721
722 return 0;
723}
724
725static struct notifier_block time_cpufreq_notifier_block = {
726 .notifier_call = time_cpufreq_notifier
727};
728
729static int __init cpufreq_tsc(void)
730{
731 if (!cpu_has_tsc)
732 return 0;
733 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
734 return 0;
735 cpufreq_register_notifier(&time_cpufreq_notifier_block,
736 CPUFREQ_TRANSITION_NOTIFIER);
737 return 0;
738}
739
740core_initcall(cpufreq_tsc);
741
742#endif
743
744
745
746static struct clocksource clocksource_tsc;
747
748
749
750
751
752
753
754
755
756
757
758
759
760static cycle_t read_tsc(struct clocksource *cs)
761{
762 cycle_t ret = (cycle_t)get_cycles();
763
764 return ret >= clocksource_tsc.cycle_last ?
765 ret : clocksource_tsc.cycle_last;
766}
767
768static void resume_tsc(struct clocksource *cs)
769{
770 clocksource_tsc.cycle_last = 0;
771}
772
773static struct clocksource clocksource_tsc = {
774 .name = "tsc",
775 .rating = 300,
776 .read = read_tsc,
777 .resume = resume_tsc,
778 .mask = CLOCKSOURCE_MASK(64),
779 .flags = CLOCK_SOURCE_IS_CONTINUOUS |
780 CLOCK_SOURCE_MUST_VERIFY,
781#ifdef CONFIG_X86_64
782 .archdata = { .vclock_mode = VCLOCK_TSC },
783#endif
784};
785
786void mark_tsc_unstable(char *reason)
787{
788 if (!tsc_unstable) {
789 tsc_unstable = 1;
790 sched_clock_stable = 0;
791 disable_sched_clock_irqtime();
792 pr_info("Marking TSC unstable due to %s\n", reason);
793
794 if (clocksource_tsc.mult)
795 clocksource_mark_unstable(&clocksource_tsc);
796 else {
797 clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE;
798 clocksource_tsc.rating = 0;
799 }
800 }
801}
802
803EXPORT_SYMBOL_GPL(mark_tsc_unstable);
804
805static void __init check_system_tsc_reliable(void)
806{
807#ifdef CONFIG_MGEODE_LX
808
809#define RTSC_SUSP 0x100
810 unsigned long res_low, res_high;
811
812 rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
813
814 if (res_low & RTSC_SUSP)
815 tsc_clocksource_reliable = 1;
816#endif
817 if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
818 tsc_clocksource_reliable = 1;
819}
820
821
822
823
824
825__cpuinit int unsynchronized_tsc(void)
826{
827 if (!cpu_has_tsc || tsc_unstable)
828 return 1;
829
830#ifdef CONFIG_SMP
831 if (apic_is_clustered_box())
832 return 1;
833#endif
834
835 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
836 return 0;
837
838 if (tsc_clocksource_reliable)
839 return 0;
840
841
842
843
844 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
845
846 if (num_possible_cpus() > 1)
847 return 1;
848 }
849
850 return 0;
851}
852
853
854static void tsc_refine_calibration_work(struct work_struct *work);
855static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work);
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870static void tsc_refine_calibration_work(struct work_struct *work)
871{
872 static u64 tsc_start = -1, ref_start;
873 static int hpet;
874 u64 tsc_stop, ref_stop, delta;
875 unsigned long freq;
876
877
878 if (check_tsc_unstable())
879 goto out;
880
881
882
883
884
885
886 if (tsc_start == -1) {
887
888
889
890
891 hpet = is_hpet_enabled();
892 schedule_delayed_work(&tsc_irqwork, HZ);
893 tsc_start = tsc_read_refs(&ref_start, hpet);
894 return;
895 }
896
897 tsc_stop = tsc_read_refs(&ref_stop, hpet);
898
899
900 if (ref_start == ref_stop)
901 goto out;
902
903
904 if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX)
905 goto out;
906
907 delta = tsc_stop - tsc_start;
908 delta *= 1000000LL;
909 if (hpet)
910 freq = calc_hpet_ref(delta, ref_start, ref_stop);
911 else
912 freq = calc_pmtimer_ref(delta, ref_start, ref_stop);
913
914
915 if (abs(tsc_khz - freq) > tsc_khz/100)
916 goto out;
917
918 tsc_khz = freq;
919 pr_info("Refined TSC clocksource calibration: %lu.%03lu MHz\n",
920 (unsigned long)tsc_khz / 1000,
921 (unsigned long)tsc_khz % 1000);
922
923out:
924 clocksource_register_khz(&clocksource_tsc, tsc_khz);
925}
926
927
928static int __init init_tsc_clocksource(void)
929{
930 if (!cpu_has_tsc || tsc_disabled > 0 || !tsc_khz)
931 return 0;
932
933 if (tsc_clocksource_reliable)
934 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
935
936 if (check_tsc_unstable()) {
937 clocksource_tsc.rating = 0;
938 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
939 }
940
941
942
943
944
945 if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
946 clocksource_register_khz(&clocksource_tsc, tsc_khz);
947 return 0;
948 }
949
950 schedule_delayed_work(&tsc_irqwork, 0);
951 return 0;
952}
953
954
955
956
957device_initcall(init_tsc_clocksource);
958
959void __init tsc_init(void)
960{
961 u64 lpj;
962 int cpu;
963
964 x86_init.timers.tsc_pre_init();
965
966 if (!cpu_has_tsc)
967 return;
968
969 tsc_khz = x86_platform.calibrate_tsc();
970 cpu_khz = tsc_khz;
971
972 if (!tsc_khz) {
973 mark_tsc_unstable("could not calculate TSC khz");
974 return;
975 }
976
977 pr_info("Detected %lu.%03lu MHz processor\n",
978 (unsigned long)cpu_khz / 1000,
979 (unsigned long)cpu_khz % 1000);
980
981
982
983
984
985
986
987 for_each_possible_cpu(cpu)
988 set_cyc2ns_scale(cpu_khz, cpu);
989
990 if (tsc_disabled > 0)
991 return;
992
993
994 tsc_disabled = 0;
995
996 if (!no_sched_irq_time)
997 enable_sched_clock_irqtime();
998
999 lpj = ((u64)tsc_khz * 1000);
1000 do_div(lpj, HZ);
1001 lpj_fine = lpj;
1002
1003 use_tsc_delay();
1004
1005 if (unsynchronized_tsc())
1006 mark_tsc_unstable("TSCs unsynchronized");
1007
1008 check_system_tsc_reliable();
1009}
1010
1011#ifdef CONFIG_SMP
1012
1013
1014
1015
1016
1017
1018unsigned long __cpuinit calibrate_delay_is_known(void)
1019{
1020 int i, cpu = smp_processor_id();
1021
1022 if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC))
1023 return 0;
1024
1025 for_each_online_cpu(i)
1026 if (cpu_data(i).phys_proc_id == cpu_data(cpu).phys_proc_id)
1027 return cpu_data(i).loops_per_jiffy;
1028 return 0;
1029}
1030#endif
1031