1#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
2
3#include <linux/kernel.h>
4#include <linux/sched.h>
5#include <linux/init.h>
6#include <linux/module.h>
7#include <linux/timer.h>
8#include <linux/acpi_pmtmr.h>
9#include <linux/cpufreq.h>
10#include <linux/delay.h>
11#include <linux/clocksource.h>
12#include <linux/percpu.h>
13#include <linux/timex.h>
14
15#include <asm/hpet.h>
16#include <asm/timer.h>
17#include <asm/vgtod.h>
18#include <asm/time.h>
19#include <asm/delay.h>
20#include <asm/hypervisor.h>
21#include <asm/nmi.h>
22#include <asm/x86_init.h>
23
24unsigned int __read_mostly cpu_khz;
25EXPORT_SYMBOL(cpu_khz);
26
27unsigned int __read_mostly tsc_khz;
28EXPORT_SYMBOL(tsc_khz);
29
30
31
32
33static int __read_mostly tsc_unstable;
34
35
36
37
38static int __read_mostly tsc_disabled = -1;
39
40int tsc_clocksource_reliable;
41
42
43
44u64 native_sched_clock(void)
45{
46 u64 this_offset;
47
48
49
50
51
52
53
54
55
56 if (unlikely(tsc_disabled)) {
57
58 return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
59 }
60
61
62 rdtscll(this_offset);
63
64
65 return __cycles_2_ns(this_offset);
66}
67
68
69
70#ifdef CONFIG_PARAVIRT
71unsigned long long sched_clock(void)
72{
73 return paravirt_sched_clock();
74}
75#else
76unsigned long long
77sched_clock(void) __attribute__((alias("native_sched_clock")));
78#endif
79
80int check_tsc_unstable(void)
81{
82 return tsc_unstable;
83}
84EXPORT_SYMBOL_GPL(check_tsc_unstable);
85
86#ifdef CONFIG_X86_TSC
87int __init notsc_setup(char *str)
88{
89 pr_warn("Kernel compiled with CONFIG_X86_TSC, cannot disable TSC completely\n");
90 tsc_disabled = 1;
91 return 1;
92}
93#else
94
95
96
97
98int __init notsc_setup(char *str)
99{
100 setup_clear_cpu_cap(X86_FEATURE_TSC);
101 return 1;
102}
103#endif
104
105__setup("notsc", notsc_setup);
106
107static int no_sched_irq_time;
108
109static int __init tsc_setup(char *str)
110{
111 if (!strcmp(str, "reliable"))
112 tsc_clocksource_reliable = 1;
113 if (!strncmp(str, "noirqtime", 9))
114 no_sched_irq_time = 1;
115 return 1;
116}
117
118__setup("tsc=", tsc_setup);
119
120#define MAX_RETRIES 5
121#define SMI_TRESHOLD 50000
122
123
124
125
126static u64 tsc_read_refs(u64 *p, int hpet)
127{
128 u64 t1, t2;
129 int i;
130
131 for (i = 0; i < MAX_RETRIES; i++) {
132 t1 = get_cycles();
133 if (hpet)
134 *p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
135 else
136 *p = acpi_pm_read_early();
137 t2 = get_cycles();
138 if ((t2 - t1) < SMI_TRESHOLD)
139 return t2;
140 }
141 return ULLONG_MAX;
142}
143
144
145
146
147static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2)
148{
149 u64 tmp;
150
151 if (hpet2 < hpet1)
152 hpet2 += 0x100000000ULL;
153 hpet2 -= hpet1;
154 tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
155 do_div(tmp, 1000000);
156 do_div(deltatsc, tmp);
157
158 return (unsigned long) deltatsc;
159}
160
161
162
163
164static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2)
165{
166 u64 tmp;
167
168 if (!pm1 && !pm2)
169 return ULONG_MAX;
170
171 if (pm2 < pm1)
172 pm2 += (u64)ACPI_PM_OVRRUN;
173 pm2 -= pm1;
174 tmp = pm2 * 1000000000LL;
175 do_div(tmp, PMTMR_TICKS_PER_SEC);
176 do_div(deltatsc, tmp);
177
178 return (unsigned long) deltatsc;
179}
180
181#define CAL_MS 10
182#define CAL_LATCH (PIT_TICK_RATE / (1000 / CAL_MS))
183#define CAL_PIT_LOOPS 1000
184
185#define CAL2_MS 50
186#define CAL2_LATCH (PIT_TICK_RATE / (1000 / CAL2_MS))
187#define CAL2_PIT_LOOPS 5000
188
189
190
191
192
193
194
195
196
197static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
198{
199 u64 tsc, t1, t2, delta;
200 unsigned long tscmin, tscmax;
201 int pitcnt;
202
203
204 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
205
206
207
208
209
210
211 outb(0xb0, 0x43);
212 outb(latch & 0xff, 0x42);
213 outb(latch >> 8, 0x42);
214
215 tsc = t1 = t2 = get_cycles();
216
217 pitcnt = 0;
218 tscmax = 0;
219 tscmin = ULONG_MAX;
220 while ((inb(0x61) & 0x20) == 0) {
221 t2 = get_cycles();
222 delta = t2 - tsc;
223 tsc = t2;
224 if ((unsigned long) delta < tscmin)
225 tscmin = (unsigned int) delta;
226 if ((unsigned long) delta > tscmax)
227 tscmax = (unsigned int) delta;
228 pitcnt++;
229 }
230
231
232
233
234
235
236
237
238
239
240 if (pitcnt < loopmin || tscmax > 10 * tscmin)
241 return ULONG_MAX;
242
243
244 delta = t2 - t1;
245 do_div(delta, ms);
246 return delta;
247}
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284static inline int pit_verify_msb(unsigned char val)
285{
286
287 inb(0x42);
288 return inb(0x42) == val;
289}
290
291static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap)
292{
293 int count;
294 u64 tsc = 0, prev_tsc = 0;
295
296 for (count = 0; count < 50000; count++) {
297 if (!pit_verify_msb(val))
298 break;
299 prev_tsc = tsc;
300 tsc = get_cycles();
301 }
302 *deltap = get_cycles() - prev_tsc;
303 *tscp = tsc;
304
305
306
307
308
309 return count > 5;
310}
311
312
313
314
315
316
317
318#define MAX_QUICK_PIT_MS 50
319#define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256)
320
321static unsigned long quick_pit_calibrate(void)
322{
323 int i;
324 u64 tsc, delta;
325 unsigned long d1, d2;
326
327
328 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
329
330
331
332
333
334
335
336
337
338
339 outb(0xb0, 0x43);
340
341
342 outb(0xff, 0x42);
343 outb(0xff, 0x42);
344
345
346
347
348
349
350
351 pit_verify_msb(0);
352
353 if (pit_expect_msb(0xff, &tsc, &d1)) {
354 for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) {
355 if (!pit_expect_msb(0xff-i, &delta, &d2))
356 break;
357
358
359
360
361 delta -= tsc;
362 if (d1+d2 >= delta >> 11)
363 continue;
364
365
366
367
368
369
370
371
372 if (!pit_verify_msb(0xfe - i))
373 break;
374 goto success;
375 }
376 }
377 pr_err("Fast TSC calibration failed\n");
378 return 0;
379
380success:
381
382
383
384
385
386
387
388
389
390
391
392
393
394 delta *= PIT_TICK_RATE;
395 do_div(delta, i*256*1000);
396 pr_info("Fast TSC calibration using PIT\n");
397 return delta;
398}
399
400
401
402
403unsigned long native_calibrate_tsc(void)
404{
405 u64 tsc1, tsc2, delta, ref1, ref2;
406 unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
407 unsigned long flags, latch, ms, fast_calibrate;
408 int hpet = is_hpet_enabled(), i, loopmin;
409
410 local_irq_save(flags);
411 fast_calibrate = quick_pit_calibrate();
412 local_irq_restore(flags);
413 if (fast_calibrate)
414 return fast_calibrate;
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442 latch = CAL_LATCH;
443 ms = CAL_MS;
444 loopmin = CAL_PIT_LOOPS;
445
446 for (i = 0; i < 3; i++) {
447 unsigned long tsc_pit_khz;
448
449
450
451
452
453
454
455 local_irq_save(flags);
456 tsc1 = tsc_read_refs(&ref1, hpet);
457 tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin);
458 tsc2 = tsc_read_refs(&ref2, hpet);
459 local_irq_restore(flags);
460
461
462 tsc_pit_min = min(tsc_pit_min, tsc_pit_khz);
463
464
465 if (ref1 == ref2)
466 continue;
467
468
469 if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX)
470 continue;
471
472 tsc2 = (tsc2 - tsc1) * 1000000LL;
473 if (hpet)
474 tsc2 = calc_hpet_ref(tsc2, ref1, ref2);
475 else
476 tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2);
477
478 tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2);
479
480
481 delta = ((u64) tsc_pit_min) * 100;
482 do_div(delta, tsc_ref_min);
483
484
485
486
487
488
489
490 if (delta >= 90 && delta <= 110) {
491 pr_info("PIT calibration matches %s. %d loops\n",
492 hpet ? "HPET" : "PMTIMER", i + 1);
493 return tsc_ref_min;
494 }
495
496
497
498
499
500
501
502 if (i == 1 && tsc_pit_min == ULONG_MAX) {
503 latch = CAL2_LATCH;
504 ms = CAL2_MS;
505 loopmin = CAL2_PIT_LOOPS;
506 }
507 }
508
509
510
511
512 if (tsc_pit_min == ULONG_MAX) {
513
514 pr_warn("Unable to calibrate against PIT\n");
515
516
517 if (!hpet && !ref1 && !ref2) {
518 pr_notice("No reference (HPET/PMTIMER) available\n");
519 return 0;
520 }
521
522
523 if (tsc_ref_min == ULONG_MAX) {
524 pr_warn("HPET/PMTIMER calibration failed\n");
525 return 0;
526 }
527
528
529 pr_info("using %s reference calibration\n",
530 hpet ? "HPET" : "PMTIMER");
531
532 return tsc_ref_min;
533 }
534
535
536 if (!hpet && !ref1 && !ref2) {
537 pr_info("Using PIT calibration value\n");
538 return tsc_pit_min;
539 }
540
541
542 if (tsc_ref_min == ULONG_MAX) {
543 pr_warn("HPET/PMTIMER calibration failed. Using PIT calibration.\n");
544 return tsc_pit_min;
545 }
546
547
548
549
550
551
552 pr_warn("PIT calibration deviates from %s: %lu %lu\n",
553 hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min);
554 pr_info("Using PIT calibration value\n");
555 return tsc_pit_min;
556}
557
558int recalibrate_cpu_khz(void)
559{
560#ifndef CONFIG_SMP
561 unsigned long cpu_khz_old = cpu_khz;
562
563 if (cpu_has_tsc) {
564 tsc_khz = x86_platform.calibrate_tsc();
565 cpu_khz = tsc_khz;
566 cpu_data(0).loops_per_jiffy =
567 cpufreq_scale(cpu_data(0).loops_per_jiffy,
568 cpu_khz_old, cpu_khz);
569 return 0;
570 } else
571 return -ENODEV;
572#else
573 return -ENODEV;
574#endif
575}
576
577EXPORT_SYMBOL(recalibrate_cpu_khz);
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602DEFINE_PER_CPU(unsigned long, cyc2ns);
603DEFINE_PER_CPU(unsigned long long, cyc2ns_offset);
604
605static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
606{
607 unsigned long long tsc_now, ns_now, *offset;
608 unsigned long flags, *scale;
609
610 local_irq_save(flags);
611 sched_clock_idle_sleep_event();
612
613 scale = &per_cpu(cyc2ns, cpu);
614 offset = &per_cpu(cyc2ns_offset, cpu);
615
616 rdtscll(tsc_now);
617 ns_now = __cycles_2_ns(tsc_now);
618
619 if (cpu_khz) {
620 *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
621 *offset = ns_now - mult_frac(tsc_now, *scale,
622 (1UL << CYC2NS_SCALE_FACTOR));
623 }
624
625 sched_clock_idle_wakeup_event(0);
626 local_irq_restore(flags);
627}
628
629static unsigned long long cyc2ns_suspend;
630
631void tsc_save_sched_clock_state(void)
632{
633 if (!sched_clock_stable)
634 return;
635
636 cyc2ns_suspend = sched_clock();
637}
638
639
640
641
642
643
644
645
646
647void tsc_restore_sched_clock_state(void)
648{
649 unsigned long long offset;
650 unsigned long flags;
651 int cpu;
652
653 if (!sched_clock_stable)
654 return;
655
656 local_irq_save(flags);
657
658 __this_cpu_write(cyc2ns_offset, 0);
659 offset = cyc2ns_suspend - sched_clock();
660
661 for_each_possible_cpu(cpu)
662 per_cpu(cyc2ns_offset, cpu) = offset;
663
664 local_irq_restore(flags);
665}
666
667#ifdef CONFIG_CPU_FREQ
668
669
670
671
672
673
674
675
676
677
678
679
680static unsigned int ref_freq;
681static unsigned long loops_per_jiffy_ref;
682static unsigned long tsc_khz_ref;
683
684static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
685 void *data)
686{
687 struct cpufreq_freqs *freq = data;
688 unsigned long *lpj;
689
690 if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC))
691 return 0;
692
693 lpj = &boot_cpu_data.loops_per_jiffy;
694#ifdef CONFIG_SMP
695 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
696 lpj = &cpu_data(freq->cpu).loops_per_jiffy;
697#endif
698
699 if (!ref_freq) {
700 ref_freq = freq->old;
701 loops_per_jiffy_ref = *lpj;
702 tsc_khz_ref = tsc_khz;
703 }
704 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
705 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
706 (val == CPUFREQ_RESUMECHANGE)) {
707 *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
708
709 tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
710 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
711 mark_tsc_unstable("cpufreq changes");
712 }
713
714 set_cyc2ns_scale(tsc_khz, freq->cpu);
715
716 return 0;
717}
718
719static struct notifier_block time_cpufreq_notifier_block = {
720 .notifier_call = time_cpufreq_notifier
721};
722
723static int __init cpufreq_tsc(void)
724{
725 if (!cpu_has_tsc)
726 return 0;
727 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
728 return 0;
729 cpufreq_register_notifier(&time_cpufreq_notifier_block,
730 CPUFREQ_TRANSITION_NOTIFIER);
731 return 0;
732}
733
734core_initcall(cpufreq_tsc);
735
736#endif
737
738
739
740static struct clocksource clocksource_tsc;
741
742
743
744
745
746
747
748
749
750
751
752
753
754static cycle_t read_tsc(struct clocksource *cs)
755{
756 cycle_t ret = (cycle_t)get_cycles();
757
758 return ret >= clocksource_tsc.cycle_last ?
759 ret : clocksource_tsc.cycle_last;
760}
761
762static void resume_tsc(struct clocksource *cs)
763{
764 clocksource_tsc.cycle_last = 0;
765}
766
767static struct clocksource clocksource_tsc = {
768 .name = "tsc",
769 .rating = 300,
770 .read = read_tsc,
771 .resume = resume_tsc,
772 .mask = CLOCKSOURCE_MASK(64),
773 .flags = CLOCK_SOURCE_IS_CONTINUOUS |
774 CLOCK_SOURCE_MUST_VERIFY,
775#ifdef CONFIG_X86_64
776 .archdata = { .vclock_mode = VCLOCK_TSC },
777#endif
778};
779
780void mark_tsc_unstable(char *reason)
781{
782 if (!tsc_unstable) {
783 tsc_unstable = 1;
784 sched_clock_stable = 0;
785 disable_sched_clock_irqtime();
786 pr_info("Marking TSC unstable due to %s\n", reason);
787
788 if (clocksource_tsc.mult)
789 clocksource_mark_unstable(&clocksource_tsc);
790 else {
791 clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE;
792 clocksource_tsc.rating = 0;
793 }
794 }
795}
796
797EXPORT_SYMBOL_GPL(mark_tsc_unstable);
798
799static void __init check_system_tsc_reliable(void)
800{
801#ifdef CONFIG_MGEODE_LX
802
803#define RTSC_SUSP 0x100
804 unsigned long res_low, res_high;
805
806 rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
807
808 if (res_low & RTSC_SUSP)
809 tsc_clocksource_reliable = 1;
810#endif
811 if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
812 tsc_clocksource_reliable = 1;
813}
814
815
816
817
818
819__cpuinit int unsynchronized_tsc(void)
820{
821 if (!cpu_has_tsc || tsc_unstable)
822 return 1;
823
824#ifdef CONFIG_SMP
825 if (apic_is_clustered_box())
826 return 1;
827#endif
828
829 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
830 return 0;
831
832 if (tsc_clocksource_reliable)
833 return 0;
834
835
836
837
838 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
839
840 if (num_possible_cpus() > 1)
841 return 1;
842 }
843
844 return 0;
845}
846
847
848static void tsc_refine_calibration_work(struct work_struct *work);
849static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work);
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864static void tsc_refine_calibration_work(struct work_struct *work)
865{
866 static u64 tsc_start = -1, ref_start;
867 static int hpet;
868 u64 tsc_stop, ref_stop, delta;
869 unsigned long freq;
870
871
872 if (check_tsc_unstable())
873 goto out;
874
875
876
877
878
879
880 if (tsc_start == -1) {
881
882
883
884
885 hpet = is_hpet_enabled();
886 schedule_delayed_work(&tsc_irqwork, HZ);
887 tsc_start = tsc_read_refs(&ref_start, hpet);
888 return;
889 }
890
891 tsc_stop = tsc_read_refs(&ref_stop, hpet);
892
893
894 if (ref_start == ref_stop)
895 goto out;
896
897
898 if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX)
899 goto out;
900
901 delta = tsc_stop - tsc_start;
902 delta *= 1000000LL;
903 if (hpet)
904 freq = calc_hpet_ref(delta, ref_start, ref_stop);
905 else
906 freq = calc_pmtimer_ref(delta, ref_start, ref_stop);
907
908
909 if (abs(tsc_khz - freq) > tsc_khz/100)
910 goto out;
911
912 tsc_khz = freq;
913 pr_info("Refined TSC clocksource calibration: %lu.%03lu MHz\n",
914 (unsigned long)tsc_khz / 1000,
915 (unsigned long)tsc_khz % 1000);
916
917out:
918 clocksource_register_khz(&clocksource_tsc, tsc_khz);
919}
920
921
922static int __init init_tsc_clocksource(void)
923{
924 if (!cpu_has_tsc || tsc_disabled > 0 || !tsc_khz)
925 return 0;
926
927 if (tsc_clocksource_reliable)
928 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
929
930 if (check_tsc_unstable()) {
931 clocksource_tsc.rating = 0;
932 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
933 }
934
935
936
937
938
939 if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
940 clocksource_register_khz(&clocksource_tsc, tsc_khz);
941 return 0;
942 }
943
944 schedule_delayed_work(&tsc_irqwork, 0);
945 return 0;
946}
947
948
949
950
951device_initcall(init_tsc_clocksource);
952
953void __init tsc_init(void)
954{
955 u64 lpj;
956 int cpu;
957
958 x86_init.timers.tsc_pre_init();
959
960 if (!cpu_has_tsc)
961 return;
962
963 tsc_khz = x86_platform.calibrate_tsc();
964 cpu_khz = tsc_khz;
965
966 if (!tsc_khz) {
967 mark_tsc_unstable("could not calculate TSC khz");
968 return;
969 }
970
971 pr_info("Detected %lu.%03lu MHz processor\n",
972 (unsigned long)cpu_khz / 1000,
973 (unsigned long)cpu_khz % 1000);
974
975
976
977
978
979
980
981 for_each_possible_cpu(cpu)
982 set_cyc2ns_scale(cpu_khz, cpu);
983
984 if (tsc_disabled > 0)
985 return;
986
987
988 tsc_disabled = 0;
989
990 if (!no_sched_irq_time)
991 enable_sched_clock_irqtime();
992
993 lpj = ((u64)tsc_khz * 1000);
994 do_div(lpj, HZ);
995 lpj_fine = lpj;
996
997 use_tsc_delay();
998
999 if (unsynchronized_tsc())
1000 mark_tsc_unstable("TSCs unsynchronized");
1001
1002 check_system_tsc_reliable();
1003}
1004
1005#ifdef CONFIG_SMP
1006
1007
1008
1009
1010
1011
1012unsigned long __cpuinit calibrate_delay_is_known(void)
1013{
1014 int i, cpu = smp_processor_id();
1015
1016 if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC))
1017 return 0;
1018
1019 for_each_online_cpu(i)
1020 if (cpu_data(i).phys_proc_id == cpu_data(cpu).phys_proc_id)
1021 return cpu_data(i).loops_per_jiffy;
1022 return 0;
1023}
1024#endif
1025