1#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
2
3#include <linux/kernel.h>
4#include <linux/sched.h>
5#include <linux/init.h>
6#include <linux/module.h>
7#include <linux/timer.h>
8#include <linux/acpi_pmtmr.h>
9#include <linux/cpufreq.h>
10#include <linux/delay.h>
11#include <linux/clocksource.h>
12#include <linux/percpu.h>
13#include <linux/timex.h>
14
15#include <asm/hpet.h>
16#include <asm/timer.h>
17#include <asm/vgtod.h>
18#include <asm/time.h>
19#include <asm/delay.h>
20#include <asm/hypervisor.h>
21#include <asm/nmi.h>
22#include <asm/x86_init.h>
23
24unsigned int __read_mostly cpu_khz;
25EXPORT_SYMBOL(cpu_khz);
26
27unsigned int __read_mostly tsc_khz;
28EXPORT_SYMBOL(tsc_khz);
29
30
31
32
33static int __read_mostly tsc_unstable;
34
35
36
37
38static int __read_mostly tsc_disabled = -1;
39
40int tsc_clocksource_reliable;
41
42
43
44u64 native_sched_clock(void)
45{
46 u64 this_offset;
47
48
49
50
51
52
53
54
55
56 if (unlikely(tsc_disabled)) {
57
58 return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
59 }
60
61
62 rdtscll(this_offset);
63
64
65 return __cycles_2_ns(this_offset);
66}
67
68
69
70#ifdef CONFIG_PARAVIRT
71unsigned long long sched_clock(void)
72{
73 return paravirt_sched_clock();
74}
75#else
76unsigned long long
77sched_clock(void) __attribute__((alias("native_sched_clock")));
78#endif
79
80unsigned long long native_read_tsc(void)
81{
82 return __native_read_tsc();
83}
84EXPORT_SYMBOL(native_read_tsc);
85
86int check_tsc_unstable(void)
87{
88 return tsc_unstable;
89}
90EXPORT_SYMBOL_GPL(check_tsc_unstable);
91
92#ifdef CONFIG_X86_TSC
93int __init notsc_setup(char *str)
94{
95 pr_warn("Kernel compiled with CONFIG_X86_TSC, cannot disable TSC completely\n");
96 tsc_disabled = 1;
97 return 1;
98}
99#else
100
101
102
103
104int __init notsc_setup(char *str)
105{
106 setup_clear_cpu_cap(X86_FEATURE_TSC);
107 return 1;
108}
109#endif
110
111__setup("notsc", notsc_setup);
112
113static int no_sched_irq_time;
114
115static int __init tsc_setup(char *str)
116{
117 if (!strcmp(str, "reliable"))
118 tsc_clocksource_reliable = 1;
119 if (!strncmp(str, "noirqtime", 9))
120 no_sched_irq_time = 1;
121 return 1;
122}
123
124__setup("tsc=", tsc_setup);
125
126#define MAX_RETRIES 5
127#define SMI_TRESHOLD 50000
128
129
130
131
132static u64 tsc_read_refs(u64 *p, int hpet)
133{
134 u64 t1, t2;
135 int i;
136
137 for (i = 0; i < MAX_RETRIES; i++) {
138 t1 = get_cycles();
139 if (hpet)
140 *p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
141 else
142 *p = acpi_pm_read_early();
143 t2 = get_cycles();
144 if ((t2 - t1) < SMI_TRESHOLD)
145 return t2;
146 }
147 return ULLONG_MAX;
148}
149
150
151
152
153static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2)
154{
155 u64 tmp;
156
157 if (hpet2 < hpet1)
158 hpet2 += 0x100000000ULL;
159 hpet2 -= hpet1;
160 tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
161 do_div(tmp, 1000000);
162 do_div(deltatsc, tmp);
163
164 return (unsigned long) deltatsc;
165}
166
167
168
169
170static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2)
171{
172 u64 tmp;
173
174 if (!pm1 && !pm2)
175 return ULONG_MAX;
176
177 if (pm2 < pm1)
178 pm2 += (u64)ACPI_PM_OVRRUN;
179 pm2 -= pm1;
180 tmp = pm2 * 1000000000LL;
181 do_div(tmp, PMTMR_TICKS_PER_SEC);
182 do_div(deltatsc, tmp);
183
184 return (unsigned long) deltatsc;
185}
186
187#define CAL_MS 10
188#define CAL_LATCH (PIT_TICK_RATE / (1000 / CAL_MS))
189#define CAL_PIT_LOOPS 1000
190
191#define CAL2_MS 50
192#define CAL2_LATCH (PIT_TICK_RATE / (1000 / CAL2_MS))
193#define CAL2_PIT_LOOPS 5000
194
195
196
197
198
199
200
201
202
203static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
204{
205 u64 tsc, t1, t2, delta;
206 unsigned long tscmin, tscmax;
207 int pitcnt;
208
209
210 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
211
212
213
214
215
216
217 outb(0xb0, 0x43);
218 outb(latch & 0xff, 0x42);
219 outb(latch >> 8, 0x42);
220
221 tsc = t1 = t2 = get_cycles();
222
223 pitcnt = 0;
224 tscmax = 0;
225 tscmin = ULONG_MAX;
226 while ((inb(0x61) & 0x20) == 0) {
227 t2 = get_cycles();
228 delta = t2 - tsc;
229 tsc = t2;
230 if ((unsigned long) delta < tscmin)
231 tscmin = (unsigned int) delta;
232 if ((unsigned long) delta > tscmax)
233 tscmax = (unsigned int) delta;
234 pitcnt++;
235 }
236
237
238
239
240
241
242
243
244
245
246 if (pitcnt < loopmin || tscmax > 10 * tscmin)
247 return ULONG_MAX;
248
249
250 delta = t2 - t1;
251 do_div(delta, ms);
252 return delta;
253}
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290static inline int pit_verify_msb(unsigned char val)
291{
292
293 inb(0x42);
294 return inb(0x42) == val;
295}
296
297static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap)
298{
299 int count;
300 u64 tsc = 0, prev_tsc = 0;
301
302 for (count = 0; count < 50000; count++) {
303 if (!pit_verify_msb(val))
304 break;
305 prev_tsc = tsc;
306 tsc = get_cycles();
307 }
308 *deltap = get_cycles() - prev_tsc;
309 *tscp = tsc;
310
311
312
313
314
315 return count > 5;
316}
317
318
319
320
321
322
323
324#define MAX_QUICK_PIT_MS 50
325#define MAX_QUICK_PIT_ITERATIONS (MAX_QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256)
326
327static unsigned long quick_pit_calibrate(void)
328{
329 int i;
330 u64 tsc, delta;
331 unsigned long d1, d2;
332
333
334 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
335
336
337
338
339
340
341
342
343
344
345 outb(0xb0, 0x43);
346
347
348 outb(0xff, 0x42);
349 outb(0xff, 0x42);
350
351
352
353
354
355
356
357 pit_verify_msb(0);
358
359 if (pit_expect_msb(0xff, &tsc, &d1)) {
360 for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) {
361 if (!pit_expect_msb(0xff-i, &delta, &d2))
362 break;
363
364
365
366
367 delta -= tsc;
368 if (d1+d2 >= delta >> 11)
369 continue;
370
371
372
373
374
375
376
377
378 if (!pit_verify_msb(0xfe - i))
379 break;
380 goto success;
381 }
382 }
383 pr_err("Fast TSC calibration failed\n");
384 return 0;
385
386success:
387
388
389
390
391
392
393
394
395
396
397
398
399
400 delta *= PIT_TICK_RATE;
401 do_div(delta, i*256*1000);
402 pr_info("Fast TSC calibration using PIT\n");
403 return delta;
404}
405
406
407
408
409unsigned long native_calibrate_tsc(void)
410{
411 u64 tsc1, tsc2, delta, ref1, ref2;
412 unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
413 unsigned long flags, latch, ms, fast_calibrate;
414 int hpet = is_hpet_enabled(), i, loopmin;
415
416 local_irq_save(flags);
417 fast_calibrate = quick_pit_calibrate();
418 local_irq_restore(flags);
419 if (fast_calibrate)
420 return fast_calibrate;
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448 latch = CAL_LATCH;
449 ms = CAL_MS;
450 loopmin = CAL_PIT_LOOPS;
451
452 for (i = 0; i < 3; i++) {
453 unsigned long tsc_pit_khz;
454
455
456
457
458
459
460
461 local_irq_save(flags);
462 tsc1 = tsc_read_refs(&ref1, hpet);
463 tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin);
464 tsc2 = tsc_read_refs(&ref2, hpet);
465 local_irq_restore(flags);
466
467
468 tsc_pit_min = min(tsc_pit_min, tsc_pit_khz);
469
470
471 if (ref1 == ref2)
472 continue;
473
474
475 if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX)
476 continue;
477
478 tsc2 = (tsc2 - tsc1) * 1000000LL;
479 if (hpet)
480 tsc2 = calc_hpet_ref(tsc2, ref1, ref2);
481 else
482 tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2);
483
484 tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2);
485
486
487 delta = ((u64) tsc_pit_min) * 100;
488 do_div(delta, tsc_ref_min);
489
490
491
492
493
494
495
496 if (delta >= 90 && delta <= 110) {
497 pr_info("PIT calibration matches %s. %d loops\n",
498 hpet ? "HPET" : "PMTIMER", i + 1);
499 return tsc_ref_min;
500 }
501
502
503
504
505
506
507
508 if (i == 1 && tsc_pit_min == ULONG_MAX) {
509 latch = CAL2_LATCH;
510 ms = CAL2_MS;
511 loopmin = CAL2_PIT_LOOPS;
512 }
513 }
514
515
516
517
518 if (tsc_pit_min == ULONG_MAX) {
519
520 pr_warn("Unable to calibrate against PIT\n");
521
522
523 if (!hpet && !ref1 && !ref2) {
524 pr_notice("No reference (HPET/PMTIMER) available\n");
525 return 0;
526 }
527
528
529 if (tsc_ref_min == ULONG_MAX) {
530 pr_warn("HPET/PMTIMER calibration failed\n");
531 return 0;
532 }
533
534
535 pr_info("using %s reference calibration\n",
536 hpet ? "HPET" : "PMTIMER");
537
538 return tsc_ref_min;
539 }
540
541
542 if (!hpet && !ref1 && !ref2) {
543 pr_info("Using PIT calibration value\n");
544 return tsc_pit_min;
545 }
546
547
548 if (tsc_ref_min == ULONG_MAX) {
549 pr_warn("HPET/PMTIMER calibration failed. Using PIT calibration.\n");
550 return tsc_pit_min;
551 }
552
553
554
555
556
557
558 pr_warn("PIT calibration deviates from %s: %lu %lu\n",
559 hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min);
560 pr_info("Using PIT calibration value\n");
561 return tsc_pit_min;
562}
563
564int recalibrate_cpu_khz(void)
565{
566#ifndef CONFIG_SMP
567 unsigned long cpu_khz_old = cpu_khz;
568
569 if (cpu_has_tsc) {
570 tsc_khz = x86_platform.calibrate_tsc();
571 cpu_khz = tsc_khz;
572 cpu_data(0).loops_per_jiffy =
573 cpufreq_scale(cpu_data(0).loops_per_jiffy,
574 cpu_khz_old, cpu_khz);
575 return 0;
576 } else
577 return -ENODEV;
578#else
579 return -ENODEV;
580#endif
581}
582
583EXPORT_SYMBOL(recalibrate_cpu_khz);
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608DEFINE_PER_CPU(unsigned long, cyc2ns);
609DEFINE_PER_CPU(unsigned long long, cyc2ns_offset);
610
611static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
612{
613 unsigned long long tsc_now, ns_now, *offset;
614 unsigned long flags, *scale;
615
616 local_irq_save(flags);
617 sched_clock_idle_sleep_event();
618
619 scale = &per_cpu(cyc2ns, cpu);
620 offset = &per_cpu(cyc2ns_offset, cpu);
621
622 rdtscll(tsc_now);
623 ns_now = __cycles_2_ns(tsc_now);
624
625 if (cpu_khz) {
626 *scale = ((NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR) +
627 cpu_khz / 2) / cpu_khz;
628 *offset = ns_now - mult_frac(tsc_now, *scale,
629 (1UL << CYC2NS_SCALE_FACTOR));
630 }
631
632 sched_clock_idle_wakeup_event(0);
633 local_irq_restore(flags);
634}
635
636static unsigned long long cyc2ns_suspend;
637
638void tsc_save_sched_clock_state(void)
639{
640 if (!sched_clock_stable)
641 return;
642
643 cyc2ns_suspend = sched_clock();
644}
645
646
647
648
649
650
651
652
653
654void tsc_restore_sched_clock_state(void)
655{
656 unsigned long long offset;
657 unsigned long flags;
658 int cpu;
659
660 if (!sched_clock_stable)
661 return;
662
663 local_irq_save(flags);
664
665 __this_cpu_write(cyc2ns_offset, 0);
666 offset = cyc2ns_suspend - sched_clock();
667
668 for_each_possible_cpu(cpu)
669 per_cpu(cyc2ns_offset, cpu) = offset;
670
671 local_irq_restore(flags);
672}
673
674#ifdef CONFIG_CPU_FREQ
675
676
677
678
679
680
681
682
683
684
685
686
687static unsigned int ref_freq;
688static unsigned long loops_per_jiffy_ref;
689static unsigned long tsc_khz_ref;
690
691static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
692 void *data)
693{
694 struct cpufreq_freqs *freq = data;
695 unsigned long *lpj;
696
697 if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC))
698 return 0;
699
700 lpj = &boot_cpu_data.loops_per_jiffy;
701#ifdef CONFIG_SMP
702 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
703 lpj = &cpu_data(freq->cpu).loops_per_jiffy;
704#endif
705
706 if (!ref_freq) {
707 ref_freq = freq->old;
708 loops_per_jiffy_ref = *lpj;
709 tsc_khz_ref = tsc_khz;
710 }
711 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
712 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
713 (val == CPUFREQ_RESUMECHANGE)) {
714 *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
715
716 tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
717 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
718 mark_tsc_unstable("cpufreq changes");
719 }
720
721 set_cyc2ns_scale(tsc_khz, freq->cpu);
722
723 return 0;
724}
725
726static struct notifier_block time_cpufreq_notifier_block = {
727 .notifier_call = time_cpufreq_notifier
728};
729
730static int __init cpufreq_tsc(void)
731{
732 if (!cpu_has_tsc)
733 return 0;
734 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
735 return 0;
736 cpufreq_register_notifier(&time_cpufreq_notifier_block,
737 CPUFREQ_TRANSITION_NOTIFIER);
738 return 0;
739}
740
741core_initcall(cpufreq_tsc);
742
743#endif
744
745
746
747static struct clocksource clocksource_tsc;
748
749
750
751
752
753
754
755
756
757
758
759
760
761static cycle_t read_tsc(struct clocksource *cs)
762{
763 cycle_t ret = (cycle_t)get_cycles();
764
765 return ret >= clocksource_tsc.cycle_last ?
766 ret : clocksource_tsc.cycle_last;
767}
768
769static void resume_tsc(struct clocksource *cs)
770{
771 if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3))
772 clocksource_tsc.cycle_last = 0;
773}
774
775static struct clocksource clocksource_tsc = {
776 .name = "tsc",
777 .rating = 300,
778 .read = read_tsc,
779 .resume = resume_tsc,
780 .mask = CLOCKSOURCE_MASK(64),
781 .flags = CLOCK_SOURCE_IS_CONTINUOUS |
782 CLOCK_SOURCE_MUST_VERIFY,
783#ifdef CONFIG_X86_64
784 .archdata = { .vclock_mode = VCLOCK_TSC },
785#endif
786};
787
788void mark_tsc_unstable(char *reason)
789{
790 if (!tsc_unstable) {
791 tsc_unstable = 1;
792 sched_clock_stable = 0;
793 disable_sched_clock_irqtime();
794 pr_info("Marking TSC unstable due to %s\n", reason);
795
796 if (clocksource_tsc.mult)
797 clocksource_mark_unstable(&clocksource_tsc);
798 else {
799 clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE;
800 clocksource_tsc.rating = 0;
801 }
802 }
803}
804
805EXPORT_SYMBOL_GPL(mark_tsc_unstable);
806
807static void __init check_system_tsc_reliable(void)
808{
809#ifdef CONFIG_MGEODE_LX
810
811#define RTSC_SUSP 0x100
812 unsigned long res_low, res_high;
813
814 rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
815
816 if (res_low & RTSC_SUSP)
817 tsc_clocksource_reliable = 1;
818#endif
819 if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
820 tsc_clocksource_reliable = 1;
821}
822
823
824
825
826
827int unsynchronized_tsc(void)
828{
829 if (!cpu_has_tsc || tsc_unstable)
830 return 1;
831
832#ifdef CONFIG_SMP
833 if (apic_is_clustered_box())
834 return 1;
835#endif
836
837 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
838 return 0;
839
840 if (tsc_clocksource_reliable)
841 return 0;
842
843
844
845
846 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
847
848 if (num_possible_cpus() > 1)
849 return 1;
850 }
851
852 return 0;
853}
854
855
856static void tsc_refine_calibration_work(struct work_struct *work);
857static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work);
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872static void tsc_refine_calibration_work(struct work_struct *work)
873{
874 static u64 tsc_start = -1, ref_start;
875 static int hpet;
876 u64 tsc_stop, ref_stop, delta;
877 unsigned long freq;
878
879
880 if (check_tsc_unstable())
881 goto out;
882
883
884
885
886
887
888 if (tsc_start == -1) {
889
890
891
892
893 hpet = is_hpet_enabled();
894 schedule_delayed_work(&tsc_irqwork, HZ);
895 tsc_start = tsc_read_refs(&ref_start, hpet);
896 return;
897 }
898
899 tsc_stop = tsc_read_refs(&ref_stop, hpet);
900
901
902 if (ref_start == ref_stop)
903 goto out;
904
905
906 if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX)
907 goto out;
908
909 delta = tsc_stop - tsc_start;
910 delta *= 1000000LL;
911 if (hpet)
912 freq = calc_hpet_ref(delta, ref_start, ref_stop);
913 else
914 freq = calc_pmtimer_ref(delta, ref_start, ref_stop);
915
916
917 if (abs(tsc_khz - freq) > tsc_khz/100)
918 goto out;
919
920 tsc_khz = freq;
921 pr_info("Refined TSC clocksource calibration: %lu.%03lu MHz\n",
922 (unsigned long)tsc_khz / 1000,
923 (unsigned long)tsc_khz % 1000);
924
925out:
926 clocksource_register_khz(&clocksource_tsc, tsc_khz);
927}
928
929
930static int __init init_tsc_clocksource(void)
931{
932 if (!cpu_has_tsc || tsc_disabled > 0 || !tsc_khz)
933 return 0;
934
935 if (tsc_clocksource_reliable)
936 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
937
938 if (check_tsc_unstable()) {
939 clocksource_tsc.rating = 0;
940 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
941 }
942
943 if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3))
944 clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;
945
946
947
948
949
950 if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
951 clocksource_register_khz(&clocksource_tsc, tsc_khz);
952 return 0;
953 }
954
955 schedule_delayed_work(&tsc_irqwork, 0);
956 return 0;
957}
958
959
960
961
962device_initcall(init_tsc_clocksource);
963
964void __init tsc_init(void)
965{
966 u64 lpj;
967 int cpu;
968
969 x86_init.timers.tsc_pre_init();
970
971 if (!cpu_has_tsc)
972 return;
973
974 tsc_khz = x86_platform.calibrate_tsc();
975 cpu_khz = tsc_khz;
976
977 if (!tsc_khz) {
978 mark_tsc_unstable("could not calculate TSC khz");
979 return;
980 }
981
982 pr_info("Detected %lu.%03lu MHz processor\n",
983 (unsigned long)cpu_khz / 1000,
984 (unsigned long)cpu_khz % 1000);
985
986
987
988
989
990
991
992 for_each_possible_cpu(cpu)
993 set_cyc2ns_scale(cpu_khz, cpu);
994
995 if (tsc_disabled > 0)
996 return;
997
998
999 tsc_disabled = 0;
1000
1001 if (!no_sched_irq_time)
1002 enable_sched_clock_irqtime();
1003
1004 lpj = ((u64)tsc_khz * 1000);
1005 do_div(lpj, HZ);
1006 lpj_fine = lpj;
1007
1008 use_tsc_delay();
1009
1010 if (unsynchronized_tsc())
1011 mark_tsc_unstable("TSCs unsynchronized");
1012
1013 check_system_tsc_reliable();
1014}
1015
1016#ifdef CONFIG_SMP
1017
1018
1019
1020
1021
1022
1023unsigned long calibrate_delay_is_known(void)
1024{
1025 int i, cpu = smp_processor_id();
1026
1027 if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC))
1028 return 0;
1029
1030 for_each_online_cpu(i)
1031 if (cpu_data(i).phys_proc_id == cpu_data(cpu).phys_proc_id)
1032 return cpu_data(i).loops_per_jiffy;
1033 return 0;
1034}
1035#endif
1036