linux/arch/powerpc/kernel/time.c
<<
>>
Prefs
   1/*
   2 * Common time routines among all ppc machines.
   3 *
   4 * Written by Cort Dougan (cort@cs.nmt.edu) to merge
   5 * Paul Mackerras' version and mine for PReP and Pmac.
   6 * MPC8xx/MBX changes by Dan Malek (dmalek@jlc.net).
   7 * Converted for 64-bit by Mike Corrigan (mikejc@us.ibm.com)
   8 *
   9 * First round of bugfixes by Gabriel Paubert (paubert@iram.es)
  10 * to make clock more stable (2.4.0-test5). The only thing
  11 * that this code assumes is that the timebases have been synchronized
  12 * by firmware on SMP and are never stopped (never do sleep
  13 * on SMP then, nap and doze are OK).
  14 * 
  15 * Speeded up do_gettimeofday by getting rid of references to
  16 * xtime (which required locks for consistency). (mikejc@us.ibm.com)
  17 *
  18 * TODO (not necessarily in this file):
  19 * - improve precision and reproducibility of timebase frequency
  20 * measurement at boot time. (for iSeries, we calibrate the timebase
  21 * against the Titan chip's clock.)
  22 * - for astronomical applications: add a new function to get
  23 * non ambiguous timestamps even around leap seconds. This needs
  24 * a new timestamp format and a good name.
  25 *
  26 * 1997-09-10  Updated NTP code according to technical memorandum Jan '96
  27 *             "A Kernel Model for Precision Timekeeping" by Dave Mills
  28 *
  29 *      This program is free software; you can redistribute it and/or
  30 *      modify it under the terms of the GNU General Public License
  31 *      as published by the Free Software Foundation; either version
  32 *      2 of the License, or (at your option) any later version.
  33 */
  34
  35#include <linux/errno.h>
  36#include <linux/module.h>
  37#include <linux/sched.h>
  38#include <linux/kernel.h>
  39#include <linux/param.h>
  40#include <linux/string.h>
  41#include <linux/mm.h>
  42#include <linux/interrupt.h>
  43#include <linux/timex.h>
  44#include <linux/kernel_stat.h>
  45#include <linux/time.h>
  46#include <linux/init.h>
  47#include <linux/profile.h>
  48#include <linux/cpu.h>
  49#include <linux/security.h>
  50#include <linux/percpu.h>
  51#include <linux/rtc.h>
  52#include <linux/jiffies.h>
  53#include <linux/posix-timers.h>
  54#include <linux/irq.h>
  55#include <linux/delay.h>
  56#include <linux/perf_event.h>
  57
  58#include <asm/io.h>
  59#include <asm/processor.h>
  60#include <asm/nvram.h>
  61#include <asm/cache.h>
  62#include <asm/machdep.h>
  63#include <asm/uaccess.h>
  64#include <asm/time.h>
  65#include <asm/prom.h>
  66#include <asm/irq.h>
  67#include <asm/div64.h>
  68#include <asm/smp.h>
  69#include <asm/vdso_datapage.h>
  70#include <asm/firmware.h>
  71#include <asm/cputime.h>
  72#ifdef CONFIG_PPC_ISERIES
  73#include <asm/iseries/it_lp_queue.h>
  74#include <asm/iseries/hv_call_xm.h>
  75#endif
  76
  77/* powerpc clocksource/clockevent code */
  78
  79#include <linux/clockchips.h>
  80#include <linux/clocksource.h>
  81
  82static cycle_t rtc_read(struct clocksource *);
  83static struct clocksource clocksource_rtc = {
  84        .name         = "rtc",
  85        .rating       = 400,
  86        .flags        = CLOCK_SOURCE_IS_CONTINUOUS,
  87        .mask         = CLOCKSOURCE_MASK(64),
  88        .shift        = 22,
  89        .mult         = 0,      /* To be filled in */
  90        .read         = rtc_read,
  91};
  92
  93static cycle_t timebase_read(struct clocksource *);
  94static struct clocksource clocksource_timebase = {
  95        .name         = "timebase",
  96        .rating       = 400,
  97        .flags        = CLOCK_SOURCE_IS_CONTINUOUS,
  98        .mask         = CLOCKSOURCE_MASK(64),
  99        .shift        = 22,
 100        .mult         = 0,      /* To be filled in */
 101        .read         = timebase_read,
 102};
 103
 104#define DECREMENTER_MAX 0x7fffffff
 105
 106static int decrementer_set_next_event(unsigned long evt,
 107                                      struct clock_event_device *dev);
 108static void decrementer_set_mode(enum clock_event_mode mode,
 109                                 struct clock_event_device *dev);
 110
 111static struct clock_event_device decrementer_clockevent = {
 112       .name           = "decrementer",
 113       .rating         = 200,
 114       .shift          = 0,     /* To be filled in */
 115       .mult           = 0,     /* To be filled in */
 116       .irq            = 0,
 117       .set_next_event = decrementer_set_next_event,
 118       .set_mode       = decrementer_set_mode,
 119       .features       = CLOCK_EVT_FEAT_ONESHOT,
 120};
 121
 122struct decrementer_clock {
 123        struct clock_event_device event;
 124        u64 next_tb;
 125};
 126
 127static DEFINE_PER_CPU(struct decrementer_clock, decrementers);
 128
 129#ifdef CONFIG_PPC_ISERIES
 130static unsigned long __initdata iSeries_recal_titan;
 131static signed long __initdata iSeries_recal_tb;
 132
 133/* Forward declaration is only needed for iSereis compiles */
 134static void __init clocksource_init(void);
 135#endif
 136
 137#define XSEC_PER_SEC (1024*1024)
 138
 139#ifdef CONFIG_PPC64
 140#define SCALE_XSEC(xsec, max)   (((xsec) * max) / XSEC_PER_SEC)
 141#else
 142/* compute ((xsec << 12) * max) >> 32 */
 143#define SCALE_XSEC(xsec, max)   mulhwu((xsec) << 12, max)
 144#endif
 145
 146unsigned long tb_ticks_per_jiffy;
 147unsigned long tb_ticks_per_usec = 100; /* sane default */
 148EXPORT_SYMBOL(tb_ticks_per_usec);
 149unsigned long tb_ticks_per_sec;
 150EXPORT_SYMBOL(tb_ticks_per_sec);        /* for cputime_t conversions */
 151u64 tb_to_xs;
 152unsigned tb_to_us;
 153
 154#define TICKLEN_SCALE   NTP_SCALE_SHIFT
 155static u64 last_tick_len;       /* units are ns / 2^TICKLEN_SCALE */
 156static u64 ticklen_to_xs;       /* 0.64 fraction */
 157
 158/* If last_tick_len corresponds to about 1/HZ seconds, then
 159   last_tick_len << TICKLEN_SHIFT will be about 2^63. */
 160#define TICKLEN_SHIFT   (63 - 30 - TICKLEN_SCALE + SHIFT_HZ)
 161
 162DEFINE_SPINLOCK(rtc_lock);
 163EXPORT_SYMBOL_GPL(rtc_lock);
 164
 165static u64 tb_to_ns_scale __read_mostly;
 166static unsigned tb_to_ns_shift __read_mostly;
 167static unsigned long boot_tb __read_mostly;
 168
 169extern struct timezone sys_tz;
 170static long timezone_offset;
 171
 172unsigned long ppc_proc_freq;
 173EXPORT_SYMBOL(ppc_proc_freq);
 174unsigned long ppc_tb_freq;
 175
 176static u64 tb_last_jiffy __cacheline_aligned_in_smp;
 177static DEFINE_PER_CPU(u64, last_jiffy);
 178
 179#ifdef CONFIG_VIRT_CPU_ACCOUNTING
 180/*
 181 * Factors for converting from cputime_t (timebase ticks) to
 182 * jiffies, milliseconds, seconds, and clock_t (1/USER_HZ seconds).
 183 * These are all stored as 0.64 fixed-point binary fractions.
 184 */
 185u64 __cputime_jiffies_factor;
 186EXPORT_SYMBOL(__cputime_jiffies_factor);
 187u64 __cputime_msec_factor;
 188EXPORT_SYMBOL(__cputime_msec_factor);
 189u64 __cputime_sec_factor;
 190EXPORT_SYMBOL(__cputime_sec_factor);
 191u64 __cputime_clockt_factor;
 192EXPORT_SYMBOL(__cputime_clockt_factor);
 193DEFINE_PER_CPU(unsigned long, cputime_last_delta);
 194DEFINE_PER_CPU(unsigned long, cputime_scaled_last_delta);
 195
 196cputime_t cputime_one_jiffy;
 197
 198static void calc_cputime_factors(void)
 199{
 200        struct div_result res;
 201
 202        div128_by_32(HZ, 0, tb_ticks_per_sec, &res);
 203        __cputime_jiffies_factor = res.result_low;
 204        div128_by_32(1000, 0, tb_ticks_per_sec, &res);
 205        __cputime_msec_factor = res.result_low;
 206        div128_by_32(1, 0, tb_ticks_per_sec, &res);
 207        __cputime_sec_factor = res.result_low;
 208        div128_by_32(USER_HZ, 0, tb_ticks_per_sec, &res);
 209        __cputime_clockt_factor = res.result_low;
 210}
 211
 212/*
 213 * Read the PURR on systems that have it, otherwise the timebase.
 214 */
 215static u64 read_purr(void)
 216{
 217        if (cpu_has_feature(CPU_FTR_PURR))
 218                return mfspr(SPRN_PURR);
 219        return mftb();
 220}
 221
 222/*
 223 * Read the SPURR on systems that have it, otherwise the purr
 224 */
 225static u64 read_spurr(u64 purr)
 226{
 227        /*
 228         * cpus without PURR won't have a SPURR
 229         * We already know the former when we use this, so tell gcc
 230         */
 231        if (cpu_has_feature(CPU_FTR_PURR) && cpu_has_feature(CPU_FTR_SPURR))
 232                return mfspr(SPRN_SPURR);
 233        return purr;
 234}
 235
 236/*
 237 * Account time for a transition between system, hard irq
 238 * or soft irq state.
 239 */
 240void account_system_vtime(struct task_struct *tsk)
 241{
 242        u64 now, nowscaled, delta, deltascaled, sys_time;
 243        unsigned long flags;
 244
 245        local_irq_save(flags);
 246        now = read_purr();
 247        nowscaled = read_spurr(now);
 248        delta = now - get_paca()->startpurr;
 249        deltascaled = nowscaled - get_paca()->startspurr;
 250        get_paca()->startpurr = now;
 251        get_paca()->startspurr = nowscaled;
 252        if (!in_interrupt()) {
 253                /* deltascaled includes both user and system time.
 254                 * Hence scale it based on the purr ratio to estimate
 255                 * the system time */
 256                sys_time = get_paca()->system_time;
 257                if (get_paca()->user_time)
 258                        deltascaled = deltascaled * sys_time /
 259                             (sys_time + get_paca()->user_time);
 260                delta += sys_time;
 261                get_paca()->system_time = 0;
 262        }
 263        if (in_irq() || idle_task(smp_processor_id()) != tsk)
 264                account_system_time(tsk, 0, delta, deltascaled);
 265        else
 266                account_idle_time(delta);
 267        per_cpu(cputime_last_delta, smp_processor_id()) = delta;
 268        per_cpu(cputime_scaled_last_delta, smp_processor_id()) = deltascaled;
 269        local_irq_restore(flags);
 270}
 271
 272/*
 273 * Transfer the user and system times accumulated in the paca
 274 * by the exception entry and exit code to the generic process
 275 * user and system time records.
 276 * Must be called with interrupts disabled.
 277 */
 278void account_process_tick(struct task_struct *tsk, int user_tick)
 279{
 280        cputime_t utime, utimescaled;
 281
 282        utime = get_paca()->user_time;
 283        get_paca()->user_time = 0;
 284        utimescaled = cputime_to_scaled(utime);
 285        account_user_time(tsk, utime, utimescaled);
 286}
 287
 288/*
 289 * Stuff for accounting stolen time.
 290 */
 291struct cpu_purr_data {
 292        int     initialized;                    /* thread is running */
 293        u64     tb;                     /* last TB value read */
 294        u64     purr;                   /* last PURR value read */
 295        u64     spurr;                  /* last SPURR value read */
 296};
 297
 298/*
 299 * Each entry in the cpu_purr_data array is manipulated only by its
 300 * "owner" cpu -- usually in the timer interrupt but also occasionally
 301 * in process context for cpu online.  As long as cpus do not touch
 302 * each others' cpu_purr_data, disabling local interrupts is
 303 * sufficient to serialize accesses.
 304 */
 305static DEFINE_PER_CPU(struct cpu_purr_data, cpu_purr_data);
 306
 307static void snapshot_tb_and_purr(void *data)
 308{
 309        unsigned long flags;
 310        struct cpu_purr_data *p = &__get_cpu_var(cpu_purr_data);
 311
 312        local_irq_save(flags);
 313        p->tb = get_tb_or_rtc();
 314        p->purr = mfspr(SPRN_PURR);
 315        wmb();
 316        p->initialized = 1;
 317        local_irq_restore(flags);
 318}
 319
 320/*
 321 * Called during boot when all cpus have come up.
 322 */
 323void snapshot_timebases(void)
 324{
 325        if (!cpu_has_feature(CPU_FTR_PURR))
 326                return;
 327        on_each_cpu(snapshot_tb_and_purr, NULL, 1);
 328}
 329
 330/*
 331 * Must be called with interrupts disabled.
 332 */
 333void calculate_steal_time(void)
 334{
 335        u64 tb, purr;
 336        s64 stolen;
 337        struct cpu_purr_data *pme;
 338
 339        pme = &__get_cpu_var(cpu_purr_data);
 340        if (!pme->initialized)
 341                return;         /* !CPU_FTR_PURR or early in early boot */
 342        tb = mftb();
 343        purr = mfspr(SPRN_PURR);
 344        stolen = (tb - pme->tb) - (purr - pme->purr);
 345        if (stolen > 0) {
 346                if (idle_task(smp_processor_id()) != current)
 347                        account_steal_time(stolen);
 348                else
 349                        account_idle_time(stolen);
 350        }
 351        pme->tb = tb;
 352        pme->purr = purr;
 353}
 354
 355#ifdef CONFIG_PPC_SPLPAR
 356/*
 357 * Must be called before the cpu is added to the online map when
 358 * a cpu is being brought up at runtime.
 359 */
 360static void snapshot_purr(void)
 361{
 362        struct cpu_purr_data *pme;
 363        unsigned long flags;
 364
 365        if (!cpu_has_feature(CPU_FTR_PURR))
 366                return;
 367        local_irq_save(flags);
 368        pme = &__get_cpu_var(cpu_purr_data);
 369        pme->tb = mftb();
 370        pme->purr = mfspr(SPRN_PURR);
 371        pme->initialized = 1;
 372        local_irq_restore(flags);
 373}
 374
 375#endif /* CONFIG_PPC_SPLPAR */
 376
 377#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */
 378#define calc_cputime_factors()
 379#define calculate_steal_time()          do { } while (0)
 380#endif
 381
 382#if !(defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR))
 383#define snapshot_purr()                 do { } while (0)
 384#endif
 385
 386/*
 387 * Called when a cpu comes up after the system has finished booting,
 388 * i.e. as a result of a hotplug cpu action.
 389 */
 390void snapshot_timebase(void)
 391{
 392        __get_cpu_var(last_jiffy) = get_tb_or_rtc();
 393        snapshot_purr();
 394}
 395
 396void __delay(unsigned long loops)
 397{
 398        unsigned long start;
 399        int diff;
 400
 401        if (__USE_RTC()) {
 402                start = get_rtcl();
 403                do {
 404                        /* the RTCL register wraps at 1000000000 */
 405                        diff = get_rtcl() - start;
 406                        if (diff < 0)
 407                                diff += 1000000000;
 408                } while (diff < loops);
 409        } else {
 410                start = get_tbl();
 411                while (get_tbl() - start < loops)
 412                        HMT_low();
 413                HMT_medium();
 414        }
 415}
 416EXPORT_SYMBOL(__delay);
 417
 418void udelay(unsigned long usecs)
 419{
 420        __delay(tb_ticks_per_usec * usecs);
 421}
 422EXPORT_SYMBOL(udelay);
 423
 424static inline void update_gtod(u64 new_tb_stamp, u64 new_stamp_xsec,
 425                               u64 new_tb_to_xs)
 426{
 427        /*
 428         * tb_update_count is used to allow the userspace gettimeofday code
 429         * to assure itself that it sees a consistent view of the tb_to_xs and
 430         * stamp_xsec variables.  It reads the tb_update_count, then reads
 431         * tb_to_xs and stamp_xsec and then reads tb_update_count again.  If
 432         * the two values of tb_update_count match and are even then the
 433         * tb_to_xs and stamp_xsec values are consistent.  If not, then it
 434         * loops back and reads them again until this criteria is met.
 435         * We expect the caller to have done the first increment of
 436         * vdso_data->tb_update_count already.
 437         */
 438        vdso_data->tb_orig_stamp = new_tb_stamp;
 439        vdso_data->stamp_xsec = new_stamp_xsec;
 440        vdso_data->tb_to_xs = new_tb_to_xs;
 441        vdso_data->wtom_clock_sec = wall_to_monotonic.tv_sec;
 442        vdso_data->wtom_clock_nsec = wall_to_monotonic.tv_nsec;
 443        vdso_data->stamp_xtime = xtime;
 444        smp_wmb();
 445        ++(vdso_data->tb_update_count);
 446}
 447
 448#ifdef CONFIG_SMP
 449unsigned long profile_pc(struct pt_regs *regs)
 450{
 451        unsigned long pc = instruction_pointer(regs);
 452
 453        if (in_lock_functions(pc))
 454                return regs->link;
 455
 456        return pc;
 457}
 458EXPORT_SYMBOL(profile_pc);
 459#endif
 460
 461#ifdef CONFIG_PPC_ISERIES
 462
 463/* 
 464 * This function recalibrates the timebase based on the 49-bit time-of-day
 465 * value in the Titan chip.  The Titan is much more accurate than the value
 466 * returned by the service processor for the timebase frequency.  
 467 */
 468
 469static int __init iSeries_tb_recal(void)
 470{
 471        struct div_result divres;
 472        unsigned long titan, tb;
 473
 474        /* Make sure we only run on iSeries */
 475        if (!firmware_has_feature(FW_FEATURE_ISERIES))
 476                return -ENODEV;
 477
 478        tb = get_tb();
 479        titan = HvCallXm_loadTod();
 480        if ( iSeries_recal_titan ) {
 481                unsigned long tb_ticks = tb - iSeries_recal_tb;
 482                unsigned long titan_usec = (titan - iSeries_recal_titan) >> 12;
 483                unsigned long new_tb_ticks_per_sec   = (tb_ticks * USEC_PER_SEC)/titan_usec;
 484                unsigned long new_tb_ticks_per_jiffy =
 485                        DIV_ROUND_CLOSEST(new_tb_ticks_per_sec, HZ);
 486                long tick_diff = new_tb_ticks_per_jiffy - tb_ticks_per_jiffy;
 487                char sign = '+';                
 488                /* make sure tb_ticks_per_sec and tb_ticks_per_jiffy are consistent */
 489                new_tb_ticks_per_sec = new_tb_ticks_per_jiffy * HZ;
 490
 491                if ( tick_diff < 0 ) {
 492                        tick_diff = -tick_diff;
 493                        sign = '-';
 494                }
 495                if ( tick_diff ) {
 496                        if ( tick_diff < tb_ticks_per_jiffy/25 ) {
 497                                printk( "Titan recalibrate: new tb_ticks_per_jiffy = %lu (%c%ld)\n",
 498                                                new_tb_ticks_per_jiffy, sign, tick_diff );
 499                                tb_ticks_per_jiffy = new_tb_ticks_per_jiffy;
 500                                tb_ticks_per_sec   = new_tb_ticks_per_sec;
 501                                calc_cputime_factors();
 502                                div128_by_32( XSEC_PER_SEC, 0, tb_ticks_per_sec, &divres );
 503                                tb_to_xs = divres.result_low;
 504                                vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
 505                                vdso_data->tb_to_xs = tb_to_xs;
 506                                setup_cputime_one_jiffy();
 507                        }
 508                        else {
 509                                printk( "Titan recalibrate: FAILED (difference > 4 percent)\n"
 510                                        "                   new tb_ticks_per_jiffy = %lu\n"
 511                                        "                   old tb_ticks_per_jiffy = %lu\n",
 512                                        new_tb_ticks_per_jiffy, tb_ticks_per_jiffy );
 513                        }
 514                }
 515        }
 516        iSeries_recal_titan = titan;
 517        iSeries_recal_tb = tb;
 518
 519        /* Called here as now we know accurate values for the timebase */
 520        clocksource_init();
 521        return 0;
 522}
 523late_initcall(iSeries_tb_recal);
 524
 525/* Called from platform early init */
 526void __init iSeries_time_init_early(void)
 527{
 528        iSeries_recal_tb = get_tb();
 529        iSeries_recal_titan = HvCallXm_loadTod();
 530}
 531#endif /* CONFIG_PPC_ISERIES */
 532
 533#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_PPC32)
 534DEFINE_PER_CPU(u8, perf_event_pending);
 535
 536void set_perf_event_pending(void)
 537{
 538        get_cpu_var(perf_event_pending) = 1;
 539        set_dec(1);
 540        put_cpu_var(perf_event_pending);
 541}
 542
 543#define test_perf_event_pending()       __get_cpu_var(perf_event_pending)
 544#define clear_perf_event_pending()      __get_cpu_var(perf_event_pending) = 0
 545
 546#else  /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */
 547
 548#define test_perf_event_pending()       0
 549#define clear_perf_event_pending()
 550
 551#endif /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */
 552
 553/*
 554 * For iSeries shared processors, we have to let the hypervisor
 555 * set the hardware decrementer.  We set a virtual decrementer
 556 * in the lppaca and call the hypervisor if the virtual
 557 * decrementer is less than the current value in the hardware
 558 * decrementer. (almost always the new decrementer value will
 559 * be greater than the current hardware decementer so the hypervisor
 560 * call will not be needed)
 561 */
 562
 563/*
 564 * timer_interrupt - gets called when the decrementer overflows,
 565 * with interrupts disabled.
 566 */
 567void timer_interrupt(struct pt_regs * regs)
 568{
 569        struct pt_regs *old_regs;
 570        struct decrementer_clock *decrementer =  &__get_cpu_var(decrementers);
 571        struct clock_event_device *evt = &decrementer->event;
 572        u64 now;
 573
 574        /* Ensure a positive value is written to the decrementer, or else
 575         * some CPUs will continuue to take decrementer exceptions */
 576        set_dec(DECREMENTER_MAX);
 577
 578#ifdef CONFIG_PPC32
 579        if (test_perf_event_pending()) {
 580                clear_perf_event_pending();
 581                perf_event_do_pending();
 582        }
 583        if (atomic_read(&ppc_n_lost_interrupts) != 0)
 584                do_IRQ(regs);
 585#endif
 586
 587        now = get_tb_or_rtc();
 588        if (now < decrementer->next_tb) {
 589                /* not time for this event yet */
 590                now = decrementer->next_tb - now;
 591                if (now <= DECREMENTER_MAX)
 592                        set_dec((int)now);
 593                return;
 594        }
 595        old_regs = set_irq_regs(regs);
 596        irq_enter();
 597
 598        calculate_steal_time();
 599
 600#ifdef CONFIG_PPC_ISERIES
 601        if (firmware_has_feature(FW_FEATURE_ISERIES))
 602                get_lppaca()->int_dword.fields.decr_int = 0;
 603#endif
 604
 605        if (evt->event_handler)
 606                evt->event_handler(evt);
 607
 608#ifdef CONFIG_PPC_ISERIES
 609        if (firmware_has_feature(FW_FEATURE_ISERIES) && hvlpevent_is_pending())
 610                process_hvlpevents();
 611#endif
 612
 613#ifdef CONFIG_PPC64
 614        /* collect purr register values often, for accurate calculations */
 615        if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
 616                struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array);
 617                cu->current_tb = mfspr(SPRN_PURR);
 618        }
 619#endif
 620
 621        irq_exit();
 622        set_irq_regs(old_regs);
 623}
 624
 625void wakeup_decrementer(void)
 626{
 627        unsigned long ticks;
 628
 629        /*
 630         * The timebase gets saved on sleep and restored on wakeup,
 631         * so all we need to do is to reset the decrementer.
 632         */
 633        ticks = tb_ticks_since(__get_cpu_var(last_jiffy));
 634        if (ticks < tb_ticks_per_jiffy)
 635                ticks = tb_ticks_per_jiffy - ticks;
 636        else
 637                ticks = 1;
 638        set_dec(ticks);
 639}
 640
 641#ifdef CONFIG_SUSPEND
 642void generic_suspend_disable_irqs(void)
 643{
 644        preempt_disable();
 645
 646        /* Disable the decrementer, so that it doesn't interfere
 647         * with suspending.
 648         */
 649
 650        set_dec(0x7fffffff);
 651        local_irq_disable();
 652        set_dec(0x7fffffff);
 653}
 654
 655void generic_suspend_enable_irqs(void)
 656{
 657        wakeup_decrementer();
 658
 659        local_irq_enable();
 660        preempt_enable();
 661}
 662
 663/* Overrides the weak version in kernel/power/main.c */
 664void arch_suspend_disable_irqs(void)
 665{
 666        if (ppc_md.suspend_disable_irqs)
 667                ppc_md.suspend_disable_irqs();
 668        generic_suspend_disable_irqs();
 669}
 670
 671/* Overrides the weak version in kernel/power/main.c */
 672void arch_suspend_enable_irqs(void)
 673{
 674        generic_suspend_enable_irqs();
 675        if (ppc_md.suspend_enable_irqs)
 676                ppc_md.suspend_enable_irqs();
 677}
 678#endif
 679
 680#ifdef CONFIG_SMP
 681void __init smp_space_timers(unsigned int max_cpus)
 682{
 683        int i;
 684        u64 previous_tb = per_cpu(last_jiffy, boot_cpuid);
 685
 686        /* make sure tb > per_cpu(last_jiffy, cpu) for all cpus always */
 687        previous_tb -= tb_ticks_per_jiffy;
 688
 689        for_each_possible_cpu(i) {
 690                if (i == boot_cpuid)
 691                        continue;
 692                per_cpu(last_jiffy, i) = previous_tb;
 693        }
 694}
 695#endif
 696
 697/*
 698 * Scheduler clock - returns current time in nanosec units.
 699 *
 700 * Note: mulhdu(a, b) (multiply high double unsigned) returns
 701 * the high 64 bits of a * b, i.e. (a * b) >> 64, where a and b
 702 * are 64-bit unsigned numbers.
 703 */
 704unsigned long long sched_clock(void)
 705{
 706        if (__USE_RTC())
 707                return get_rtc();
 708        return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift;
 709}
 710
 711static int __init get_freq(char *name, int cells, unsigned long *val)
 712{
 713        struct device_node *cpu;
 714        const unsigned int *fp;
 715        int found = 0;
 716
 717        /* The cpu node should have timebase and clock frequency properties */
 718        cpu = of_find_node_by_type(NULL, "cpu");
 719
 720        if (cpu) {
 721                fp = of_get_property(cpu, name, NULL);
 722                if (fp) {
 723                        found = 1;
 724                        *val = of_read_ulong(fp, cells);
 725                }
 726
 727                of_node_put(cpu);
 728        }
 729
 730        return found;
 731}
 732
 733/* should become __cpuinit when secondary_cpu_time_init also is */
 734void start_cpu_decrementer(void)
 735{
 736#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
 737        /* Clear any pending timer interrupts */
 738        mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS);
 739
 740        /* Enable decrementer interrupt */
 741        mtspr(SPRN_TCR, TCR_DIE);
 742#endif /* defined(CONFIG_BOOKE) || defined(CONFIG_40x) */
 743}
 744
 745void __init generic_calibrate_decr(void)
 746{
 747        ppc_tb_freq = DEFAULT_TB_FREQ;          /* hardcoded default */
 748
 749        if (!get_freq("ibm,extended-timebase-frequency", 2, &ppc_tb_freq) &&
 750            !get_freq("timebase-frequency", 1, &ppc_tb_freq)) {
 751
 752                printk(KERN_ERR "WARNING: Estimating decrementer frequency "
 753                                "(not found)\n");
 754        }
 755
 756        ppc_proc_freq = DEFAULT_PROC_FREQ;      /* hardcoded default */
 757
 758        if (!get_freq("ibm,extended-clock-frequency", 2, &ppc_proc_freq) &&
 759            !get_freq("clock-frequency", 1, &ppc_proc_freq)) {
 760
 761                printk(KERN_ERR "WARNING: Estimating processor frequency "
 762                                "(not found)\n");
 763        }
 764}
 765
 766int update_persistent_clock(struct timespec now)
 767{
 768        struct rtc_time tm;
 769
 770        if (!ppc_md.set_rtc_time)
 771                return 0;
 772
 773        to_tm(now.tv_sec + 1 + timezone_offset, &tm);
 774        tm.tm_year -= 1900;
 775        tm.tm_mon -= 1;
 776
 777        return ppc_md.set_rtc_time(&tm);
 778}
 779
 780static void __read_persistent_clock(struct timespec *ts)
 781{
 782        struct rtc_time tm;
 783        static int first = 1;
 784
 785        ts->tv_nsec = 0;
 786        /* XXX this is a litle fragile but will work okay in the short term */
 787        if (first) {
 788                first = 0;
 789                if (ppc_md.time_init)
 790                        timezone_offset = ppc_md.time_init();
 791
 792                /* get_boot_time() isn't guaranteed to be safe to call late */
 793                if (ppc_md.get_boot_time) {
 794                        ts->tv_sec = ppc_md.get_boot_time() - timezone_offset;
 795                        return;
 796                }
 797        }
 798        if (!ppc_md.get_rtc_time) {
 799                ts->tv_sec = 0;
 800                return;
 801        }
 802        ppc_md.get_rtc_time(&tm);
 803
 804        ts->tv_sec = mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday,
 805                            tm.tm_hour, tm.tm_min, tm.tm_sec);
 806}
 807
 808void read_persistent_clock(struct timespec *ts)
 809{
 810        __read_persistent_clock(ts);
 811
 812        /* Sanitize it in case real time clock is set below EPOCH */
 813        if (ts->tv_sec < 0) {
 814                ts->tv_sec = 0;
 815                ts->tv_nsec = 0;
 816        }
 817                
 818}
 819
 820/* clocksource code */
 821static cycle_t rtc_read(struct clocksource *cs)
 822{
 823        return (cycle_t)get_rtc();
 824}
 825
 826static cycle_t timebase_read(struct clocksource *cs)
 827{
 828        return (cycle_t)get_tb();
 829}
 830
 831void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
 832{
 833        u64 t2x, stamp_xsec;
 834
 835        if (clock != &clocksource_timebase)
 836                return;
 837
 838        /* Make userspace gettimeofday spin until we're done. */
 839        ++vdso_data->tb_update_count;
 840        smp_mb();
 841
 842        /* XXX this assumes clock->shift == 22 */
 843        /* 4611686018 ~= 2^(20+64-22) / 1e9 */
 844        t2x = (u64) clock->mult * 4611686018ULL;
 845        stamp_xsec = (u64) xtime.tv_nsec * XSEC_PER_SEC;
 846        do_div(stamp_xsec, 1000000000);
 847        stamp_xsec += (u64) xtime.tv_sec * XSEC_PER_SEC;
 848        update_gtod(clock->cycle_last, stamp_xsec, t2x);
 849}
 850
 851void update_vsyscall_tz(void)
 852{
 853        /* Make userspace gettimeofday spin until we're done. */
 854        ++vdso_data->tb_update_count;
 855        smp_mb();
 856        vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
 857        vdso_data->tz_dsttime = sys_tz.tz_dsttime;
 858        smp_mb();
 859        ++vdso_data->tb_update_count;
 860}
 861
 862static void __init clocksource_init(void)
 863{
 864        struct clocksource *clock;
 865
 866        if (__USE_RTC())
 867                clock = &clocksource_rtc;
 868        else
 869                clock = &clocksource_timebase;
 870
 871        clock->mult = clocksource_hz2mult(tb_ticks_per_sec, clock->shift);
 872
 873        if (clocksource_register(clock)) {
 874                printk(KERN_ERR "clocksource: %s is already registered\n",
 875                       clock->name);
 876                return;
 877        }
 878
 879        printk(KERN_INFO "clocksource: %s mult[%x] shift[%d] registered\n",
 880               clock->name, clock->mult, clock->shift);
 881}
 882
 883static int decrementer_set_next_event(unsigned long evt,
 884                                      struct clock_event_device *dev)
 885{
 886        __get_cpu_var(decrementers).next_tb = get_tb_or_rtc() + evt;
 887        set_dec(evt);
 888        return 0;
 889}
 890
 891static void decrementer_set_mode(enum clock_event_mode mode,
 892                                 struct clock_event_device *dev)
 893{
 894        if (mode != CLOCK_EVT_MODE_ONESHOT)
 895                decrementer_set_next_event(DECREMENTER_MAX, dev);
 896}
 897
 898static void __init setup_clockevent_multiplier(unsigned long hz)
 899{
 900        u64 mult, shift = 32;
 901
 902        while (1) {
 903                mult = div_sc(hz, NSEC_PER_SEC, shift);
 904                if (mult && (mult >> 32UL) == 0UL)
 905                        break;
 906
 907                shift--;
 908        }
 909
 910        decrementer_clockevent.shift = shift;
 911        decrementer_clockevent.mult = mult;
 912}
 913
 914static void register_decrementer_clockevent(int cpu)
 915{
 916        struct clock_event_device *dec = &per_cpu(decrementers, cpu).event;
 917
 918        *dec = decrementer_clockevent;
 919        dec->cpumask = cpumask_of(cpu);
 920
 921        printk(KERN_DEBUG "clockevent: %s mult[%lx] shift[%d] cpu[%d]\n",
 922               dec->name, dec->mult, dec->shift, cpu);
 923
 924        clockevents_register_device(dec);
 925}
 926
 927static void __init init_decrementer_clockevent(void)
 928{
 929        int cpu = smp_processor_id();
 930
 931        setup_clockevent_multiplier(ppc_tb_freq);
 932        decrementer_clockevent.max_delta_ns =
 933                clockevent_delta2ns(DECREMENTER_MAX, &decrementer_clockevent);
 934        decrementer_clockevent.min_delta_ns =
 935                clockevent_delta2ns(2, &decrementer_clockevent);
 936
 937        register_decrementer_clockevent(cpu);
 938}
 939
 940void secondary_cpu_time_init(void)
 941{
 942        /* Start the decrementer on CPUs that have manual control
 943         * such as BookE
 944         */
 945        start_cpu_decrementer();
 946
 947        /* FIME: Should make unrelatred change to move snapshot_timebase
 948         * call here ! */
 949        register_decrementer_clockevent(smp_processor_id());
 950}
 951
 952/* This function is only called on the boot processor */
 953void __init time_init(void)
 954{
 955        unsigned long flags;
 956        struct div_result res;
 957        u64 scale, x;
 958        unsigned shift;
 959
 960        if (__USE_RTC()) {
 961                /* 601 processor: dec counts down by 128 every 128ns */
 962                ppc_tb_freq = 1000000000;
 963                tb_last_jiffy = get_rtcl();
 964        } else {
 965                /* Normal PowerPC with timebase register */
 966                ppc_md.calibrate_decr();
 967                printk(KERN_DEBUG "time_init: decrementer frequency = %lu.%.6lu MHz\n",
 968                       ppc_tb_freq / 1000000, ppc_tb_freq % 1000000);
 969                printk(KERN_DEBUG "time_init: processor frequency   = %lu.%.6lu MHz\n",
 970                       ppc_proc_freq / 1000000, ppc_proc_freq % 1000000);
 971                tb_last_jiffy = get_tb();
 972        }
 973
 974        tb_ticks_per_jiffy = ppc_tb_freq / HZ;
 975        tb_ticks_per_sec = ppc_tb_freq;
 976        tb_ticks_per_usec = ppc_tb_freq / 1000000;
 977        tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000);
 978        calc_cputime_factors();
 979        setup_cputime_one_jiffy();
 980
 981        /*
 982         * Calculate the length of each tick in ns.  It will not be
 983         * exactly 1e9/HZ unless ppc_tb_freq is divisible by HZ.
 984         * We compute 1e9 * tb_ticks_per_jiffy / ppc_tb_freq,
 985         * rounded up.
 986         */
 987        x = (u64) NSEC_PER_SEC * tb_ticks_per_jiffy + ppc_tb_freq - 1;
 988        do_div(x, ppc_tb_freq);
 989        tick_nsec = x;
 990        last_tick_len = x << TICKLEN_SCALE;
 991
 992        /*
 993         * Compute ticklen_to_xs, which is a factor which gets multiplied
 994         * by (last_tick_len << TICKLEN_SHIFT) to get a tb_to_xs value.
 995         * It is computed as:
 996         * ticklen_to_xs = 2^N / (tb_ticks_per_jiffy * 1e9)
 997         * where N = 64 + 20 - TICKLEN_SCALE - TICKLEN_SHIFT
 998         * which turns out to be N = 51 - SHIFT_HZ.
 999         * This gives the result as a 0.64 fixed-point fraction.
1000         * That value is reduced by an offset amounting to 1 xsec per
1001         * 2^31 timebase ticks to avoid problems with time going backwards
1002         * by 1 xsec when we do timer_recalc_offset due to losing the
1003         * fractional xsec.  That offset is equal to ppc_tb_freq/2^51
1004         * since there are 2^20 xsec in a second.
1005         */
1006        div128_by_32((1ULL << 51) - ppc_tb_freq, 0,
1007                     tb_ticks_per_jiffy << SHIFT_HZ, &res);
1008        div128_by_32(res.result_high, res.result_low, NSEC_PER_SEC, &res);
1009        ticklen_to_xs = res.result_low;
1010
1011        /* Compute tb_to_xs from tick_nsec */
1012        tb_to_xs = mulhdu(last_tick_len << TICKLEN_SHIFT, ticklen_to_xs);
1013
1014        /*
1015         * Compute scale factor for sched_clock.
1016         * The calibrate_decr() function has set tb_ticks_per_sec,
1017         * which is the timebase frequency.
1018         * We compute 1e9 * 2^64 / tb_ticks_per_sec and interpret
1019         * the 128-bit result as a 64.64 fixed-point number.
1020         * We then shift that number right until it is less than 1.0,
1021         * giving us the scale factor and shift count to use in
1022         * sched_clock().
1023         */
1024        div128_by_32(1000000000, 0, tb_ticks_per_sec, &res);
1025        scale = res.result_low;
1026        for (shift = 0; res.result_high != 0; ++shift) {
1027                scale = (scale >> 1) | (res.result_high << 63);
1028                res.result_high >>= 1;
1029        }
1030        tb_to_ns_scale = scale;
1031        tb_to_ns_shift = shift;
1032        /* Save the current timebase to pretty up CONFIG_PRINTK_TIME */
1033        boot_tb = get_tb_or_rtc();
1034
1035        write_seqlock_irqsave(&xtime_lock, flags);
1036
1037        /* If platform provided a timezone (pmac), we correct the time */
1038        if (timezone_offset) {
1039                sys_tz.tz_minuteswest = -timezone_offset / 60;
1040                sys_tz.tz_dsttime = 0;
1041        }
1042
1043        vdso_data->tb_orig_stamp = tb_last_jiffy;
1044        vdso_data->tb_update_count = 0;
1045        vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
1046        vdso_data->stamp_xsec = (u64) xtime.tv_sec * XSEC_PER_SEC;
1047        vdso_data->tb_to_xs = tb_to_xs;
1048
1049        write_sequnlock_irqrestore(&xtime_lock, flags);
1050
1051        /* Start the decrementer on CPUs that have manual control
1052         * such as BookE
1053         */
1054        start_cpu_decrementer();
1055
1056        /* Register the clocksource, if we're not running on iSeries */
1057        if (!firmware_has_feature(FW_FEATURE_ISERIES))
1058                clocksource_init();
1059
1060        init_decrementer_clockevent();
1061}
1062
1063
1064#define FEBRUARY        2
1065#define STARTOFTIME     1970
1066#define SECDAY          86400L
1067#define SECYR           (SECDAY * 365)
1068#define leapyear(year)          ((year) % 4 == 0 && \
1069                                 ((year) % 100 != 0 || (year) % 400 == 0))
1070#define days_in_year(a)         (leapyear(a) ? 366 : 365)
1071#define days_in_month(a)        (month_days[(a) - 1])
1072
1073static int month_days[12] = {
1074        31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
1075};
1076
1077/*
1078 * This only works for the Gregorian calendar - i.e. after 1752 (in the UK)
1079 */
1080void GregorianDay(struct rtc_time * tm)
1081{
1082        int leapsToDate;
1083        int lastYear;
1084        int day;
1085        int MonthOffset[] = { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 };
1086
1087        lastYear = tm->tm_year - 1;
1088
1089        /*
1090         * Number of leap corrections to apply up to end of last year
1091         */
1092        leapsToDate = lastYear / 4 - lastYear / 100 + lastYear / 400;
1093
1094        /*
1095         * This year is a leap year if it is divisible by 4 except when it is
1096         * divisible by 100 unless it is divisible by 400
1097         *
1098         * e.g. 1904 was a leap year, 1900 was not, 1996 is, and 2000 was
1099         */
1100        day = tm->tm_mon > 2 && leapyear(tm->tm_year);
1101
1102        day += lastYear*365 + leapsToDate + MonthOffset[tm->tm_mon-1] +
1103                   tm->tm_mday;
1104
1105        tm->tm_wday = day % 7;
1106}
1107
1108void to_tm(int tim, struct rtc_time * tm)
1109{
1110        register int    i;
1111        register long   hms, day;
1112
1113        day = tim / SECDAY;
1114        hms = tim % SECDAY;
1115
1116        /* Hours, minutes, seconds are easy */
1117        tm->tm_hour = hms / 3600;
1118        tm->tm_min = (hms % 3600) / 60;
1119        tm->tm_sec = (hms % 3600) % 60;
1120
1121        /* Number of years in days */
1122        for (i = STARTOFTIME; day >= days_in_year(i); i++)
1123                day -= days_in_year(i);
1124        tm->tm_year = i;
1125
1126        /* Number of months in days left */
1127        if (leapyear(tm->tm_year))
1128                days_in_month(FEBRUARY) = 29;
1129        for (i = 1; day >= days_in_month(i); i++)
1130                day -= days_in_month(i);
1131        days_in_month(FEBRUARY) = 28;
1132        tm->tm_mon = i;
1133
1134        /* Days are what is left over (+1) from all that. */
1135        tm->tm_mday = day + 1;
1136
1137        /*
1138         * Determine the day of week
1139         */
1140        GregorianDay(tm);
1141}
1142
1143/* Auxiliary function to compute scaling factors */
1144/* Actually the choice of a timebase running at 1/4 the of the bus
1145 * frequency giving resolution of a few tens of nanoseconds is quite nice.
1146 * It makes this computation very precise (27-28 bits typically) which
1147 * is optimistic considering the stability of most processor clock
1148 * oscillators and the precision with which the timebase frequency
1149 * is measured but does not harm.
1150 */
1151unsigned mulhwu_scale_factor(unsigned inscale, unsigned outscale)
1152{
1153        unsigned mlt=0, tmp, err;
1154        /* No concern for performance, it's done once: use a stupid
1155         * but safe and compact method to find the multiplier.
1156         */
1157  
1158        for (tmp = 1U<<31; tmp != 0; tmp >>= 1) {
1159                if (mulhwu(inscale, mlt|tmp) < outscale)
1160                        mlt |= tmp;
1161        }
1162  
1163        /* We might still be off by 1 for the best approximation.
1164         * A side effect of this is that if outscale is too large
1165         * the returned value will be zero.
1166         * Many corner cases have been checked and seem to work,
1167         * some might have been forgotten in the test however.
1168         */
1169  
1170        err = inscale * (mlt+1);
1171        if (err <= inscale/2)
1172                mlt++;
1173        return mlt;
1174}
1175
1176/*
1177 * Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit
1178 * result.
1179 */
1180void div128_by_32(u64 dividend_high, u64 dividend_low,
1181                  unsigned divisor, struct div_result *dr)
1182{
1183        unsigned long a, b, c, d;
1184        unsigned long w, x, y, z;
1185        u64 ra, rb, rc;
1186
1187        a = dividend_high >> 32;
1188        b = dividend_high & 0xffffffff;
1189        c = dividend_low >> 32;
1190        d = dividend_low & 0xffffffff;
1191
1192        w = a / divisor;
1193        ra = ((u64)(a - (w * divisor)) << 32) + b;
1194
1195        rb = ((u64) do_div(ra, divisor) << 32) + c;
1196        x = ra;
1197
1198        rc = ((u64) do_div(rb, divisor) << 32) + d;
1199        y = rb;
1200
1201        do_div(rc, divisor);
1202        z = rc;
1203
1204        dr->result_high = ((u64)w << 32) + x;
1205        dr->result_low  = ((u64)y << 32) + z;
1206
1207}
1208
1209/* We don't need to calibrate delay, we use the CPU timebase for that */
1210void calibrate_delay(void)
1211{
1212        /* Some generic code (such as spinlock debug) use loops_per_jiffy
1213         * as the number of __delay(1) in a jiffy, so make it so
1214         */
1215        loops_per_jiffy = tb_ticks_per_jiffy;
1216}
1217
1218static int __init rtc_init(void)
1219{
1220        struct platform_device *pdev;
1221
1222        if (!ppc_md.get_rtc_time)
1223                return -ENODEV;
1224
1225        pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0);
1226        if (IS_ERR(pdev))
1227                return PTR_ERR(pdev);
1228
1229        return 0;
1230}
1231
1232module_init(rtc_init);
1233