linux/kernel/time/clocksource.c
<<
>>
Prefs
   1/*
   2 * linux/kernel/time/clocksource.c
   3 *
   4 * This file contains the functions which manage clocksource drivers.
   5 *
   6 * Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com)
   7 *
   8 * This program is free software; you can redistribute it and/or modify
   9 * it under the terms of the GNU General Public License as published by
  10 * the Free Software Foundation; either version 2 of the License, or
  11 * (at your option) any later version.
  12 *
  13 * This program is distributed in the hope that it will be useful,
  14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 * GNU General Public License for more details.
  17 *
  18 * You should have received a copy of the GNU General Public License
  19 * along with this program; if not, write to the Free Software
  20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  21 *
  22 * TODO WishList:
  23 *   o Allow clocksource drivers to be unregistered
  24 */
  25
  26#include <linux/device.h>
  27#include <linux/clocksource.h>
  28#include <linux/init.h>
  29#include <linux/module.h>
  30#include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
  31#include <linux/tick.h>
  32#include <linux/kthread.h>
  33
  34#include "tick-internal.h"
  35#include "timekeeping_internal.h"
  36
  37void timecounter_init(struct timecounter *tc,
  38                      const struct cyclecounter *cc,
  39                      u64 start_tstamp)
  40{
  41        tc->cc = cc;
  42        tc->cycle_last = cc->read(cc);
  43        tc->nsec = start_tstamp;
  44}
  45EXPORT_SYMBOL_GPL(timecounter_init);
  46
  47/**
  48 * timecounter_read_delta - get nanoseconds since last call of this function
  49 * @tc:         Pointer to time counter
  50 *
  51 * When the underlying cycle counter runs over, this will be handled
  52 * correctly as long as it does not run over more than once between
  53 * calls.
  54 *
  55 * The first call to this function for a new time counter initializes
  56 * the time tracking and returns an undefined result.
  57 */
  58static u64 timecounter_read_delta(struct timecounter *tc)
  59{
  60        cycle_t cycle_now, cycle_delta;
  61        u64 ns_offset;
  62
  63        /* read cycle counter: */
  64        cycle_now = tc->cc->read(tc->cc);
  65
  66        /* calculate the delta since the last timecounter_read_delta(): */
  67        cycle_delta = (cycle_now - tc->cycle_last) & tc->cc->mask;
  68
  69        /* convert to nanoseconds: */
  70        ns_offset = cyclecounter_cyc2ns(tc->cc, cycle_delta);
  71
  72        /* update time stamp of timecounter_read_delta() call: */
  73        tc->cycle_last = cycle_now;
  74
  75        return ns_offset;
  76}
  77
  78u64 timecounter_read(struct timecounter *tc)
  79{
  80        u64 nsec;
  81
  82        /* increment time by nanoseconds since last call */
  83        nsec = timecounter_read_delta(tc);
  84        nsec += tc->nsec;
  85        tc->nsec = nsec;
  86
  87        return nsec;
  88}
  89EXPORT_SYMBOL_GPL(timecounter_read);
  90
  91u64 timecounter_cyc2time(struct timecounter *tc,
  92                         cycle_t cycle_tstamp)
  93{
  94        u64 cycle_delta = (cycle_tstamp - tc->cycle_last) & tc->cc->mask;
  95        u64 nsec;
  96
  97        /*
  98         * Instead of always treating cycle_tstamp as more recent
  99         * than tc->cycle_last, detect when it is too far in the
 100         * future and treat it as old time stamp instead.
 101         */
 102        if (cycle_delta > tc->cc->mask / 2) {
 103                cycle_delta = (tc->cycle_last - cycle_tstamp) & tc->cc->mask;
 104                nsec = tc->nsec - cyclecounter_cyc2ns(tc->cc, cycle_delta);
 105        } else {
 106                nsec = cyclecounter_cyc2ns(tc->cc, cycle_delta) + tc->nsec;
 107        }
 108
 109        return nsec;
 110}
 111EXPORT_SYMBOL_GPL(timecounter_cyc2time);
 112
 113/**
 114 * clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks
 115 * @mult:       pointer to mult variable
 116 * @shift:      pointer to shift variable
 117 * @from:       frequency to convert from
 118 * @to:         frequency to convert to
 119 * @maxsec:     guaranteed runtime conversion range in seconds
 120 *
 121 * The function evaluates the shift/mult pair for the scaled math
 122 * operations of clocksources and clockevents.
 123 *
 124 * @to and @from are frequency values in HZ. For clock sources @to is
 125 * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock
 126 * event @to is the counter frequency and @from is NSEC_PER_SEC.
 127 *
 128 * The @maxsec conversion range argument controls the time frame in
 129 * seconds which must be covered by the runtime conversion with the
 130 * calculated mult and shift factors. This guarantees that no 64bit
 131 * overflow happens when the input value of the conversion is
 132 * multiplied with the calculated mult factor. Larger ranges may
 133 * reduce the conversion accuracy by chosing smaller mult and shift
 134 * factors.
 135 */
 136void
 137clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec)
 138{
 139        u64 tmp;
 140        u32 sft, sftacc= 32;
 141
 142        /*
 143         * Calculate the shift factor which is limiting the conversion
 144         * range:
 145         */
 146        tmp = ((u64)maxsec * from) >> 32;
 147        while (tmp) {
 148                tmp >>=1;
 149                sftacc--;
 150        }
 151
 152        /*
 153         * Find the conversion shift/mult pair which has the best
 154         * accuracy and fits the maxsec conversion range:
 155         */
 156        for (sft = 32; sft > 0; sft--) {
 157                tmp = (u64) to << sft;
 158                tmp += from / 2;
 159                do_div(tmp, from);
 160                if ((tmp >> sftacc) == 0)
 161                        break;
 162        }
 163        *mult = tmp;
 164        *shift = sft;
 165}
 166
 167/*[Clocksource internal variables]---------
 168 * curr_clocksource:
 169 *      currently selected clocksource.
 170 * clocksource_list:
 171 *      linked list with the registered clocksources
 172 * clocksource_mutex:
 173 *      protects manipulations to curr_clocksource and the clocksource_list
 174 * override_name:
 175 *      Name of the user-specified clocksource.
 176 */
 177static struct clocksource *curr_clocksource;
 178static LIST_HEAD(clocksource_list);
 179static DEFINE_MUTEX(clocksource_mutex);
 180static char override_name[CS_NAME_LEN];
 181static int finished_booting;
 182
 183#ifdef CONFIG_CLOCKSOURCE_WATCHDOG
 184static void clocksource_watchdog_work(struct work_struct *work);
 185static void clocksource_select(void);
 186
 187static LIST_HEAD(watchdog_list);
 188static struct clocksource *watchdog;
 189static struct timer_list watchdog_timer;
 190static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);
 191static DEFINE_SPINLOCK(watchdog_lock);
 192static int watchdog_running;
 193static atomic_t watchdog_reset_pending;
 194
 195static int clocksource_watchdog_kthread(void *data);
 196static void __clocksource_change_rating(struct clocksource *cs, int rating);
 197
 198/*
 199 * Interval: 0.5sec Threshold: 0.0625s
 200 */
 201#define WATCHDOG_INTERVAL (HZ >> 1)
 202#define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4)
 203
 204static void clocksource_watchdog_work(struct work_struct *work)
 205{
 206        /*
 207         * If kthread_run fails the next watchdog scan over the
 208         * watchdog_list will find the unstable clock again.
 209         */
 210        kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog");
 211}
 212
 213static void __clocksource_unstable(struct clocksource *cs)
 214{
 215        cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG);
 216        cs->flags |= CLOCK_SOURCE_UNSTABLE;
 217        if (finished_booting)
 218                schedule_work(&watchdog_work);
 219}
 220
 221static void clocksource_unstable(struct clocksource *cs, int64_t delta)
 222{
 223        printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n",
 224               cs->name, delta);
 225        __clocksource_unstable(cs);
 226}
 227
 228/**
 229 * clocksource_mark_unstable - mark clocksource unstable via watchdog
 230 * @cs:         clocksource to be marked unstable
 231 *
 232 * This function is called instead of clocksource_change_rating from
 233 * cpu hotplug code to avoid a deadlock between the clocksource mutex
 234 * and the cpu hotplug mutex. It defers the update of the clocksource
 235 * to the watchdog thread.
 236 */
 237void clocksource_mark_unstable(struct clocksource *cs)
 238{
 239        unsigned long flags;
 240
 241        spin_lock_irqsave(&watchdog_lock, flags);
 242        if (!(cs->flags & CLOCK_SOURCE_UNSTABLE)) {
 243                if (list_empty(&cs->wd_list))
 244                        list_add(&cs->wd_list, &watchdog_list);
 245                __clocksource_unstable(cs);
 246        }
 247        spin_unlock_irqrestore(&watchdog_lock, flags);
 248}
 249
 250static void clocksource_watchdog(unsigned long data)
 251{
 252        struct clocksource *cs;
 253        cycle_t csnow, wdnow, delta;
 254        int64_t wd_nsec, cs_nsec;
 255        int next_cpu, reset_pending;
 256
 257        spin_lock(&watchdog_lock);
 258        if (!watchdog_running)
 259                goto out;
 260
 261        reset_pending = atomic_read(&watchdog_reset_pending);
 262
 263        list_for_each_entry(cs, &watchdog_list, wd_list) {
 264
 265                /* Clocksource already marked unstable? */
 266                if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
 267                        if (finished_booting)
 268                                schedule_work(&watchdog_work);
 269                        continue;
 270                }
 271
 272                local_irq_disable();
 273                csnow = cs->read(cs);
 274                wdnow = watchdog->read(watchdog);
 275                local_irq_enable();
 276
 277                /* Clocksource initialized ? */
 278                if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) ||
 279                    atomic_read(&watchdog_reset_pending)) {
 280                        cs->flags |= CLOCK_SOURCE_WATCHDOG;
 281                        cs->wd_last = wdnow;
 282                        cs->cs_last = csnow;
 283                        continue;
 284                }
 285
 286                delta = clocksource_delta(wdnow, cs->wd_last, watchdog->mask);
 287                wd_nsec = clocksource_cyc2ns(delta, watchdog->mult,
 288                                             watchdog->shift);
 289
 290                delta = clocksource_delta(csnow, cs->cs_last, cs->mask);
 291                cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);
 292                cs->cs_last = csnow;
 293                cs->wd_last = wdnow;
 294
 295                if (atomic_read(&watchdog_reset_pending))
 296                        continue;
 297
 298                /* Check the deviation from the watchdog clocksource. */
 299                if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) {
 300                        clocksource_unstable(cs, cs_nsec - wd_nsec);
 301                        continue;
 302                }
 303
 304                if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
 305                    (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
 306                    (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {
 307                        /* Mark it valid for high-res. */
 308                        cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
 309
 310                        /*
 311                         * clocksource_done_booting() will sort it if
 312                         * finished_booting is not set yet.
 313                         */
 314                        if (!finished_booting)
 315                                continue;
 316
 317                        /*
 318                         * If this is not the current clocksource let
 319                         * the watchdog thread reselect it. Due to the
 320                         * change to high res this clocksource might
 321                         * be preferred now. If it is the current
 322                         * clocksource let the tick code know about
 323                         * that change.
 324                         */
 325                        if (cs != curr_clocksource) {
 326                                cs->flags |= CLOCK_SOURCE_RESELECT;
 327                                schedule_work(&watchdog_work);
 328                        } else {
 329                                tick_clock_notify();
 330                        }
 331                }
 332        }
 333
 334        /*
 335         * We only clear the watchdog_reset_pending, when we did a
 336         * full cycle through all clocksources.
 337         */
 338        if (reset_pending)
 339                atomic_dec(&watchdog_reset_pending);
 340
 341        /*
 342         * Cycle through CPUs to check if the CPUs stay synchronized
 343         * to each other.
 344         */
 345        next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
 346        if (next_cpu >= nr_cpu_ids)
 347                next_cpu = cpumask_first(cpu_online_mask);
 348        watchdog_timer.expires += WATCHDOG_INTERVAL;
 349        add_timer_on(&watchdog_timer, next_cpu);
 350out:
 351        spin_unlock(&watchdog_lock);
 352}
 353
 354static inline void clocksource_start_watchdog(void)
 355{
 356        if (watchdog_running || !watchdog || list_empty(&watchdog_list))
 357                return;
 358        init_timer(&watchdog_timer);
 359        watchdog_timer.function = clocksource_watchdog;
 360        watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
 361        add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask));
 362        watchdog_running = 1;
 363}
 364
 365static inline void clocksource_stop_watchdog(void)
 366{
 367        if (!watchdog_running || (watchdog && !list_empty(&watchdog_list)))
 368                return;
 369        del_timer(&watchdog_timer);
 370        watchdog_running = 0;
 371}
 372
 373static inline void clocksource_reset_watchdog(void)
 374{
 375        struct clocksource *cs;
 376
 377        list_for_each_entry(cs, &watchdog_list, wd_list)
 378                cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
 379}
 380
 381static void clocksource_resume_watchdog(void)
 382{
 383        atomic_inc(&watchdog_reset_pending);
 384}
 385
 386static void clocksource_enqueue_watchdog(struct clocksource *cs)
 387{
 388        unsigned long flags;
 389
 390        spin_lock_irqsave(&watchdog_lock, flags);
 391        if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
 392                /* cs is a clocksource to be watched. */
 393                list_add(&cs->wd_list, &watchdog_list);
 394                cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
 395        } else {
 396                /* cs is a watchdog. */
 397                if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
 398                        cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
 399                /* Pick the best watchdog. */
 400                if (!watchdog || cs->rating > watchdog->rating) {
 401                        watchdog = cs;
 402                        /* Reset watchdog cycles */
 403                        clocksource_reset_watchdog();
 404                }
 405        }
 406        /* Check if the watchdog timer needs to be started. */
 407        clocksource_start_watchdog();
 408        spin_unlock_irqrestore(&watchdog_lock, flags);
 409}
 410
 411static void clocksource_dequeue_watchdog(struct clocksource *cs)
 412{
 413        unsigned long flags;
 414
 415        spin_lock_irqsave(&watchdog_lock, flags);
 416        if (cs != watchdog) {
 417                if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
 418                        /* cs is a watched clocksource. */
 419                        list_del_init(&cs->wd_list);
 420                        /* Check if the watchdog timer needs to be stopped. */
 421                        clocksource_stop_watchdog();
 422                }
 423        }
 424        spin_unlock_irqrestore(&watchdog_lock, flags);
 425}
 426
 427static int __clocksource_watchdog_kthread(void)
 428{
 429        struct clocksource *cs, *tmp;
 430        unsigned long flags;
 431        LIST_HEAD(unstable);
 432        int select = 0;
 433
 434        spin_lock_irqsave(&watchdog_lock, flags);
 435        list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) {
 436                if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
 437                        list_del_init(&cs->wd_list);
 438                        list_add(&cs->wd_list, &unstable);
 439                        select = 1;
 440                }
 441                if (cs->flags & CLOCK_SOURCE_RESELECT) {
 442                        cs->flags &= ~CLOCK_SOURCE_RESELECT;
 443                        select = 1;
 444                }
 445        }
 446        /* Check if the watchdog timer needs to be stopped. */
 447        clocksource_stop_watchdog();
 448        spin_unlock_irqrestore(&watchdog_lock, flags);
 449
 450        /* Needs to be done outside of watchdog lock */
 451        list_for_each_entry_safe(cs, tmp, &unstable, wd_list) {
 452                list_del_init(&cs->wd_list);
 453                __clocksource_change_rating(cs, 0);
 454        }
 455        return select;
 456}
 457
 458static int clocksource_watchdog_kthread(void *data)
 459{
 460        mutex_lock(&clocksource_mutex);
 461        if (__clocksource_watchdog_kthread())
 462                clocksource_select();
 463        mutex_unlock(&clocksource_mutex);
 464        return 0;
 465}
 466
 467static bool clocksource_is_watchdog(struct clocksource *cs)
 468{
 469        return cs == watchdog;
 470}
 471
 472#else /* CONFIG_CLOCKSOURCE_WATCHDOG */
 473
 474static void clocksource_enqueue_watchdog(struct clocksource *cs)
 475{
 476        if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
 477                cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
 478}
 479
 480static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { }
 481static inline void clocksource_resume_watchdog(void) { }
 482static inline int __clocksource_watchdog_kthread(void) { return 0; }
 483static bool clocksource_is_watchdog(struct clocksource *cs) { return false; }
 484void clocksource_mark_unstable(struct clocksource *cs) { }
 485
 486#endif /* CONFIG_CLOCKSOURCE_WATCHDOG */
 487
 488/**
 489 * clocksource_suspend - suspend the clocksource(s)
 490 */
 491void clocksource_suspend(void)
 492{
 493        struct clocksource *cs;
 494
 495        list_for_each_entry_reverse(cs, &clocksource_list, list)
 496                if (cs->suspend)
 497                        cs->suspend(cs);
 498}
 499
 500/**
 501 * clocksource_resume - resume the clocksource(s)
 502 */
 503void clocksource_resume(void)
 504{
 505        struct clocksource *cs;
 506
 507        list_for_each_entry(cs, &clocksource_list, list)
 508                if (cs->resume)
 509                        cs->resume(cs);
 510
 511        clocksource_resume_watchdog();
 512}
 513
 514/**
 515 * clocksource_touch_watchdog - Update watchdog
 516 *
 517 * Update the watchdog after exception contexts such as kgdb so as not
 518 * to incorrectly trip the watchdog. This might fail when the kernel
 519 * was stopped in code which holds watchdog_lock.
 520 */
 521void clocksource_touch_watchdog(void)
 522{
 523        clocksource_resume_watchdog();
 524}
 525
 526/**
 527 * clocksource_max_adjustment- Returns max adjustment amount
 528 * @cs:         Pointer to clocksource
 529 *
 530 */
 531static u32 clocksource_max_adjustment(struct clocksource *cs)
 532{
 533        u64 ret;
 534        /*
 535         * We won't try to correct for more than 11% adjustments (110,000 ppm),
 536         */
 537        ret = (u64)cs->mult * 11;
 538        do_div(ret,100);
 539        return (u32)ret;
 540}
 541
 542/**
 543 * clocks_calc_max_nsecs - Returns maximum nanoseconds that can be converted
 544 * @mult:       cycle to nanosecond multiplier
 545 * @shift:      cycle to nanosecond divisor (power of two)
 546 * @maxadj:     maximum adjustment value to mult (~11%)
 547 * @mask:       bitmask for two's complement subtraction of non 64 bit counters
 548 */
 549u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask)
 550{
 551        u64 max_nsecs, max_cycles;
 552
 553        /*
 554         * Calculate the maximum number of cycles that we can pass to the
 555         * cyc2ns function without overflowing a 64-bit signed result. The
 556         * maximum number of cycles is equal to ULLONG_MAX/(mult+maxadj)
 557         * which is equivalent to the below.
 558         * max_cycles < (2^63)/(mult + maxadj)
 559         * max_cycles < 2^(log2((2^63)/(mult + maxadj)))
 560         * max_cycles < 2^(log2(2^63) - log2(mult + maxadj))
 561         * max_cycles < 2^(63 - log2(mult + maxadj))
 562         * max_cycles < 1 << (63 - log2(mult + maxadj))
 563         * Please note that we add 1 to the result of the log2 to account for
 564         * any rounding errors, ensure the above inequality is satisfied and
 565         * no overflow will occur.
 566         */
 567        max_cycles = 1ULL << (63 - (ilog2(mult + maxadj) + 1));
 568
 569        /*
 570         * The actual maximum number of cycles we can defer the clocksource is
 571         * determined by the minimum of max_cycles and mask.
 572         * Note: Here we subtract the maxadj to make sure we don't sleep for
 573         * too long if there's a large negative adjustment.
 574         */
 575        max_cycles = min(max_cycles, mask);
 576        max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift);
 577
 578        return max_nsecs;
 579}
 580
 581/**
 582 * clocksource_max_deferment - Returns max time the clocksource can be deferred
 583 * @cs:         Pointer to clocksource
 584 *
 585 */
 586static u64 clocksource_max_deferment(struct clocksource *cs)
 587{
 588        u64 max_nsecs;
 589
 590        max_nsecs = clocks_calc_max_nsecs(cs->mult, cs->shift, cs->maxadj,
 591                                          cs->mask);
 592        /*
 593         * To ensure that the clocksource does not wrap whilst we are idle,
 594         * limit the time the clocksource can be deferred by 12.5%. Please
 595         * note a margin of 12.5% is used because this can be computed with
 596         * a shift, versus say 10% which would require division.
 597         */
 598        return max_nsecs - (max_nsecs >> 3);
 599}
 600
 601#ifndef CONFIG_ARCH_USES_GETTIMEOFFSET
 602
 603static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur)
 604{
 605        struct clocksource *cs;
 606
 607        if (!finished_booting || list_empty(&clocksource_list))
 608                return NULL;
 609
 610        /*
 611         * We pick the clocksource with the highest rating. If oneshot
 612         * mode is active, we pick the highres valid clocksource with
 613         * the best rating.
 614         */
 615        list_for_each_entry(cs, &clocksource_list, list) {
 616                if (skipcur && cs == curr_clocksource)
 617                        continue;
 618                if (oneshot && !(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES))
 619                        continue;
 620                return cs;
 621        }
 622        return NULL;
 623}
 624
 625static void __clocksource_select(bool skipcur)
 626{
 627        bool oneshot = tick_oneshot_mode_active();
 628        struct clocksource *best, *cs;
 629
 630        /* Find the best suitable clocksource */
 631        best = clocksource_find_best(oneshot, skipcur);
 632        if (!best)
 633                return;
 634
 635        /* Check for the override clocksource. */
 636        list_for_each_entry(cs, &clocksource_list, list) {
 637                if (skipcur && cs == curr_clocksource)
 638                        continue;
 639                if (strcmp(cs->name, override_name) != 0)
 640                        continue;
 641                /*
 642                 * Check to make sure we don't switch to a non-highres
 643                 * capable clocksource if the tick code is in oneshot
 644                 * mode (highres or nohz)
 645                 */
 646                if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && oneshot) {
 647                        /* Override clocksource cannot be used. */
 648                        printk(KERN_WARNING "Override clocksource %s is not "
 649                               "HRT compatible. Cannot switch while in "
 650                               "HRT/NOHZ mode\n", cs->name);
 651                        override_name[0] = 0;
 652                } else
 653                        /* Override clocksource can be used. */
 654                        best = cs;
 655                break;
 656        }
 657
 658        if (curr_clocksource != best && !timekeeping_notify(best)) {
 659                pr_info("Switched to clocksource %s\n", best->name);
 660                curr_clocksource = best;
 661        }
 662}
 663
 664/**
 665 * clocksource_select - Select the best clocksource available
 666 *
 667 * Private function. Must hold clocksource_mutex when called.
 668 *
 669 * Select the clocksource with the best rating, or the clocksource,
 670 * which is selected by userspace override.
 671 */
 672static void clocksource_select(void)
 673{
 674        return __clocksource_select(false);
 675}
 676
 677static void clocksource_select_fallback(void)
 678{
 679        return __clocksource_select(true);
 680}
 681
 682#else /* !CONFIG_ARCH_USES_GETTIMEOFFSET */
 683
 684static inline void clocksource_select(void) { }
 685static inline void clocksource_select_fallback(void) { }
 686
 687#endif
 688
 689/*
 690 * clocksource_done_booting - Called near the end of core bootup
 691 *
 692 * Hack to avoid lots of clocksource churn at boot time.
 693 * We use fs_initcall because we want this to start before
 694 * device_initcall but after subsys_initcall.
 695 */
 696static int __init clocksource_done_booting(void)
 697{
 698        mutex_lock(&clocksource_mutex);
 699        curr_clocksource = clocksource_default_clock();
 700        finished_booting = 1;
 701        /*
 702         * Run the watchdog first to eliminate unstable clock sources
 703         */
 704        __clocksource_watchdog_kthread();
 705        clocksource_select();
 706        mutex_unlock(&clocksource_mutex);
 707        return 0;
 708}
 709fs_initcall(clocksource_done_booting);
 710
 711/*
 712 * Enqueue the clocksource sorted by rating
 713 */
 714static void clocksource_enqueue(struct clocksource *cs)
 715{
 716        struct list_head *entry = &clocksource_list;
 717        struct clocksource *tmp;
 718
 719        list_for_each_entry(tmp, &clocksource_list, list)
 720                /* Keep track of the place, where to insert */
 721                if (tmp->rating >= cs->rating)
 722                        entry = &tmp->list;
 723        list_add(&cs->list, entry);
 724}
 725
 726/**
 727 * __clocksource_updatefreq_scale - Used update clocksource with new freq
 728 * @cs:         clocksource to be registered
 729 * @scale:      Scale factor multiplied against freq to get clocksource hz
 730 * @freq:       clocksource frequency (cycles per second) divided by scale
 731 *
 732 * This should only be called from the clocksource->enable() method.
 733 *
 734 * This *SHOULD NOT* be called directly! Please use the
 735 * clocksource_updatefreq_hz() or clocksource_updatefreq_khz helper functions.
 736 */
 737void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)
 738{
 739        u64 sec;
 740        /*
 741         * Calc the maximum number of seconds which we can run before
 742         * wrapping around. For clocksources which have a mask > 32bit
 743         * we need to limit the max sleep time to have a good
 744         * conversion precision. 10 minutes is still a reasonable
 745         * amount. That results in a shift value of 24 for a
 746         * clocksource with mask >= 40bit and f >= 4GHz. That maps to
 747         * ~ 0.06ppm granularity for NTP. We apply the same 12.5%
 748         * margin as we do in clocksource_max_deferment()
 749         */
 750        sec = (cs->mask - (cs->mask >> 3));
 751        do_div(sec, freq);
 752        do_div(sec, scale);
 753        if (!sec)
 754                sec = 1;
 755        else if (sec > 600 && cs->mask > UINT_MAX)
 756                sec = 600;
 757
 758        clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
 759                               NSEC_PER_SEC / scale, sec * scale);
 760
 761        /*
 762         * for clocksources that have large mults, to avoid overflow.
 763         * Since mult may be adjusted by ntp, add an safety extra margin
 764         *
 765         */
 766        cs->maxadj = clocksource_max_adjustment(cs);
 767        while ((cs->mult + cs->maxadj < cs->mult)
 768                || (cs->mult - cs->maxadj > cs->mult)) {
 769                cs->mult >>= 1;
 770                cs->shift--;
 771                cs->maxadj = clocksource_max_adjustment(cs);
 772        }
 773
 774        cs->max_idle_ns = clocksource_max_deferment(cs);
 775}
 776EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale);
 777
 778/**
 779 * __clocksource_register_scale - Used to install new clocksources
 780 * @cs:         clocksource to be registered
 781 * @scale:      Scale factor multiplied against freq to get clocksource hz
 782 * @freq:       clocksource frequency (cycles per second) divided by scale
 783 *
 784 * Returns -EBUSY if registration fails, zero otherwise.
 785 *
 786 * This *SHOULD NOT* be called directly! Please use the
 787 * clocksource_register_hz() or clocksource_register_khz helper functions.
 788 */
 789int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
 790{
 791
 792        /* Initialize mult/shift and max_idle_ns */
 793        __clocksource_updatefreq_scale(cs, scale, freq);
 794
 795        /* Add clocksource to the clocksource list */
 796        mutex_lock(&clocksource_mutex);
 797        clocksource_enqueue(cs);
 798        clocksource_enqueue_watchdog(cs);
 799        clocksource_select();
 800        mutex_unlock(&clocksource_mutex);
 801        return 0;
 802}
 803EXPORT_SYMBOL_GPL(__clocksource_register_scale);
 804
 805
 806/**
 807 * clocksource_register - Used to install new clocksources
 808 * @cs:         clocksource to be registered
 809 *
 810 * Returns -EBUSY if registration fails, zero otherwise.
 811 */
 812int clocksource_register(struct clocksource *cs)
 813{
 814        /* calculate max adjustment for given mult/shift */
 815        cs->maxadj = clocksource_max_adjustment(cs);
 816        WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
 817                "Clocksource %s might overflow on 11%% adjustment\n",
 818                cs->name);
 819
 820        /* calculate max idle time permitted for this clocksource */
 821        cs->max_idle_ns = clocksource_max_deferment(cs);
 822
 823        mutex_lock(&clocksource_mutex);
 824        clocksource_enqueue(cs);
 825        clocksource_enqueue_watchdog(cs);
 826        clocksource_select();
 827        mutex_unlock(&clocksource_mutex);
 828        return 0;
 829}
 830EXPORT_SYMBOL(clocksource_register);
 831
 832static void __clocksource_change_rating(struct clocksource *cs, int rating)
 833{
 834        list_del(&cs->list);
 835        cs->rating = rating;
 836        clocksource_enqueue(cs);
 837}
 838
 839/**
 840 * clocksource_change_rating - Change the rating of a registered clocksource
 841 * @cs:         clocksource to be changed
 842 * @rating:     new rating
 843 */
 844void clocksource_change_rating(struct clocksource *cs, int rating)
 845{
 846        mutex_lock(&clocksource_mutex);
 847        __clocksource_change_rating(cs, rating);
 848        clocksource_select();
 849        mutex_unlock(&clocksource_mutex);
 850}
 851EXPORT_SYMBOL(clocksource_change_rating);
 852
 853/*
 854 * Unbind clocksource @cs. Called with clocksource_mutex held
 855 */
 856static int clocksource_unbind(struct clocksource *cs)
 857{
 858        /*
 859         * I really can't convince myself to support this on hardware
 860         * designed by lobotomized monkeys.
 861         */
 862        if (clocksource_is_watchdog(cs))
 863                return -EBUSY;
 864
 865        if (cs == curr_clocksource) {
 866                /* Select and try to install a replacement clock source */
 867                clocksource_select_fallback();
 868                if (curr_clocksource == cs)
 869                        return -EBUSY;
 870        }
 871        clocksource_dequeue_watchdog(cs);
 872        list_del_init(&cs->list);
 873        return 0;
 874}
 875
 876/**
 877 * clocksource_unregister - remove a registered clocksource
 878 * @cs: clocksource to be unregistered
 879 */
 880int clocksource_unregister(struct clocksource *cs)
 881{
 882        int ret = 0;
 883
 884        mutex_lock(&clocksource_mutex);
 885        if (!list_empty(&cs->list))
 886                ret = clocksource_unbind(cs);
 887        mutex_unlock(&clocksource_mutex);
 888        return ret;
 889}
 890EXPORT_SYMBOL(clocksource_unregister);
 891
 892#ifdef CONFIG_SYSFS
 893/**
 894 * sysfs_show_current_clocksources - sysfs interface for current clocksource
 895 * @dev:        unused
 896 * @attr:       unused
 897 * @buf:        char buffer to be filled with clocksource list
 898 *
 899 * Provides sysfs interface for listing current clocksource.
 900 */
 901static ssize_t
 902sysfs_show_current_clocksources(struct device *dev,
 903                                struct device_attribute *attr, char *buf)
 904{
 905        ssize_t count = 0;
 906
 907        mutex_lock(&clocksource_mutex);
 908        count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name);
 909        mutex_unlock(&clocksource_mutex);
 910
 911        return count;
 912}
 913
 914ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt)
 915{
 916        size_t ret = cnt;
 917
 918        /* strings from sysfs write are not 0 terminated! */
 919        if (!cnt || cnt >= CS_NAME_LEN)
 920                return -EINVAL;
 921
 922        /* strip of \n: */
 923        if (buf[cnt-1] == '\n')
 924                cnt--;
 925        if (cnt > 0)
 926                memcpy(dst, buf, cnt);
 927        dst[cnt] = 0;
 928        return ret;
 929}
 930
 931/**
 932 * sysfs_override_clocksource - interface for manually overriding clocksource
 933 * @dev:        unused
 934 * @attr:       unused
 935 * @buf:        name of override clocksource
 936 * @count:      length of buffer
 937 *
 938 * Takes input from sysfs interface for manually overriding the default
 939 * clocksource selection.
 940 */
 941static ssize_t sysfs_override_clocksource(struct device *dev,
 942                                          struct device_attribute *attr,
 943                                          const char *buf, size_t count)
 944{
 945        ssize_t ret;
 946
 947        mutex_lock(&clocksource_mutex);
 948
 949        ret = sysfs_get_uname(buf, override_name, count);
 950        if (ret >= 0)
 951                clocksource_select();
 952
 953        mutex_unlock(&clocksource_mutex);
 954
 955        return ret;
 956}
 957
 958/**
 959 * sysfs_unbind_current_clocksource - interface for manually unbinding clocksource
 960 * @dev:        unused
 961 * @attr:       unused
 962 * @buf:        unused
 963 * @count:      length of buffer
 964 *
 965 * Takes input from sysfs interface for manually unbinding a clocksource.
 966 */
 967static ssize_t sysfs_unbind_clocksource(struct device *dev,
 968                                        struct device_attribute *attr,
 969                                        const char *buf, size_t count)
 970{
 971        struct clocksource *cs;
 972        char name[CS_NAME_LEN];
 973        ssize_t ret;
 974
 975        ret = sysfs_get_uname(buf, name, count);
 976        if (ret < 0)
 977                return ret;
 978
 979        ret = -ENODEV;
 980        mutex_lock(&clocksource_mutex);
 981        list_for_each_entry(cs, &clocksource_list, list) {
 982                if (strcmp(cs->name, name))
 983                        continue;
 984                ret = clocksource_unbind(cs);
 985                break;
 986        }
 987        mutex_unlock(&clocksource_mutex);
 988
 989        return ret ? ret : count;
 990}
 991
 992/**
 993 * sysfs_show_available_clocksources - sysfs interface for listing clocksource
 994 * @dev:        unused
 995 * @attr:       unused
 996 * @buf:        char buffer to be filled with clocksource list
 997 *
 998 * Provides sysfs interface for listing registered clocksources
 999 */
1000static ssize_t
1001sysfs_show_available_clocksources(struct device *dev,
1002                                  struct device_attribute *attr,
1003                                  char *buf)
1004{
1005        struct clocksource *src;
1006        ssize_t count = 0;
1007
1008        mutex_lock(&clocksource_mutex);
1009        list_for_each_entry(src, &clocksource_list, list) {
1010                /*
1011                 * Don't show non-HRES clocksource if the tick code is
1012                 * in one shot mode (highres=on or nohz=on)
1013                 */
1014                if (!tick_oneshot_mode_active() ||
1015                    (src->flags & CLOCK_SOURCE_VALID_FOR_HRES))
1016                        count += snprintf(buf + count,
1017                                  max((ssize_t)PAGE_SIZE - count, (ssize_t)0),
1018                                  "%s ", src->name);
1019        }
1020        mutex_unlock(&clocksource_mutex);
1021
1022        count += snprintf(buf + count,
1023                          max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n");
1024
1025        return count;
1026}
1027
1028/*
1029 * Sysfs setup bits:
1030 */
1031static DEVICE_ATTR(current_clocksource, 0644, sysfs_show_current_clocksources,
1032                   sysfs_override_clocksource);
1033
1034static DEVICE_ATTR(unbind_clocksource, 0200, NULL, sysfs_unbind_clocksource);
1035
1036static DEVICE_ATTR(available_clocksource, 0444,
1037                   sysfs_show_available_clocksources, NULL);
1038
1039static struct bus_type clocksource_subsys = {
1040        .name = "clocksource",
1041        .dev_name = "clocksource",
1042};
1043
1044static struct device device_clocksource = {
1045        .id     = 0,
1046        .bus    = &clocksource_subsys,
1047};
1048
1049static int __init init_clocksource_sysfs(void)
1050{
1051        int error = subsys_system_register(&clocksource_subsys, NULL);
1052
1053        if (!error)
1054                error = device_register(&device_clocksource);
1055        if (!error)
1056                error = device_create_file(
1057                                &device_clocksource,
1058                                &dev_attr_current_clocksource);
1059        if (!error)
1060                error = device_create_file(&device_clocksource,
1061                                           &dev_attr_unbind_clocksource);
1062        if (!error)
1063                error = device_create_file(
1064                                &device_clocksource,
1065                                &dev_attr_available_clocksource);
1066        return error;
1067}
1068
1069device_initcall(init_clocksource_sysfs);
1070#endif /* CONFIG_SYSFS */
1071
1072/**
1073 * boot_override_clocksource - boot clock override
1074 * @str:        override name
1075 *
1076 * Takes a clocksource= boot argument and uses it
1077 * as the clocksource override name.
1078 */
1079static int __init boot_override_clocksource(char* str)
1080{
1081        mutex_lock(&clocksource_mutex);
1082        if (str)
1083                strlcpy(override_name, str, sizeof(override_name));
1084        mutex_unlock(&clocksource_mutex);
1085        return 1;
1086}
1087
1088__setup("clocksource=", boot_override_clocksource);
1089
1090/**
1091 * boot_override_clock - Compatibility layer for deprecated boot option
1092 * @str:        override name
1093 *
1094 * DEPRECATED! Takes a clock= boot argument and uses it
1095 * as the clocksource override name
1096 */
1097static int __init boot_override_clock(char* str)
1098{
1099        if (!strcmp(str, "pmtmr")) {
1100                printk("Warning: clock=pmtmr is deprecated. "
1101                        "Use clocksource=acpi_pm.\n");
1102                return boot_override_clocksource("acpi_pm");
1103        }
1104        printk("Warning! clock= boot option is deprecated. "
1105                "Use clocksource=xyz\n");
1106        return boot_override_clocksource(str);
1107}
1108
1109__setup("clock=", boot_override_clock);
1110