linux/kernel/time/clocksource.c
<<
>>
Prefs
   1/*
   2 * linux/kernel/time/clocksource.c
   3 *
   4 * This file contains the functions which manage clocksource drivers.
   5 *
   6 * Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com)
   7 *
   8 * This program is free software; you can redistribute it and/or modify
   9 * it under the terms of the GNU General Public License as published by
  10 * the Free Software Foundation; either version 2 of the License, or
  11 * (at your option) any later version.
  12 *
  13 * This program is distributed in the hope that it will be useful,
  14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 * GNU General Public License for more details.
  17 *
  18 * You should have received a copy of the GNU General Public License
  19 * along with this program; if not, write to the Free Software
  20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  21 *
  22 * TODO WishList:
  23 *   o Allow clocksource drivers to be unregistered
  24 */
  25
  26#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  27
  28#include <linux/device.h>
  29#include <linux/clocksource.h>
  30#include <linux/init.h>
  31#include <linux/module.h>
  32#include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
  33#include <linux/tick.h>
  34#include <linux/kthread.h>
  35
  36#include "tick-internal.h"
  37#include "timekeeping_internal.h"
  38
  39/**
  40 * clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks
  41 * @mult:       pointer to mult variable
  42 * @shift:      pointer to shift variable
  43 * @from:       frequency to convert from
  44 * @to:         frequency to convert to
  45 * @maxsec:     guaranteed runtime conversion range in seconds
  46 *
  47 * The function evaluates the shift/mult pair for the scaled math
  48 * operations of clocksources and clockevents.
  49 *
  50 * @to and @from are frequency values in HZ. For clock sources @to is
  51 * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock
  52 * event @to is the counter frequency and @from is NSEC_PER_SEC.
  53 *
  54 * The @maxsec conversion range argument controls the time frame in
  55 * seconds which must be covered by the runtime conversion with the
  56 * calculated mult and shift factors. This guarantees that no 64bit
  57 * overflow happens when the input value of the conversion is
  58 * multiplied with the calculated mult factor. Larger ranges may
  59 * reduce the conversion accuracy by chosing smaller mult and shift
  60 * factors.
  61 */
  62void
  63clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec)
  64{
  65        u64 tmp;
  66        u32 sft, sftacc= 32;
  67
  68        /*
  69         * Calculate the shift factor which is limiting the conversion
  70         * range:
  71         */
  72        tmp = ((u64)maxsec * from) >> 32;
  73        while (tmp) {
  74                tmp >>=1;
  75                sftacc--;
  76        }
  77
  78        /*
  79         * Find the conversion shift/mult pair which has the best
  80         * accuracy and fits the maxsec conversion range:
  81         */
  82        for (sft = 32; sft > 0; sft--) {
  83                tmp = (u64) to << sft;
  84                tmp += from / 2;
  85                do_div(tmp, from);
  86                if ((tmp >> sftacc) == 0)
  87                        break;
  88        }
  89        *mult = tmp;
  90        *shift = sft;
  91}
  92EXPORT_SYMBOL_GPL(clocks_calc_mult_shift);
  93
  94/*[Clocksource internal variables]---------
  95 * curr_clocksource:
  96 *      currently selected clocksource.
  97 * clocksource_list:
  98 *      linked list with the registered clocksources
  99 * clocksource_mutex:
 100 *      protects manipulations to curr_clocksource and the clocksource_list
 101 * override_name:
 102 *      Name of the user-specified clocksource.
 103 */
 104static struct clocksource *curr_clocksource;
 105static LIST_HEAD(clocksource_list);
 106static DEFINE_MUTEX(clocksource_mutex);
 107static char override_name[CS_NAME_LEN];
 108static int finished_booting;
 109
 110#ifdef CONFIG_CLOCKSOURCE_WATCHDOG
 111static void clocksource_watchdog_work(struct work_struct *work);
 112static void clocksource_select(void);
 113
 114static LIST_HEAD(watchdog_list);
 115static struct clocksource *watchdog;
 116static struct timer_list watchdog_timer;
 117static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);
 118static DEFINE_SPINLOCK(watchdog_lock);
 119static int watchdog_running;
 120static atomic_t watchdog_reset_pending;
 121
 122static int clocksource_watchdog_kthread(void *data);
 123static void __clocksource_change_rating(struct clocksource *cs, int rating);
 124
 125/*
 126 * Interval: 0.5sec Threshold: 0.0625s
 127 */
 128#define WATCHDOG_INTERVAL (HZ >> 1)
 129#define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4)
 130
 131static void clocksource_watchdog_work(struct work_struct *work)
 132{
 133        /*
 134         * If kthread_run fails the next watchdog scan over the
 135         * watchdog_list will find the unstable clock again.
 136         */
 137        kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog");
 138}
 139
 140static void __clocksource_unstable(struct clocksource *cs)
 141{
 142        cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG);
 143        cs->flags |= CLOCK_SOURCE_UNSTABLE;
 144
 145        if (cs->mark_unstable)
 146                cs->mark_unstable(cs);
 147
 148        if (finished_booting)
 149                schedule_work(&watchdog_work);
 150}
 151
 152/**
 153 * clocksource_mark_unstable - mark clocksource unstable via watchdog
 154 * @cs:         clocksource to be marked unstable
 155 *
 156 * This function is called instead of clocksource_change_rating from
 157 * cpu hotplug code to avoid a deadlock between the clocksource mutex
 158 * and the cpu hotplug mutex. It defers the update of the clocksource
 159 * to the watchdog thread.
 160 */
 161void clocksource_mark_unstable(struct clocksource *cs)
 162{
 163        unsigned long flags;
 164
 165        spin_lock_irqsave(&watchdog_lock, flags);
 166        if (!(cs->flags & CLOCK_SOURCE_UNSTABLE)) {
 167                if (list_empty(&cs->wd_list))
 168                        list_add(&cs->wd_list, &watchdog_list);
 169                __clocksource_unstable(cs);
 170        }
 171        spin_unlock_irqrestore(&watchdog_lock, flags);
 172}
 173
 174static void clocksource_watchdog(struct timer_list *unused)
 175{
 176        struct clocksource *cs;
 177        u64 csnow, wdnow, cslast, wdlast, delta;
 178        int64_t wd_nsec, cs_nsec;
 179        int next_cpu, reset_pending;
 180
 181        spin_lock(&watchdog_lock);
 182        if (!watchdog_running)
 183                goto out;
 184
 185        reset_pending = atomic_read(&watchdog_reset_pending);
 186
 187        list_for_each_entry(cs, &watchdog_list, wd_list) {
 188
 189                /* Clocksource already marked unstable? */
 190                if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
 191                        if (finished_booting)
 192                                schedule_work(&watchdog_work);
 193                        continue;
 194                }
 195
 196                local_irq_disable();
 197                csnow = cs->read(cs);
 198                wdnow = watchdog->read(watchdog);
 199                local_irq_enable();
 200
 201                /* Clocksource initialized ? */
 202                if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) ||
 203                    atomic_read(&watchdog_reset_pending)) {
 204                        cs->flags |= CLOCK_SOURCE_WATCHDOG;
 205                        cs->wd_last = wdnow;
 206                        cs->cs_last = csnow;
 207                        continue;
 208                }
 209
 210                delta = clocksource_delta(wdnow, cs->wd_last, watchdog->mask);
 211                wd_nsec = clocksource_cyc2ns(delta, watchdog->mult,
 212                                             watchdog->shift);
 213
 214                delta = clocksource_delta(csnow, cs->cs_last, cs->mask);
 215                cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);
 216                wdlast = cs->wd_last; /* save these in case we print them */
 217                cslast = cs->cs_last;
 218                cs->cs_last = csnow;
 219                cs->wd_last = wdnow;
 220
 221                if (atomic_read(&watchdog_reset_pending))
 222                        continue;
 223
 224                /* Check the deviation from the watchdog clocksource. */
 225                if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) {
 226                        pr_warn("timekeeping watchdog on CPU%d: Marking clocksource '%s' as unstable because the skew is too large:\n",
 227                                smp_processor_id(), cs->name);
 228                        pr_warn("                      '%s' wd_now: %llx wd_last: %llx mask: %llx\n",
 229                                watchdog->name, wdnow, wdlast, watchdog->mask);
 230                        pr_warn("                      '%s' cs_now: %llx cs_last: %llx mask: %llx\n",
 231                                cs->name, csnow, cslast, cs->mask);
 232                        __clocksource_unstable(cs);
 233                        continue;
 234                }
 235
 236                if (cs == curr_clocksource && cs->tick_stable)
 237                        cs->tick_stable(cs);
 238
 239                if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
 240                    (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
 241                    (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {
 242                        /* Mark it valid for high-res. */
 243                        cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
 244
 245                        /*
 246                         * clocksource_done_booting() will sort it if
 247                         * finished_booting is not set yet.
 248                         */
 249                        if (!finished_booting)
 250                                continue;
 251
 252                        /*
 253                         * If this is not the current clocksource let
 254                         * the watchdog thread reselect it. Due to the
 255                         * change to high res this clocksource might
 256                         * be preferred now. If it is the current
 257                         * clocksource let the tick code know about
 258                         * that change.
 259                         */
 260                        if (cs != curr_clocksource) {
 261                                cs->flags |= CLOCK_SOURCE_RESELECT;
 262                                schedule_work(&watchdog_work);
 263                        } else {
 264                                tick_clock_notify();
 265                        }
 266                }
 267        }
 268
 269        /*
 270         * We only clear the watchdog_reset_pending, when we did a
 271         * full cycle through all clocksources.
 272         */
 273        if (reset_pending)
 274                atomic_dec(&watchdog_reset_pending);
 275
 276        /*
 277         * Cycle through CPUs to check if the CPUs stay synchronized
 278         * to each other.
 279         */
 280        next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
 281        if (next_cpu >= nr_cpu_ids)
 282                next_cpu = cpumask_first(cpu_online_mask);
 283        watchdog_timer.expires += WATCHDOG_INTERVAL;
 284        add_timer_on(&watchdog_timer, next_cpu);
 285out:
 286        spin_unlock(&watchdog_lock);
 287}
 288
 289static inline void clocksource_start_watchdog(void)
 290{
 291        if (watchdog_running || !watchdog || list_empty(&watchdog_list))
 292                return;
 293        timer_setup(&watchdog_timer, clocksource_watchdog, 0);
 294        watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
 295        add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask));
 296        watchdog_running = 1;
 297}
 298
 299static inline void clocksource_stop_watchdog(void)
 300{
 301        if (!watchdog_running || (watchdog && !list_empty(&watchdog_list)))
 302                return;
 303        del_timer(&watchdog_timer);
 304        watchdog_running = 0;
 305}
 306
 307static inline void clocksource_reset_watchdog(void)
 308{
 309        struct clocksource *cs;
 310
 311        list_for_each_entry(cs, &watchdog_list, wd_list)
 312                cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
 313}
 314
 315static void clocksource_resume_watchdog(void)
 316{
 317        atomic_inc(&watchdog_reset_pending);
 318}
 319
 320static void clocksource_enqueue_watchdog(struct clocksource *cs)
 321{
 322        unsigned long flags;
 323
 324        spin_lock_irqsave(&watchdog_lock, flags);
 325        if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
 326                /* cs is a clocksource to be watched. */
 327                list_add(&cs->wd_list, &watchdog_list);
 328                cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
 329        } else {
 330                /* cs is a watchdog. */
 331                if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
 332                        cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
 333        }
 334        spin_unlock_irqrestore(&watchdog_lock, flags);
 335}
 336
 337static void clocksource_select_watchdog(bool fallback)
 338{
 339        struct clocksource *cs, *old_wd;
 340        unsigned long flags;
 341
 342        spin_lock_irqsave(&watchdog_lock, flags);
 343        /* save current watchdog */
 344        old_wd = watchdog;
 345        if (fallback)
 346                watchdog = NULL;
 347
 348        list_for_each_entry(cs, &clocksource_list, list) {
 349                /* cs is a clocksource to be watched. */
 350                if (cs->flags & CLOCK_SOURCE_MUST_VERIFY)
 351                        continue;
 352
 353                /* Skip current if we were requested for a fallback. */
 354                if (fallback && cs == old_wd)
 355                        continue;
 356
 357                /* Pick the best watchdog. */
 358                if (!watchdog || cs->rating > watchdog->rating)
 359                        watchdog = cs;
 360        }
 361        /* If we failed to find a fallback restore the old one. */
 362        if (!watchdog)
 363                watchdog = old_wd;
 364
 365        /* If we changed the watchdog we need to reset cycles. */
 366        if (watchdog != old_wd)
 367                clocksource_reset_watchdog();
 368
 369        /* Check if the watchdog timer needs to be started. */
 370        clocksource_start_watchdog();
 371        spin_unlock_irqrestore(&watchdog_lock, flags);
 372}
 373
 374static void clocksource_dequeue_watchdog(struct clocksource *cs)
 375{
 376        unsigned long flags;
 377
 378        spin_lock_irqsave(&watchdog_lock, flags);
 379        if (cs != watchdog) {
 380                if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
 381                        /* cs is a watched clocksource. */
 382                        list_del_init(&cs->wd_list);
 383                        /* Check if the watchdog timer needs to be stopped. */
 384                        clocksource_stop_watchdog();
 385                }
 386        }
 387        spin_unlock_irqrestore(&watchdog_lock, flags);
 388}
 389
 390static int __clocksource_watchdog_kthread(void)
 391{
 392        struct clocksource *cs, *tmp;
 393        unsigned long flags;
 394        LIST_HEAD(unstable);
 395        int select = 0;
 396
 397        spin_lock_irqsave(&watchdog_lock, flags);
 398        list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) {
 399                if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
 400                        list_del_init(&cs->wd_list);
 401                        list_add(&cs->wd_list, &unstable);
 402                        select = 1;
 403                }
 404                if (cs->flags & CLOCK_SOURCE_RESELECT) {
 405                        cs->flags &= ~CLOCK_SOURCE_RESELECT;
 406                        select = 1;
 407                }
 408        }
 409        /* Check if the watchdog timer needs to be stopped. */
 410        clocksource_stop_watchdog();
 411        spin_unlock_irqrestore(&watchdog_lock, flags);
 412
 413        /* Needs to be done outside of watchdog lock */
 414        list_for_each_entry_safe(cs, tmp, &unstable, wd_list) {
 415                list_del_init(&cs->wd_list);
 416                __clocksource_change_rating(cs, 0);
 417        }
 418        return select;
 419}
 420
 421static int clocksource_watchdog_kthread(void *data)
 422{
 423        mutex_lock(&clocksource_mutex);
 424        if (__clocksource_watchdog_kthread())
 425                clocksource_select();
 426        mutex_unlock(&clocksource_mutex);
 427        return 0;
 428}
 429
 430static bool clocksource_is_watchdog(struct clocksource *cs)
 431{
 432        return cs == watchdog;
 433}
 434
 435#else /* CONFIG_CLOCKSOURCE_WATCHDOG */
 436
 437static void clocksource_enqueue_watchdog(struct clocksource *cs)
 438{
 439        if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
 440                cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
 441}
 442
 443static void clocksource_select_watchdog(bool fallback) { }
 444static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { }
 445static inline void clocksource_resume_watchdog(void) { }
 446static inline int __clocksource_watchdog_kthread(void) { return 0; }
 447static bool clocksource_is_watchdog(struct clocksource *cs) { return false; }
 448void clocksource_mark_unstable(struct clocksource *cs) { }
 449
 450#endif /* CONFIG_CLOCKSOURCE_WATCHDOG */
 451
 452/**
 453 * clocksource_suspend - suspend the clocksource(s)
 454 */
 455void clocksource_suspend(void)
 456{
 457        struct clocksource *cs;
 458
 459        list_for_each_entry_reverse(cs, &clocksource_list, list)
 460                if (cs->suspend)
 461                        cs->suspend(cs);
 462}
 463
 464/**
 465 * clocksource_resume - resume the clocksource(s)
 466 */
 467void clocksource_resume(void)
 468{
 469        struct clocksource *cs;
 470
 471        list_for_each_entry(cs, &clocksource_list, list)
 472                if (cs->resume)
 473                        cs->resume(cs);
 474
 475        clocksource_resume_watchdog();
 476}
 477
 478/**
 479 * clocksource_touch_watchdog - Update watchdog
 480 *
 481 * Update the watchdog after exception contexts such as kgdb so as not
 482 * to incorrectly trip the watchdog. This might fail when the kernel
 483 * was stopped in code which holds watchdog_lock.
 484 */
 485void clocksource_touch_watchdog(void)
 486{
 487        clocksource_resume_watchdog();
 488}
 489
 490/**
 491 * clocksource_max_adjustment- Returns max adjustment amount
 492 * @cs:         Pointer to clocksource
 493 *
 494 */
 495static u32 clocksource_max_adjustment(struct clocksource *cs)
 496{
 497        u64 ret;
 498        /*
 499         * We won't try to correct for more than 11% adjustments (110,000 ppm),
 500         */
 501        ret = (u64)cs->mult * 11;
 502        do_div(ret,100);
 503        return (u32)ret;
 504}
 505
 506/**
 507 * clocks_calc_max_nsecs - Returns maximum nanoseconds that can be converted
 508 * @mult:       cycle to nanosecond multiplier
 509 * @shift:      cycle to nanosecond divisor (power of two)
 510 * @maxadj:     maximum adjustment value to mult (~11%)
 511 * @mask:       bitmask for two's complement subtraction of non 64 bit counters
 512 * @max_cyc:    maximum cycle value before potential overflow (does not include
 513 *              any safety margin)
 514 *
 515 * NOTE: This function includes a safety margin of 50%, in other words, we
 516 * return half the number of nanoseconds the hardware counter can technically
 517 * cover. This is done so that we can potentially detect problems caused by
 518 * delayed timers or bad hardware, which might result in time intervals that
 519 * are larger than what the math used can handle without overflows.
 520 */
 521u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc)
 522{
 523        u64 max_nsecs, max_cycles;
 524
 525        /*
 526         * Calculate the maximum number of cycles that we can pass to the
 527         * cyc2ns() function without overflowing a 64-bit result.
 528         */
 529        max_cycles = ULLONG_MAX;
 530        do_div(max_cycles, mult+maxadj);
 531
 532        /*
 533         * The actual maximum number of cycles we can defer the clocksource is
 534         * determined by the minimum of max_cycles and mask.
 535         * Note: Here we subtract the maxadj to make sure we don't sleep for
 536         * too long if there's a large negative adjustment.
 537         */
 538        max_cycles = min(max_cycles, mask);
 539        max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift);
 540
 541        /* return the max_cycles value as well if requested */
 542        if (max_cyc)
 543                *max_cyc = max_cycles;
 544
 545        /* Return 50% of the actual maximum, so we can detect bad values */
 546        max_nsecs >>= 1;
 547
 548        return max_nsecs;
 549}
 550
 551/**
 552 * clocksource_update_max_deferment - Updates the clocksource max_idle_ns & max_cycles
 553 * @cs:         Pointer to clocksource to be updated
 554 *
 555 */
 556static inline void clocksource_update_max_deferment(struct clocksource *cs)
 557{
 558        cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift,
 559                                                cs->maxadj, cs->mask,
 560                                                &cs->max_cycles);
 561}
 562
 563#ifndef CONFIG_ARCH_USES_GETTIMEOFFSET
 564
 565static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur)
 566{
 567        struct clocksource *cs;
 568
 569        if (!finished_booting || list_empty(&clocksource_list))
 570                return NULL;
 571
 572        /*
 573         * We pick the clocksource with the highest rating. If oneshot
 574         * mode is active, we pick the highres valid clocksource with
 575         * the best rating.
 576         */
 577        list_for_each_entry(cs, &clocksource_list, list) {
 578                if (skipcur && cs == curr_clocksource)
 579                        continue;
 580                if (oneshot && !(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES))
 581                        continue;
 582                return cs;
 583        }
 584        return NULL;
 585}
 586
 587static void __clocksource_select(bool skipcur)
 588{
 589        bool oneshot = tick_oneshot_mode_active();
 590        struct clocksource *best, *cs;
 591
 592        /* Find the best suitable clocksource */
 593        best = clocksource_find_best(oneshot, skipcur);
 594        if (!best)
 595                return;
 596
 597        /* Check for the override clocksource. */
 598        list_for_each_entry(cs, &clocksource_list, list) {
 599                if (skipcur && cs == curr_clocksource)
 600                        continue;
 601                if (strcmp(cs->name, override_name) != 0)
 602                        continue;
 603                /*
 604                 * Check to make sure we don't switch to a non-highres
 605                 * capable clocksource if the tick code is in oneshot
 606                 * mode (highres or nohz)
 607                 */
 608                if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && oneshot) {
 609                        /* Override clocksource cannot be used. */
 610                        if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
 611                                pr_warn("Override clocksource %s is unstable and not HRT compatible - cannot switch while in HRT/NOHZ mode\n",
 612                                        cs->name);
 613                                override_name[0] = 0;
 614                        } else {
 615                                /*
 616                                 * The override cannot be currently verified.
 617                                 * Deferring to let the watchdog check.
 618                                 */
 619                                pr_info("Override clocksource %s is not currently HRT compatible - deferring\n",
 620                                        cs->name);
 621                        }
 622                } else
 623                        /* Override clocksource can be used. */
 624                        best = cs;
 625                break;
 626        }
 627
 628        if (curr_clocksource != best && !timekeeping_notify(best)) {
 629                pr_info("Switched to clocksource %s\n", best->name);
 630                curr_clocksource = best;
 631        }
 632}
 633
 634/**
 635 * clocksource_select - Select the best clocksource available
 636 *
 637 * Private function. Must hold clocksource_mutex when called.
 638 *
 639 * Select the clocksource with the best rating, or the clocksource,
 640 * which is selected by userspace override.
 641 */
 642static void clocksource_select(void)
 643{
 644        __clocksource_select(false);
 645}
 646
 647static void clocksource_select_fallback(void)
 648{
 649        __clocksource_select(true);
 650}
 651
 652#else /* !CONFIG_ARCH_USES_GETTIMEOFFSET */
 653static inline void clocksource_select(void) { }
 654static inline void clocksource_select_fallback(void) { }
 655
 656#endif
 657
 658/*
 659 * clocksource_done_booting - Called near the end of core bootup
 660 *
 661 * Hack to avoid lots of clocksource churn at boot time.
 662 * We use fs_initcall because we want this to start before
 663 * device_initcall but after subsys_initcall.
 664 */
 665static int __init clocksource_done_booting(void)
 666{
 667        mutex_lock(&clocksource_mutex);
 668        curr_clocksource = clocksource_default_clock();
 669        finished_booting = 1;
 670        /*
 671         * Run the watchdog first to eliminate unstable clock sources
 672         */
 673        __clocksource_watchdog_kthread();
 674        clocksource_select();
 675        mutex_unlock(&clocksource_mutex);
 676        return 0;
 677}
 678fs_initcall(clocksource_done_booting);
 679
 680/*
 681 * Enqueue the clocksource sorted by rating
 682 */
 683static void clocksource_enqueue(struct clocksource *cs)
 684{
 685        struct list_head *entry = &clocksource_list;
 686        struct clocksource *tmp;
 687
 688        list_for_each_entry(tmp, &clocksource_list, list) {
 689                /* Keep track of the place, where to insert */
 690                if (tmp->rating < cs->rating)
 691                        break;
 692                entry = &tmp->list;
 693        }
 694        list_add(&cs->list, entry);
 695}
 696
 697/**
 698 * __clocksource_update_freq_scale - Used update clocksource with new freq
 699 * @cs:         clocksource to be registered
 700 * @scale:      Scale factor multiplied against freq to get clocksource hz
 701 * @freq:       clocksource frequency (cycles per second) divided by scale
 702 *
 703 * This should only be called from the clocksource->enable() method.
 704 *
 705 * This *SHOULD NOT* be called directly! Please use the
 706 * __clocksource_update_freq_hz() or __clocksource_update_freq_khz() helper
 707 * functions.
 708 */
 709void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq)
 710{
 711        u64 sec;
 712
 713        /*
 714         * Default clocksources are *special* and self-define their mult/shift.
 715         * But, you're not special, so you should specify a freq value.
 716         */
 717        if (freq) {
 718                /*
 719                 * Calc the maximum number of seconds which we can run before
 720                 * wrapping around. For clocksources which have a mask > 32-bit
 721                 * we need to limit the max sleep time to have a good
 722                 * conversion precision. 10 minutes is still a reasonable
 723                 * amount. That results in a shift value of 24 for a
 724                 * clocksource with mask >= 40-bit and f >= 4GHz. That maps to
 725                 * ~ 0.06ppm granularity for NTP.
 726                 */
 727                sec = cs->mask;
 728                do_div(sec, freq);
 729                do_div(sec, scale);
 730                if (!sec)
 731                        sec = 1;
 732                else if (sec > 600 && cs->mask > UINT_MAX)
 733                        sec = 600;
 734
 735                clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
 736                                       NSEC_PER_SEC / scale, sec * scale);
 737        }
 738        /*
 739         * Ensure clocksources that have large 'mult' values don't overflow
 740         * when adjusted.
 741         */
 742        cs->maxadj = clocksource_max_adjustment(cs);
 743        while (freq && ((cs->mult + cs->maxadj < cs->mult)
 744                || (cs->mult - cs->maxadj > cs->mult))) {
 745                cs->mult >>= 1;
 746                cs->shift--;
 747                cs->maxadj = clocksource_max_adjustment(cs);
 748        }
 749
 750        /*
 751         * Only warn for *special* clocksources that self-define
 752         * their mult/shift values and don't specify a freq.
 753         */
 754        WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
 755                "timekeeping: Clocksource %s might overflow on 11%% adjustment\n",
 756                cs->name);
 757
 758        clocksource_update_max_deferment(cs);
 759
 760        pr_info("%s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n",
 761                cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns);
 762}
 763EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale);
 764
 765/**
 766 * __clocksource_register_scale - Used to install new clocksources
 767 * @cs:         clocksource to be registered
 768 * @scale:      Scale factor multiplied against freq to get clocksource hz
 769 * @freq:       clocksource frequency (cycles per second) divided by scale
 770 *
 771 * Returns -EBUSY if registration fails, zero otherwise.
 772 *
 773 * This *SHOULD NOT* be called directly! Please use the
 774 * clocksource_register_hz() or clocksource_register_khz helper functions.
 775 */
 776int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
 777{
 778
 779        /* Initialize mult/shift and max_idle_ns */
 780        __clocksource_update_freq_scale(cs, scale, freq);
 781
 782        /* Add clocksource to the clocksource list */
 783        mutex_lock(&clocksource_mutex);
 784        clocksource_enqueue(cs);
 785        clocksource_enqueue_watchdog(cs);
 786        clocksource_select();
 787        clocksource_select_watchdog(false);
 788        mutex_unlock(&clocksource_mutex);
 789        return 0;
 790}
 791EXPORT_SYMBOL_GPL(__clocksource_register_scale);
 792
 793static void __clocksource_change_rating(struct clocksource *cs, int rating)
 794{
 795        list_del(&cs->list);
 796        cs->rating = rating;
 797        clocksource_enqueue(cs);
 798}
 799
 800/**
 801 * clocksource_change_rating - Change the rating of a registered clocksource
 802 * @cs:         clocksource to be changed
 803 * @rating:     new rating
 804 */
 805void clocksource_change_rating(struct clocksource *cs, int rating)
 806{
 807        mutex_lock(&clocksource_mutex);
 808        __clocksource_change_rating(cs, rating);
 809        clocksource_select();
 810        clocksource_select_watchdog(false);
 811        mutex_unlock(&clocksource_mutex);
 812}
 813EXPORT_SYMBOL(clocksource_change_rating);
 814
 815/*
 816 * Unbind clocksource @cs. Called with clocksource_mutex held
 817 */
 818static int clocksource_unbind(struct clocksource *cs)
 819{
 820        if (clocksource_is_watchdog(cs)) {
 821                /* Select and try to install a replacement watchdog. */
 822                clocksource_select_watchdog(true);
 823                if (clocksource_is_watchdog(cs))
 824                        return -EBUSY;
 825        }
 826
 827        if (cs == curr_clocksource) {
 828                /* Select and try to install a replacement clock source */
 829                clocksource_select_fallback();
 830                if (curr_clocksource == cs)
 831                        return -EBUSY;
 832        }
 833        clocksource_dequeue_watchdog(cs);
 834        list_del_init(&cs->list);
 835        return 0;
 836}
 837
 838/**
 839 * clocksource_unregister - remove a registered clocksource
 840 * @cs: clocksource to be unregistered
 841 */
 842int clocksource_unregister(struct clocksource *cs)
 843{
 844        int ret = 0;
 845
 846        mutex_lock(&clocksource_mutex);
 847        if (!list_empty(&cs->list))
 848                ret = clocksource_unbind(cs);
 849        mutex_unlock(&clocksource_mutex);
 850        return ret;
 851}
 852EXPORT_SYMBOL(clocksource_unregister);
 853
 854#ifdef CONFIG_SYSFS
 855/**
 856 * sysfs_show_current_clocksources - sysfs interface for current clocksource
 857 * @dev:        unused
 858 * @attr:       unused
 859 * @buf:        char buffer to be filled with clocksource list
 860 *
 861 * Provides sysfs interface for listing current clocksource.
 862 */
 863static ssize_t
 864sysfs_show_current_clocksources(struct device *dev,
 865                                struct device_attribute *attr, char *buf)
 866{
 867        ssize_t count = 0;
 868
 869        mutex_lock(&clocksource_mutex);
 870        count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name);
 871        mutex_unlock(&clocksource_mutex);
 872
 873        return count;
 874}
 875
 876ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt)
 877{
 878        size_t ret = cnt;
 879
 880        /* strings from sysfs write are not 0 terminated! */
 881        if (!cnt || cnt >= CS_NAME_LEN)
 882                return -EINVAL;
 883
 884        /* strip of \n: */
 885        if (buf[cnt-1] == '\n')
 886                cnt--;
 887        if (cnt > 0)
 888                memcpy(dst, buf, cnt);
 889        dst[cnt] = 0;
 890        return ret;
 891}
 892
 893/**
 894 * sysfs_override_clocksource - interface for manually overriding clocksource
 895 * @dev:        unused
 896 * @attr:       unused
 897 * @buf:        name of override clocksource
 898 * @count:      length of buffer
 899 *
 900 * Takes input from sysfs interface for manually overriding the default
 901 * clocksource selection.
 902 */
 903static ssize_t sysfs_override_clocksource(struct device *dev,
 904                                          struct device_attribute *attr,
 905                                          const char *buf, size_t count)
 906{
 907        ssize_t ret;
 908
 909        mutex_lock(&clocksource_mutex);
 910
 911        ret = sysfs_get_uname(buf, override_name, count);
 912        if (ret >= 0)
 913                clocksource_select();
 914
 915        mutex_unlock(&clocksource_mutex);
 916
 917        return ret;
 918}
 919
 920/**
 921 * sysfs_unbind_current_clocksource - interface for manually unbinding clocksource
 922 * @dev:        unused
 923 * @attr:       unused
 924 * @buf:        unused
 925 * @count:      length of buffer
 926 *
 927 * Takes input from sysfs interface for manually unbinding a clocksource.
 928 */
 929static ssize_t sysfs_unbind_clocksource(struct device *dev,
 930                                        struct device_attribute *attr,
 931                                        const char *buf, size_t count)
 932{
 933        struct clocksource *cs;
 934        char name[CS_NAME_LEN];
 935        ssize_t ret;
 936
 937        ret = sysfs_get_uname(buf, name, count);
 938        if (ret < 0)
 939                return ret;
 940
 941        ret = -ENODEV;
 942        mutex_lock(&clocksource_mutex);
 943        list_for_each_entry(cs, &clocksource_list, list) {
 944                if (strcmp(cs->name, name))
 945                        continue;
 946                ret = clocksource_unbind(cs);
 947                break;
 948        }
 949        mutex_unlock(&clocksource_mutex);
 950
 951        return ret ? ret : count;
 952}
 953
 954/**
 955 * sysfs_show_available_clocksources - sysfs interface for listing clocksource
 956 * @dev:        unused
 957 * @attr:       unused
 958 * @buf:        char buffer to be filled with clocksource list
 959 *
 960 * Provides sysfs interface for listing registered clocksources
 961 */
 962static ssize_t
 963sysfs_show_available_clocksources(struct device *dev,
 964                                  struct device_attribute *attr,
 965                                  char *buf)
 966{
 967        struct clocksource *src;
 968        ssize_t count = 0;
 969
 970        mutex_lock(&clocksource_mutex);
 971        list_for_each_entry(src, &clocksource_list, list) {
 972                /*
 973                 * Don't show non-HRES clocksource if the tick code is
 974                 * in one shot mode (highres=on or nohz=on)
 975                 */
 976                if (!tick_oneshot_mode_active() ||
 977                    (src->flags & CLOCK_SOURCE_VALID_FOR_HRES))
 978                        count += snprintf(buf + count,
 979                                  max((ssize_t)PAGE_SIZE - count, (ssize_t)0),
 980                                  "%s ", src->name);
 981        }
 982        mutex_unlock(&clocksource_mutex);
 983
 984        count += snprintf(buf + count,
 985                          max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n");
 986
 987        return count;
 988}
 989
 990/*
 991 * Sysfs setup bits:
 992 */
 993static DEVICE_ATTR(current_clocksource, 0644, sysfs_show_current_clocksources,
 994                   sysfs_override_clocksource);
 995
 996static DEVICE_ATTR(unbind_clocksource, 0200, NULL, sysfs_unbind_clocksource);
 997
 998static DEVICE_ATTR(available_clocksource, 0444,
 999                   sysfs_show_available_clocksources, NULL);
1000
1001static struct bus_type clocksource_subsys = {
1002        .name = "clocksource",
1003        .dev_name = "clocksource",
1004};
1005
1006static struct device device_clocksource = {
1007        .id     = 0,
1008        .bus    = &clocksource_subsys,
1009};
1010
1011static int __init init_clocksource_sysfs(void)
1012{
1013        int error = subsys_system_register(&clocksource_subsys, NULL);
1014
1015        if (!error)
1016                error = device_register(&device_clocksource);
1017        if (!error)
1018                error = device_create_file(
1019                                &device_clocksource,
1020                                &dev_attr_current_clocksource);
1021        if (!error)
1022                error = device_create_file(&device_clocksource,
1023                                           &dev_attr_unbind_clocksource);
1024        if (!error)
1025                error = device_create_file(
1026                                &device_clocksource,
1027                                &dev_attr_available_clocksource);
1028        return error;
1029}
1030
1031device_initcall(init_clocksource_sysfs);
1032#endif /* CONFIG_SYSFS */
1033
1034/**
1035 * boot_override_clocksource - boot clock override
1036 * @str:        override name
1037 *
1038 * Takes a clocksource= boot argument and uses it
1039 * as the clocksource override name.
1040 */
1041static int __init boot_override_clocksource(char* str)
1042{
1043        mutex_lock(&clocksource_mutex);
1044        if (str)
1045                strlcpy(override_name, str, sizeof(override_name));
1046        mutex_unlock(&clocksource_mutex);
1047        return 1;
1048}
1049
1050__setup("clocksource=", boot_override_clocksource);
1051
1052/**
1053 * boot_override_clock - Compatibility layer for deprecated boot option
1054 * @str:        override name
1055 *
1056 * DEPRECATED! Takes a clock= boot argument and uses it
1057 * as the clocksource override name
1058 */
1059static int __init boot_override_clock(char* str)
1060{
1061        if (!strcmp(str, "pmtmr")) {
1062                pr_warn("clock=pmtmr is deprecated - use clocksource=acpi_pm\n");
1063                return boot_override_clocksource("acpi_pm");
1064        }
1065        pr_warn("clock= boot option is deprecated - use clocksource=xyz\n");
1066        return boot_override_clocksource(str);
1067}
1068
1069__setup("clock=", boot_override_clock);
1070