linux/kernel/time/clocksource.c
<<
>>
Prefs
   1/*
   2 * linux/kernel/time/clocksource.c
   3 *
   4 * This file contains the functions which manage clocksource drivers.
   5 *
   6 * Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com)
   7 *
   8 * This program is free software; you can redistribute it and/or modify
   9 * it under the terms of the GNU General Public License as published by
  10 * the Free Software Foundation; either version 2 of the License, or
  11 * (at your option) any later version.
  12 *
  13 * This program is distributed in the hope that it will be useful,
  14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 * GNU General Public License for more details.
  17 *
  18 * You should have received a copy of the GNU General Public License
  19 * along with this program; if not, write to the Free Software
  20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  21 *
  22 * TODO WishList:
  23 *   o Allow clocksource drivers to be unregistered
  24 */
  25
  26#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  27
  28#include <linux/device.h>
  29#include <linux/clocksource.h>
  30#include <linux/init.h>
  31#include <linux/module.h>
  32#include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
  33#include <linux/tick.h>
  34#include <linux/kthread.h>
  35
  36#include "tick-internal.h"
  37#include "timekeeping_internal.h"
  38
  39/**
  40 * clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks
  41 * @mult:       pointer to mult variable
  42 * @shift:      pointer to shift variable
  43 * @from:       frequency to convert from
  44 * @to:         frequency to convert to
  45 * @maxsec:     guaranteed runtime conversion range in seconds
  46 *
  47 * The function evaluates the shift/mult pair for the scaled math
  48 * operations of clocksources and clockevents.
  49 *
  50 * @to and @from are frequency values in HZ. For clock sources @to is
  51 * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock
  52 * event @to is the counter frequency and @from is NSEC_PER_SEC.
  53 *
  54 * The @maxsec conversion range argument controls the time frame in
  55 * seconds which must be covered by the runtime conversion with the
  56 * calculated mult and shift factors. This guarantees that no 64bit
  57 * overflow happens when the input value of the conversion is
  58 * multiplied with the calculated mult factor. Larger ranges may
  59 * reduce the conversion accuracy by chosing smaller mult and shift
  60 * factors.
  61 */
  62void
  63clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec)
  64{
  65        u64 tmp;
  66        u32 sft, sftacc= 32;
  67
  68        /*
  69         * Calculate the shift factor which is limiting the conversion
  70         * range:
  71         */
  72        tmp = ((u64)maxsec * from) >> 32;
  73        while (tmp) {
  74                tmp >>=1;
  75                sftacc--;
  76        }
  77
  78        /*
  79         * Find the conversion shift/mult pair which has the best
  80         * accuracy and fits the maxsec conversion range:
  81         */
  82        for (sft = 32; sft > 0; sft--) {
  83                tmp = (u64) to << sft;
  84                tmp += from / 2;
  85                do_div(tmp, from);
  86                if ((tmp >> sftacc) == 0)
  87                        break;
  88        }
  89        *mult = tmp;
  90        *shift = sft;
  91}
  92EXPORT_SYMBOL_GPL(clocks_calc_mult_shift);
  93
  94/*[Clocksource internal variables]---------
  95 * curr_clocksource:
  96 *      currently selected clocksource.
  97 * suspend_clocksource:
  98 *      used to calculate the suspend time.
  99 * clocksource_list:
 100 *      linked list with the registered clocksources
 101 * clocksource_mutex:
 102 *      protects manipulations to curr_clocksource and the clocksource_list
 103 * override_name:
 104 *      Name of the user-specified clocksource.
 105 */
 106static struct clocksource *curr_clocksource;
 107static struct clocksource *suspend_clocksource;
 108static LIST_HEAD(clocksource_list);
 109static DEFINE_MUTEX(clocksource_mutex);
 110static char override_name[CS_NAME_LEN];
 111static int finished_booting;
 112static u64 suspend_start;
 113
 114#ifdef CONFIG_CLOCKSOURCE_WATCHDOG
 115static void clocksource_watchdog_work(struct work_struct *work);
 116static void clocksource_select(void);
 117
 118static LIST_HEAD(watchdog_list);
 119static struct clocksource *watchdog;
 120static struct timer_list watchdog_timer;
 121static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);
 122static DEFINE_SPINLOCK(watchdog_lock);
 123static int watchdog_running;
 124static atomic_t watchdog_reset_pending;
 125
 126static void inline clocksource_watchdog_lock(unsigned long *flags)
 127{
 128        spin_lock_irqsave(&watchdog_lock, *flags);
 129}
 130
 131static void inline clocksource_watchdog_unlock(unsigned long *flags)
 132{
 133        spin_unlock_irqrestore(&watchdog_lock, *flags);
 134}
 135
 136static int clocksource_watchdog_kthread(void *data);
 137static void __clocksource_change_rating(struct clocksource *cs, int rating);
 138
 139/*
 140 * Interval: 0.5sec Threshold: 0.0625s
 141 */
 142#define WATCHDOG_INTERVAL (HZ >> 1)
 143#define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4)
 144
 145static void clocksource_watchdog_work(struct work_struct *work)
 146{
 147        /*
 148         * We cannot directly run clocksource_watchdog_kthread() here, because
 149         * clocksource_select() calls timekeeping_notify() which uses
 150         * stop_machine(). One cannot use stop_machine() from a workqueue() due
 151         * lock inversions wrt CPU hotplug.
 152         *
 153         * Also, we only ever run this work once or twice during the lifetime
 154         * of the kernel, so there is no point in creating a more permanent
 155         * kthread for this.
 156         *
 157         * If kthread_run fails the next watchdog scan over the
 158         * watchdog_list will find the unstable clock again.
 159         */
 160        kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog");
 161}
 162
 163static void __clocksource_unstable(struct clocksource *cs)
 164{
 165        cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG);
 166        cs->flags |= CLOCK_SOURCE_UNSTABLE;
 167
 168        /*
 169         * If the clocksource is registered clocksource_watchdog_kthread() will
 170         * re-rate and re-select.
 171         */
 172        if (list_empty(&cs->list)) {
 173                cs->rating = 0;
 174                return;
 175        }
 176
 177        if (cs->mark_unstable)
 178                cs->mark_unstable(cs);
 179
 180        /* kick clocksource_watchdog_kthread() */
 181        if (finished_booting)
 182                schedule_work(&watchdog_work);
 183}
 184
 185/**
 186 * clocksource_mark_unstable - mark clocksource unstable via watchdog
 187 * @cs:         clocksource to be marked unstable
 188 *
 189 * This function is called by the x86 TSC code to mark clocksources as unstable;
 190 * it defers demotion and re-selection to a kthread.
 191 */
 192void clocksource_mark_unstable(struct clocksource *cs)
 193{
 194        unsigned long flags;
 195
 196        spin_lock_irqsave(&watchdog_lock, flags);
 197        if (!(cs->flags & CLOCK_SOURCE_UNSTABLE)) {
 198                if (!list_empty(&cs->list) && list_empty(&cs->wd_list))
 199                        list_add(&cs->wd_list, &watchdog_list);
 200                __clocksource_unstable(cs);
 201        }
 202        spin_unlock_irqrestore(&watchdog_lock, flags);
 203}
 204
 205static void clocksource_watchdog(struct timer_list *unused)
 206{
 207        struct clocksource *cs;
 208        u64 csnow, wdnow, cslast, wdlast, delta;
 209        int64_t wd_nsec, cs_nsec;
 210        int next_cpu, reset_pending;
 211
 212        spin_lock(&watchdog_lock);
 213        if (!watchdog_running)
 214                goto out;
 215
 216        reset_pending = atomic_read(&watchdog_reset_pending);
 217
 218        list_for_each_entry(cs, &watchdog_list, wd_list) {
 219
 220                /* Clocksource already marked unstable? */
 221                if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
 222                        if (finished_booting)
 223                                schedule_work(&watchdog_work);
 224                        continue;
 225                }
 226
 227                local_irq_disable();
 228                csnow = cs->read(cs);
 229                wdnow = watchdog->read(watchdog);
 230                local_irq_enable();
 231
 232                /* Clocksource initialized ? */
 233                if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) ||
 234                    atomic_read(&watchdog_reset_pending)) {
 235                        cs->flags |= CLOCK_SOURCE_WATCHDOG;
 236                        cs->wd_last = wdnow;
 237                        cs->cs_last = csnow;
 238                        continue;
 239                }
 240
 241                delta = clocksource_delta(wdnow, cs->wd_last, watchdog->mask);
 242                wd_nsec = clocksource_cyc2ns(delta, watchdog->mult,
 243                                             watchdog->shift);
 244
 245                delta = clocksource_delta(csnow, cs->cs_last, cs->mask);
 246                cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);
 247                wdlast = cs->wd_last; /* save these in case we print them */
 248                cslast = cs->cs_last;
 249                cs->cs_last = csnow;
 250                cs->wd_last = wdnow;
 251
 252                if (atomic_read(&watchdog_reset_pending))
 253                        continue;
 254
 255                /* Check the deviation from the watchdog clocksource. */
 256                if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) {
 257                        pr_warn("timekeeping watchdog on CPU%d: Marking clocksource '%s' as unstable because the skew is too large:\n",
 258                                smp_processor_id(), cs->name);
 259                        pr_warn("                      '%s' wd_now: %llx wd_last: %llx mask: %llx\n",
 260                                watchdog->name, wdnow, wdlast, watchdog->mask);
 261                        pr_warn("                      '%s' cs_now: %llx cs_last: %llx mask: %llx\n",
 262                                cs->name, csnow, cslast, cs->mask);
 263                        __clocksource_unstable(cs);
 264                        continue;
 265                }
 266
 267                if (cs == curr_clocksource && cs->tick_stable)
 268                        cs->tick_stable(cs);
 269
 270                if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
 271                    (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
 272                    (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {
 273                        /* Mark it valid for high-res. */
 274                        cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
 275
 276                        /*
 277                         * clocksource_done_booting() will sort it if
 278                         * finished_booting is not set yet.
 279                         */
 280                        if (!finished_booting)
 281                                continue;
 282
 283                        /*
 284                         * If this is not the current clocksource let
 285                         * the watchdog thread reselect it. Due to the
 286                         * change to high res this clocksource might
 287                         * be preferred now. If it is the current
 288                         * clocksource let the tick code know about
 289                         * that change.
 290                         */
 291                        if (cs != curr_clocksource) {
 292                                cs->flags |= CLOCK_SOURCE_RESELECT;
 293                                schedule_work(&watchdog_work);
 294                        } else {
 295                                tick_clock_notify();
 296                        }
 297                }
 298        }
 299
 300        /*
 301         * We only clear the watchdog_reset_pending, when we did a
 302         * full cycle through all clocksources.
 303         */
 304        if (reset_pending)
 305                atomic_dec(&watchdog_reset_pending);
 306
 307        /*
 308         * Cycle through CPUs to check if the CPUs stay synchronized
 309         * to each other.
 310         */
 311        next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
 312        if (next_cpu >= nr_cpu_ids)
 313                next_cpu = cpumask_first(cpu_online_mask);
 314        watchdog_timer.expires += WATCHDOG_INTERVAL;
 315        add_timer_on(&watchdog_timer, next_cpu);
 316out:
 317        spin_unlock(&watchdog_lock);
 318}
 319
 320static inline void clocksource_start_watchdog(void)
 321{
 322        if (watchdog_running || !watchdog || list_empty(&watchdog_list))
 323                return;
 324        timer_setup(&watchdog_timer, clocksource_watchdog, 0);
 325        watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
 326        add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask));
 327        watchdog_running = 1;
 328}
 329
 330static inline void clocksource_stop_watchdog(void)
 331{
 332        if (!watchdog_running || (watchdog && !list_empty(&watchdog_list)))
 333                return;
 334        del_timer(&watchdog_timer);
 335        watchdog_running = 0;
 336}
 337
 338static inline void clocksource_reset_watchdog(void)
 339{
 340        struct clocksource *cs;
 341
 342        list_for_each_entry(cs, &watchdog_list, wd_list)
 343                cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
 344}
 345
 346static void clocksource_resume_watchdog(void)
 347{
 348        atomic_inc(&watchdog_reset_pending);
 349}
 350
 351static void clocksource_enqueue_watchdog(struct clocksource *cs)
 352{
 353        INIT_LIST_HEAD(&cs->wd_list);
 354
 355        if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
 356                /* cs is a clocksource to be watched. */
 357                list_add(&cs->wd_list, &watchdog_list);
 358                cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
 359        } else {
 360                /* cs is a watchdog. */
 361                if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
 362                        cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
 363        }
 364}
 365
 366static void clocksource_select_watchdog(bool fallback)
 367{
 368        struct clocksource *cs, *old_wd;
 369        unsigned long flags;
 370
 371        spin_lock_irqsave(&watchdog_lock, flags);
 372        /* save current watchdog */
 373        old_wd = watchdog;
 374        if (fallback)
 375                watchdog = NULL;
 376
 377        list_for_each_entry(cs, &clocksource_list, list) {
 378                /* cs is a clocksource to be watched. */
 379                if (cs->flags & CLOCK_SOURCE_MUST_VERIFY)
 380                        continue;
 381
 382                /* Skip current if we were requested for a fallback. */
 383                if (fallback && cs == old_wd)
 384                        continue;
 385
 386                /* Pick the best watchdog. */
 387                if (!watchdog || cs->rating > watchdog->rating)
 388                        watchdog = cs;
 389        }
 390        /* If we failed to find a fallback restore the old one. */
 391        if (!watchdog)
 392                watchdog = old_wd;
 393
 394        /* If we changed the watchdog we need to reset cycles. */
 395        if (watchdog != old_wd)
 396                clocksource_reset_watchdog();
 397
 398        /* Check if the watchdog timer needs to be started. */
 399        clocksource_start_watchdog();
 400        spin_unlock_irqrestore(&watchdog_lock, flags);
 401}
 402
 403static void clocksource_dequeue_watchdog(struct clocksource *cs)
 404{
 405        if (cs != watchdog) {
 406                if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
 407                        /* cs is a watched clocksource. */
 408                        list_del_init(&cs->wd_list);
 409                        /* Check if the watchdog timer needs to be stopped. */
 410                        clocksource_stop_watchdog();
 411                }
 412        }
 413}
 414
 415static int __clocksource_watchdog_kthread(void)
 416{
 417        struct clocksource *cs, *tmp;
 418        unsigned long flags;
 419        int select = 0;
 420
 421        spin_lock_irqsave(&watchdog_lock, flags);
 422        list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) {
 423                if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
 424                        list_del_init(&cs->wd_list);
 425                        __clocksource_change_rating(cs, 0);
 426                        select = 1;
 427                }
 428                if (cs->flags & CLOCK_SOURCE_RESELECT) {
 429                        cs->flags &= ~CLOCK_SOURCE_RESELECT;
 430                        select = 1;
 431                }
 432        }
 433        /* Check if the watchdog timer needs to be stopped. */
 434        clocksource_stop_watchdog();
 435        spin_unlock_irqrestore(&watchdog_lock, flags);
 436
 437        return select;
 438}
 439
 440static int clocksource_watchdog_kthread(void *data)
 441{
 442        mutex_lock(&clocksource_mutex);
 443        if (__clocksource_watchdog_kthread())
 444                clocksource_select();
 445        mutex_unlock(&clocksource_mutex);
 446        return 0;
 447}
 448
 449static bool clocksource_is_watchdog(struct clocksource *cs)
 450{
 451        return cs == watchdog;
 452}
 453
 454#else /* CONFIG_CLOCKSOURCE_WATCHDOG */
 455
 456static void clocksource_enqueue_watchdog(struct clocksource *cs)
 457{
 458        if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
 459                cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
 460}
 461
 462static void clocksource_select_watchdog(bool fallback) { }
 463static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { }
 464static inline void clocksource_resume_watchdog(void) { }
 465static inline int __clocksource_watchdog_kthread(void) { return 0; }
 466static bool clocksource_is_watchdog(struct clocksource *cs) { return false; }
 467void clocksource_mark_unstable(struct clocksource *cs) { }
 468
 469static inline void clocksource_watchdog_lock(unsigned long *flags) { }
 470static inline void clocksource_watchdog_unlock(unsigned long *flags) { }
 471
 472#endif /* CONFIG_CLOCKSOURCE_WATCHDOG */
 473
 474static bool clocksource_is_suspend(struct clocksource *cs)
 475{
 476        return cs == suspend_clocksource;
 477}
 478
 479static void __clocksource_suspend_select(struct clocksource *cs)
 480{
 481        /*
 482         * Skip the clocksource which will be stopped in suspend state.
 483         */
 484        if (!(cs->flags & CLOCK_SOURCE_SUSPEND_NONSTOP))
 485                return;
 486
 487        /*
 488         * The nonstop clocksource can be selected as the suspend clocksource to
 489         * calculate the suspend time, so it should not supply suspend/resume
 490         * interfaces to suspend the nonstop clocksource when system suspends.
 491         */
 492        if (cs->suspend || cs->resume) {
 493                pr_warn("Nonstop clocksource %s should not supply suspend/resume interfaces\n",
 494                        cs->name);
 495        }
 496
 497        /* Pick the best rating. */
 498        if (!suspend_clocksource || cs->rating > suspend_clocksource->rating)
 499                suspend_clocksource = cs;
 500}
 501
 502/**
 503 * clocksource_suspend_select - Select the best clocksource for suspend timing
 504 * @fallback:   if select a fallback clocksource
 505 */
 506static void clocksource_suspend_select(bool fallback)
 507{
 508        struct clocksource *cs, *old_suspend;
 509
 510        old_suspend = suspend_clocksource;
 511        if (fallback)
 512                suspend_clocksource = NULL;
 513
 514        list_for_each_entry(cs, &clocksource_list, list) {
 515                /* Skip current if we were requested for a fallback. */
 516                if (fallback && cs == old_suspend)
 517                        continue;
 518
 519                __clocksource_suspend_select(cs);
 520        }
 521}
 522
 523/**
 524 * clocksource_start_suspend_timing - Start measuring the suspend timing
 525 * @cs:                 current clocksource from timekeeping
 526 * @start_cycles:       current cycles from timekeeping
 527 *
 528 * This function will save the start cycle values of suspend timer to calculate
 529 * the suspend time when resuming system.
 530 *
 531 * This function is called late in the suspend process from timekeeping_suspend(),
 532 * that means processes are freezed, non-boot cpus and interrupts are disabled
 533 * now. It is therefore possible to start the suspend timer without taking the
 534 * clocksource mutex.
 535 */
 536void clocksource_start_suspend_timing(struct clocksource *cs, u64 start_cycles)
 537{
 538        if (!suspend_clocksource)
 539                return;
 540
 541        /*
 542         * If current clocksource is the suspend timer, we should use the
 543         * tkr_mono.cycle_last value as suspend_start to avoid same reading
 544         * from suspend timer.
 545         */
 546        if (clocksource_is_suspend(cs)) {
 547                suspend_start = start_cycles;
 548                return;
 549        }
 550
 551        if (suspend_clocksource->enable &&
 552            suspend_clocksource->enable(suspend_clocksource)) {
 553                pr_warn_once("Failed to enable the non-suspend-able clocksource.\n");
 554                return;
 555        }
 556
 557        suspend_start = suspend_clocksource->read(suspend_clocksource);
 558}
 559
 560/**
 561 * clocksource_stop_suspend_timing - Stop measuring the suspend timing
 562 * @cs:         current clocksource from timekeeping
 563 * @cycle_now:  current cycles from timekeeping
 564 *
 565 * This function will calculate the suspend time from suspend timer.
 566 *
 567 * Returns nanoseconds since suspend started, 0 if no usable suspend clocksource.
 568 *
 569 * This function is called early in the resume process from timekeeping_resume(),
 570 * that means there is only one cpu, no processes are running and the interrupts
 571 * are disabled. It is therefore possible to stop the suspend timer without
 572 * taking the clocksource mutex.
 573 */
 574u64 clocksource_stop_suspend_timing(struct clocksource *cs, u64 cycle_now)
 575{
 576        u64 now, delta, nsec = 0;
 577
 578        if (!suspend_clocksource)
 579                return 0;
 580
 581        /*
 582         * If current clocksource is the suspend timer, we should use the
 583         * tkr_mono.cycle_last value from timekeeping as current cycle to
 584         * avoid same reading from suspend timer.
 585         */
 586        if (clocksource_is_suspend(cs))
 587                now = cycle_now;
 588        else
 589                now = suspend_clocksource->read(suspend_clocksource);
 590
 591        if (now > suspend_start) {
 592                delta = clocksource_delta(now, suspend_start,
 593                                          suspend_clocksource->mask);
 594                nsec = mul_u64_u32_shr(delta, suspend_clocksource->mult,
 595                                       suspend_clocksource->shift);
 596        }
 597
 598        /*
 599         * Disable the suspend timer to save power if current clocksource is
 600         * not the suspend timer.
 601         */
 602        if (!clocksource_is_suspend(cs) && suspend_clocksource->disable)
 603                suspend_clocksource->disable(suspend_clocksource);
 604
 605        return nsec;
 606}
 607
 608/**
 609 * clocksource_suspend - suspend the clocksource(s)
 610 */
 611void clocksource_suspend(void)
 612{
 613        struct clocksource *cs;
 614
 615        list_for_each_entry_reverse(cs, &clocksource_list, list)
 616                if (cs->suspend)
 617                        cs->suspend(cs);
 618}
 619
 620/**
 621 * clocksource_resume - resume the clocksource(s)
 622 */
 623void clocksource_resume(void)
 624{
 625        struct clocksource *cs;
 626
 627        list_for_each_entry(cs, &clocksource_list, list)
 628                if (cs->resume)
 629                        cs->resume(cs);
 630
 631        clocksource_resume_watchdog();
 632}
 633
 634/**
 635 * clocksource_touch_watchdog - Update watchdog
 636 *
 637 * Update the watchdog after exception contexts such as kgdb so as not
 638 * to incorrectly trip the watchdog. This might fail when the kernel
 639 * was stopped in code which holds watchdog_lock.
 640 */
 641void clocksource_touch_watchdog(void)
 642{
 643        clocksource_resume_watchdog();
 644}
 645
 646/**
 647 * clocksource_max_adjustment- Returns max adjustment amount
 648 * @cs:         Pointer to clocksource
 649 *
 650 */
 651static u32 clocksource_max_adjustment(struct clocksource *cs)
 652{
 653        u64 ret;
 654        /*
 655         * We won't try to correct for more than 11% adjustments (110,000 ppm),
 656         */
 657        ret = (u64)cs->mult * 11;
 658        do_div(ret,100);
 659        return (u32)ret;
 660}
 661
 662/**
 663 * clocks_calc_max_nsecs - Returns maximum nanoseconds that can be converted
 664 * @mult:       cycle to nanosecond multiplier
 665 * @shift:      cycle to nanosecond divisor (power of two)
 666 * @maxadj:     maximum adjustment value to mult (~11%)
 667 * @mask:       bitmask for two's complement subtraction of non 64 bit counters
 668 * @max_cyc:    maximum cycle value before potential overflow (does not include
 669 *              any safety margin)
 670 *
 671 * NOTE: This function includes a safety margin of 50%, in other words, we
 672 * return half the number of nanoseconds the hardware counter can technically
 673 * cover. This is done so that we can potentially detect problems caused by
 674 * delayed timers or bad hardware, which might result in time intervals that
 675 * are larger than what the math used can handle without overflows.
 676 */
 677u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc)
 678{
 679        u64 max_nsecs, max_cycles;
 680
 681        /*
 682         * Calculate the maximum number of cycles that we can pass to the
 683         * cyc2ns() function without overflowing a 64-bit result.
 684         */
 685        max_cycles = ULLONG_MAX;
 686        do_div(max_cycles, mult+maxadj);
 687
 688        /*
 689         * The actual maximum number of cycles we can defer the clocksource is
 690         * determined by the minimum of max_cycles and mask.
 691         * Note: Here we subtract the maxadj to make sure we don't sleep for
 692         * too long if there's a large negative adjustment.
 693         */
 694        max_cycles = min(max_cycles, mask);
 695        max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift);
 696
 697        /* return the max_cycles value as well if requested */
 698        if (max_cyc)
 699                *max_cyc = max_cycles;
 700
 701        /* Return 50% of the actual maximum, so we can detect bad values */
 702        max_nsecs >>= 1;
 703
 704        return max_nsecs;
 705}
 706
 707/**
 708 * clocksource_update_max_deferment - Updates the clocksource max_idle_ns & max_cycles
 709 * @cs:         Pointer to clocksource to be updated
 710 *
 711 */
 712static inline void clocksource_update_max_deferment(struct clocksource *cs)
 713{
 714        cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift,
 715                                                cs->maxadj, cs->mask,
 716                                                &cs->max_cycles);
 717}
 718
 719#ifndef CONFIG_ARCH_USES_GETTIMEOFFSET
 720
 721static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur)
 722{
 723        struct clocksource *cs;
 724
 725        if (!finished_booting || list_empty(&clocksource_list))
 726                return NULL;
 727
 728        /*
 729         * We pick the clocksource with the highest rating. If oneshot
 730         * mode is active, we pick the highres valid clocksource with
 731         * the best rating.
 732         */
 733        list_for_each_entry(cs, &clocksource_list, list) {
 734                if (skipcur && cs == curr_clocksource)
 735                        continue;
 736                if (oneshot && !(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES))
 737                        continue;
 738                return cs;
 739        }
 740        return NULL;
 741}
 742
 743static void __clocksource_select(bool skipcur)
 744{
 745        bool oneshot = tick_oneshot_mode_active();
 746        struct clocksource *best, *cs;
 747
 748        /* Find the best suitable clocksource */
 749        best = clocksource_find_best(oneshot, skipcur);
 750        if (!best)
 751                return;
 752
 753        if (!strlen(override_name))
 754                goto found;
 755
 756        /* Check for the override clocksource. */
 757        list_for_each_entry(cs, &clocksource_list, list) {
 758                if (skipcur && cs == curr_clocksource)
 759                        continue;
 760                if (strcmp(cs->name, override_name) != 0)
 761                        continue;
 762                /*
 763                 * Check to make sure we don't switch to a non-highres
 764                 * capable clocksource if the tick code is in oneshot
 765                 * mode (highres or nohz)
 766                 */
 767                if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && oneshot) {
 768                        /* Override clocksource cannot be used. */
 769                        if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
 770                                pr_warn("Override clocksource %s is unstable and not HRT compatible - cannot switch while in HRT/NOHZ mode\n",
 771                                        cs->name);
 772                                override_name[0] = 0;
 773                        } else {
 774                                /*
 775                                 * The override cannot be currently verified.
 776                                 * Deferring to let the watchdog check.
 777                                 */
 778                                pr_info("Override clocksource %s is not currently HRT compatible - deferring\n",
 779                                        cs->name);
 780                        }
 781                } else
 782                        /* Override clocksource can be used. */
 783                        best = cs;
 784                break;
 785        }
 786
 787found:
 788        if (curr_clocksource != best && !timekeeping_notify(best)) {
 789                pr_info("Switched to clocksource %s\n", best->name);
 790                curr_clocksource = best;
 791        }
 792}
 793
 794/**
 795 * clocksource_select - Select the best clocksource available
 796 *
 797 * Private function. Must hold clocksource_mutex when called.
 798 *
 799 * Select the clocksource with the best rating, or the clocksource,
 800 * which is selected by userspace override.
 801 */
 802static void clocksource_select(void)
 803{
 804        __clocksource_select(false);
 805}
 806
 807static void clocksource_select_fallback(void)
 808{
 809        __clocksource_select(true);
 810}
 811
 812#else /* !CONFIG_ARCH_USES_GETTIMEOFFSET */
 813static inline void clocksource_select(void) { }
 814static inline void clocksource_select_fallback(void) { }
 815
 816#endif
 817
 818/*
 819 * clocksource_done_booting - Called near the end of core bootup
 820 *
 821 * Hack to avoid lots of clocksource churn at boot time.
 822 * We use fs_initcall because we want this to start before
 823 * device_initcall but after subsys_initcall.
 824 */
 825static int __init clocksource_done_booting(void)
 826{
 827        mutex_lock(&clocksource_mutex);
 828        curr_clocksource = clocksource_default_clock();
 829        finished_booting = 1;
 830        /*
 831         * Run the watchdog first to eliminate unstable clock sources
 832         */
 833        __clocksource_watchdog_kthread();
 834        clocksource_select();
 835        mutex_unlock(&clocksource_mutex);
 836        return 0;
 837}
 838fs_initcall(clocksource_done_booting);
 839
 840/*
 841 * Enqueue the clocksource sorted by rating
 842 */
 843static void clocksource_enqueue(struct clocksource *cs)
 844{
 845        struct list_head *entry = &clocksource_list;
 846        struct clocksource *tmp;
 847
 848        list_for_each_entry(tmp, &clocksource_list, list) {
 849                /* Keep track of the place, where to insert */
 850                if (tmp->rating < cs->rating)
 851                        break;
 852                entry = &tmp->list;
 853        }
 854        list_add(&cs->list, entry);
 855}
 856
 857/**
 858 * __clocksource_update_freq_scale - Used update clocksource with new freq
 859 * @cs:         clocksource to be registered
 860 * @scale:      Scale factor multiplied against freq to get clocksource hz
 861 * @freq:       clocksource frequency (cycles per second) divided by scale
 862 *
 863 * This should only be called from the clocksource->enable() method.
 864 *
 865 * This *SHOULD NOT* be called directly! Please use the
 866 * __clocksource_update_freq_hz() or __clocksource_update_freq_khz() helper
 867 * functions.
 868 */
 869void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq)
 870{
 871        u64 sec;
 872
 873        /*
 874         * Default clocksources are *special* and self-define their mult/shift.
 875         * But, you're not special, so you should specify a freq value.
 876         */
 877        if (freq) {
 878                /*
 879                 * Calc the maximum number of seconds which we can run before
 880                 * wrapping around. For clocksources which have a mask > 32-bit
 881                 * we need to limit the max sleep time to have a good
 882                 * conversion precision. 10 minutes is still a reasonable
 883                 * amount. That results in a shift value of 24 for a
 884                 * clocksource with mask >= 40-bit and f >= 4GHz. That maps to
 885                 * ~ 0.06ppm granularity for NTP.
 886                 */
 887                sec = cs->mask;
 888                do_div(sec, freq);
 889                do_div(sec, scale);
 890                if (!sec)
 891                        sec = 1;
 892                else if (sec > 600 && cs->mask > UINT_MAX)
 893                        sec = 600;
 894
 895                clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
 896                                       NSEC_PER_SEC / scale, sec * scale);
 897        }
 898        /*
 899         * Ensure clocksources that have large 'mult' values don't overflow
 900         * when adjusted.
 901         */
 902        cs->maxadj = clocksource_max_adjustment(cs);
 903        while (freq && ((cs->mult + cs->maxadj < cs->mult)
 904                || (cs->mult - cs->maxadj > cs->mult))) {
 905                cs->mult >>= 1;
 906                cs->shift--;
 907                cs->maxadj = clocksource_max_adjustment(cs);
 908        }
 909
 910        /*
 911         * Only warn for *special* clocksources that self-define
 912         * their mult/shift values and don't specify a freq.
 913         */
 914        WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
 915                "timekeeping: Clocksource %s might overflow on 11%% adjustment\n",
 916                cs->name);
 917
 918        clocksource_update_max_deferment(cs);
 919
 920        pr_info("%s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n",
 921                cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns);
 922}
 923EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale);
 924
 925/**
 926 * __clocksource_register_scale - Used to install new clocksources
 927 * @cs:         clocksource to be registered
 928 * @scale:      Scale factor multiplied against freq to get clocksource hz
 929 * @freq:       clocksource frequency (cycles per second) divided by scale
 930 *
 931 * Returns -EBUSY if registration fails, zero otherwise.
 932 *
 933 * This *SHOULD NOT* be called directly! Please use the
 934 * clocksource_register_hz() or clocksource_register_khz helper functions.
 935 */
 936int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
 937{
 938        unsigned long flags;
 939
 940        /* Initialize mult/shift and max_idle_ns */
 941        __clocksource_update_freq_scale(cs, scale, freq);
 942
 943        /* Add clocksource to the clocksource list */
 944        mutex_lock(&clocksource_mutex);
 945
 946        clocksource_watchdog_lock(&flags);
 947        clocksource_enqueue(cs);
 948        clocksource_enqueue_watchdog(cs);
 949        clocksource_watchdog_unlock(&flags);
 950
 951        clocksource_select();
 952        clocksource_select_watchdog(false);
 953        __clocksource_suspend_select(cs);
 954        mutex_unlock(&clocksource_mutex);
 955        return 0;
 956}
 957EXPORT_SYMBOL_GPL(__clocksource_register_scale);
 958
 959static void __clocksource_change_rating(struct clocksource *cs, int rating)
 960{
 961        list_del(&cs->list);
 962        cs->rating = rating;
 963        clocksource_enqueue(cs);
 964}
 965
 966/**
 967 * clocksource_change_rating - Change the rating of a registered clocksource
 968 * @cs:         clocksource to be changed
 969 * @rating:     new rating
 970 */
 971void clocksource_change_rating(struct clocksource *cs, int rating)
 972{
 973        unsigned long flags;
 974
 975        mutex_lock(&clocksource_mutex);
 976        clocksource_watchdog_lock(&flags);
 977        __clocksource_change_rating(cs, rating);
 978        clocksource_watchdog_unlock(&flags);
 979
 980        clocksource_select();
 981        clocksource_select_watchdog(false);
 982        clocksource_suspend_select(false);
 983        mutex_unlock(&clocksource_mutex);
 984}
 985EXPORT_SYMBOL(clocksource_change_rating);
 986
 987/*
 988 * Unbind clocksource @cs. Called with clocksource_mutex held
 989 */
 990static int clocksource_unbind(struct clocksource *cs)
 991{
 992        unsigned long flags;
 993
 994        if (clocksource_is_watchdog(cs)) {
 995                /* Select and try to install a replacement watchdog. */
 996                clocksource_select_watchdog(true);
 997                if (clocksource_is_watchdog(cs))
 998                        return -EBUSY;
 999        }
1000
1001        if (cs == curr_clocksource) {
1002                /* Select and try to install a replacement clock source */
1003                clocksource_select_fallback();
1004                if (curr_clocksource == cs)
1005                        return -EBUSY;
1006        }
1007
1008        if (clocksource_is_suspend(cs)) {
1009                /*
1010                 * Select and try to install a replacement suspend clocksource.
1011                 * If no replacement suspend clocksource, we will just let the
1012                 * clocksource go and have no suspend clocksource.
1013                 */
1014                clocksource_suspend_select(true);
1015        }
1016
1017        clocksource_watchdog_lock(&flags);
1018        clocksource_dequeue_watchdog(cs);
1019        list_del_init(&cs->list);
1020        clocksource_watchdog_unlock(&flags);
1021
1022        return 0;
1023}
1024
1025/**
1026 * clocksource_unregister - remove a registered clocksource
1027 * @cs: clocksource to be unregistered
1028 */
1029int clocksource_unregister(struct clocksource *cs)
1030{
1031        int ret = 0;
1032
1033        mutex_lock(&clocksource_mutex);
1034        if (!list_empty(&cs->list))
1035                ret = clocksource_unbind(cs);
1036        mutex_unlock(&clocksource_mutex);
1037        return ret;
1038}
1039EXPORT_SYMBOL(clocksource_unregister);
1040
1041#ifdef CONFIG_SYSFS
1042/**
1043 * current_clocksource_show - sysfs interface for current clocksource
1044 * @dev:        unused
1045 * @attr:       unused
1046 * @buf:        char buffer to be filled with clocksource list
1047 *
1048 * Provides sysfs interface for listing current clocksource.
1049 */
1050static ssize_t current_clocksource_show(struct device *dev,
1051                                        struct device_attribute *attr,
1052                                        char *buf)
1053{
1054        ssize_t count = 0;
1055
1056        mutex_lock(&clocksource_mutex);
1057        count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name);
1058        mutex_unlock(&clocksource_mutex);
1059
1060        return count;
1061}
1062
1063ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt)
1064{
1065        size_t ret = cnt;
1066
1067        /* strings from sysfs write are not 0 terminated! */
1068        if (!cnt || cnt >= CS_NAME_LEN)
1069                return -EINVAL;
1070
1071        /* strip of \n: */
1072        if (buf[cnt-1] == '\n')
1073                cnt--;
1074        if (cnt > 0)
1075                memcpy(dst, buf, cnt);
1076        dst[cnt] = 0;
1077        return ret;
1078}
1079
1080/**
1081 * current_clocksource_store - interface for manually overriding clocksource
1082 * @dev:        unused
1083 * @attr:       unused
1084 * @buf:        name of override clocksource
1085 * @count:      length of buffer
1086 *
1087 * Takes input from sysfs interface for manually overriding the default
1088 * clocksource selection.
1089 */
1090static ssize_t current_clocksource_store(struct device *dev,
1091                                         struct device_attribute *attr,
1092                                         const char *buf, size_t count)
1093{
1094        ssize_t ret;
1095
1096        mutex_lock(&clocksource_mutex);
1097
1098        ret = sysfs_get_uname(buf, override_name, count);
1099        if (ret >= 0)
1100                clocksource_select();
1101
1102        mutex_unlock(&clocksource_mutex);
1103
1104        return ret;
1105}
1106static DEVICE_ATTR_RW(current_clocksource);
1107
1108/**
1109 * unbind_clocksource_store - interface for manually unbinding clocksource
1110 * @dev:        unused
1111 * @attr:       unused
1112 * @buf:        unused
1113 * @count:      length of buffer
1114 *
1115 * Takes input from sysfs interface for manually unbinding a clocksource.
1116 */
1117static ssize_t unbind_clocksource_store(struct device *dev,
1118                                        struct device_attribute *attr,
1119                                        const char *buf, size_t count)
1120{
1121        struct clocksource *cs;
1122        char name[CS_NAME_LEN];
1123        ssize_t ret;
1124
1125        ret = sysfs_get_uname(buf, name, count);
1126        if (ret < 0)
1127                return ret;
1128
1129        ret = -ENODEV;
1130        mutex_lock(&clocksource_mutex);
1131        list_for_each_entry(cs, &clocksource_list, list) {
1132                if (strcmp(cs->name, name))
1133                        continue;
1134                ret = clocksource_unbind(cs);
1135                break;
1136        }
1137        mutex_unlock(&clocksource_mutex);
1138
1139        return ret ? ret : count;
1140}
1141static DEVICE_ATTR_WO(unbind_clocksource);
1142
1143/**
1144 * available_clocksource_show - sysfs interface for listing clocksource
1145 * @dev:        unused
1146 * @attr:       unused
1147 * @buf:        char buffer to be filled with clocksource list
1148 *
1149 * Provides sysfs interface for listing registered clocksources
1150 */
1151static ssize_t available_clocksource_show(struct device *dev,
1152                                          struct device_attribute *attr,
1153                                          char *buf)
1154{
1155        struct clocksource *src;
1156        ssize_t count = 0;
1157
1158        mutex_lock(&clocksource_mutex);
1159        list_for_each_entry(src, &clocksource_list, list) {
1160                /*
1161                 * Don't show non-HRES clocksource if the tick code is
1162                 * in one shot mode (highres=on or nohz=on)
1163                 */
1164                if (!tick_oneshot_mode_active() ||
1165                    (src->flags & CLOCK_SOURCE_VALID_FOR_HRES))
1166                        count += snprintf(buf + count,
1167                                  max((ssize_t)PAGE_SIZE - count, (ssize_t)0),
1168                                  "%s ", src->name);
1169        }
1170        mutex_unlock(&clocksource_mutex);
1171
1172        count += snprintf(buf + count,
1173                          max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n");
1174
1175        return count;
1176}
1177static DEVICE_ATTR_RO(available_clocksource);
1178
1179static struct attribute *clocksource_attrs[] = {
1180        &dev_attr_current_clocksource.attr,
1181        &dev_attr_unbind_clocksource.attr,
1182        &dev_attr_available_clocksource.attr,
1183        NULL
1184};
1185ATTRIBUTE_GROUPS(clocksource);
1186
1187static struct bus_type clocksource_subsys = {
1188        .name = "clocksource",
1189        .dev_name = "clocksource",
1190};
1191
1192static struct device device_clocksource = {
1193        .id     = 0,
1194        .bus    = &clocksource_subsys,
1195        .groups = clocksource_groups,
1196};
1197
1198static int __init init_clocksource_sysfs(void)
1199{
1200        int error = subsys_system_register(&clocksource_subsys, NULL);
1201
1202        if (!error)
1203                error = device_register(&device_clocksource);
1204
1205        return error;
1206}
1207
1208device_initcall(init_clocksource_sysfs);
1209#endif /* CONFIG_SYSFS */
1210
1211/**
1212 * boot_override_clocksource - boot clock override
1213 * @str:        override name
1214 *
1215 * Takes a clocksource= boot argument and uses it
1216 * as the clocksource override name.
1217 */
1218static int __init boot_override_clocksource(char* str)
1219{
1220        mutex_lock(&clocksource_mutex);
1221        if (str)
1222                strlcpy(override_name, str, sizeof(override_name));
1223        mutex_unlock(&clocksource_mutex);
1224        return 1;
1225}
1226
1227__setup("clocksource=", boot_override_clocksource);
1228
1229/**
1230 * boot_override_clock - Compatibility layer for deprecated boot option
1231 * @str:        override name
1232 *
1233 * DEPRECATED! Takes a clock= boot argument and uses it
1234 * as the clocksource override name
1235 */
1236static int __init boot_override_clock(char* str)
1237{
1238        if (!strcmp(str, "pmtmr")) {
1239                pr_warn("clock=pmtmr is deprecated - use clocksource=acpi_pm\n");
1240                return boot_override_clocksource("acpi_pm");
1241        }
1242        pr_warn("clock= boot option is deprecated - use clocksource=xyz\n");
1243        return boot_override_clocksource(str);
1244}
1245
1246__setup("clock=", boot_override_clock);
1247