linux/kernel/time/clocksource.c
<<
>>
Prefs
   1/*
   2 * linux/kernel/time/clocksource.c
   3 *
   4 * This file contains the functions which manage clocksource drivers.
   5 *
   6 * Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com)
   7 *
   8 * This program is free software; you can redistribute it and/or modify
   9 * it under the terms of the GNU General Public License as published by
  10 * the Free Software Foundation; either version 2 of the License, or
  11 * (at your option) any later version.
  12 *
  13 * This program is distributed in the hope that it will be useful,
  14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 * GNU General Public License for more details.
  17 *
  18 * You should have received a copy of the GNU General Public License
  19 * along with this program; if not, write to the Free Software
  20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  21 *
  22 * TODO WishList:
  23 *   o Allow clocksource drivers to be unregistered
  24 */
  25
  26#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  27
  28#include <linux/device.h>
  29#include <linux/clocksource.h>
  30#include <linux/init.h>
  31#include <linux/module.h>
  32#include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
  33#include <linux/tick.h>
  34#include <linux/kthread.h>
  35
  36#include "tick-internal.h"
  37#include "timekeeping_internal.h"
  38
  39/**
  40 * clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks
  41 * @mult:       pointer to mult variable
  42 * @shift:      pointer to shift variable
  43 * @from:       frequency to convert from
  44 * @to:         frequency to convert to
  45 * @maxsec:     guaranteed runtime conversion range in seconds
  46 *
  47 * The function evaluates the shift/mult pair for the scaled math
  48 * operations of clocksources and clockevents.
  49 *
  50 * @to and @from are frequency values in HZ. For clock sources @to is
  51 * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock
  52 * event @to is the counter frequency and @from is NSEC_PER_SEC.
  53 *
  54 * The @maxsec conversion range argument controls the time frame in
  55 * seconds which must be covered by the runtime conversion with the
  56 * calculated mult and shift factors. This guarantees that no 64bit
  57 * overflow happens when the input value of the conversion is
  58 * multiplied with the calculated mult factor. Larger ranges may
  59 * reduce the conversion accuracy by chosing smaller mult and shift
  60 * factors.
  61 */
  62void
  63clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec)
  64{
  65        u64 tmp;
  66        u32 sft, sftacc= 32;
  67
  68        /*
  69         * Calculate the shift factor which is limiting the conversion
  70         * range:
  71         */
  72        tmp = ((u64)maxsec * from) >> 32;
  73        while (tmp) {
  74                tmp >>=1;
  75                sftacc--;
  76        }
  77
  78        /*
  79         * Find the conversion shift/mult pair which has the best
  80         * accuracy and fits the maxsec conversion range:
  81         */
  82        for (sft = 32; sft > 0; sft--) {
  83                tmp = (u64) to << sft;
  84                tmp += from / 2;
  85                do_div(tmp, from);
  86                if ((tmp >> sftacc) == 0)
  87                        break;
  88        }
  89        *mult = tmp;
  90        *shift = sft;
  91}
  92
  93/*[Clocksource internal variables]---------
  94 * curr_clocksource:
  95 *      currently selected clocksource.
  96 * clocksource_list:
  97 *      linked list with the registered clocksources
  98 * clocksource_mutex:
  99 *      protects manipulations to curr_clocksource and the clocksource_list
 100 * override_name:
 101 *      Name of the user-specified clocksource.
 102 */
 103static struct clocksource *curr_clocksource;
 104static LIST_HEAD(clocksource_list);
 105static DEFINE_MUTEX(clocksource_mutex);
 106static char override_name[CS_NAME_LEN];
 107static int finished_booting;
 108
 109#ifdef CONFIG_CLOCKSOURCE_WATCHDOG
 110static void clocksource_watchdog_work(struct work_struct *work);
 111static void clocksource_select(void);
 112
 113static LIST_HEAD(watchdog_list);
 114static struct clocksource *watchdog;
 115static struct timer_list watchdog_timer;
 116static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);
 117static DEFINE_SPINLOCK(watchdog_lock);
 118static int watchdog_running;
 119static atomic_t watchdog_reset_pending;
 120
 121static int clocksource_watchdog_kthread(void *data);
 122static void __clocksource_change_rating(struct clocksource *cs, int rating);
 123
 124/*
 125 * Interval: 0.5sec Threshold: 0.0625s
 126 */
 127#define WATCHDOG_INTERVAL (HZ >> 1)
 128#define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4)
 129
 130static void clocksource_watchdog_work(struct work_struct *work)
 131{
 132        /*
 133         * If kthread_run fails the next watchdog scan over the
 134         * watchdog_list will find the unstable clock again.
 135         */
 136        kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog");
 137}
 138
 139static void __clocksource_unstable(struct clocksource *cs)
 140{
 141        cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG);
 142        cs->flags |= CLOCK_SOURCE_UNSTABLE;
 143        if (finished_booting)
 144                schedule_work(&watchdog_work);
 145}
 146
 147/**
 148 * clocksource_mark_unstable - mark clocksource unstable via watchdog
 149 * @cs:         clocksource to be marked unstable
 150 *
 151 * This function is called instead of clocksource_change_rating from
 152 * cpu hotplug code to avoid a deadlock between the clocksource mutex
 153 * and the cpu hotplug mutex. It defers the update of the clocksource
 154 * to the watchdog thread.
 155 */
 156void clocksource_mark_unstable(struct clocksource *cs)
 157{
 158        unsigned long flags;
 159
 160        spin_lock_irqsave(&watchdog_lock, flags);
 161        if (!(cs->flags & CLOCK_SOURCE_UNSTABLE)) {
 162                if (list_empty(&cs->wd_list))
 163                        list_add(&cs->wd_list, &watchdog_list);
 164                __clocksource_unstable(cs);
 165        }
 166        spin_unlock_irqrestore(&watchdog_lock, flags);
 167}
 168
 169static void clocksource_watchdog(unsigned long data)
 170{
 171        struct clocksource *cs;
 172        cycle_t csnow, wdnow, cslast, wdlast, delta;
 173        int64_t wd_nsec, cs_nsec;
 174        int next_cpu, reset_pending;
 175
 176        spin_lock(&watchdog_lock);
 177        if (!watchdog_running)
 178                goto out;
 179
 180        reset_pending = atomic_read(&watchdog_reset_pending);
 181
 182        list_for_each_entry(cs, &watchdog_list, wd_list) {
 183
 184                /* Clocksource already marked unstable? */
 185                if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
 186                        if (finished_booting)
 187                                schedule_work(&watchdog_work);
 188                        continue;
 189                }
 190
 191                local_irq_disable();
 192                csnow = cs->read(cs);
 193                wdnow = watchdog->read(watchdog);
 194                local_irq_enable();
 195
 196                /* Clocksource initialized ? */
 197                if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) ||
 198                    atomic_read(&watchdog_reset_pending)) {
 199                        cs->flags |= CLOCK_SOURCE_WATCHDOG;
 200                        cs->wd_last = wdnow;
 201                        cs->cs_last = csnow;
 202                        continue;
 203                }
 204
 205                delta = clocksource_delta(wdnow, cs->wd_last, watchdog->mask);
 206                wd_nsec = clocksource_cyc2ns(delta, watchdog->mult,
 207                                             watchdog->shift);
 208
 209                delta = clocksource_delta(csnow, cs->cs_last, cs->mask);
 210                cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);
 211                wdlast = cs->wd_last; /* save these in case we print them */
 212                cslast = cs->cs_last;
 213                cs->cs_last = csnow;
 214                cs->wd_last = wdnow;
 215
 216                if (atomic_read(&watchdog_reset_pending))
 217                        continue;
 218
 219                /* Check the deviation from the watchdog clocksource. */
 220                if (abs64(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) {
 221                        pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable because the skew is too large:\n",
 222                                cs->name);
 223                        pr_warn("                      '%s' wd_now: %llx wd_last: %llx mask: %llx\n",
 224                                watchdog->name, wdnow, wdlast, watchdog->mask);
 225                        pr_warn("                      '%s' cs_now: %llx cs_last: %llx mask: %llx\n",
 226                                cs->name, csnow, cslast, cs->mask);
 227                        __clocksource_unstable(cs);
 228                        continue;
 229                }
 230
 231                if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
 232                    (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
 233                    (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {
 234                        /* Mark it valid for high-res. */
 235                        cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
 236
 237                        /*
 238                         * clocksource_done_booting() will sort it if
 239                         * finished_booting is not set yet.
 240                         */
 241                        if (!finished_booting)
 242                                continue;
 243
 244                        /*
 245                         * If this is not the current clocksource let
 246                         * the watchdog thread reselect it. Due to the
 247                         * change to high res this clocksource might
 248                         * be preferred now. If it is the current
 249                         * clocksource let the tick code know about
 250                         * that change.
 251                         */
 252                        if (cs != curr_clocksource) {
 253                                cs->flags |= CLOCK_SOURCE_RESELECT;
 254                                schedule_work(&watchdog_work);
 255                        } else {
 256                                tick_clock_notify();
 257                        }
 258                }
 259        }
 260
 261        /*
 262         * We only clear the watchdog_reset_pending, when we did a
 263         * full cycle through all clocksources.
 264         */
 265        if (reset_pending)
 266                atomic_dec(&watchdog_reset_pending);
 267
 268        /*
 269         * Cycle through CPUs to check if the CPUs stay synchronized
 270         * to each other.
 271         */
 272        next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
 273        if (next_cpu >= nr_cpu_ids)
 274                next_cpu = cpumask_first(cpu_online_mask);
 275        watchdog_timer.expires += WATCHDOG_INTERVAL;
 276        add_timer_on(&watchdog_timer, next_cpu);
 277out:
 278        spin_unlock(&watchdog_lock);
 279}
 280
 281static inline void clocksource_start_watchdog(void)
 282{
 283        if (watchdog_running || !watchdog || list_empty(&watchdog_list))
 284                return;
 285        init_timer(&watchdog_timer);
 286        watchdog_timer.function = clocksource_watchdog;
 287        watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
 288        add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask));
 289        watchdog_running = 1;
 290}
 291
 292static inline void clocksource_stop_watchdog(void)
 293{
 294        if (!watchdog_running || (watchdog && !list_empty(&watchdog_list)))
 295                return;
 296        del_timer(&watchdog_timer);
 297        watchdog_running = 0;
 298}
 299
 300static inline void clocksource_reset_watchdog(void)
 301{
 302        struct clocksource *cs;
 303
 304        list_for_each_entry(cs, &watchdog_list, wd_list)
 305                cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
 306}
 307
 308static void clocksource_resume_watchdog(void)
 309{
 310        atomic_inc(&watchdog_reset_pending);
 311}
 312
 313static void clocksource_enqueue_watchdog(struct clocksource *cs)
 314{
 315        unsigned long flags;
 316
 317        spin_lock_irqsave(&watchdog_lock, flags);
 318        if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
 319                /* cs is a clocksource to be watched. */
 320                list_add(&cs->wd_list, &watchdog_list);
 321                cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
 322        } else {
 323                /* cs is a watchdog. */
 324                if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
 325                        cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
 326                /* Pick the best watchdog. */
 327                if (!watchdog || cs->rating > watchdog->rating) {
 328                        watchdog = cs;
 329                        /* Reset watchdog cycles */
 330                        clocksource_reset_watchdog();
 331                }
 332        }
 333        /* Check if the watchdog timer needs to be started. */
 334        clocksource_start_watchdog();
 335        spin_unlock_irqrestore(&watchdog_lock, flags);
 336}
 337
 338static void clocksource_dequeue_watchdog(struct clocksource *cs)
 339{
 340        unsigned long flags;
 341
 342        spin_lock_irqsave(&watchdog_lock, flags);
 343        if (cs != watchdog) {
 344                if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
 345                        /* cs is a watched clocksource. */
 346                        list_del_init(&cs->wd_list);
 347                        /* Check if the watchdog timer needs to be stopped. */
 348                        clocksource_stop_watchdog();
 349                }
 350        }
 351        spin_unlock_irqrestore(&watchdog_lock, flags);
 352}
 353
 354static int __clocksource_watchdog_kthread(void)
 355{
 356        struct clocksource *cs, *tmp;
 357        unsigned long flags;
 358        LIST_HEAD(unstable);
 359        int select = 0;
 360
 361        spin_lock_irqsave(&watchdog_lock, flags);
 362        list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) {
 363                if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
 364                        list_del_init(&cs->wd_list);
 365                        list_add(&cs->wd_list, &unstable);
 366                        select = 1;
 367                }
 368                if (cs->flags & CLOCK_SOURCE_RESELECT) {
 369                        cs->flags &= ~CLOCK_SOURCE_RESELECT;
 370                        select = 1;
 371                }
 372        }
 373        /* Check if the watchdog timer needs to be stopped. */
 374        clocksource_stop_watchdog();
 375        spin_unlock_irqrestore(&watchdog_lock, flags);
 376
 377        /* Needs to be done outside of watchdog lock */
 378        list_for_each_entry_safe(cs, tmp, &unstable, wd_list) {
 379                list_del_init(&cs->wd_list);
 380                __clocksource_change_rating(cs, 0);
 381        }
 382        return select;
 383}
 384
 385static int clocksource_watchdog_kthread(void *data)
 386{
 387        mutex_lock(&clocksource_mutex);
 388        if (__clocksource_watchdog_kthread())
 389                clocksource_select();
 390        mutex_unlock(&clocksource_mutex);
 391        return 0;
 392}
 393
 394static bool clocksource_is_watchdog(struct clocksource *cs)
 395{
 396        return cs == watchdog;
 397}
 398
 399#else /* CONFIG_CLOCKSOURCE_WATCHDOG */
 400
 401static void clocksource_enqueue_watchdog(struct clocksource *cs)
 402{
 403        if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
 404                cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
 405}
 406
 407static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { }
 408static inline void clocksource_resume_watchdog(void) { }
 409static inline int __clocksource_watchdog_kthread(void) { return 0; }
 410static bool clocksource_is_watchdog(struct clocksource *cs) { return false; }
 411void clocksource_mark_unstable(struct clocksource *cs) { }
 412
 413#endif /* CONFIG_CLOCKSOURCE_WATCHDOG */
 414
 415/**
 416 * clocksource_suspend - suspend the clocksource(s)
 417 */
 418void clocksource_suspend(void)
 419{
 420        struct clocksource *cs;
 421
 422        list_for_each_entry_reverse(cs, &clocksource_list, list)
 423                if (cs->suspend)
 424                        cs->suspend(cs);
 425}
 426
 427/**
 428 * clocksource_resume - resume the clocksource(s)
 429 */
 430void clocksource_resume(void)
 431{
 432        struct clocksource *cs;
 433
 434        list_for_each_entry(cs, &clocksource_list, list)
 435                if (cs->resume)
 436                        cs->resume(cs);
 437
 438        clocksource_resume_watchdog();
 439}
 440
 441/**
 442 * clocksource_touch_watchdog - Update watchdog
 443 *
 444 * Update the watchdog after exception contexts such as kgdb so as not
 445 * to incorrectly trip the watchdog. This might fail when the kernel
 446 * was stopped in code which holds watchdog_lock.
 447 */
 448void clocksource_touch_watchdog(void)
 449{
 450        clocksource_resume_watchdog();
 451}
 452
 453/**
 454 * clocksource_max_adjustment- Returns max adjustment amount
 455 * @cs:         Pointer to clocksource
 456 *
 457 */
 458static u32 clocksource_max_adjustment(struct clocksource *cs)
 459{
 460        u64 ret;
 461        /*
 462         * We won't try to correct for more than 11% adjustments (110,000 ppm),
 463         */
 464        ret = (u64)cs->mult * 11;
 465        do_div(ret,100);
 466        return (u32)ret;
 467}
 468
 469/**
 470 * clocks_calc_max_nsecs - Returns maximum nanoseconds that can be converted
 471 * @mult:       cycle to nanosecond multiplier
 472 * @shift:      cycle to nanosecond divisor (power of two)
 473 * @maxadj:     maximum adjustment value to mult (~11%)
 474 * @mask:       bitmask for two's complement subtraction of non 64 bit counters
 475 * @max_cyc:    maximum cycle value before potential overflow (does not include
 476 *              any safety margin)
 477 *
 478 * NOTE: This function includes a safety margin of 50%, in other words, we
 479 * return half the number of nanoseconds the hardware counter can technically
 480 * cover. This is done so that we can potentially detect problems caused by
 481 * delayed timers or bad hardware, which might result in time intervals that
 482 * are larger then what the math used can handle without overflows.
 483 */
 484u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc)
 485{
 486        u64 max_nsecs, max_cycles;
 487
 488        /*
 489         * Calculate the maximum number of cycles that we can pass to the
 490         * cyc2ns() function without overflowing a 64-bit result.
 491         */
 492        max_cycles = ULLONG_MAX;
 493        do_div(max_cycles, mult+maxadj);
 494
 495        /*
 496         * The actual maximum number of cycles we can defer the clocksource is
 497         * determined by the minimum of max_cycles and mask.
 498         * Note: Here we subtract the maxadj to make sure we don't sleep for
 499         * too long if there's a large negative adjustment.
 500         */
 501        max_cycles = min(max_cycles, mask);
 502        max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift);
 503
 504        /* return the max_cycles value as well if requested */
 505        if (max_cyc)
 506                *max_cyc = max_cycles;
 507
 508        /* Return 50% of the actual maximum, so we can detect bad values */
 509        max_nsecs >>= 1;
 510
 511        return max_nsecs;
 512}
 513
 514/**
 515 * clocksource_update_max_deferment - Updates the clocksource max_idle_ns & max_cycles
 516 * @cs:         Pointer to clocksource to be updated
 517 *
 518 */
 519static inline void clocksource_update_max_deferment(struct clocksource *cs)
 520{
 521        cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift,
 522                                                cs->maxadj, cs->mask,
 523                                                &cs->max_cycles);
 524}
 525
 526#ifndef CONFIG_ARCH_USES_GETTIMEOFFSET
 527
 528static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur)
 529{
 530        struct clocksource *cs;
 531
 532        if (!finished_booting || list_empty(&clocksource_list))
 533                return NULL;
 534
 535        /*
 536         * We pick the clocksource with the highest rating. If oneshot
 537         * mode is active, we pick the highres valid clocksource with
 538         * the best rating.
 539         */
 540        list_for_each_entry(cs, &clocksource_list, list) {
 541                if (skipcur && cs == curr_clocksource)
 542                        continue;
 543                if (oneshot && !(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES))
 544                        continue;
 545                return cs;
 546        }
 547        return NULL;
 548}
 549
 550static void __clocksource_select(bool skipcur)
 551{
 552        bool oneshot = tick_oneshot_mode_active();
 553        struct clocksource *best, *cs;
 554
 555        /* Find the best suitable clocksource */
 556        best = clocksource_find_best(oneshot, skipcur);
 557        if (!best)
 558                return;
 559
 560        /* Check for the override clocksource. */
 561        list_for_each_entry(cs, &clocksource_list, list) {
 562                if (skipcur && cs == curr_clocksource)
 563                        continue;
 564                if (strcmp(cs->name, override_name) != 0)
 565                        continue;
 566                /*
 567                 * Check to make sure we don't switch to a non-highres
 568                 * capable clocksource if the tick code is in oneshot
 569                 * mode (highres or nohz)
 570                 */
 571                if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && oneshot) {
 572                        /* Override clocksource cannot be used. */
 573                        pr_warn("Override clocksource %s is not HRT compatible - cannot switch while in HRT/NOHZ mode\n",
 574                                cs->name);
 575                        override_name[0] = 0;
 576                } else
 577                        /* Override clocksource can be used. */
 578                        best = cs;
 579                break;
 580        }
 581
 582        if (curr_clocksource != best && !timekeeping_notify(best)) {
 583                pr_info("Switched to clocksource %s\n", best->name);
 584                curr_clocksource = best;
 585        }
 586}
 587
 588/**
 589 * clocksource_select - Select the best clocksource available
 590 *
 591 * Private function. Must hold clocksource_mutex when called.
 592 *
 593 * Select the clocksource with the best rating, or the clocksource,
 594 * which is selected by userspace override.
 595 */
 596static void clocksource_select(void)
 597{
 598        return __clocksource_select(false);
 599}
 600
 601static void clocksource_select_fallback(void)
 602{
 603        return __clocksource_select(true);
 604}
 605
 606#else /* !CONFIG_ARCH_USES_GETTIMEOFFSET */
 607
 608static inline void clocksource_select(void) { }
 609static inline void clocksource_select_fallback(void) { }
 610
 611#endif
 612
 613/*
 614 * clocksource_done_booting - Called near the end of core bootup
 615 *
 616 * Hack to avoid lots of clocksource churn at boot time.
 617 * We use fs_initcall because we want this to start before
 618 * device_initcall but after subsys_initcall.
 619 */
 620static int __init clocksource_done_booting(void)
 621{
 622        mutex_lock(&clocksource_mutex);
 623        curr_clocksource = clocksource_default_clock();
 624        finished_booting = 1;
 625        /*
 626         * Run the watchdog first to eliminate unstable clock sources
 627         */
 628        __clocksource_watchdog_kthread();
 629        clocksource_select();
 630        mutex_unlock(&clocksource_mutex);
 631        return 0;
 632}
 633fs_initcall(clocksource_done_booting);
 634
 635/*
 636 * Enqueue the clocksource sorted by rating
 637 */
 638static void clocksource_enqueue(struct clocksource *cs)
 639{
 640        struct list_head *entry = &clocksource_list;
 641        struct clocksource *tmp;
 642
 643        list_for_each_entry(tmp, &clocksource_list, list)
 644                /* Keep track of the place, where to insert */
 645                if (tmp->rating >= cs->rating)
 646                        entry = &tmp->list;
 647        list_add(&cs->list, entry);
 648}
 649
 650/**
 651 * __clocksource_update_freq_scale - Used update clocksource with new freq
 652 * @cs:         clocksource to be registered
 653 * @scale:      Scale factor multiplied against freq to get clocksource hz
 654 * @freq:       clocksource frequency (cycles per second) divided by scale
 655 *
 656 * This should only be called from the clocksource->enable() method.
 657 *
 658 * This *SHOULD NOT* be called directly! Please use the
 659 * __clocksource_update_freq_hz() or __clocksource_update_freq_khz() helper
 660 * functions.
 661 */
 662void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq)
 663{
 664        u64 sec;
 665
 666        /*
 667         * Default clocksources are *special* and self-define their mult/shift.
 668         * But, you're not special, so you should specify a freq value.
 669         */
 670        if (freq) {
 671                /*
 672                 * Calc the maximum number of seconds which we can run before
 673                 * wrapping around. For clocksources which have a mask > 32-bit
 674                 * we need to limit the max sleep time to have a good
 675                 * conversion precision. 10 minutes is still a reasonable
 676                 * amount. That results in a shift value of 24 for a
 677                 * clocksource with mask >= 40-bit and f >= 4GHz. That maps to
 678                 * ~ 0.06ppm granularity for NTP.
 679                 */
 680                sec = cs->mask;
 681                do_div(sec, freq);
 682                do_div(sec, scale);
 683                if (!sec)
 684                        sec = 1;
 685                else if (sec > 600 && cs->mask > UINT_MAX)
 686                        sec = 600;
 687
 688                clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
 689                                       NSEC_PER_SEC / scale, sec * scale);
 690        }
 691        /*
 692         * Ensure clocksources that have large 'mult' values don't overflow
 693         * when adjusted.
 694         */
 695        cs->maxadj = clocksource_max_adjustment(cs);
 696        while (freq && ((cs->mult + cs->maxadj < cs->mult)
 697                || (cs->mult - cs->maxadj > cs->mult))) {
 698                cs->mult >>= 1;
 699                cs->shift--;
 700                cs->maxadj = clocksource_max_adjustment(cs);
 701        }
 702
 703        /*
 704         * Only warn for *special* clocksources that self-define
 705         * their mult/shift values and don't specify a freq.
 706         */
 707        WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
 708                "timekeeping: Clocksource %s might overflow on 11%% adjustment\n",
 709                cs->name);
 710
 711        clocksource_update_max_deferment(cs);
 712
 713        pr_info("%s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n",
 714                cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns);
 715}
 716EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale);
 717
 718/**
 719 * __clocksource_register_scale - Used to install new clocksources
 720 * @cs:         clocksource to be registered
 721 * @scale:      Scale factor multiplied against freq to get clocksource hz
 722 * @freq:       clocksource frequency (cycles per second) divided by scale
 723 *
 724 * Returns -EBUSY if registration fails, zero otherwise.
 725 *
 726 * This *SHOULD NOT* be called directly! Please use the
 727 * clocksource_register_hz() or clocksource_register_khz helper functions.
 728 */
 729int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
 730{
 731
 732        /* Initialize mult/shift and max_idle_ns */
 733        __clocksource_update_freq_scale(cs, scale, freq);
 734
 735        /* Add clocksource to the clocksource list */
 736        mutex_lock(&clocksource_mutex);
 737        clocksource_enqueue(cs);
 738        clocksource_enqueue_watchdog(cs);
 739        clocksource_select();
 740        mutex_unlock(&clocksource_mutex);
 741        return 0;
 742}
 743EXPORT_SYMBOL_GPL(__clocksource_register_scale);
 744
 745static void __clocksource_change_rating(struct clocksource *cs, int rating)
 746{
 747        list_del(&cs->list);
 748        cs->rating = rating;
 749        clocksource_enqueue(cs);
 750}
 751
 752/**
 753 * clocksource_change_rating - Change the rating of a registered clocksource
 754 * @cs:         clocksource to be changed
 755 * @rating:     new rating
 756 */
 757void clocksource_change_rating(struct clocksource *cs, int rating)
 758{
 759        mutex_lock(&clocksource_mutex);
 760        __clocksource_change_rating(cs, rating);
 761        clocksource_select();
 762        mutex_unlock(&clocksource_mutex);
 763}
 764EXPORT_SYMBOL(clocksource_change_rating);
 765
 766/*
 767 * Unbind clocksource @cs. Called with clocksource_mutex held
 768 */
 769static int clocksource_unbind(struct clocksource *cs)
 770{
 771        /*
 772         * I really can't convince myself to support this on hardware
 773         * designed by lobotomized monkeys.
 774         */
 775        if (clocksource_is_watchdog(cs))
 776                return -EBUSY;
 777
 778        if (cs == curr_clocksource) {
 779                /* Select and try to install a replacement clock source */
 780                clocksource_select_fallback();
 781                if (curr_clocksource == cs)
 782                        return -EBUSY;
 783        }
 784        clocksource_dequeue_watchdog(cs);
 785        list_del_init(&cs->list);
 786        return 0;
 787}
 788
 789/**
 790 * clocksource_unregister - remove a registered clocksource
 791 * @cs: clocksource to be unregistered
 792 */
 793int clocksource_unregister(struct clocksource *cs)
 794{
 795        int ret = 0;
 796
 797        mutex_lock(&clocksource_mutex);
 798        if (!list_empty(&cs->list))
 799                ret = clocksource_unbind(cs);
 800        mutex_unlock(&clocksource_mutex);
 801        return ret;
 802}
 803EXPORT_SYMBOL(clocksource_unregister);
 804
 805#ifdef CONFIG_SYSFS
 806/**
 807 * sysfs_show_current_clocksources - sysfs interface for current clocksource
 808 * @dev:        unused
 809 * @attr:       unused
 810 * @buf:        char buffer to be filled with clocksource list
 811 *
 812 * Provides sysfs interface for listing current clocksource.
 813 */
 814static ssize_t
 815sysfs_show_current_clocksources(struct device *dev,
 816                                struct device_attribute *attr, char *buf)
 817{
 818        ssize_t count = 0;
 819
 820        mutex_lock(&clocksource_mutex);
 821        count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name);
 822        mutex_unlock(&clocksource_mutex);
 823
 824        return count;
 825}
 826
 827ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt)
 828{
 829        size_t ret = cnt;
 830
 831        /* strings from sysfs write are not 0 terminated! */
 832        if (!cnt || cnt >= CS_NAME_LEN)
 833                return -EINVAL;
 834
 835        /* strip of \n: */
 836        if (buf[cnt-1] == '\n')
 837                cnt--;
 838        if (cnt > 0)
 839                memcpy(dst, buf, cnt);
 840        dst[cnt] = 0;
 841        return ret;
 842}
 843
 844/**
 845 * sysfs_override_clocksource - interface for manually overriding clocksource
 846 * @dev:        unused
 847 * @attr:       unused
 848 * @buf:        name of override clocksource
 849 * @count:      length of buffer
 850 *
 851 * Takes input from sysfs interface for manually overriding the default
 852 * clocksource selection.
 853 */
 854static ssize_t sysfs_override_clocksource(struct device *dev,
 855                                          struct device_attribute *attr,
 856                                          const char *buf, size_t count)
 857{
 858        ssize_t ret;
 859
 860        mutex_lock(&clocksource_mutex);
 861
 862        ret = sysfs_get_uname(buf, override_name, count);
 863        if (ret >= 0)
 864                clocksource_select();
 865
 866        mutex_unlock(&clocksource_mutex);
 867
 868        return ret;
 869}
 870
 871/**
 872 * sysfs_unbind_current_clocksource - interface for manually unbinding clocksource
 873 * @dev:        unused
 874 * @attr:       unused
 875 * @buf:        unused
 876 * @count:      length of buffer
 877 *
 878 * Takes input from sysfs interface for manually unbinding a clocksource.
 879 */
 880static ssize_t sysfs_unbind_clocksource(struct device *dev,
 881                                        struct device_attribute *attr,
 882                                        const char *buf, size_t count)
 883{
 884        struct clocksource *cs;
 885        char name[CS_NAME_LEN];
 886        ssize_t ret;
 887
 888        ret = sysfs_get_uname(buf, name, count);
 889        if (ret < 0)
 890                return ret;
 891
 892        ret = -ENODEV;
 893        mutex_lock(&clocksource_mutex);
 894        list_for_each_entry(cs, &clocksource_list, list) {
 895                if (strcmp(cs->name, name))
 896                        continue;
 897                ret = clocksource_unbind(cs);
 898                break;
 899        }
 900        mutex_unlock(&clocksource_mutex);
 901
 902        return ret ? ret : count;
 903}
 904
 905/**
 906 * sysfs_show_available_clocksources - sysfs interface for listing clocksource
 907 * @dev:        unused
 908 * @attr:       unused
 909 * @buf:        char buffer to be filled with clocksource list
 910 *
 911 * Provides sysfs interface for listing registered clocksources
 912 */
 913static ssize_t
 914sysfs_show_available_clocksources(struct device *dev,
 915                                  struct device_attribute *attr,
 916                                  char *buf)
 917{
 918        struct clocksource *src;
 919        ssize_t count = 0;
 920
 921        mutex_lock(&clocksource_mutex);
 922        list_for_each_entry(src, &clocksource_list, list) {
 923                /*
 924                 * Don't show non-HRES clocksource if the tick code is
 925                 * in one shot mode (highres=on or nohz=on)
 926                 */
 927                if (!tick_oneshot_mode_active() ||
 928                    (src->flags & CLOCK_SOURCE_VALID_FOR_HRES))
 929                        count += snprintf(buf + count,
 930                                  max((ssize_t)PAGE_SIZE - count, (ssize_t)0),
 931                                  "%s ", src->name);
 932        }
 933        mutex_unlock(&clocksource_mutex);
 934
 935        count += snprintf(buf + count,
 936                          max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n");
 937
 938        return count;
 939}
 940
 941/*
 942 * Sysfs setup bits:
 943 */
 944static DEVICE_ATTR(current_clocksource, 0644, sysfs_show_current_clocksources,
 945                   sysfs_override_clocksource);
 946
 947static DEVICE_ATTR(unbind_clocksource, 0200, NULL, sysfs_unbind_clocksource);
 948
 949static DEVICE_ATTR(available_clocksource, 0444,
 950                   sysfs_show_available_clocksources, NULL);
 951
 952static struct bus_type clocksource_subsys = {
 953        .name = "clocksource",
 954        .dev_name = "clocksource",
 955};
 956
 957static struct device device_clocksource = {
 958        .id     = 0,
 959        .bus    = &clocksource_subsys,
 960};
 961
 962static int __init init_clocksource_sysfs(void)
 963{
 964        int error = subsys_system_register(&clocksource_subsys, NULL);
 965
 966        if (!error)
 967                error = device_register(&device_clocksource);
 968        if (!error)
 969                error = device_create_file(
 970                                &device_clocksource,
 971                                &dev_attr_current_clocksource);
 972        if (!error)
 973                error = device_create_file(&device_clocksource,
 974                                           &dev_attr_unbind_clocksource);
 975        if (!error)
 976                error = device_create_file(
 977                                &device_clocksource,
 978                                &dev_attr_available_clocksource);
 979        return error;
 980}
 981
 982device_initcall(init_clocksource_sysfs);
 983#endif /* CONFIG_SYSFS */
 984
 985/**
 986 * boot_override_clocksource - boot clock override
 987 * @str:        override name
 988 *
 989 * Takes a clocksource= boot argument and uses it
 990 * as the clocksource override name.
 991 */
 992static int __init boot_override_clocksource(char* str)
 993{
 994        mutex_lock(&clocksource_mutex);
 995        if (str)
 996                strlcpy(override_name, str, sizeof(override_name));
 997        mutex_unlock(&clocksource_mutex);
 998        return 1;
 999}
1000
1001__setup("clocksource=", boot_override_clocksource);
1002
1003/**
1004 * boot_override_clock - Compatibility layer for deprecated boot option
1005 * @str:        override name
1006 *
1007 * DEPRECATED! Takes a clock= boot argument and uses it
1008 * as the clocksource override name
1009 */
1010static int __init boot_override_clock(char* str)
1011{
1012        if (!strcmp(str, "pmtmr")) {
1013                pr_warn("clock=pmtmr is deprecated - use clocksource=acpi_pm\n");
1014                return boot_override_clocksource("acpi_pm");
1015        }
1016        pr_warn("clock= boot option is deprecated - use clocksource=xyz\n");
1017        return boot_override_clocksource(str);
1018}
1019
1020__setup("clock=", boot_override_clock);
1021