LXR linux/kernel/time/timer.c

   1/*
   2 *  linux/kernel/timer.c
   3 *
   4 *  Kernel internal timers
   5 *
   6 *  Copyright (C) 1991, 1992  Linus Torvalds
   7 *
   8 *  1997-01-28  Modified by Finn Arne Gangstad to make timers scale better.
   9 *
  10 *  1997-09-10  Updated NTP code according to technical memorandum Jan '96
  11 *              "A Kernel Model for Precision Timekeeping" by Dave Mills
  12 *  1998-12-24  Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
  13 *              serialize accesses to xtime/lost_ticks).
  14 *                              Copyright (C) 1998  Andrea Arcangeli
  15 *  1999-03-10  Improved NTP compatibility by Ulrich Windl
  16 *  2002-05-31  Move sys_sysinfo here and make its locking sane, Robert Love
  17 *  2000-10-05  Implemented scalable SMP per-CPU timer handling.
  18 *                              Copyright (C) 2000, 2001, 2002  Ingo Molnar
  19 *              Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar
  20 */
  21
  22#include <linux/kernel_stat.h>
  23#include <linux/export.h>
  24#include <linux/interrupt.h>
  25#include <linux/percpu.h>
  26#include <linux/init.h>
  27#include <linux/mm.h>
  28#include <linux/swap.h>
  29#include <linux/pid_namespace.h>
  30#include <linux/notifier.h>
  31#include <linux/thread_info.h>
  32#include <linux/time.h>
  33#include <linux/jiffies.h>
  34#include <linux/posix-timers.h>
  35#include <linux/cpu.h>
  36#include <linux/syscalls.h>
  37#include <linux/delay.h>
  38#include <linux/tick.h>
  39#include <linux/kallsyms.h>
  40#include <linux/irq_work.h>
  41#include <linux/sched.h>
  42#include <linux/sched/sysctl.h>
  43#include <linux/slab.h>
  44#include <linux/compat.h>
  45
  46#include <asm/uaccess.h>
  47#include <asm/unistd.h>
  48#include <asm/div64.h>
  49#include <asm/timex.h>
  50#include <asm/io.h>
  51
  52#include "tick-internal.h"
  53
  54#define CREATE_TRACE_POINTS
  55#include <trace/events/timer.h>
  56
  57__visible u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
  58
  59EXPORT_SYMBOL(jiffies_64);
  60
  61/*
  62 * per-CPU timer vector definitions:
  63 */
  64#define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6)
  65#define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8)
  66#define TVN_SIZE (1 << TVN_BITS)
  67#define TVR_SIZE (1 << TVR_BITS)
  68#define TVN_MASK (TVN_SIZE - 1)
  69#define TVR_MASK (TVR_SIZE - 1)
  70#define MAX_TVAL ((unsigned long)((1ULL << (TVR_BITS + 4*TVN_BITS)) - 1))
  71
  72struct tvec {
  73        struct hlist_head vec[TVN_SIZE];
  74};
  75
  76struct tvec_root {
  77        struct hlist_head vec[TVR_SIZE];
  78};
  79
  80struct tvec_base {
  81        spinlock_t lock;
  82        struct timer_list *running_timer;
  83        unsigned long timer_jiffies;
  84        unsigned long next_timer;
  85        unsigned long active_timers;
  86        unsigned long all_timers;
  87        int cpu;
  88        bool migration_enabled;
  89        bool nohz_active;
  90        struct tvec_root tv1;
  91        struct tvec tv2;
  92        struct tvec tv3;
  93        struct tvec tv4;
  94        struct tvec tv5;
  95} ____cacheline_aligned;
  96
  97
  98static DEFINE_PER_CPU(struct tvec_base, tvec_bases);
  99
 100#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
 101unsigned int sysctl_timer_migration = 1;
 102
 103void timers_update_migration(bool update_nohz)
 104{
 105        bool on = sysctl_timer_migration && tick_nohz_active;
 106        unsigned int cpu;
 107
 108        /* Avoid the loop, if nothing to update */
 109        if (this_cpu_read(tvec_bases.migration_enabled) == on)
 110                return;
 111
 112        for_each_possible_cpu(cpu) {
 113                per_cpu(tvec_bases.migration_enabled, cpu) = on;
 114                per_cpu(hrtimer_bases.migration_enabled, cpu) = on;
 115                if (!update_nohz)
 116                        continue;
 117                per_cpu(tvec_bases.nohz_active, cpu) = true;
 118                per_cpu(hrtimer_bases.nohz_active, cpu) = true;
 119        }
 120}
 121
 122int timer_migration_handler(struct ctl_table *table, int write,
 123                            void __user *buffer, size_t *lenp,
 124                            loff_t *ppos)
 125{
 126        static DEFINE_MUTEX(mutex);
 127        int ret;
 128
 129        mutex_lock(&mutex);
 130        ret = proc_dointvec(table, write, buffer, lenp, ppos);
 131        if (!ret && write)
 132                timers_update_migration(false);
 133        mutex_unlock(&mutex);
 134        return ret;
 135}
 136
 137static inline struct tvec_base *get_target_base(struct tvec_base *base,
 138                                                int pinned)
 139{
 140        if (pinned || !base->migration_enabled)
 141                return this_cpu_ptr(&tvec_bases);
 142        return per_cpu_ptr(&tvec_bases, get_nohz_timer_target());
 143}
 144#else
 145static inline struct tvec_base *get_target_base(struct tvec_base *base,
 146                                                int pinned)
 147{
 148        return this_cpu_ptr(&tvec_bases);
 149}
 150#endif
 151
 152static unsigned long round_jiffies_common(unsigned long j, int cpu,
 153                bool force_up)
 154{
 155        int rem;
 156        unsigned long original = j;
 157
 158        /*
 159         * We don't want all cpus firing their timers at once hitting the
 160         * same lock or cachelines, so we skew each extra cpu with an extra
 161         * 3 jiffies. This 3 jiffies came originally from the mm/ code which
 162         * already did this.
 163         * The skew is done by adding 3*cpunr, then round, then subtract this
 164         * extra offset again.
 165         */
 166        j += cpu * 3;
 167
 168        rem = j % HZ;
 169
 170        /*
 171         * If the target jiffie is just after a whole second (which can happen
 172         * due to delays of the timer irq, long irq off times etc etc) then
 173         * we should round down to the whole second, not up. Use 1/4th second
 174         * as cutoff for this rounding as an extreme upper bound for this.
 175         * But never round down if @force_up is set.
 176         */
 177        if (rem < HZ/4 && !force_up) /* round down */
 178                j = j - rem;
 179        else /* round up */
 180                j = j - rem + HZ;
 181
 182        /* now that we have rounded, subtract the extra skew again */
 183        j -= cpu * 3;
 184
 185        /*
 186         * Make sure j is still in the future. Otherwise return the
 187         * unmodified value.
 188         */
 189        return time_is_after_jiffies(j) ? j : original;
 190}
 191
 192/**
 193 * __round_jiffies - function to round jiffies to a full second
 194 * @j: the time in (absolute) jiffies that should be rounded
 195 * @cpu: the processor number on which the timeout will happen
 196 *
 197 * __round_jiffies() rounds an absolute time in the future (in jiffies)
 198 * up or down to (approximately) full seconds. This is useful for timers
 199 * for which the exact time they fire does not matter too much, as long as
 200 * they fire approximately every X seconds.
 201 *
 202 * By rounding these timers to whole seconds, all such timers will fire
 203 * at the same time, rather than at various times spread out. The goal
 204 * of this is to have the CPU wake up less, which saves power.
 205 *
 206 * The exact rounding is skewed for each processor to avoid all
 207 * processors firing at the exact same time, which could lead
 208 * to lock contention or spurious cache line bouncing.
 209 *
 210 * The return value is the rounded version of the @j parameter.
 211 */
 212unsigned long __round_jiffies(unsigned long j, int cpu)
 213{
 214        return round_jiffies_common(j, cpu, false);
 215}
 216EXPORT_SYMBOL_GPL(__round_jiffies);
 217
 218/**
 219 * __round_jiffies_relative - function to round jiffies to a full second
 220 * @j: the time in (relative) jiffies that should be rounded
 221 * @cpu: the processor number on which the timeout will happen
 222 *
 223 * __round_jiffies_relative() rounds a time delta  in the future (in jiffies)
 224 * up or down to (approximately) full seconds. This is useful for timers
 225 * for which the exact time they fire does not matter too much, as long as
 226 * they fire approximately every X seconds.
 227 *
 228 * By rounding these timers to whole seconds, all such timers will fire
 229 * at the same time, rather than at various times spread out. The goal
 230 * of this is to have the CPU wake up less, which saves power.
 231 *
 232 * The exact rounding is skewed for each processor to avoid all
 233 * processors firing at the exact same time, which could lead
 234 * to lock contention or spurious cache line bouncing.
 235 *
 236 * The return value is the rounded version of the @j parameter.
 237 */
 238unsigned long __round_jiffies_relative(unsigned long j, int cpu)
 239{
 240        unsigned long j0 = jiffies;
 241
 242        /* Use j0 because jiffies might change while we run */
 243        return round_jiffies_common(j + j0, cpu, false) - j0;
 244}
 245EXPORT_SYMBOL_GPL(__round_jiffies_relative);
 246
 247/**
 248 * round_jiffies - function to round jiffies to a full second
 249 * @j: the time in (absolute) jiffies that should be rounded
 250 *
 251 * round_jiffies() rounds an absolute time in the future (in jiffies)
 252 * up or down to (approximately) full seconds. This is useful for timers
 253 * for which the exact time they fire does not matter too much, as long as
 254 * they fire approximately every X seconds.
 255 *
 256 * By rounding these timers to whole seconds, all such timers will fire
 257 * at the same time, rather than at various times spread out. The goal
 258 * of this is to have the CPU wake up less, which saves power.
 259 *
 260 * The return value is the rounded version of the @j parameter.
 261 */
 262unsigned long round_jiffies(unsigned long j)
 263{
 264        return round_jiffies_common(j, raw_smp_processor_id(), false);
 265}
 266EXPORT_SYMBOL_GPL(round_jiffies);
 267
 268/**
 269 * round_jiffies_relative - function to round jiffies to a full second
 270 * @j: the time in (relative) jiffies that should be rounded
 271 *
 272 * round_jiffies_relative() rounds a time delta  in the future (in jiffies)
 273 * up or down to (approximately) full seconds. This is useful for timers
 274 * for which the exact time they fire does not matter too much, as long as
 275 * they fire approximately every X seconds.
 276 *
 277 * By rounding these timers to whole seconds, all such timers will fire
 278 * at the same time, rather than at various times spread out. The goal
 279 * of this is to have the CPU wake up less, which saves power.
 280 *
 281 * The return value is the rounded version of the @j parameter.
 282 */
 283unsigned long round_jiffies_relative(unsigned long j)
 284{
 285        return __round_jiffies_relative(j, raw_smp_processor_id());
 286}
 287EXPORT_SYMBOL_GPL(round_jiffies_relative);
 288
 289/**
 290 * __round_jiffies_up - function to round jiffies up to a full second
 291 * @j: the time in (absolute) jiffies that should be rounded
 292 * @cpu: the processor number on which the timeout will happen
 293 *
 294 * This is the same as __round_jiffies() except that it will never
 295 * round down.  This is useful for timeouts for which the exact time
 296 * of firing does not matter too much, as long as they don't fire too
 297 * early.
 298 */
 299unsigned long __round_jiffies_up(unsigned long j, int cpu)
 300{
 301        return round_jiffies_common(j, cpu, true);
 302}
 303EXPORT_SYMBOL_GPL(__round_jiffies_up);
 304
 305/**
 306 * __round_jiffies_up_relative - function to round jiffies up to a full second
 307 * @j: the time in (relative) jiffies that should be rounded
 308 * @cpu: the processor number on which the timeout will happen
 309 *
 310 * This is the same as __round_jiffies_relative() except that it will never
 311 * round down.  This is useful for timeouts for which the exact time
 312 * of firing does not matter too much, as long as they don't fire too
 313 * early.
 314 */
 315unsigned long __round_jiffies_up_relative(unsigned long j, int cpu)
 316{
 317        unsigned long j0 = jiffies;
 318
 319        /* Use j0 because jiffies might change while we run */
 320        return round_jiffies_common(j + j0, cpu, true) - j0;
 321}
 322EXPORT_SYMBOL_GPL(__round_jiffies_up_relative);
 323
 324/**
 325 * round_jiffies_up - function to round jiffies up to a full second
 326 * @j: the time in (absolute) jiffies that should be rounded
 327 *
 328 * This is the same as round_jiffies() except that it will never
 329 * round down.  This is useful for timeouts for which the exact time
 330 * of firing does not matter too much, as long as they don't fire too
 331 * early.
 332 */
 333unsigned long round_jiffies_up(unsigned long j)
 334{
 335        return round_jiffies_common(j, raw_smp_processor_id(), true);
 336}
 337EXPORT_SYMBOL_GPL(round_jiffies_up);
 338
 339/**
 340 * round_jiffies_up_relative - function to round jiffies up to a full second
 341 * @j: the time in (relative) jiffies that should be rounded
 342 *
 343 * This is the same as round_jiffies_relative() except that it will never
 344 * round down.  This is useful for timeouts for which the exact time
 345 * of firing does not matter too much, as long as they don't fire too
 346 * early.
 347 */
 348unsigned long round_jiffies_up_relative(unsigned long j)
 349{
 350        return __round_jiffies_up_relative(j, raw_smp_processor_id());
 351}
 352EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
 353
 354/**
 355 * set_timer_slack - set the allowed slack for a timer
 356 * @timer: the timer to be modified
 357 * @slack_hz: the amount of time (in jiffies) allowed for rounding
 358 *
 359 * Set the amount of time, in jiffies, that a certain timer has
 360 * in terms of slack. By setting this value, the timer subsystem
 361 * will schedule the actual timer somewhere between
 362 * the time mod_timer() asks for, and that time plus the slack.
 363 *
 364 * By setting the slack to -1, a percentage of the delay is used
 365 * instead.
 366 */
 367void set_timer_slack(struct timer_list *timer, int slack_hz)
 368{
 369        timer->slack = slack_hz;
 370}
 371EXPORT_SYMBOL_GPL(set_timer_slack);
 372
 373static void
 374__internal_add_timer(struct tvec_base *base, struct timer_list *timer)
 375{
 376        unsigned long expires = timer->expires;
 377        unsigned long idx = expires - base->timer_jiffies;
 378        struct hlist_head *vec;
 379
 380        if (idx < TVR_SIZE) {
 381                int i = expires & TVR_MASK;
 382                vec = base->tv1.vec + i;
 383        } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
 384                int i = (expires >> TVR_BITS) & TVN_MASK;
 385                vec = base->tv2.vec + i;
 386        } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
 387                int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
 388                vec = base->tv3.vec + i;
 389        } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
 390                int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
 391                vec = base->tv4.vec + i;
 392        } else if ((signed long) idx < 0) {
 393                /*
 394                 * Can happen if you add a timer with expires == jiffies,
 395                 * or you set a timer to go off in the past
 396                 */
 397                vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK);
 398        } else {
 399                int i;
 400                /* If the timeout is larger than MAX_TVAL (on 64-bit
 401                 * architectures or with CONFIG_BASE_SMALL=1) then we
 402                 * use the maximum timeout.
 403                 */
 404                if (idx > MAX_TVAL) {
 405                        idx = MAX_TVAL;
 406                        expires = idx + base->timer_jiffies;
 407                }
 408                i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
 409                vec = base->tv5.vec + i;
 410        }
 411
 412        hlist_add_head(&timer->entry, vec);
 413}
 414
 415static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
 416{
 417        /* Advance base->jiffies, if the base is empty */
 418        if (!base->all_timers++)
 419                base->timer_jiffies = jiffies;
 420
 421        __internal_add_timer(base, timer);
 422        /*
 423         * Update base->active_timers and base->next_timer
 424         */
 425        if (!(timer->flags & TIMER_DEFERRABLE)) {
 426                if (!base->active_timers++ ||
 427                    time_before(timer->expires, base->next_timer))
 428                        base->next_timer = timer->expires;
 429        }
 430
 431        /*
 432         * Check whether the other CPU is in dynticks mode and needs
 433         * to be triggered to reevaluate the timer wheel.
 434         * We are protected against the other CPU fiddling
 435         * with the timer by holding the timer base lock. This also
 436         * makes sure that a CPU on the way to stop its tick can not
 437         * evaluate the timer wheel.
 438         *
 439         * Spare the IPI for deferrable timers on idle targets though.
 440         * The next busy ticks will take care of it. Except full dynticks
 441         * require special care against races with idle_cpu(), lets deal
 442         * with that later.
 443         */
 444        if (base->nohz_active) {
 445                if (!(timer->flags & TIMER_DEFERRABLE) ||
 446                    tick_nohz_full_cpu(base->cpu))
 447                        wake_up_nohz_cpu(base->cpu);
 448        }
 449}
 450
 451#ifdef CONFIG_TIMER_STATS
 452void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
 453{
 454        if (timer->start_site)
 455                return;
 456
 457        timer->start_site = addr;
 458        memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
 459        timer->start_pid = current->pid;
 460}
 461
 462static void timer_stats_account_timer(struct timer_list *timer)
 463{
 464        if (likely(!timer->start_site))
 465                return;
 466
 467        timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
 468                                 timer->function, timer->start_comm,
 469                                 timer->flags);
 470}
 471
 472#else
 473static void timer_stats_account_timer(struct timer_list *timer) {}
 474#endif
 475
 476#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
 477
 478static struct debug_obj_descr timer_debug_descr;
 479
 480static void *timer_debug_hint(void *addr)
 481{
 482        return ((struct timer_list *) addr)->function;
 483}
 484
 485/*
 486 * fixup_init is called when:
 487 * - an active object is initialized
 488 */
 489static int timer_fixup_init(void *addr, enum debug_obj_state state)
 490{
 491        struct timer_list *timer = addr;
 492
 493        switch (state) {
 494        case ODEBUG_STATE_ACTIVE:
 495                del_timer_sync(timer);
 496                debug_object_init(timer, &timer_debug_descr);
 497                return 1;
 498        default:
 499                return 0;
 500        }
 501}
 502
 503/* Stub timer callback for improperly used timers. */
 504static void stub_timer(unsigned long data)
 505{
 506        WARN_ON(1);
 507}
 508
 509/*
 510 * fixup_activate is called when:
 511 * - an active object is activated
 512 * - an unknown object is activated (might be a statically initialized object)
 513 */
 514static int timer_fixup_activate(void *addr, enum debug_obj_state state)
 515{
 516        struct timer_list *timer = addr;
 517
 518        switch (state) {
 519
 520        case ODEBUG_STATE_NOTAVAILABLE:
 521                /*
 522                 * This is not really a fixup. The timer was
 523                 * statically initialized. We just make sure that it
 524                 * is tracked in the object tracker.
 525                 */
 526                if (timer->entry.pprev == NULL &&
 527                    timer->entry.next == TIMER_ENTRY_STATIC) {
 528                        debug_object_init(timer, &timer_debug_descr);
 529                        debug_object_activate(timer, &timer_debug_descr);
 530                        return 0;
 531                } else {
 532                        setup_timer(timer, stub_timer, 0);
 533                        return 1;
 534                }
 535                return 0;
 536
 537        case ODEBUG_STATE_ACTIVE:
 538                WARN_ON(1);
 539
 540        default:
 541                return 0;
 542        }
 543}
 544
 545/*
 546 * fixup_free is called when:
 547 * - an active object is freed
 548 */
 549static int timer_fixup_free(void *addr, enum debug_obj_state state)
 550{
 551        struct timer_list *timer = addr;
 552
 553        switch (state) {
 554        case ODEBUG_STATE_ACTIVE:
 555                del_timer_sync(timer);
 556                debug_object_free(timer, &timer_debug_descr);
 557                return 1;
 558        default:
 559                return 0;
 560        }
 561}
 562
 563/*
 564 * fixup_assert_init is called when:
 565 * - an untracked/uninit-ed object is found
 566 */
 567static int timer_fixup_assert_init(void *addr, enum debug_obj_state state)
 568{
 569        struct timer_list *timer = addr;
 570
 571        switch (state) {
 572        case ODEBUG_STATE_NOTAVAILABLE:
 573                if (timer->entry.next == TIMER_ENTRY_STATIC) {
 574                        /*
 575                         * This is not really a fixup. The timer was
 576                         * statically initialized. We just make sure that it
 577                         * is tracked in the object tracker.
 578                         */
 579                        debug_object_init(timer, &timer_debug_descr);
 580                        return 0;
 581                } else {
 582                        setup_timer(timer, stub_timer, 0);
 583                        return 1;
 584                }
 585        default:
 586                return 0;
 587        }
 588}
 589
 590static struct debug_obj_descr timer_debug_descr = {
 591        .name                   = "timer_list",
 592        .debug_hint             = timer_debug_hint,
 593        .fixup_init             = timer_fixup_init,
 594        .fixup_activate         = timer_fixup_activate,
 595        .fixup_free             = timer_fixup_free,
 596        .fixup_assert_init      = timer_fixup_assert_init,
 597};
 598
 599static inline void debug_timer_init(struct timer_list *timer)
 600{
 601        debug_object_init(timer, &timer_debug_descr);
 602}
 603
 604static inline void debug_timer_activate(struct timer_list *timer)
 605{
 606        debug_object_activate(timer, &timer_debug_descr);
 607}
 608
 609static inline void debug_timer_deactivate(struct timer_list *timer)
 610{
 611        debug_object_deactivate(timer, &timer_debug_descr);
 612}
 613
 614static inline void debug_timer_free(struct timer_list *timer)
 615{
 616        debug_object_free(timer, &timer_debug_descr);
 617}
 618
 619static inline void debug_timer_assert_init(struct timer_list *timer)
 620{
 621        debug_object_assert_init(timer, &timer_debug_descr);
 622}
 623
 624static void do_init_timer(struct timer_list *timer, unsigned int flags,
 625                          const char *name, struct lock_class_key *key);
 626
 627void init_timer_on_stack_key(struct timer_list *timer, unsigned int flags,
 628                             const char *name, struct lock_class_key *key)
 629{
 630        debug_object_init_on_stack(timer, &timer_debug_descr);
 631        do_init_timer(timer, flags, name, key);
 632}
 633EXPORT_SYMBOL_GPL(init_timer_on_stack_key);
 634
 635void destroy_timer_on_stack(struct timer_list *timer)
 636{
 637        debug_object_free(timer, &timer_debug_descr);
 638}
 639EXPORT_SYMBOL_GPL(destroy_timer_on_stack);
 640
 641#else
 642static inline void debug_timer_init(struct timer_list *timer) { }
 643static inline void debug_timer_activate(struct timer_list *timer) { }
 644static inline void debug_timer_deactivate(struct timer_list *timer) { }
 645static inline void debug_timer_assert_init(struct timer_list *timer) { }
 646#endif
 647
 648static inline void debug_init(struct timer_list *timer)
 649{
 650        debug_timer_init(timer);
 651        trace_timer_init(timer);
 652}
 653
 654static inline void
 655debug_activate(struct timer_list *timer, unsigned long expires)
 656{
 657        debug_timer_activate(timer);
 658        trace_timer_start(timer, expires, timer->flags);
 659}
 660
 661static inline void debug_deactivate(struct timer_list *timer)
 662{
 663        debug_timer_deactivate(timer);
 664        trace_timer_cancel(timer);
 665}
 666
 667static inline void debug_assert_init(struct timer_list *timer)
 668{
 669        debug_timer_assert_init(timer);
 670}
 671
 672static void do_init_timer(struct timer_list *timer, unsigned int flags,
 673                          const char *name, struct lock_class_key *key)
 674{
 675        timer->entry.pprev = NULL;
 676        timer->flags = flags | raw_smp_processor_id();
 677        timer->slack = -1;
 678#ifdef CONFIG_TIMER_STATS
 679        timer->start_site = NULL;
 680        timer->start_pid = -1;
 681        memset(timer->start_comm, 0, TASK_COMM_LEN);
 682#endif
 683        lockdep_init_map(&timer->lockdep_map, name, key, 0);
 684}
 685
 686/**
 687 * init_timer_key - initialize a timer
 688 * @timer: the timer to be initialized
 689 * @flags: timer flags
 690 * @name: name of the timer
 691 * @key: lockdep class key of the fake lock used for tracking timer
 692 *       sync lock dependencies
 693 *
 694 * init_timer_key() must be done to a timer prior calling *any* of the
 695 * other timer functions.
 696 */
 697void init_timer_key(struct timer_list *timer, unsigned int flags,
 698                    const char *name, struct lock_class_key *key)
 699{
 700        debug_init(timer);
 701        do_init_timer(timer, flags, name, key);
 702}
 703EXPORT_SYMBOL(init_timer_key);
 704
 705static inline void detach_timer(struct timer_list *timer, bool clear_pending)
 706{
 707        struct hlist_node *entry = &timer->entry;
 708
 709        debug_deactivate(timer);
 710
 711        __hlist_del(entry);
 712        if (clear_pending)
 713                entry->pprev = NULL;
 714        entry->next = LIST_POISON2;
 715}
 716
 717static inline void
 718detach_expired_timer(struct timer_list *timer, struct tvec_base *base)
 719{
 720        detach_timer(timer, true);
 721        if (!(timer->flags & TIMER_DEFERRABLE))
 722                base->active_timers--;
 723        base->all_timers--;
 724}
 725
 726static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
 727                             bool clear_pending)
 728{
 729        if (!timer_pending(timer))
 730                return 0;
 731
 732        detach_timer(timer, clear_pending);
 733        if (!(timer->flags & TIMER_DEFERRABLE)) {
 734                base->active_timers--;
 735                if (timer->expires == base->next_timer)
 736                        base->next_timer = base->timer_jiffies;
 737        }
 738        /* If this was the last timer, advance base->jiffies */
 739        if (!--base->all_timers)
 740                base->timer_jiffies = jiffies;
 741        return 1;
 742}
 743
 744/*
 745 * We are using hashed locking: holding per_cpu(tvec_bases).lock
 746 * means that all timers which are tied to this base via timer->base are
 747 * locked, and the base itself is locked too.
 748 *
 749 * So __run_timers/migrate_timers can safely modify all timers which could
 750 * be found on ->tvX lists.
 751 *
 752 * When the timer's base is locked and removed from the list, the
 753 * TIMER_MIGRATING flag is set, FIXME
 754 */
 755static struct tvec_base *lock_timer_base(struct timer_list *timer,
 756                                        unsigned long *flags)
 757        __acquires(timer->base->lock)
 758{
 759        for (;;) {
 760                u32 tf = timer->flags;
 761                struct tvec_base *base;
 762
 763                if (!(tf & TIMER_MIGRATING)) {
 764                        base = per_cpu_ptr(&tvec_bases, tf & TIMER_CPUMASK);
 765                        spin_lock_irqsave(&base->lock, *flags);
 766                        if (timer->flags == tf)
 767                                return base;
 768                        spin_unlock_irqrestore(&base->lock, *flags);
 769                }
 770                cpu_relax();
 771        }
 772}
 773
 774static inline int
 775__mod_timer(struct timer_list *timer, unsigned long expires,
 776            bool pending_only, int pinned)
 777{
 778        struct tvec_base *base, *new_base;
 779        unsigned long flags;
 780        int ret = 0;
 781
 782        timer_stats_timer_set_start_info(timer);
 783        BUG_ON(!timer->function);
 784
 785        base = lock_timer_base(timer, &flags);
 786
 787        ret = detach_if_pending(timer, base, false);
 788        if (!ret && pending_only)
 789                goto out_unlock;
 790
 791        debug_activate(timer, expires);
 792
 793        new_base = get_target_base(base, pinned);
 794
 795        if (base != new_base) {
 796                /*
 797                 * We are trying to schedule the timer on the local CPU.
 798                 * However we can't change timer's base while it is running,
 799                 * otherwise del_timer_sync() can't detect that the timer's
 800                 * handler yet has not finished. This also guarantees that
 801                 * the timer is serialized wrt itself.
 802                 */
 803                if (likely(base->running_timer != timer)) {
 804                        /* See the comment in lock_timer_base() */
 805                        timer->flags |= TIMER_MIGRATING;
 806
 807                        spin_unlock(&base->lock);
 808                        base = new_base;
 809                        spin_lock(&base->lock);
 810                        WRITE_ONCE(timer->flags,
 811                                   (timer->flags & ~TIMER_BASEMASK) | base->cpu);
 812                }
 813        }
 814
 815        timer->expires = expires;
 816        internal_add_timer(base, timer);
 817
 818out_unlock:
 819        spin_unlock_irqrestore(&base->lock, flags);
 820
 821        return ret;
 822}
 823
 824/**
 825 * mod_timer_pending - modify a pending timer's timeout
 826 * @timer: the pending timer to be modified
 827 * @expires: new timeout in jiffies
 828 *
 829 * mod_timer_pending() is the same for pending timers as mod_timer(),
 830 * but will not re-activate and modify already deleted timers.
 831 *
 832 * It is useful for unserialized use of timers.
 833 */
 834int mod_timer_pending(struct timer_list *timer, unsigned long expires)
 835{
 836        return __mod_timer(timer, expires, true, TIMER_NOT_PINNED);
 837}
 838EXPORT_SYMBOL(mod_timer_pending);
 839
 840/*
 841 * Decide where to put the timer while taking the slack into account
 842 *
 843 * Algorithm:
 844 *   1) calculate the maximum (absolute) time
 845 *   2) calculate the highest bit where the expires and new max are different
 846 *   3) use this bit to make a mask
 847 *   4) use the bitmask to round down the maximum time, so that all last
 848 *      bits are zeros
 849 */
 850static inline
 851unsigned long apply_slack(struct timer_list *timer, unsigned long expires)
 852{
 853        unsigned long expires_limit, mask;
 854        int bit;
 855
 856        if (timer->slack >= 0) {
 857                expires_limit = expires + timer->slack;
 858        } else {
 859                long delta = expires - jiffies;
 860
 861                if (delta < 256)
 862                        return expires;
 863
 864                expires_limit = expires + delta / 256;
 865        }
 866        mask = expires ^ expires_limit;
 867        if (mask == 0)
 868                return expires;
 869
 870        bit = find_last_bit(&mask, BITS_PER_LONG);
 871
 872        mask = (1UL << bit) - 1;
 873
 874        expires_limit = expires_limit & ~(mask);
 875
 876        return expires_limit;
 877}
 878
 879/**
 880 * mod_timer - modify a timer's timeout
 881 * @timer: the timer to be modified
 882 * @expires: new timeout in jiffies
 883 *
 884 * mod_timer() is a more efficient way to update the expire field of an
 885 * active timer (if the timer is inactive it will be activated)
 886 *
 887 * mod_timer(timer, expires) is equivalent to:
 888 *
 889 *     del_timer(timer); timer->expires = expires; add_timer(timer);
 890 *
 891 * Note that if there are multiple unserialized concurrent users of the
 892 * same timer, then mod_timer() is the only safe way to modify the timeout,
 893 * since add_timer() cannot modify an already running timer.
 894 *
 895 * The function returns whether it has modified a pending timer or not.
 896 * (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an
 897 * active timer returns 1.)
 898 */
 899int mod_timer(struct timer_list *timer, unsigned long expires)
 900{
 901        expires = apply_slack(timer, expires);
 902
 903        /*
 904         * This is a common optimization triggered by the
 905         * networking code - if the timer is re-modified
 906         * to be the same thing then just return:
 907         */
 908        if (timer_pending(timer) && timer->expires == expires)
 909                return 1;
 910
 911        return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
 912}
 913EXPORT_SYMBOL(mod_timer);
 914
 915/**
 916 * mod_timer_pinned - modify a timer's timeout
 917 * @timer: the timer to be modified
 918 * @expires: new timeout in jiffies
 919 *
 920 * mod_timer_pinned() is a way to update the expire field of an
 921 * active timer (if the timer is inactive it will be activated)
 922 * and to ensure that the timer is scheduled on the current CPU.
 923 *
 924 * Note that this does not prevent the timer from being migrated
 925 * when the current CPU goes offline.  If this is a problem for
 926 * you, use CPU-hotplug notifiers to handle it correctly, for
 927 * example, cancelling the timer when the corresponding CPU goes
 928 * offline.
 929 *
 930 * mod_timer_pinned(timer, expires) is equivalent to:
 931 *
 932 *     del_timer(timer); timer->expires = expires; add_timer(timer);
 933 */
 934int mod_timer_pinned(struct timer_list *timer, unsigned long expires)
 935{
 936        if (timer->expires == expires && timer_pending(timer))
 937                return 1;
 938
 939        return __mod_timer(timer, expires, false, TIMER_PINNED);
 940}
 941EXPORT_SYMBOL(mod_timer_pinned);
 942
 943/**
 944 * add_timer - start a timer
 945 * @timer: the timer to be added
 946 *
 947 * The kernel will do a ->function(->data) callback from the
 948 * timer interrupt at the ->expires point in the future. The
 949 * current time is 'jiffies'.
 950 *
 951 * The timer's ->expires, ->function (and if the handler uses it, ->data)
 952 * fields must be set prior calling this function.
 953 *
 954 * Timers with an ->expires field in the past will be executed in the next
 955 * timer tick.
 956 */
 957void add_timer(struct timer_list *timer)
 958{
 959        BUG_ON(timer_pending(timer));
 960        mod_timer(timer, timer->expires);
 961}
 962EXPORT_SYMBOL(add_timer);
 963
 964/**
 965 * add_timer_on - start a timer on a particular CPU
 966 * @timer: the timer to be added
 967 * @cpu: the CPU to start it on
 968 *
 969 * This is not very scalable on SMP. Double adds are not possible.
 970 */
 971void add_timer_on(struct timer_list *timer, int cpu)
 972{
 973        struct tvec_base *base = per_cpu_ptr(&tvec_bases, cpu);
 974        unsigned long flags;
 975
 976        timer_stats_timer_set_start_info(timer);
 977        BUG_ON(timer_pending(timer) || !timer->function);
 978        spin_lock_irqsave(&base->lock, flags);
 979        timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu;
 980        debug_activate(timer, timer->expires);
 981        internal_add_timer(base, timer);
 982        spin_unlock_irqrestore(&base->lock, flags);
 983}
 984EXPORT_SYMBOL_GPL(add_timer_on);
 985
 986/**
 987 * del_timer - deactive a timer.
 988 * @timer: the timer to be deactivated
 989 *
 990 * del_timer() deactivates a timer - this works on both active and inactive
 991 * timers.
 992 *
 993 * The function returns whether it has deactivated a pending timer or not.
 994 * (ie. del_timer() of an inactive timer returns 0, del_timer() of an
 995 * active timer returns 1.)
 996 */
 997int del_timer(struct timer_list *timer)
 998{
 999        struct tvec_base *base;
1000        unsigned long flags;

1001        int ret = 0;
1002
1003        debug_assert_init(timer);
1004
1005        timer_stats_timer_clear_start_info(timer);
1006        if (timer_pending(timer)) {
1007                base = lock_timer_base(timer, &flags);
1008                ret = detach_if_pending(timer, base, true);
1009                spin_unlock_irqrestore(&base->lock, flags);
1010        }
1011
1012        return ret;
1013}
1014EXPORT_SYMBOL(del_timer);
1015
1016/**
1017 * try_to_del_timer_sync - Try to deactivate a timer
1018 * @timer: timer do del
1019 *
1020 * This function tries to deactivate a timer. Upon successful (ret >= 0)
1021 * exit the timer is not queued and the handler is not running on any CPU.
1022 */
1023int try_to_del_timer_sync(struct timer_list *timer)
1024{
1025        struct tvec_base *base;
1026        unsigned long flags;
1027        int ret = -1;
1028
1029        debug_assert_init(timer);
1030
1031        base = lock_timer_base(timer, &flags);
1032
1033        if (base->running_timer != timer) {
1034                timer_stats_timer_clear_start_info(timer);
1035                ret = detach_if_pending(timer, base, true);
1036        }
1037        spin_unlock_irqrestore(&base->lock, flags);
1038
1039        return ret;
1040}
1041EXPORT_SYMBOL(try_to_del_timer_sync);
1042
1043#ifdef CONFIG_SMP
1044/**
1045 * del_timer_sync - deactivate a timer and wait for the handler to finish.
1046 * @timer: the timer to be deactivated
1047 *
1048 * This function only differs from del_timer() on SMP: besides deactivating
1049 * the timer it also makes sure the handler has finished executing on other
1050 * CPUs.
1051 *
1052 * Synchronization rules: Callers must prevent restarting of the timer,
1053 * otherwise this function is meaningless. It must not be called from
1054 * interrupt contexts unless the timer is an irqsafe one. The caller must
1055 * not hold locks which would prevent completion of the timer's
1056 * handler. The timer's handler must not call add_timer_on(). Upon exit the
1057 * timer is not queued and the handler is not running on any CPU.
1058 *
1059 * Note: For !irqsafe timers, you must not hold locks that are held in
1060 *   interrupt context while calling this function. Even if the lock has
1061 *   nothing to do with the timer in question.  Here's why:
1062 *
1063 *    CPU0                             CPU1
1064 *    ----                             ----
1065 *                                   <SOFTIRQ>
1066 *                                   call_timer_fn();
1067 *                                     base->running_timer = mytimer;
1068 *  spin_lock_irq(somelock);
1069 *                                     <IRQ>
1070 *                                        spin_lock(somelock);
1071 *  del_timer_sync(mytimer);
1072 *   while (base->running_timer == mytimer);
1073 *
1074 * Now del_timer_sync() will never return and never release somelock.
1075 * The interrupt on the other CPU is waiting to grab somelock but
1076 * it has interrupted the softirq that CPU0 is waiting to finish.
1077 *
1078 * The function returns whether it has deactivated a pending timer or not.
1079 */
1080int del_timer_sync(struct timer_list *timer)
1081{
1082#ifdef CONFIG_LOCKDEP
1083        unsigned long flags;
1084
1085        /*
1086         * If lockdep gives a backtrace here, please reference
1087         * the synchronization rules above.
1088         */
1089        local_irq_save(flags);
1090        lock_map_acquire(&timer->lockdep_map);
1091        lock_map_release(&timer->lockdep_map);
1092        local_irq_restore(flags);
1093#endif
1094        /*
1095         * don't use it in hardirq context, because it
1096         * could lead to deadlock.
1097         */
1098        WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE));
1099        for (;;) {
1100                int ret = try_to_del_timer_sync(timer);
1101                if (ret >= 0)
1102                        return ret;
1103                cpu_relax();
1104        }
1105}
1106EXPORT_SYMBOL(del_timer_sync);
1107#endif
1108
1109static int cascade(struct tvec_base *base, struct tvec *tv, int index)
1110{
1111        /* cascade all the timers from tv up one level */
1112        struct timer_list *timer;
1113        struct hlist_node *tmp;
1114        struct hlist_head tv_list;
1115
1116        hlist_move_list(tv->vec + index, &tv_list);
1117
1118        /*
1119         * We are removing _all_ timers from the list, so we
1120         * don't have to detach them individually.
1121         */
1122        hlist_for_each_entry_safe(timer, tmp, &tv_list, entry) {
1123                /* No accounting, while moving them */
1124                __internal_add_timer(base, timer);
1125        }
1126
1127        return index;
1128}
1129
1130static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
1131                          unsigned long data)
1132{
1133        int count = preempt_count();
1134
1135#ifdef CONFIG_LOCKDEP
1136        /*
1137         * It is permissible to free the timer from inside the
1138         * function that is called from it, this we need to take into
1139         * account for lockdep too. To avoid bogus "held lock freed"
1140         * warnings as well as problems when looking into
1141         * timer->lockdep_map, make a copy and use that here.
1142         */
1143        struct lockdep_map lockdep_map;
1144
1145        lockdep_copy_map(&lockdep_map, &timer->lockdep_map);
1146#endif
1147        /*
1148         * Couple the lock chain with the lock chain at
1149         * del_timer_sync() by acquiring the lock_map around the fn()
1150         * call here and in del_timer_sync().
1151         */
1152        lock_map_acquire(&lockdep_map);
1153
1154        trace_timer_expire_entry(timer);
1155        fn(data);
1156        trace_timer_expire_exit(timer);
1157
1158        lock_map_release(&lockdep_map);
1159
1160        if (count != preempt_count()) {
1161                WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n",
1162                          fn, count, preempt_count());
1163                /*
1164                 * Restore the preempt count. That gives us a decent
1165                 * chance to survive and extract information. If the
1166                 * callback kept a lock held, bad luck, but not worse
1167                 * than the BUG() we had.
1168                 */
1169                preempt_count_set(count);
1170        }
1171}
1172
1173#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK)
1174
1175/**
1176 * __run_timers - run all expired timers (if any) on this CPU.
1177 * @base: the timer vector to be processed.
1178 *
1179 * This function cascades all vectors and executes all expired timer
1180 * vectors.
1181 */
1182static inline void __run_timers(struct tvec_base *base)
1183{
1184        struct timer_list *timer;
1185
1186        spin_lock_irq(&base->lock);
1187
1188        while (time_after_eq(jiffies, base->timer_jiffies)) {
1189                struct hlist_head work_list;
1190                struct hlist_head *head = &work_list;
1191                int index;
1192
1193                if (!base->all_timers) {
1194                        base->timer_jiffies = jiffies;
1195                        break;
1196                }
1197
1198                index = base->timer_jiffies & TVR_MASK;
1199
1200                /*
1201                 * Cascade timers:
1202                 */
1203                if (!index &&
1204                        (!cascade(base, &base->tv2, INDEX(0))) &&
1205                                (!cascade(base, &base->tv3, INDEX(1))) &&
1206                                        !cascade(base, &base->tv4, INDEX(2)))
1207                        cascade(base, &base->tv5, INDEX(3));
1208                ++base->timer_jiffies;
1209                hlist_move_list(base->tv1.vec + index, head);
1210                while (!hlist_empty(head)) {
1211                        void (*fn)(unsigned long);
1212                        unsigned long data;
1213                        bool irqsafe;
1214
1215                        timer = hlist_entry(head->first, struct timer_list, entry);
1216                        fn = timer->function;
1217                        data = timer->data;
1218                        irqsafe = timer->flags & TIMER_IRQSAFE;
1219
1220                        timer_stats_account_timer(timer);
1221
1222                        base->running_timer = timer;
1223                        detach_expired_timer(timer, base);
1224
1225                        if (irqsafe) {
1226                                spin_unlock(&base->lock);
1227                                call_timer_fn(timer, fn, data);
1228                                spin_lock(&base->lock);
1229                        } else {
1230                                spin_unlock_irq(&base->lock);
1231                                call_timer_fn(timer, fn, data);
1232                                spin_lock_irq(&base->lock);
1233                        }
1234                }
1235        }
1236        base->running_timer = NULL;
1237        spin_unlock_irq(&base->lock);
1238}
1239
1240#ifdef CONFIG_NO_HZ_COMMON
1241/*
1242 * Find out when the next timer event is due to happen. This
1243 * is used on S/390 to stop all activity when a CPU is idle.
1244 * This function needs to be called with interrupts disabled.
1245 */
1246static unsigned long __next_timer_interrupt(struct tvec_base *base)
1247{
1248        unsigned long timer_jiffies = base->timer_jiffies;
1249        unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA;
1250        int index, slot, array, found = 0;
1251        struct timer_list *nte;
1252        struct tvec *varray[4];
1253
1254        /* Look for timer events in tv1. */
1255        index = slot = timer_jiffies & TVR_MASK;
1256        do {
1257                hlist_for_each_entry(nte, base->tv1.vec + slot, entry) {
1258                        if (nte->flags & TIMER_DEFERRABLE)
1259                                continue;
1260
1261                        found = 1;
1262                        expires = nte->expires;
1263                        /* Look at the cascade bucket(s)? */
1264                        if (!index || slot < index)
1265                                goto cascade;
1266                        return expires;
1267                }
1268                slot = (slot + 1) & TVR_MASK;
1269        } while (slot != index);
1270
1271cascade:
1272        /* Calculate the next cascade event */
1273        if (index)
1274                timer_jiffies += TVR_SIZE - index;
1275        timer_jiffies >>= TVR_BITS;
1276
1277        /* Check tv2-tv5. */
1278        varray[0] = &base->tv2;
1279        varray[1] = &base->tv3;
1280        varray[2] = &base->tv4;
1281        varray[3] = &base->tv5;
1282
1283        for (array = 0; array < 4; array++) {
1284                struct tvec *varp = varray[array];
1285
1286                index = slot = timer_jiffies & TVN_MASK;
1287                do {
1288                        hlist_for_each_entry(nte, varp->vec + slot, entry) {
1289                                if (nte->flags & TIMER_DEFERRABLE)
1290                                        continue;
1291
1292                                found = 1;
1293                                if (time_before(nte->expires, expires))
1294                                        expires = nte->expires;
1295                        }
1296                        /*
1297                         * Do we still search for the first timer or are
1298                         * we looking up the cascade buckets ?
1299                         */
1300                        if (found) {
1301                                /* Look at the cascade bucket(s)? */
1302                                if (!index || slot < index)
1303                                        break;
1304                                return expires;
1305                        }
1306                        slot = (slot + 1) & TVN_MASK;
1307                } while (slot != index);
1308
1309                if (index)
1310                        timer_jiffies += TVN_SIZE - index;
1311                timer_jiffies >>= TVN_BITS;
1312        }
1313        return expires;
1314}
1315
1316/*
1317 * Check, if the next hrtimer event is before the next timer wheel
1318 * event:
1319 */
1320static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
1321{
1322        u64 nextevt = hrtimer_get_next_event();
1323
1324        /*
1325         * If high resolution timers are enabled
1326         * hrtimer_get_next_event() returns KTIME_MAX.
1327         */
1328        if (expires <= nextevt)
1329                return expires;
1330
1331        /*
1332         * If the next timer is already expired, return the tick base
1333         * time so the tick is fired immediately.
1334         */
1335        if (nextevt <= basem)
1336                return basem;
1337
1338        /*
1339         * Round up to the next jiffie. High resolution timers are
1340         * off, so the hrtimers are expired in the tick and we need to
1341         * make sure that this tick really expires the timer to avoid
1342         * a ping pong of the nohz stop code.
1343         *
1344         * Use DIV_ROUND_UP_ULL to prevent gcc calling __divdi3
1345         */
1346        return DIV_ROUND_UP_ULL(nextevt, TICK_NSEC) * TICK_NSEC;
1347}
1348
1349/**
1350 * get_next_timer_interrupt - return the time (clock mono) of the next timer
1351 * @basej:      base time jiffies
1352 * @basem:      base time clock monotonic
1353 *
1354 * Returns the tick aligned clock monotonic time of the next pending
1355 * timer or KTIME_MAX if no timer is pending.
1356 */
1357u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
1358{
1359        struct tvec_base *base = this_cpu_ptr(&tvec_bases);
1360        u64 expires = KTIME_MAX;
1361        unsigned long nextevt;
1362
1363        /*
1364         * Pretend that there is no timer pending if the cpu is offline.
1365         * Possible pending timers will be migrated later to an active cpu.
1366         */
1367        if (cpu_is_offline(smp_processor_id()))
1368                return expires;
1369
1370        spin_lock(&base->lock);
1371        if (base->active_timers) {
1372                if (time_before_eq(base->next_timer, base->timer_jiffies))
1373                        base->next_timer = __next_timer_interrupt(base);
1374                nextevt = base->next_timer;
1375                if (time_before_eq(nextevt, basej))
1376                        expires = basem;
1377                else
1378                        expires = basem + (nextevt - basej) * TICK_NSEC;
1379        }
1380        spin_unlock(&base->lock);
1381
1382        return cmp_next_hrtimer_event(basem, expires);
1383}
1384#endif
1385
1386/*
1387 * Called from the timer interrupt handler to charge one tick to the current
1388 * process.  user_tick is 1 if the tick is user time, 0 for system.
1389 */
1390void update_process_times(int user_tick)
1391{
1392        struct task_struct *p = current;
1393
1394        /* Note: this timer irq context must be accounted for as well. */
1395        account_process_tick(p, user_tick);
1396        run_local_timers();
1397        rcu_check_callbacks(user_tick);
1398#ifdef CONFIG_IRQ_WORK
1399        if (in_irq())
1400                irq_work_tick();
1401#endif
1402        scheduler_tick();
1403        run_posix_cpu_timers(p);
1404}
1405
1406/*
1407 * This function runs timers and the timer-tq in bottom half context.
1408 */
1409static void run_timer_softirq(struct softirq_action *h)
1410{
1411        struct tvec_base *base = this_cpu_ptr(&tvec_bases);
1412
1413        if (time_after_eq(jiffies, base->timer_jiffies))
1414                __run_timers(base);
1415}
1416
1417/*
1418 * Called by the local, per-CPU timer interrupt on SMP.
1419 */
1420void run_local_timers(void)
1421{
1422        hrtimer_run_queues();
1423        raise_softirq(TIMER_SOFTIRQ);
1424}
1425
1426#ifdef __ARCH_WANT_SYS_ALARM
1427
1428/*
1429 * For backwards compatibility?  This can be done in libc so Alpha
1430 * and all newer ports shouldn't need it.
1431 */
1432SYSCALL_DEFINE1(alarm, unsigned int, seconds)
1433{
1434        return alarm_setitimer(seconds);
1435}
1436
1437#endif
1438
1439static void process_timeout(unsigned long __data)
1440{
1441        wake_up_process((struct task_struct *)__data);
1442}
1443
1444/**
1445 * schedule_timeout - sleep until timeout
1446 * @timeout: timeout value in jiffies
1447 *
1448 * Make the current task sleep until @timeout jiffies have
1449 * elapsed. The routine will return immediately unless
1450 * the current task state has been set (see set_current_state()).
1451 *
1452 * You can set the task state as follows -
1453 *
1454 * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
1455 * pass before the routine returns. The routine will return 0
1456 *
1457 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
1458 * delivered to the current task. In this case the remaining time
1459 * in jiffies will be returned, or 0 if the timer expired in time
1460 *
1461 * The current task state is guaranteed to be TASK_RUNNING when this
1462 * routine returns.
1463 *
1464 * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
1465 * the CPU away without a bound on the timeout. In this case the return
1466 * value will be %MAX_SCHEDULE_TIMEOUT.
1467 *
1468 * In all cases the return value is guaranteed to be non-negative.
1469 */
1470signed long __sched schedule_timeout(signed long timeout)
1471{
1472        struct timer_list timer;
1473        unsigned long expire;
1474
1475        switch (timeout)
1476        {
1477        case MAX_SCHEDULE_TIMEOUT:
1478                /*
1479                 * These two special cases are useful to be comfortable
1480                 * in the caller. Nothing more. We could take
1481                 * MAX_SCHEDULE_TIMEOUT from one of the negative value
1482                 * but I' d like to return a valid offset (>=0) to allow
1483                 * the caller to do everything it want with the retval.
1484                 */
1485                schedule();
1486                goto out;
1487        default:
1488                /*
1489                 * Another bit of PARANOID. Note that the retval will be
1490                 * 0 since no piece of kernel is supposed to do a check
1491                 * for a negative retval of schedule_timeout() (since it
1492                 * should never happens anyway). You just have the printk()
1493                 * that will tell you if something is gone wrong and where.
1494                 */
1495                if (timeout < 0) {
1496                        printk(KERN_ERR "schedule_timeout: wrong timeout "
1497                                "value %lx\n", timeout);
1498                        dump_stack();
1499                        current->state = TASK_RUNNING;
1500                        goto out;
1501                }
1502        }
1503
1504        expire = timeout + jiffies;
1505
1506        setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
1507        __mod_timer(&timer, expire, false, TIMER_NOT_PINNED);
1508        schedule();
1509        del_singleshot_timer_sync(&timer);
1510
1511        /* Remove the timer from the object tracker */
1512        destroy_timer_on_stack(&timer);
1513
1514        timeout = expire - jiffies;
1515
1516 out:
1517        return timeout < 0 ? 0 : timeout;
1518}
1519EXPORT_SYMBOL(schedule_timeout);
1520
1521/*
1522 * We can use __set_current_state() here because schedule_timeout() calls
1523 * schedule() unconditionally.
1524 */
1525signed long __sched schedule_timeout_interruptible(signed long timeout)
1526{
1527        __set_current_state(TASK_INTERRUPTIBLE);
1528        return schedule_timeout(timeout);
1529}
1530EXPORT_SYMBOL(schedule_timeout_interruptible);
1531
1532signed long __sched schedule_timeout_killable(signed long timeout)
1533{
1534        __set_current_state(TASK_KILLABLE);
1535        return schedule_timeout(timeout);
1536}
1537EXPORT_SYMBOL(schedule_timeout_killable);
1538
1539signed long __sched schedule_timeout_uninterruptible(signed long timeout)
1540{
1541        __set_current_state(TASK_UNINTERRUPTIBLE);
1542        return schedule_timeout(timeout);
1543}
1544EXPORT_SYMBOL(schedule_timeout_uninterruptible);
1545
1546#ifdef CONFIG_HOTPLUG_CPU
1547static void migrate_timer_list(struct tvec_base *new_base, struct hlist_head *head)
1548{
1549        struct timer_list *timer;
1550        int cpu = new_base->cpu;
1551
1552        while (!hlist_empty(head)) {
1553                timer = hlist_entry(head->first, struct timer_list, entry);
1554                /* We ignore the accounting on the dying cpu */
1555                detach_timer(timer, false);
1556                timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu;
1557                internal_add_timer(new_base, timer);
1558        }
1559}
1560
1561static void migrate_timers(int cpu)
1562{
1563        struct tvec_base *old_base;
1564        struct tvec_base *new_base;
1565        int i;
1566
1567        BUG_ON(cpu_online(cpu));
1568        old_base = per_cpu_ptr(&tvec_bases, cpu);
1569        new_base = get_cpu_ptr(&tvec_bases);
1570        /*
1571         * The caller is globally serialized and nobody else
1572         * takes two locks at once, deadlock is not possible.
1573         */
1574        spin_lock_irq(&new_base->lock);
1575        spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
1576
1577        BUG_ON(old_base->running_timer);
1578
1579        for (i = 0; i < TVR_SIZE; i++)
1580                migrate_timer_list(new_base, old_base->tv1.vec + i);
1581        for (i = 0; i < TVN_SIZE; i++) {
1582                migrate_timer_list(new_base, old_base->tv2.vec + i);
1583                migrate_timer_list(new_base, old_base->tv3.vec + i);
1584                migrate_timer_list(new_base, old_base->tv4.vec + i);
1585                migrate_timer_list(new_base, old_base->tv5.vec + i);
1586        }
1587
1588        old_base->active_timers = 0;
1589        old_base->all_timers = 0;
1590
1591        spin_unlock(&old_base->lock);
1592        spin_unlock_irq(&new_base->lock);
1593        put_cpu_ptr(&tvec_bases);
1594}
1595
1596static int timer_cpu_notify(struct notifier_block *self,
1597                                unsigned long action, void *hcpu)
1598{
1599        switch (action) {
1600        case CPU_DEAD:
1601        case CPU_DEAD_FROZEN:
1602                migrate_timers((long)hcpu);
1603                break;
1604        default:
1605                break;
1606        }
1607
1608        return NOTIFY_OK;
1609}
1610
1611static inline void timer_register_cpu_notifier(void)
1612{
1613        cpu_notifier(timer_cpu_notify, 0);
1614}
1615#else
1616static inline void timer_register_cpu_notifier(void) { }
1617#endif /* CONFIG_HOTPLUG_CPU */
1618
1619static void __init init_timer_cpu(int cpu)
1620{
1621        struct tvec_base *base = per_cpu_ptr(&tvec_bases, cpu);
1622
1623        base->cpu = cpu;
1624        spin_lock_init(&base->lock);
1625
1626        base->timer_jiffies = jiffies;
1627        base->next_timer = base->timer_jiffies;
1628}
1629
1630static void __init init_timer_cpus(void)
1631{
1632        int cpu;
1633
1634        for_each_possible_cpu(cpu)
1635                init_timer_cpu(cpu);
1636}
1637
1638void __init init_timers(void)
1639{
1640        init_timer_cpus();
1641        init_timer_stats();
1642        timer_register_cpu_notifier();
1643        open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
1644}
1645
1646/**
1647 * msleep - sleep safely even with waitqueue interruptions
1648 * @msecs: Time in milliseconds to sleep for
1649 */
1650void msleep(unsigned int msecs)
1651{
1652        unsigned long timeout = msecs_to_jiffies(msecs) + 1;
1653
1654        while (timeout)
1655                timeout = schedule_timeout_uninterruptible(timeout);
1656}
1657
1658EXPORT_SYMBOL(msleep);
1659
1660/**
1661 * msleep_interruptible - sleep waiting for signals
1662 * @msecs: Time in milliseconds to sleep for
1663 */
1664unsigned long msleep_interruptible(unsigned int msecs)
1665{
1666        unsigned long timeout = msecs_to_jiffies(msecs) + 1;
1667
1668        while (timeout && !signal_pending(current))
1669                timeout = schedule_timeout_interruptible(timeout);
1670        return jiffies_to_msecs(timeout);
1671}
1672
1673EXPORT_SYMBOL(msleep_interruptible);
1674
1675static void __sched do_usleep_range(unsigned long min, unsigned long max)
1676{
1677        ktime_t kmin;
1678        unsigned long delta;
1679
1680        kmin = ktime_set(0, min * NSEC_PER_USEC);
1681        delta = (max - min) * NSEC_PER_USEC;
1682        schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL);
1683}
1684
1685/**
1686 * usleep_range - Drop in replacement for udelay where wakeup is flexible
1687 * @min: Minimum time in usecs to sleep
1688 * @max: Maximum time in usecs to sleep
1689 */
1690void __sched usleep_range(unsigned long min, unsigned long max)
1691{
1692        __set_current_state(TASK_UNINTERRUPTIBLE);
1693        do_usleep_range(min, max);
1694}
1695EXPORT_SYMBOL(usleep_range);
1696