linux/kernel/time/tick-sched.c
<<
>>
Prefs
   1/*
   2 *  linux/kernel/time/tick-sched.c
   3 *
   4 *  Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
   5 *  Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
   6 *  Copyright(C) 2006-2007  Timesys Corp., Thomas Gleixner
   7 *
   8 *  No idle tick implementation for low and high resolution timers
   9 *
  10 *  Started by: Thomas Gleixner and Ingo Molnar
  11 *
  12 *  Distribute under GPLv2.
  13 */
  14#include <linux/cpu.h>
  15#include <linux/err.h>
  16#include <linux/hrtimer.h>
  17#include <linux/interrupt.h>
  18#include <linux/kernel_stat.h>
  19#include <linux/percpu.h>
  20#include <linux/nmi.h>
  21#include <linux/profile.h>
  22#include <linux/sched/signal.h>
  23#include <linux/sched/clock.h>
  24#include <linux/sched/stat.h>
  25#include <linux/sched/nohz.h>
  26#include <linux/sched/loadavg.h>
  27#include <linux/module.h>
  28#include <linux/irq_work.h>
  29#include <linux/posix-timers.h>
  30#include <linux/context_tracking.h>
  31#include <linux/mm.h>
  32
  33#include <asm/irq_regs.h>
  34
  35#include "tick-internal.h"
  36
  37#include <trace/events/timer.h>
  38
  39/*
  40 * Per-CPU nohz control structure
  41 */
  42static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
  43
  44struct tick_sched *tick_get_tick_sched(int cpu)
  45{
  46        return &per_cpu(tick_cpu_sched, cpu);
  47}
  48
  49#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS)
  50/*
  51 * The time, when the last jiffy update happened. Write access must hold
  52 * jiffies_lock and jiffies_seq. tick_nohz_next_event() needs to get a
  53 * consistent view of jiffies and last_jiffies_update.
  54 */
  55static ktime_t last_jiffies_update;
  56
  57/*
  58 * Must be called with interrupts disabled !
  59 */
  60static void tick_do_update_jiffies64(ktime_t now)
  61{
  62        unsigned long ticks = 1;
  63        ktime_t delta, nextp;
  64
  65        /*
  66         * 64bit can do a quick check without holding jiffies lock and
  67         * without looking at the sequence count. The smp_load_acquire()
  68         * pairs with the update done later in this function.
  69         *
  70         * 32bit cannot do that because the store of tick_next_period
  71         * consists of two 32bit stores and the first store could move it
  72         * to a random point in the future.
  73         */
  74        if (IS_ENABLED(CONFIG_64BIT)) {
  75                if (ktime_before(now, smp_load_acquire(&tick_next_period)))
  76                        return;
  77        } else {
  78                unsigned int seq;
  79
  80                /*
  81                 * Avoid contention on jiffies_lock and protect the quick
  82                 * check with the sequence count.
  83                 */
  84                do {
  85                        seq = read_seqcount_begin(&jiffies_seq);
  86                        nextp = tick_next_period;
  87                } while (read_seqcount_retry(&jiffies_seq, seq));
  88
  89                if (ktime_before(now, nextp))
  90                        return;
  91        }
  92
  93        /* Quick check failed, i.e. update is required. */
  94        raw_spin_lock(&jiffies_lock);
  95        /*
  96         * Reevaluate with the lock held. Another CPU might have done the
  97         * update already.
  98         */
  99        if (ktime_before(now, tick_next_period)) {
 100                raw_spin_unlock(&jiffies_lock);
 101                return;
 102        }
 103
 104        write_seqcount_begin(&jiffies_seq);
 105
 106        delta = ktime_sub(now, tick_next_period);
 107        if (unlikely(delta >= TICK_NSEC)) {
 108                /* Slow path for long idle sleep times */
 109                s64 incr = TICK_NSEC;
 110
 111                ticks += ktime_divns(delta, incr);
 112
 113                last_jiffies_update = ktime_add_ns(last_jiffies_update,
 114                                                   incr * ticks);
 115        } else {
 116                last_jiffies_update = ktime_add_ns(last_jiffies_update,
 117                                                   TICK_NSEC);
 118        }
 119
 120        /* Advance jiffies to complete the jiffies_seq protected job */
 121        jiffies_64 += ticks;
 122
 123        /*
 124         * Keep the tick_next_period variable up to date.
 125         */
 126        nextp = ktime_add_ns(last_jiffies_update, TICK_NSEC);
 127
 128        if (IS_ENABLED(CONFIG_64BIT)) {
 129                /*
 130                 * Pairs with smp_load_acquire() in the lockless quick
 131                 * check above and ensures that the update to jiffies_64 is
 132                 * not reordered vs. the store to tick_next_period, neither
 133                 * by the compiler nor by the CPU.
 134                 */
 135                smp_store_release(&tick_next_period, nextp);
 136        } else {
 137                /*
 138                 * A plain store is good enough on 32bit as the quick check
 139                 * above is protected by the sequence count.
 140                 */
 141                tick_next_period = nextp;
 142        }
 143
 144        /*
 145         * Release the sequence count. calc_global_load() below is not
 146         * protected by it, but jiffies_lock needs to be held to prevent
 147         * concurrent invocations.
 148         */
 149        write_seqcount_end(&jiffies_seq);
 150
 151        calc_global_load();
 152
 153        raw_spin_unlock(&jiffies_lock);
 154        update_wall_time();
 155}
 156
 157/*
 158 * Initialize and return retrieve the jiffies update.
 159 */
 160static ktime_t tick_init_jiffy_update(void)
 161{
 162        ktime_t period;
 163
 164        raw_spin_lock(&jiffies_lock);
 165        write_seqcount_begin(&jiffies_seq);
 166        /* Did we start the jiffies update yet ? */
 167        if (last_jiffies_update == 0)
 168                last_jiffies_update = tick_next_period;
 169        period = last_jiffies_update;
 170        write_seqcount_end(&jiffies_seq);
 171        raw_spin_unlock(&jiffies_lock);
 172        return period;
 173}
 174
 175static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
 176{
 177        int cpu = smp_processor_id();
 178
 179#ifdef CONFIG_NO_HZ_COMMON
 180        /*
 181         * Check if the do_timer duty was dropped. We don't care about
 182         * concurrency: This happens only when the CPU in charge went
 183         * into a long sleep. If two CPUs happen to assign themselves to
 184         * this duty, then the jiffies update is still serialized by
 185         * jiffies_lock.
 186         *
 187         * If nohz_full is enabled, this should not happen because the
 188         * tick_do_timer_cpu never relinquishes.
 189         */
 190        if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
 191#ifdef CONFIG_NO_HZ_FULL
 192                WARN_ON(tick_nohz_full_running);
 193#endif
 194                tick_do_timer_cpu = cpu;
 195        }
 196#endif
 197
 198        /* Check, if the jiffies need an update */
 199        if (tick_do_timer_cpu == cpu)
 200                tick_do_update_jiffies64(now);
 201
 202        if (ts->inidle)
 203                ts->got_idle_tick = 1;
 204}
 205
 206static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
 207{
 208#ifdef CONFIG_NO_HZ_COMMON
 209        /*
 210         * When we are idle and the tick is stopped, we have to touch
 211         * the watchdog as we might not schedule for a really long
 212         * time. This happens on complete idle SMP systems while
 213         * waiting on the login prompt. We also increment the "start of
 214         * idle" jiffy stamp so the idle accounting adjustment we do
 215         * when we go busy again does not account too much ticks.
 216         */
 217        if (ts->tick_stopped) {
 218                touch_softlockup_watchdog_sched();
 219                if (is_idle_task(current))
 220                        ts->idle_jiffies++;
 221                /*
 222                 * In case the current tick fired too early past its expected
 223                 * expiration, make sure we don't bypass the next clock reprogramming
 224                 * to the same deadline.
 225                 */
 226                ts->next_tick = 0;
 227        }
 228#endif
 229        update_process_times(user_mode(regs));
 230        profile_tick(CPU_PROFILING);
 231}
 232#endif
 233
 234#ifdef CONFIG_NO_HZ_FULL
 235cpumask_var_t tick_nohz_full_mask;
 236bool tick_nohz_full_running;
 237EXPORT_SYMBOL_GPL(tick_nohz_full_running);
 238static atomic_t tick_dep_mask;
 239
 240static bool check_tick_dependency(atomic_t *dep)
 241{
 242        int val = atomic_read(dep);
 243
 244        if (val & TICK_DEP_MASK_POSIX_TIMER) {
 245                trace_tick_stop(0, TICK_DEP_MASK_POSIX_TIMER);
 246                return true;
 247        }
 248
 249        if (val & TICK_DEP_MASK_PERF_EVENTS) {
 250                trace_tick_stop(0, TICK_DEP_MASK_PERF_EVENTS);
 251                return true;
 252        }
 253
 254        if (val & TICK_DEP_MASK_SCHED) {
 255                trace_tick_stop(0, TICK_DEP_MASK_SCHED);
 256                return true;
 257        }
 258
 259        if (val & TICK_DEP_MASK_CLOCK_UNSTABLE) {
 260                trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE);
 261                return true;
 262        }
 263
 264        if (val & TICK_DEP_MASK_RCU) {
 265                trace_tick_stop(0, TICK_DEP_MASK_RCU);
 266                return true;
 267        }
 268
 269        return false;
 270}
 271
 272static bool can_stop_full_tick(int cpu, struct tick_sched *ts)
 273{
 274        lockdep_assert_irqs_disabled();
 275
 276        if (unlikely(!cpu_online(cpu)))
 277                return false;
 278
 279        if (check_tick_dependency(&tick_dep_mask))
 280                return false;
 281
 282        if (check_tick_dependency(&ts->tick_dep_mask))
 283                return false;
 284
 285        if (check_tick_dependency(&current->tick_dep_mask))
 286                return false;
 287
 288        if (check_tick_dependency(&current->signal->tick_dep_mask))
 289                return false;
 290
 291        return true;
 292}
 293
 294static void nohz_full_kick_func(struct irq_work *work)
 295{
 296        /* Empty, the tick restart happens on tick_nohz_irq_exit() */
 297}
 298
 299static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
 300        .func = nohz_full_kick_func,
 301        .flags = ATOMIC_INIT(IRQ_WORK_HARD_IRQ),
 302};
 303
 304/*
 305 * Kick this CPU if it's full dynticks in order to force it to
 306 * re-evaluate its dependency on the tick and restart it if necessary.
 307 * This kick, unlike tick_nohz_full_kick_cpu() and tick_nohz_full_kick_all(),
 308 * is NMI safe.
 309 */
 310static void tick_nohz_full_kick(void)
 311{
 312        if (!tick_nohz_full_cpu(smp_processor_id()))
 313                return;
 314
 315        irq_work_queue(this_cpu_ptr(&nohz_full_kick_work));
 316}
 317
 318/*
 319 * Kick the CPU if it's full dynticks in order to force it to
 320 * re-evaluate its dependency on the tick and restart it if necessary.
 321 */
 322void tick_nohz_full_kick_cpu(int cpu)
 323{
 324        if (!tick_nohz_full_cpu(cpu))
 325                return;
 326
 327        irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu);
 328}
 329
 330static void tick_nohz_kick_task(struct task_struct *tsk)
 331{
 332        int cpu;
 333
 334        /*
 335         * If the task is not running, run_posix_cpu_timers()
 336         * has nothing to elapse, IPI can then be spared.
 337         *
 338         * activate_task()                      STORE p->tick_dep_mask
 339         *   STORE p->on_rq
 340         * __schedule() (switch to task 'p')    smp_mb() (atomic_fetch_or())
 341         *   LOCK rq->lock                      LOAD p->on_rq
 342         *   smp_mb__after_spin_lock()
 343         *   tick_nohz_task_switch()
 344         *     LOAD p->tick_dep_mask
 345         */
 346        if (!sched_task_on_rq(tsk))
 347                return;
 348
 349        /*
 350         * If the task concurrently migrates to another CPU,
 351         * we guarantee it sees the new tick dependency upon
 352         * schedule.
 353         *
 354         * set_task_cpu(p, cpu);
 355         *   STORE p->cpu = @cpu
 356         * __schedule() (switch to task 'p')
 357         *   LOCK rq->lock
 358         *   smp_mb__after_spin_lock()          STORE p->tick_dep_mask
 359         *   tick_nohz_task_switch()            smp_mb() (atomic_fetch_or())
 360         *      LOAD p->tick_dep_mask           LOAD p->cpu
 361         */
 362        cpu = task_cpu(tsk);
 363
 364        preempt_disable();
 365        if (cpu_online(cpu))
 366                tick_nohz_full_kick_cpu(cpu);
 367        preempt_enable();
 368}
 369
 370/*
 371 * Kick all full dynticks CPUs in order to force these to re-evaluate
 372 * their dependency on the tick and restart it if necessary.
 373 */
 374static void tick_nohz_full_kick_all(void)
 375{
 376        int cpu;
 377
 378        if (!tick_nohz_full_running)
 379                return;
 380
 381        preempt_disable();
 382        for_each_cpu_and(cpu, tick_nohz_full_mask, cpu_online_mask)
 383                tick_nohz_full_kick_cpu(cpu);
 384        preempt_enable();
 385}
 386
 387static void tick_nohz_dep_set_all(atomic_t *dep,
 388                                  enum tick_dep_bits bit)
 389{
 390        int prev;
 391
 392        prev = atomic_fetch_or(BIT(bit), dep);
 393        if (!prev)
 394                tick_nohz_full_kick_all();
 395}
 396
 397/*
 398 * Set a global tick dependency. Used by perf events that rely on freq and
 399 * by unstable clock.
 400 */
 401void tick_nohz_dep_set(enum tick_dep_bits bit)
 402{
 403        tick_nohz_dep_set_all(&tick_dep_mask, bit);
 404}
 405
 406void tick_nohz_dep_clear(enum tick_dep_bits bit)
 407{
 408        atomic_andnot(BIT(bit), &tick_dep_mask);
 409}
 410
 411/*
 412 * Set per-CPU tick dependency. Used by scheduler and perf events in order to
 413 * manage events throttling.
 414 */
 415void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit)
 416{
 417        int prev;
 418        struct tick_sched *ts;
 419
 420        ts = per_cpu_ptr(&tick_cpu_sched, cpu);
 421
 422        prev = atomic_fetch_or(BIT(bit), &ts->tick_dep_mask);
 423        if (!prev) {
 424                preempt_disable();
 425                /* Perf needs local kick that is NMI safe */
 426                if (cpu == smp_processor_id()) {
 427                        tick_nohz_full_kick();
 428                } else {
 429                        /* Remote irq work not NMI-safe */
 430                        if (!WARN_ON_ONCE(in_nmi()))
 431                                tick_nohz_full_kick_cpu(cpu);
 432                }
 433                preempt_enable();
 434        }
 435}
 436EXPORT_SYMBOL_GPL(tick_nohz_dep_set_cpu);
 437
 438void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
 439{
 440        struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
 441
 442        atomic_andnot(BIT(bit), &ts->tick_dep_mask);
 443}
 444EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_cpu);
 445
 446/*
 447 * Set a per-task tick dependency. RCU need this. Also posix CPU timers
 448 * in order to elapse per task timers.
 449 */
 450void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
 451{
 452        if (!atomic_fetch_or(BIT(bit), &tsk->tick_dep_mask))
 453                tick_nohz_kick_task(tsk);
 454}
 455EXPORT_SYMBOL_GPL(tick_nohz_dep_set_task);
 456
 457void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit)
 458{
 459        atomic_andnot(BIT(bit), &tsk->tick_dep_mask);
 460}
 461EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_task);
 462
 463/*
 464 * Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
 465 * per process timers.
 466 */
 467void tick_nohz_dep_set_signal(struct task_struct *tsk,
 468                              enum tick_dep_bits bit)
 469{
 470        int prev;
 471        struct signal_struct *sig = tsk->signal;
 472
 473        prev = atomic_fetch_or(BIT(bit), &sig->tick_dep_mask);
 474        if (!prev) {
 475                struct task_struct *t;
 476
 477                lockdep_assert_held(&tsk->sighand->siglock);
 478                __for_each_thread(sig, t)
 479                        tick_nohz_kick_task(t);
 480        }
 481}
 482
 483void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)
 484{
 485        atomic_andnot(BIT(bit), &sig->tick_dep_mask);
 486}
 487
 488/*
 489 * Re-evaluate the need for the tick as we switch the current task.
 490 * It might need the tick due to per task/process properties:
 491 * perf events, posix CPU timers, ...
 492 */
 493void __tick_nohz_task_switch(void)
 494{
 495        struct tick_sched *ts;
 496
 497        if (!tick_nohz_full_cpu(smp_processor_id()))
 498                return;
 499
 500        ts = this_cpu_ptr(&tick_cpu_sched);
 501
 502        if (ts->tick_stopped) {
 503                if (atomic_read(&current->tick_dep_mask) ||
 504                    atomic_read(&current->signal->tick_dep_mask))
 505                        tick_nohz_full_kick();
 506        }
 507}
 508
 509/* Get the boot-time nohz CPU list from the kernel parameters. */
 510void __init tick_nohz_full_setup(cpumask_var_t cpumask)
 511{
 512        alloc_bootmem_cpumask_var(&tick_nohz_full_mask);
 513        cpumask_copy(tick_nohz_full_mask, cpumask);
 514        tick_nohz_full_running = true;
 515}
 516EXPORT_SYMBOL_GPL(tick_nohz_full_setup);
 517
 518static int tick_nohz_cpu_down(unsigned int cpu)
 519{
 520        /*
 521         * The tick_do_timer_cpu CPU handles housekeeping duty (unbound
 522         * timers, workqueues, timekeeping, ...) on behalf of full dynticks
 523         * CPUs. It must remain online when nohz full is enabled.
 524         */
 525        if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
 526                return -EBUSY;
 527        return 0;
 528}
 529
 530void __init tick_nohz_init(void)
 531{
 532        int cpu, ret;
 533
 534        if (!tick_nohz_full_running)
 535                return;
 536
 537        /*
 538         * Full dynticks uses irq work to drive the tick rescheduling on safe
 539         * locking contexts. But then we need irq work to raise its own
 540         * interrupts to avoid circular dependency on the tick
 541         */
 542        if (!arch_irq_work_has_interrupt()) {
 543                pr_warn("NO_HZ: Can't run full dynticks because arch doesn't support irq work self-IPIs\n");
 544                cpumask_clear(tick_nohz_full_mask);
 545                tick_nohz_full_running = false;
 546                return;
 547        }
 548
 549        if (IS_ENABLED(CONFIG_PM_SLEEP_SMP) &&
 550                        !IS_ENABLED(CONFIG_PM_SLEEP_SMP_NONZERO_CPU)) {
 551                cpu = smp_processor_id();
 552
 553                if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
 554                        pr_warn("NO_HZ: Clearing %d from nohz_full range "
 555                                "for timekeeping\n", cpu);
 556                        cpumask_clear_cpu(cpu, tick_nohz_full_mask);
 557                }
 558        }
 559
 560        for_each_cpu(cpu, tick_nohz_full_mask)
 561                context_tracking_cpu_set(cpu);
 562
 563        ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
 564                                        "kernel/nohz:predown", NULL,
 565                                        tick_nohz_cpu_down);
 566        WARN_ON(ret < 0);
 567        pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n",
 568                cpumask_pr_args(tick_nohz_full_mask));
 569}
 570#endif
 571
 572/*
 573 * NOHZ - aka dynamic tick functionality
 574 */
 575#ifdef CONFIG_NO_HZ_COMMON
 576/*
 577 * NO HZ enabled ?
 578 */
 579bool tick_nohz_enabled __read_mostly  = true;
 580unsigned long tick_nohz_active  __read_mostly;
 581/*
 582 * Enable / Disable tickless mode
 583 */
 584static int __init setup_tick_nohz(char *str)
 585{
 586        return (kstrtobool(str, &tick_nohz_enabled) == 0);
 587}
 588
 589__setup("nohz=", setup_tick_nohz);
 590
 591bool tick_nohz_tick_stopped(void)
 592{
 593        struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
 594
 595        return ts->tick_stopped;
 596}
 597
 598bool tick_nohz_tick_stopped_cpu(int cpu)
 599{
 600        struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
 601
 602        return ts->tick_stopped;
 603}
 604
 605/**
 606 * tick_nohz_update_jiffies - update jiffies when idle was interrupted
 607 *
 608 * Called from interrupt entry when the CPU was idle
 609 *
 610 * In case the sched_tick was stopped on this CPU, we have to check if jiffies
 611 * must be updated. Otherwise an interrupt handler could use a stale jiffy
 612 * value. We do this unconditionally on any CPU, as we don't know whether the
 613 * CPU, which has the update task assigned is in a long sleep.
 614 */
 615static void tick_nohz_update_jiffies(ktime_t now)
 616{
 617        unsigned long flags;
 618
 619        __this_cpu_write(tick_cpu_sched.idle_waketime, now);
 620
 621        local_irq_save(flags);
 622        tick_do_update_jiffies64(now);
 623        local_irq_restore(flags);
 624
 625        touch_softlockup_watchdog_sched();
 626}
 627
 628/*
 629 * Updates the per-CPU time idle statistics counters
 630 */
 631static void
 632update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_update_time)
 633{
 634        ktime_t delta;
 635
 636        if (ts->idle_active) {
 637                delta = ktime_sub(now, ts->idle_entrytime);
 638                if (nr_iowait_cpu(cpu) > 0)
 639                        ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta);
 640                else
 641                        ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
 642                ts->idle_entrytime = now;
 643        }
 644
 645        if (last_update_time)
 646                *last_update_time = ktime_to_us(now);
 647
 648}
 649
 650static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
 651{
 652        update_ts_time_stats(smp_processor_id(), ts, now, NULL);
 653        ts->idle_active = 0;
 654
 655        sched_clock_idle_wakeup_event();
 656}
 657
 658static void tick_nohz_start_idle(struct tick_sched *ts)
 659{
 660        ts->idle_entrytime = ktime_get();
 661        ts->idle_active = 1;
 662        sched_clock_idle_sleep_event();
 663}
 664
 665/**
 666 * get_cpu_idle_time_us - get the total idle time of a CPU
 667 * @cpu: CPU number to query
 668 * @last_update_time: variable to store update time in. Do not update
 669 * counters if NULL.
 670 *
 671 * Return the cumulative idle time (since boot) for a given
 672 * CPU, in microseconds.
 673 *
 674 * This time is measured via accounting rather than sampling,
 675 * and is as accurate as ktime_get() is.
 676 *
 677 * This function returns -1 if NOHZ is not enabled.
 678 */
 679u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
 680{
 681        struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
 682        ktime_t now, idle;
 683
 684        if (!tick_nohz_active)
 685                return -1;
 686
 687        now = ktime_get();
 688        if (last_update_time) {
 689                update_ts_time_stats(cpu, ts, now, last_update_time);
 690                idle = ts->idle_sleeptime;
 691        } else {
 692                if (ts->idle_active && !nr_iowait_cpu(cpu)) {
 693                        ktime_t delta = ktime_sub(now, ts->idle_entrytime);
 694
 695                        idle = ktime_add(ts->idle_sleeptime, delta);
 696                } else {
 697                        idle = ts->idle_sleeptime;
 698                }
 699        }
 700
 701        return ktime_to_us(idle);
 702
 703}
 704EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
 705
 706/**
 707 * get_cpu_iowait_time_us - get the total iowait time of a CPU
 708 * @cpu: CPU number to query
 709 * @last_update_time: variable to store update time in. Do not update
 710 * counters if NULL.
 711 *
 712 * Return the cumulative iowait time (since boot) for a given
 713 * CPU, in microseconds.
 714 *
 715 * This time is measured via accounting rather than sampling,
 716 * and is as accurate as ktime_get() is.
 717 *
 718 * This function returns -1 if NOHZ is not enabled.
 719 */
 720u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
 721{
 722        struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
 723        ktime_t now, iowait;
 724
 725        if (!tick_nohz_active)
 726                return -1;
 727
 728        now = ktime_get();
 729        if (last_update_time) {
 730                update_ts_time_stats(cpu, ts, now, last_update_time);
 731                iowait = ts->iowait_sleeptime;
 732        } else {
 733                if (ts->idle_active && nr_iowait_cpu(cpu) > 0) {
 734                        ktime_t delta = ktime_sub(now, ts->idle_entrytime);
 735
 736                        iowait = ktime_add(ts->iowait_sleeptime, delta);
 737                } else {
 738                        iowait = ts->iowait_sleeptime;
 739                }
 740        }
 741
 742        return ktime_to_us(iowait);
 743}
 744EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
 745
 746static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
 747{
 748        hrtimer_cancel(&ts->sched_timer);
 749        hrtimer_set_expires(&ts->sched_timer, ts->last_tick);
 750
 751        /* Forward the time to expire in the future */
 752        hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
 753
 754        if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
 755                hrtimer_start_expires(&ts->sched_timer,
 756                                      HRTIMER_MODE_ABS_PINNED_HARD);
 757        } else {
 758                tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
 759        }
 760
 761        /*
 762         * Reset to make sure next tick stop doesn't get fooled by past
 763         * cached clock deadline.
 764         */
 765        ts->next_tick = 0;
 766}
 767
 768static inline bool local_timer_softirq_pending(void)
 769{
 770        return local_softirq_pending() & BIT(TIMER_SOFTIRQ);
 771}
 772
 773static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
 774{
 775        u64 basemono, next_tick, next_tmr, next_rcu, delta, expires;
 776        unsigned long basejiff;
 777        unsigned int seq;
 778
 779        /* Read jiffies and the time when jiffies were updated last */
 780        do {
 781                seq = read_seqcount_begin(&jiffies_seq);
 782                basemono = last_jiffies_update;
 783                basejiff = jiffies;
 784        } while (read_seqcount_retry(&jiffies_seq, seq));
 785        ts->last_jiffies = basejiff;
 786        ts->timer_expires_base = basemono;
 787
 788        /*
 789         * Keep the periodic tick, when RCU, architecture or irq_work
 790         * requests it.
 791         * Aside of that check whether the local timer softirq is
 792         * pending. If so its a bad idea to call get_next_timer_interrupt()
 793         * because there is an already expired timer, so it will request
 794         * immeditate expiry, which rearms the hardware timer with a
 795         * minimal delta which brings us back to this place
 796         * immediately. Lather, rinse and repeat...
 797         */
 798        if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() ||
 799            irq_work_needs_cpu() || local_timer_softirq_pending()) {
 800                next_tick = basemono + TICK_NSEC;
 801        } else {
 802                /*
 803                 * Get the next pending timer. If high resolution
 804                 * timers are enabled this only takes the timer wheel
 805                 * timers into account. If high resolution timers are
 806                 * disabled this also looks at the next expiring
 807                 * hrtimer.
 808                 */
 809                next_tmr = get_next_timer_interrupt(basejiff, basemono);
 810                ts->next_timer = next_tmr;
 811                /* Take the next rcu event into account */
 812                next_tick = next_rcu < next_tmr ? next_rcu : next_tmr;
 813        }
 814
 815        /*
 816         * If the tick is due in the next period, keep it ticking or
 817         * force prod the timer.
 818         */
 819        delta = next_tick - basemono;
 820        if (delta <= (u64)TICK_NSEC) {
 821                /*
 822                 * Tell the timer code that the base is not idle, i.e. undo
 823                 * the effect of get_next_timer_interrupt():
 824                 */
 825                timer_clear_idle();
 826                /*
 827                 * We've not stopped the tick yet, and there's a timer in the
 828                 * next period, so no point in stopping it either, bail.
 829                 */
 830                if (!ts->tick_stopped) {
 831                        ts->timer_expires = 0;
 832                        goto out;
 833                }
 834        }
 835
 836        /*
 837         * If this CPU is the one which had the do_timer() duty last, we limit
 838         * the sleep time to the timekeeping max_deferment value.
 839         * Otherwise we can sleep as long as we want.
 840         */
 841        delta = timekeeping_max_deferment();
 842        if (cpu != tick_do_timer_cpu &&
 843            (tick_do_timer_cpu != TICK_DO_TIMER_NONE || !ts->do_timer_last))
 844                delta = KTIME_MAX;
 845
 846        /* Calculate the next expiry time */
 847        if (delta < (KTIME_MAX - basemono))
 848                expires = basemono + delta;
 849        else
 850                expires = KTIME_MAX;
 851
 852        ts->timer_expires = min_t(u64, expires, next_tick);
 853
 854out:
 855        return ts->timer_expires;
 856}
 857
 858static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
 859{
 860        struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
 861        u64 basemono = ts->timer_expires_base;
 862        u64 expires = ts->timer_expires;
 863        ktime_t tick = expires;
 864
 865        /* Make sure we won't be trying to stop it twice in a row. */
 866        ts->timer_expires_base = 0;
 867
 868        /*
 869         * If this CPU is the one which updates jiffies, then give up
 870         * the assignment and let it be taken by the CPU which runs
 871         * the tick timer next, which might be this CPU as well. If we
 872         * don't drop this here the jiffies might be stale and
 873         * do_timer() never invoked. Keep track of the fact that it
 874         * was the one which had the do_timer() duty last.
 875         */
 876        if (cpu == tick_do_timer_cpu) {
 877                tick_do_timer_cpu = TICK_DO_TIMER_NONE;
 878                ts->do_timer_last = 1;
 879        } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
 880                ts->do_timer_last = 0;
 881        }
 882
 883        /* Skip reprogram of event if its not changed */
 884        if (ts->tick_stopped && (expires == ts->next_tick)) {
 885                /* Sanity check: make sure clockevent is actually programmed */
 886                if (tick == KTIME_MAX || ts->next_tick == hrtimer_get_expires(&ts->sched_timer))
 887                        return;
 888
 889                WARN_ON_ONCE(1);
 890                printk_once("basemono: %llu ts->next_tick: %llu dev->next_event: %llu timer->active: %d timer->expires: %llu\n",
 891                            basemono, ts->next_tick, dev->next_event,
 892                            hrtimer_active(&ts->sched_timer), hrtimer_get_expires(&ts->sched_timer));
 893        }
 894
 895        /*
 896         * nohz_stop_sched_tick can be called several times before
 897         * the nohz_restart_sched_tick is called. This happens when
 898         * interrupts arrive which do not cause a reschedule. In the
 899         * first call we save the current tick time, so we can restart
 900         * the scheduler tick in nohz_restart_sched_tick.
 901         */
 902        if (!ts->tick_stopped) {
 903                calc_load_nohz_start();
 904                quiet_vmstat();
 905
 906                ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
 907                ts->tick_stopped = 1;
 908                trace_tick_stop(1, TICK_DEP_MASK_NONE);
 909        }
 910
 911        ts->next_tick = tick;
 912
 913        /*
 914         * If the expiration time == KTIME_MAX, then we simply stop
 915         * the tick timer.
 916         */
 917        if (unlikely(expires == KTIME_MAX)) {
 918                if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
 919                        hrtimer_cancel(&ts->sched_timer);
 920                return;
 921        }
 922
 923        if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
 924                hrtimer_start(&ts->sched_timer, tick,
 925                              HRTIMER_MODE_ABS_PINNED_HARD);
 926        } else {
 927                hrtimer_set_expires(&ts->sched_timer, tick);
 928                tick_program_event(tick, 1);
 929        }
 930}
 931
 932static void tick_nohz_retain_tick(struct tick_sched *ts)
 933{
 934        ts->timer_expires_base = 0;
 935}
 936
 937#ifdef CONFIG_NO_HZ_FULL
 938static void tick_nohz_stop_sched_tick(struct tick_sched *ts, int cpu)
 939{
 940        if (tick_nohz_next_event(ts, cpu))
 941                tick_nohz_stop_tick(ts, cpu);
 942        else
 943                tick_nohz_retain_tick(ts);
 944}
 945#endif /* CONFIG_NO_HZ_FULL */
 946
 947static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
 948{
 949        /* Update jiffies first */
 950        tick_do_update_jiffies64(now);
 951
 952        /*
 953         * Clear the timer idle flag, so we avoid IPIs on remote queueing and
 954         * the clock forward checks in the enqueue path:
 955         */
 956        timer_clear_idle();
 957
 958        calc_load_nohz_stop();
 959        touch_softlockup_watchdog_sched();
 960        /*
 961         * Cancel the scheduled timer and restore the tick
 962         */
 963        ts->tick_stopped  = 0;
 964        tick_nohz_restart(ts, now);
 965}
 966
 967static void __tick_nohz_full_update_tick(struct tick_sched *ts,
 968                                         ktime_t now)
 969{
 970#ifdef CONFIG_NO_HZ_FULL
 971        int cpu = smp_processor_id();
 972
 973        if (can_stop_full_tick(cpu, ts))
 974                tick_nohz_stop_sched_tick(ts, cpu);
 975        else if (ts->tick_stopped)
 976                tick_nohz_restart_sched_tick(ts, now);
 977#endif
 978}
 979
 980static void tick_nohz_full_update_tick(struct tick_sched *ts)
 981{
 982        if (!tick_nohz_full_cpu(smp_processor_id()))
 983                return;
 984
 985        if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
 986                return;
 987
 988        __tick_nohz_full_update_tick(ts, ktime_get());
 989}
 990
 991static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
 992{
 993        /*
 994         * If this CPU is offline and it is the one which updates
 995         * jiffies, then give up the assignment and let it be taken by
 996         * the CPU which runs the tick timer next. If we don't drop
 997         * this here the jiffies might be stale and do_timer() never
 998         * invoked.
 999         */
1000        if (unlikely(!cpu_online(cpu))) {
1001                if (cpu == tick_do_timer_cpu)
1002                        tick_do_timer_cpu = TICK_DO_TIMER_NONE;
1003                /*
1004                 * Make sure the CPU doesn't get fooled by obsolete tick
1005                 * deadline if it comes back online later.
1006                 */
1007                ts->next_tick = 0;
1008                return false;
1009        }
1010
1011        if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
1012                return false;
1013
1014        if (need_resched())
1015                return false;
1016
1017        if (unlikely(local_softirq_pending())) {
1018                static int ratelimit;
1019
1020                if (ratelimit < 10 &&
1021                    (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
1022                        pr_warn("NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #%02x!!!\n",
1023                                (unsigned int) local_softirq_pending());
1024                        ratelimit++;
1025                }
1026                return false;
1027        }
1028
1029        if (tick_nohz_full_enabled()) {
1030                /*
1031                 * Keep the tick alive to guarantee timekeeping progression
1032                 * if there are full dynticks CPUs around
1033                 */
1034                if (tick_do_timer_cpu == cpu)
1035                        return false;
1036                /*
1037                 * Boot safety: make sure the timekeeping duty has been
1038                 * assigned before entering dyntick-idle mode,
1039                 * tick_do_timer_cpu is TICK_DO_TIMER_BOOT
1040                 */
1041                if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_BOOT))
1042                        return false;
1043
1044                /* Should not happen for nohz-full */
1045                if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
1046                        return false;
1047        }
1048
1049        return true;
1050}
1051
1052static void __tick_nohz_idle_stop_tick(struct tick_sched *ts)
1053{
1054        ktime_t expires;
1055        int cpu = smp_processor_id();
1056
1057        /*
1058         * If tick_nohz_get_sleep_length() ran tick_nohz_next_event(), the
1059         * tick timer expiration time is known already.
1060         */
1061        if (ts->timer_expires_base)
1062                expires = ts->timer_expires;
1063        else if (can_stop_idle_tick(cpu, ts))
1064                expires = tick_nohz_next_event(ts, cpu);
1065        else
1066                return;
1067
1068        ts->idle_calls++;
1069
1070        if (expires > 0LL) {
1071                int was_stopped = ts->tick_stopped;
1072
1073                tick_nohz_stop_tick(ts, cpu);
1074
1075                ts->idle_sleeps++;
1076                ts->idle_expires = expires;
1077
1078                if (!was_stopped && ts->tick_stopped) {
1079                        ts->idle_jiffies = ts->last_jiffies;
1080                        nohz_balance_enter_idle(cpu);
1081                }
1082        } else {
1083                tick_nohz_retain_tick(ts);
1084        }
1085}
1086
1087/**
1088 * tick_nohz_idle_stop_tick - stop the idle tick from the idle task
1089 *
1090 * When the next event is more than a tick into the future, stop the idle tick
1091 */
1092void tick_nohz_idle_stop_tick(void)
1093{
1094        __tick_nohz_idle_stop_tick(this_cpu_ptr(&tick_cpu_sched));
1095}
1096
1097void tick_nohz_idle_retain_tick(void)
1098{
1099        tick_nohz_retain_tick(this_cpu_ptr(&tick_cpu_sched));
1100        /*
1101         * Undo the effect of get_next_timer_interrupt() called from
1102         * tick_nohz_next_event().
1103         */
1104        timer_clear_idle();
1105}
1106
1107/**
1108 * tick_nohz_idle_enter - prepare for entering idle on the current CPU
1109 *
1110 * Called when we start the idle loop.
1111 */
1112void tick_nohz_idle_enter(void)
1113{
1114        struct tick_sched *ts;
1115
1116        lockdep_assert_irqs_enabled();
1117
1118        local_irq_disable();
1119
1120        ts = this_cpu_ptr(&tick_cpu_sched);
1121
1122        WARN_ON_ONCE(ts->timer_expires_base);
1123
1124        ts->inidle = 1;
1125        tick_nohz_start_idle(ts);
1126
1127        local_irq_enable();
1128}
1129
1130/**
1131 * tick_nohz_irq_exit - update next tick event from interrupt exit
1132 *
1133 * When an interrupt fires while we are idle and it doesn't cause
1134 * a reschedule, it may still add, modify or delete a timer, enqueue
1135 * an RCU callback, etc...
1136 * So we need to re-calculate and reprogram the next tick event.
1137 */
1138void tick_nohz_irq_exit(void)
1139{
1140        struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1141
1142        if (ts->inidle)
1143                tick_nohz_start_idle(ts);
1144        else
1145                tick_nohz_full_update_tick(ts);
1146}
1147
1148/**
1149 * tick_nohz_idle_got_tick - Check whether or not the tick handler has run
1150 */
1151bool tick_nohz_idle_got_tick(void)
1152{
1153        struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1154
1155        if (ts->got_idle_tick) {
1156                ts->got_idle_tick = 0;
1157                return true;
1158        }
1159        return false;
1160}
1161
1162/**
1163 * tick_nohz_get_next_hrtimer - return the next expiration time for the hrtimer
1164 * or the tick, whatever that expires first. Note that, if the tick has been
1165 * stopped, it returns the next hrtimer.
1166 *
1167 * Called from power state control code with interrupts disabled
1168 */
1169ktime_t tick_nohz_get_next_hrtimer(void)
1170{
1171        return __this_cpu_read(tick_cpu_device.evtdev)->next_event;
1172}
1173
1174/**
1175 * tick_nohz_get_sleep_length - return the expected length of the current sleep
1176 * @delta_next: duration until the next event if the tick cannot be stopped
1177 *
1178 * Called from power state control code with interrupts disabled
1179 */
1180ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
1181{
1182        struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
1183        struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1184        int cpu = smp_processor_id();
1185        /*
1186         * The idle entry time is expected to be a sufficient approximation of
1187         * the current time at this point.
1188         */
1189        ktime_t now = ts->idle_entrytime;
1190        ktime_t next_event;
1191
1192        WARN_ON_ONCE(!ts->inidle);
1193
1194        *delta_next = ktime_sub(dev->next_event, now);
1195
1196        if (!can_stop_idle_tick(cpu, ts))
1197                return *delta_next;
1198
1199        next_event = tick_nohz_next_event(ts, cpu);
1200        if (!next_event)
1201                return *delta_next;
1202
1203        /*
1204         * If the next highres timer to expire is earlier than next_event, the
1205         * idle governor needs to know that.
1206         */
1207        next_event = min_t(u64, next_event,
1208                           hrtimer_next_event_without(&ts->sched_timer));
1209
1210        return ktime_sub(next_event, now);
1211}
1212
1213/**
1214 * tick_nohz_get_idle_calls_cpu - return the current idle calls counter value
1215 * for a particular CPU.
1216 *
1217 * Called from the schedutil frequency scaling governor in scheduler context.
1218 */
1219unsigned long tick_nohz_get_idle_calls_cpu(int cpu)
1220{
1221        struct tick_sched *ts = tick_get_tick_sched(cpu);
1222
1223        return ts->idle_calls;
1224}
1225
1226/**
1227 * tick_nohz_get_idle_calls - return the current idle calls counter value
1228 *
1229 * Called from the schedutil frequency scaling governor in scheduler context.
1230 */
1231unsigned long tick_nohz_get_idle_calls(void)
1232{
1233        struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1234
1235        return ts->idle_calls;
1236}
1237
1238static void tick_nohz_account_idle_time(struct tick_sched *ts,
1239                                        ktime_t now)
1240{
1241        unsigned long ticks;
1242
1243        ts->idle_exittime = now;
1244
1245        if (vtime_accounting_enabled_this_cpu())
1246                return;
1247        /*
1248         * We stopped the tick in idle. Update process times would miss the
1249         * time we slept as update_process_times does only a 1 tick
1250         * accounting. Enforce that this is accounted to idle !
1251         */
1252        ticks = jiffies - ts->idle_jiffies;
1253        /*
1254         * We might be one off. Do not randomly account a huge number of ticks!
1255         */
1256        if (ticks && ticks < LONG_MAX)
1257                account_idle_ticks(ticks);
1258}
1259
1260void tick_nohz_idle_restart_tick(void)
1261{
1262        struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1263
1264        if (ts->tick_stopped) {
1265                ktime_t now = ktime_get();
1266                tick_nohz_restart_sched_tick(ts, now);
1267                tick_nohz_account_idle_time(ts, now);
1268        }
1269}
1270
1271static void tick_nohz_idle_update_tick(struct tick_sched *ts, ktime_t now)
1272{
1273        if (tick_nohz_full_cpu(smp_processor_id()))
1274                __tick_nohz_full_update_tick(ts, now);
1275        else
1276                tick_nohz_restart_sched_tick(ts, now);
1277
1278        tick_nohz_account_idle_time(ts, now);
1279}
1280
1281/**
1282 * tick_nohz_idle_exit - restart the idle tick from the idle task
1283 *
1284 * Restart the idle tick when the CPU is woken up from idle
1285 * This also exit the RCU extended quiescent state. The CPU
1286 * can use RCU again after this function is called.
1287 */
1288void tick_nohz_idle_exit(void)
1289{
1290        struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1291        bool idle_active, tick_stopped;
1292        ktime_t now;
1293
1294        local_irq_disable();
1295
1296        WARN_ON_ONCE(!ts->inidle);
1297        WARN_ON_ONCE(ts->timer_expires_base);
1298
1299        ts->inidle = 0;
1300        idle_active = ts->idle_active;
1301        tick_stopped = ts->tick_stopped;
1302
1303        if (idle_active || tick_stopped)
1304                now = ktime_get();
1305
1306        if (idle_active)
1307                tick_nohz_stop_idle(ts, now);
1308
1309        if (tick_stopped)
1310                tick_nohz_idle_update_tick(ts, now);
1311
1312        local_irq_enable();
1313}
1314
1315/*
1316 * The nohz low res interrupt handler
1317 */
1318static void tick_nohz_handler(struct clock_event_device *dev)
1319{
1320        struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1321        struct pt_regs *regs = get_irq_regs();
1322        ktime_t now = ktime_get();
1323
1324        dev->next_event = KTIME_MAX;
1325
1326        tick_sched_do_timer(ts, now);
1327        tick_sched_handle(ts, regs);
1328
1329        /* No need to reprogram if we are running tickless  */
1330        if (unlikely(ts->tick_stopped))
1331                return;
1332
1333        hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
1334        tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
1335}
1336
1337static inline void tick_nohz_activate(struct tick_sched *ts, int mode)
1338{
1339        if (!tick_nohz_enabled)
1340                return;
1341        ts->nohz_mode = mode;
1342        /* One update is enough */
1343        if (!test_and_set_bit(0, &tick_nohz_active))
1344                timers_update_nohz();
1345}
1346
1347/**
1348 * tick_nohz_switch_to_nohz - switch to nohz mode
1349 */
1350static void tick_nohz_switch_to_nohz(void)
1351{
1352        struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1353        ktime_t next;
1354
1355        if (!tick_nohz_enabled)
1356                return;
1357
1358        if (tick_switch_to_oneshot(tick_nohz_handler))
1359                return;
1360
1361        /*
1362         * Recycle the hrtimer in ts, so we can share the
1363         * hrtimer_forward with the highres code.
1364         */
1365        hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
1366        /* Get the next period */
1367        next = tick_init_jiffy_update();
1368
1369        hrtimer_set_expires(&ts->sched_timer, next);
1370        hrtimer_forward_now(&ts->sched_timer, TICK_NSEC);
1371        tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
1372        tick_nohz_activate(ts, NOHZ_MODE_LOWRES);
1373}
1374
1375static inline void tick_nohz_irq_enter(void)
1376{
1377        struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1378        ktime_t now;
1379
1380        if (!ts->idle_active && !ts->tick_stopped)
1381                return;
1382        now = ktime_get();
1383        if (ts->idle_active)
1384                tick_nohz_stop_idle(ts, now);
1385        if (ts->tick_stopped)
1386                tick_nohz_update_jiffies(now);
1387}
1388
1389#else
1390
1391static inline void tick_nohz_switch_to_nohz(void) { }
1392static inline void tick_nohz_irq_enter(void) { }
1393static inline void tick_nohz_activate(struct tick_sched *ts, int mode) { }
1394
1395#endif /* CONFIG_NO_HZ_COMMON */
1396
1397/*
1398 * Called from irq_enter to notify about the possible interruption of idle()
1399 */
1400void tick_irq_enter(void)
1401{
1402        tick_check_oneshot_broadcast_this_cpu();
1403        tick_nohz_irq_enter();
1404}
1405
1406/*
1407 * High resolution timer specific code
1408 */
1409#ifdef CONFIG_HIGH_RES_TIMERS
1410/*
1411 * We rearm the timer until we get disabled by the idle code.
1412 * Called with interrupts disabled.
1413 */
1414static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
1415{
1416        struct tick_sched *ts =
1417                container_of(timer, struct tick_sched, sched_timer);
1418        struct pt_regs *regs = get_irq_regs();
1419        ktime_t now = ktime_get();
1420
1421        tick_sched_do_timer(ts, now);
1422
1423        /*
1424         * Do not call, when we are not in irq context and have
1425         * no valid regs pointer
1426         */
1427        if (regs)
1428                tick_sched_handle(ts, regs);
1429        else
1430                ts->next_tick = 0;
1431
1432        /* No need to reprogram if we are in idle or full dynticks mode */
1433        if (unlikely(ts->tick_stopped))
1434                return HRTIMER_NORESTART;
1435
1436        hrtimer_forward(timer, now, TICK_NSEC);
1437
1438        return HRTIMER_RESTART;
1439}
1440
1441static int sched_skew_tick;
1442
1443static int __init skew_tick(char *str)
1444{
1445        get_option(&str, &sched_skew_tick);
1446
1447        return 0;
1448}
1449early_param("skew_tick", skew_tick);
1450
1451/**
1452 * tick_setup_sched_timer - setup the tick emulation timer
1453 */
1454void tick_setup_sched_timer(void)
1455{
1456        struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1457        ktime_t now = ktime_get();
1458
1459        /*
1460         * Emulate tick processing via per-CPU hrtimers:
1461         */
1462        hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
1463        ts->sched_timer.function = tick_sched_timer;
1464
1465        /* Get the next period (per-CPU) */
1466        hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
1467
1468        /* Offset the tick to avert jiffies_lock contention. */
1469        if (sched_skew_tick) {
1470                u64 offset = TICK_NSEC >> 1;
1471                do_div(offset, num_possible_cpus());
1472                offset *= smp_processor_id();
1473                hrtimer_add_expires_ns(&ts->sched_timer, offset);
1474        }
1475
1476        hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
1477        hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD);
1478        tick_nohz_activate(ts, NOHZ_MODE_HIGHRES);
1479}
1480#endif /* HIGH_RES_TIMERS */
1481
1482#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
1483void tick_cancel_sched_timer(int cpu)
1484{
1485        struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
1486
1487# ifdef CONFIG_HIGH_RES_TIMERS
1488        if (ts->sched_timer.base)
1489                hrtimer_cancel(&ts->sched_timer);
1490# endif
1491
1492        memset(ts, 0, sizeof(*ts));
1493}
1494#endif
1495
1496/**
1497 * Async notification about clocksource changes
1498 */
1499void tick_clock_notify(void)
1500{
1501        int cpu;
1502
1503        for_each_possible_cpu(cpu)
1504                set_bit(0, &per_cpu(tick_cpu_sched, cpu).check_clocks);
1505}
1506
1507/*
1508 * Async notification about clock event changes
1509 */
1510void tick_oneshot_notify(void)
1511{
1512        struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1513
1514        set_bit(0, &ts->check_clocks);
1515}
1516
1517/**
1518 * Check, if a change happened, which makes oneshot possible.
1519 *
1520 * Called cyclic from the hrtimer softirq (driven by the timer
1521 * softirq) allow_nohz signals, that we can switch into low-res nohz
1522 * mode, because high resolution timers are disabled (either compile
1523 * or runtime). Called with interrupts disabled.
1524 */
1525int tick_check_oneshot_change(int allow_nohz)
1526{
1527        struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1528
1529        if (!test_and_clear_bit(0, &ts->check_clocks))
1530                return 0;
1531
1532        if (ts->nohz_mode != NOHZ_MODE_INACTIVE)
1533                return 0;
1534
1535        if (!timekeeping_valid_for_hres() || !tick_is_oneshot_available())
1536                return 0;
1537
1538        if (!allow_nohz)
1539                return 1;
1540
1541        tick_nohz_switch_to_nohz();
1542        return 0;
1543}
1544