linux/kernel/time/tick-common.c
<<
>>
Prefs
   1/*
   2 * linux/kernel/time/tick-common.c
   3 *
   4 * This file contains the base functions to manage periodic tick
   5 * related events.
   6 *
   7 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
   8 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
   9 * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
  10 *
  11 * This code is licenced under the GPL version 2. For details see
  12 * kernel-base/COPYING.
  13 */
  14#include <linux/cpu.h>
  15#include <linux/err.h>
  16#include <linux/hrtimer.h>
  17#include <linux/interrupt.h>
  18#include <linux/percpu.h>
  19#include <linux/profile.h>
  20#include <linux/sched.h>
  21#include <linux/module.h>
  22#include <trace/events/power.h>
  23
  24#include <asm/irq_regs.h>
  25
  26#include "tick-internal.h"
  27
  28/*
  29 * Tick devices
  30 */
  31DEFINE_PER_CPU(struct tick_device, tick_cpu_device);
  32/*
  33 * Tick next event: keeps track of the tick time
  34 */
  35ktime_t tick_next_period;
  36ktime_t tick_period;
  37
  38/*
  39 * tick_do_timer_cpu is a timer core internal variable which holds the CPU NR
  40 * which is responsible for calling do_timer(), i.e. the timekeeping stuff. This
  41 * variable has two functions:
  42 *
  43 * 1) Prevent a thundering herd issue of a gazillion of CPUs trying to grab the
  44 *    timekeeping lock all at once. Only the CPU which is assigned to do the
  45 *    update is handling it.
  46 *
  47 * 2) Hand off the duty in the NOHZ idle case by setting the value to
  48 *    TICK_DO_TIMER_NONE, i.e. a non existing CPU. So the next cpu which looks
  49 *    at it will take over and keep the time keeping alive.  The handover
  50 *    procedure also covers cpu hotplug.
  51 */
  52int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT;
  53
  54/*
  55 * Debugging: see timer_list.c
  56 */
  57struct tick_device *tick_get_device(int cpu)
  58{
  59        return &per_cpu(tick_cpu_device, cpu);
  60}
  61
  62/**
  63 * tick_is_oneshot_available - check for a oneshot capable event device
  64 */
  65int tick_is_oneshot_available(void)
  66{
  67        struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
  68
  69        if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT))
  70                return 0;
  71        if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
  72                return 1;
  73        return tick_broadcast_oneshot_available();
  74}
  75
  76/*
  77 * Periodic tick
  78 */
  79static void tick_periodic(int cpu)
  80{
  81        if (tick_do_timer_cpu == cpu) {
  82                write_seqlock(&jiffies_lock);
  83
  84                /* Keep track of the next tick event */
  85                tick_next_period = ktime_add(tick_next_period, tick_period);
  86
  87                do_timer(1);
  88                write_sequnlock(&jiffies_lock);
  89                update_wall_time();
  90        }
  91
  92        update_process_times(user_mode(get_irq_regs()));
  93        profile_tick(CPU_PROFILING);
  94}
  95
  96/*
  97 * Event handler for periodic ticks
  98 */
  99void tick_handle_periodic(struct clock_event_device *dev)
 100{
 101        int cpu = smp_processor_id();
 102        ktime_t next = dev->next_event;
 103
 104        tick_periodic(cpu);
 105
 106#if defined(CONFIG_HIGH_RES_TIMERS) || defined(CONFIG_NO_HZ_COMMON)
 107        /*
 108         * The cpu might have transitioned to HIGHRES or NOHZ mode via
 109         * update_process_times() -> run_local_timers() ->
 110         * hrtimer_run_queues().
 111         */
 112        if (dev->event_handler != tick_handle_periodic)
 113                return;
 114#endif
 115
 116        if (!clockevent_state_oneshot(dev))
 117                return;
 118        for (;;) {
 119                /*
 120                 * Setup the next period for devices, which do not have
 121                 * periodic mode:
 122                 */
 123                next = ktime_add(next, tick_period);
 124
 125                if (!clockevents_program_event(dev, next, false))
 126                        return;
 127                /*
 128                 * Have to be careful here. If we're in oneshot mode,
 129                 * before we call tick_periodic() in a loop, we need
 130                 * to be sure we're using a real hardware clocksource.
 131                 * Otherwise we could get trapped in an infinite
 132                 * loop, as the tick_periodic() increments jiffies,
 133                 * which then will increment time, possibly causing
 134                 * the loop to trigger again and again.
 135                 */
 136                if (timekeeping_valid_for_hres())
 137                        tick_periodic(cpu);
 138        }
 139}
 140
 141/*
 142 * Setup the device for a periodic tick
 143 */
 144void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
 145{
 146        tick_set_periodic_handler(dev, broadcast);
 147
 148        /* Broadcast setup ? */
 149        if (!tick_device_is_functional(dev))
 150                return;
 151
 152        if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
 153            !tick_broadcast_oneshot_active()) {
 154                clockevents_switch_state(dev, CLOCK_EVT_STATE_PERIODIC);
 155        } else {
 156                unsigned long seq;
 157                ktime_t next;
 158
 159                do {
 160                        seq = read_seqbegin(&jiffies_lock);
 161                        next = tick_next_period;
 162                } while (read_seqretry(&jiffies_lock, seq));
 163
 164                clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
 165
 166                for (;;) {
 167                        if (!clockevents_program_event(dev, next, false))
 168                                return;
 169                        next = ktime_add(next, tick_period);
 170                }
 171        }
 172}
 173
 174/*
 175 * Setup the tick device
 176 */
 177static void tick_setup_device(struct tick_device *td,
 178                              struct clock_event_device *newdev, int cpu,
 179                              const struct cpumask *cpumask)
 180{
 181        ktime_t next_event;
 182        void (*handler)(struct clock_event_device *) = NULL;
 183
 184        /*
 185         * First device setup ?
 186         */
 187        if (!td->evtdev) {
 188                /*
 189                 * If no cpu took the do_timer update, assign it to
 190                 * this cpu:
 191                 */
 192                if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
 193                        if (!tick_nohz_full_cpu(cpu))
 194                                tick_do_timer_cpu = cpu;
 195                        else
 196                                tick_do_timer_cpu = TICK_DO_TIMER_NONE;
 197                        tick_next_period = ktime_get();
 198                        tick_period = ktime_set(0, NSEC_PER_SEC / HZ);
 199                }
 200
 201                /*
 202                 * Startup in periodic mode first.
 203                 */
 204                td->mode = TICKDEV_MODE_PERIODIC;
 205        } else {
 206                handler = td->evtdev->event_handler;
 207                next_event = td->evtdev->next_event;
 208                td->evtdev->event_handler = clockevents_handle_noop;
 209        }
 210
 211        td->evtdev = newdev;
 212
 213        /*
 214         * When the device is not per cpu, pin the interrupt to the
 215         * current cpu:
 216         */
 217        if (!cpumask_equal(newdev->cpumask, cpumask))
 218                irq_set_affinity(newdev->irq, cpumask);
 219
 220        /*
 221         * When global broadcasting is active, check if the current
 222         * device is registered as a placeholder for broadcast mode.
 223         * This allows us to handle this x86 misfeature in a generic
 224         * way. This function also returns !=0 when we keep the
 225         * current active broadcast state for this CPU.
 226         */
 227        if (tick_device_uses_broadcast(newdev, cpu))
 228                return;
 229
 230        if (td->mode == TICKDEV_MODE_PERIODIC)
 231                tick_setup_periodic(newdev, 0);
 232        else
 233                tick_setup_oneshot(newdev, handler, next_event);
 234}
 235
 236void tick_install_replacement(struct clock_event_device *newdev)
 237{
 238        struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
 239        int cpu = smp_processor_id();
 240
 241        clockevents_exchange_device(td->evtdev, newdev);
 242        tick_setup_device(td, newdev, cpu, cpumask_of(cpu));
 243        if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
 244                tick_oneshot_notify();
 245}
 246
 247static bool tick_check_percpu(struct clock_event_device *curdev,
 248                              struct clock_event_device *newdev, int cpu)
 249{
 250        if (!cpumask_test_cpu(cpu, newdev->cpumask))
 251                return false;
 252        if (cpumask_equal(newdev->cpumask, cpumask_of(cpu)))
 253                return true;
 254        /* Check if irq affinity can be set */
 255        if (newdev->irq >= 0 && !irq_can_set_affinity(newdev->irq))
 256                return false;
 257        /* Prefer an existing cpu local device */
 258        if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu)))
 259                return false;
 260        return true;
 261}
 262
 263static bool tick_check_preferred(struct clock_event_device *curdev,
 264                                 struct clock_event_device *newdev)
 265{
 266        /* Prefer oneshot capable device */
 267        if (!(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) {
 268                if (curdev && (curdev->features & CLOCK_EVT_FEAT_ONESHOT))
 269                        return false;
 270                if (tick_oneshot_mode_active())
 271                        return false;
 272        }
 273
 274        /*
 275         * Use the higher rated one, but prefer a CPU local device with a lower
 276         * rating than a non-CPU local device
 277         */
 278        return !curdev ||
 279                newdev->rating > curdev->rating ||
 280               !cpumask_equal(curdev->cpumask, newdev->cpumask);
 281}
 282
 283/*
 284 * Check whether the new device is a better fit than curdev. curdev
 285 * can be NULL !
 286 */
 287bool tick_check_replacement(struct clock_event_device *curdev,
 288                            struct clock_event_device *newdev)
 289{
 290        if (!tick_check_percpu(curdev, newdev, smp_processor_id()))
 291                return false;
 292
 293        return tick_check_preferred(curdev, newdev);
 294}
 295
 296/*
 297 * Check, if the new registered device should be used. Called with
 298 * clockevents_lock held and interrupts disabled.
 299 */
 300void tick_check_new_device(struct clock_event_device *newdev)
 301{
 302        struct clock_event_device *curdev;
 303        struct tick_device *td;
 304        int cpu;
 305
 306        cpu = smp_processor_id();
 307        td = &per_cpu(tick_cpu_device, cpu);
 308        curdev = td->evtdev;
 309
 310        /* cpu local device ? */
 311        if (!tick_check_percpu(curdev, newdev, cpu))
 312                goto out_bc;
 313
 314        /* Preference decision */
 315        if (!tick_check_preferred(curdev, newdev))
 316                goto out_bc;
 317
 318        if (!try_module_get(newdev->owner))
 319                return;
 320
 321        /*
 322         * Replace the eventually existing device by the new
 323         * device. If the current device is the broadcast device, do
 324         * not give it back to the clockevents layer !
 325         */
 326        if (tick_is_broadcast_device(curdev)) {
 327                clockevents_shutdown(curdev);
 328                curdev = NULL;
 329        }
 330        clockevents_exchange_device(curdev, newdev);
 331        tick_setup_device(td, newdev, cpu, cpumask_of(cpu));
 332        if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
 333                tick_oneshot_notify();
 334        return;
 335
 336out_bc:
 337        /*
 338         * Can the new device be used as a broadcast device ?
 339         */
 340        tick_install_broadcast_device(newdev);
 341}
 342
 343/**
 344 * tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode
 345 * @state:      The target state (enter/exit)
 346 *
 347 * The system enters/leaves a state, where affected devices might stop
 348 * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups.
 349 *
 350 * Called with interrupts disabled, so clockevents_lock is not
 351 * required here because the local clock event device cannot go away
 352 * under us.
 353 */
 354int tick_broadcast_oneshot_control(enum tick_broadcast_state state)
 355{
 356        struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
 357
 358        if (!(td->evtdev->features & CLOCK_EVT_FEAT_C3STOP))
 359                return 0;
 360
 361        return __tick_broadcast_oneshot_control(state);
 362}
 363EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control);
 364
 365#ifdef CONFIG_HOTPLUG_CPU
 366/*
 367 * Transfer the do_timer job away from a dying cpu.
 368 *
 369 * Called with interrupts disabled. Not locking required. If
 370 * tick_do_timer_cpu is owned by this cpu, nothing can change it.
 371 */
 372void tick_handover_do_timer(void)
 373{
 374        if (tick_do_timer_cpu == smp_processor_id()) {
 375                int cpu = cpumask_first(cpu_online_mask);
 376
 377                tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu :
 378                        TICK_DO_TIMER_NONE;
 379        }
 380}
 381
 382/*
 383 * Shutdown an event device on a given cpu:
 384 *
 385 * This is called on a life CPU, when a CPU is dead. So we cannot
 386 * access the hardware device itself.
 387 * We just set the mode and remove it from the lists.
 388 */
 389void tick_shutdown(unsigned int cpu)
 390{
 391        struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
 392        struct clock_event_device *dev = td->evtdev;
 393
 394        td->mode = TICKDEV_MODE_PERIODIC;
 395        if (dev) {
 396                /*
 397                 * Prevent that the clock events layer tries to call
 398                 * the set mode function!
 399                 */
 400                clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED);
 401                clockevents_exchange_device(dev, NULL);
 402                dev->event_handler = clockevents_handle_noop;
 403                td->evtdev = NULL;
 404        }
 405}
 406#endif
 407
 408/**
 409 * tick_suspend_local - Suspend the local tick device
 410 *
 411 * Called from the local cpu for freeze with interrupts disabled.
 412 *
 413 * No locks required. Nothing can change the per cpu device.
 414 */
 415void tick_suspend_local(void)
 416{
 417        struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
 418
 419        clockevents_shutdown(td->evtdev);
 420}
 421
 422/**
 423 * tick_resume_local - Resume the local tick device
 424 *
 425 * Called from the local CPU for unfreeze or XEN resume magic.
 426 *
 427 * No locks required. Nothing can change the per cpu device.
 428 */
 429void tick_resume_local(void)
 430{
 431        struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
 432        bool broadcast = tick_resume_check_broadcast();
 433
 434        clockevents_tick_resume(td->evtdev);
 435        if (!broadcast) {
 436                if (td->mode == TICKDEV_MODE_PERIODIC)
 437                        tick_setup_periodic(td->evtdev, 0);
 438                else
 439                        tick_resume_oneshot();
 440        }
 441}
 442
 443/**
 444 * tick_suspend - Suspend the tick and the broadcast device
 445 *
 446 * Called from syscore_suspend() via timekeeping_suspend with only one
 447 * CPU online and interrupts disabled or from tick_unfreeze() under
 448 * tick_freeze_lock.
 449 *
 450 * No locks required. Nothing can change the per cpu device.
 451 */
 452void tick_suspend(void)
 453{
 454        tick_suspend_local();
 455        tick_suspend_broadcast();
 456}
 457
 458/**
 459 * tick_resume - Resume the tick and the broadcast device
 460 *
 461 * Called from syscore_resume() via timekeeping_resume with only one
 462 * CPU online and interrupts disabled.
 463 *
 464 * No locks required. Nothing can change the per cpu device.
 465 */
 466void tick_resume(void)
 467{
 468        tick_resume_broadcast();
 469        tick_resume_local();
 470}
 471
 472#ifdef CONFIG_SUSPEND
 473static DEFINE_RAW_SPINLOCK(tick_freeze_lock);
 474static unsigned int tick_freeze_depth;
 475
 476/**
 477 * tick_freeze - Suspend the local tick and (possibly) timekeeping.
 478 *
 479 * Check if this is the last online CPU executing the function and if so,
 480 * suspend timekeeping.  Otherwise suspend the local tick.
 481 *
 482 * Call with interrupts disabled.  Must be balanced with %tick_unfreeze().
 483 * Interrupts must not be enabled before the subsequent %tick_unfreeze().
 484 */
 485void tick_freeze(void)
 486{
 487        raw_spin_lock(&tick_freeze_lock);
 488
 489        tick_freeze_depth++;
 490        if (tick_freeze_depth == num_online_cpus()) {
 491                trace_suspend_resume(TPS("timekeeping_freeze"),
 492                                     smp_processor_id(), true);
 493                timekeeping_suspend();
 494        } else {
 495                tick_suspend_local();
 496        }
 497
 498        raw_spin_unlock(&tick_freeze_lock);
 499}
 500
 501/**
 502 * tick_unfreeze - Resume the local tick and (possibly) timekeeping.
 503 *
 504 * Check if this is the first CPU executing the function and if so, resume
 505 * timekeeping.  Otherwise resume the local tick.
 506 *
 507 * Call with interrupts disabled.  Must be balanced with %tick_freeze().
 508 * Interrupts must not be enabled after the preceding %tick_freeze().
 509 */
 510void tick_unfreeze(void)
 511{
 512        raw_spin_lock(&tick_freeze_lock);
 513
 514        if (tick_freeze_depth == num_online_cpus()) {
 515                timekeeping_resume();
 516                trace_suspend_resume(TPS("timekeeping_freeze"),
 517                                     smp_processor_id(), false);
 518        } else {
 519                tick_resume_local();
 520        }
 521
 522        tick_freeze_depth--;
 523
 524        raw_spin_unlock(&tick_freeze_lock);
 525}
 526#endif /* CONFIG_SUSPEND */
 527
 528/**
 529 * tick_init - initialize the tick control
 530 */
 531void __init tick_init(void)
 532{
 533        tick_broadcast_init();
 534        tick_nohz_init();
 535}
 536