linux/kernel/time/tick-common.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * This file contains the base functions to manage periodic tick
   4 * related events.
   5 *
   6 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
   7 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
   8 * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
   9 */
  10#include <linux/cpu.h>
  11#include <linux/err.h>
  12#include <linux/hrtimer.h>
  13#include <linux/interrupt.h>
  14#include <linux/percpu.h>
  15#include <linux/profile.h>
  16#include <linux/sched.h>
  17#include <linux/module.h>
  18#include <trace/events/power.h>
  19
  20#include <asm/irq_regs.h>
  21
  22#include "tick-internal.h"
  23
  24/*
  25 * Tick devices
  26 */
  27DEFINE_PER_CPU(struct tick_device, tick_cpu_device);
  28/*
  29 * Tick next event: keeps track of the tick time
  30 */
  31ktime_t tick_next_period;
  32ktime_t tick_period;
  33
  34/*
  35 * tick_do_timer_cpu is a timer core internal variable which holds the CPU NR
  36 * which is responsible for calling do_timer(), i.e. the timekeeping stuff. This
  37 * variable has two functions:
  38 *
  39 * 1) Prevent a thundering herd issue of a gazillion of CPUs trying to grab the
  40 *    timekeeping lock all at once. Only the CPU which is assigned to do the
  41 *    update is handling it.
  42 *
  43 * 2) Hand off the duty in the NOHZ idle case by setting the value to
  44 *    TICK_DO_TIMER_NONE, i.e. a non existing CPU. So the next cpu which looks
  45 *    at it will take over and keep the time keeping alive.  The handover
  46 *    procedure also covers cpu hotplug.
  47 */
  48int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT;
  49
  50/*
  51 * Debugging: see timer_list.c
  52 */
  53struct tick_device *tick_get_device(int cpu)
  54{
  55        return &per_cpu(tick_cpu_device, cpu);
  56}
  57
  58/**
  59 * tick_is_oneshot_available - check for a oneshot capable event device
  60 */
  61int tick_is_oneshot_available(void)
  62{
  63        struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
  64
  65        if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT))
  66                return 0;
  67        if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
  68                return 1;
  69        return tick_broadcast_oneshot_available();
  70}
  71
  72/*
  73 * Periodic tick
  74 */
  75static void tick_periodic(int cpu)
  76{
  77        if (tick_do_timer_cpu == cpu) {
  78                write_seqlock(&jiffies_lock);
  79
  80                /* Keep track of the next tick event */
  81                tick_next_period = ktime_add(tick_next_period, tick_period);
  82
  83                do_timer(1);
  84                write_sequnlock(&jiffies_lock);
  85                update_wall_time();
  86        }
  87
  88        update_process_times(user_mode(get_irq_regs()));
  89        profile_tick(CPU_PROFILING);
  90}
  91
  92/*
  93 * Event handler for periodic ticks
  94 */
  95void tick_handle_periodic(struct clock_event_device *dev)
  96{
  97        int cpu = smp_processor_id();
  98        ktime_t next = dev->next_event;
  99
 100        tick_periodic(cpu);
 101
 102#if defined(CONFIG_HIGH_RES_TIMERS) || defined(CONFIG_NO_HZ_COMMON)
 103        /*
 104         * The cpu might have transitioned to HIGHRES or NOHZ mode via
 105         * update_process_times() -> run_local_timers() ->
 106         * hrtimer_run_queues().
 107         */
 108        if (dev->event_handler != tick_handle_periodic)
 109                return;
 110#endif
 111
 112        if (!clockevent_state_oneshot(dev))
 113                return;
 114        for (;;) {
 115                /*
 116                 * Setup the next period for devices, which do not have
 117                 * periodic mode:
 118                 */
 119                next = ktime_add(next, tick_period);
 120
 121                if (!clockevents_program_event(dev, next, false))
 122                        return;
 123                /*
 124                 * Have to be careful here. If we're in oneshot mode,
 125                 * before we call tick_periodic() in a loop, we need
 126                 * to be sure we're using a real hardware clocksource.
 127                 * Otherwise we could get trapped in an infinite
 128                 * loop, as the tick_periodic() increments jiffies,
 129                 * which then will increment time, possibly causing
 130                 * the loop to trigger again and again.
 131                 */
 132                if (timekeeping_valid_for_hres())
 133                        tick_periodic(cpu);
 134        }
 135}
 136
 137/*
 138 * Setup the device for a periodic tick
 139 */
 140void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
 141{
 142        tick_set_periodic_handler(dev, broadcast);
 143
 144        /* Broadcast setup ? */
 145        if (!tick_device_is_functional(dev))
 146                return;
 147
 148        if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
 149            !tick_broadcast_oneshot_active()) {
 150                clockevents_switch_state(dev, CLOCK_EVT_STATE_PERIODIC);
 151        } else {
 152                unsigned long seq;
 153                ktime_t next;
 154
 155                do {
 156                        seq = read_seqbegin(&jiffies_lock);
 157                        next = tick_next_period;
 158                } while (read_seqretry(&jiffies_lock, seq));
 159
 160                clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
 161
 162                for (;;) {
 163                        if (!clockevents_program_event(dev, next, false))
 164                                return;
 165                        next = ktime_add(next, tick_period);
 166                }
 167        }
 168}
 169
 170/*
 171 * Setup the tick device
 172 */
 173static void tick_setup_device(struct tick_device *td,
 174                              struct clock_event_device *newdev, int cpu,
 175                              const struct cpumask *cpumask)
 176{
 177        void (*handler)(struct clock_event_device *) = NULL;
 178        ktime_t next_event = 0;
 179
 180        /*
 181         * First device setup ?
 182         */
 183        if (!td->evtdev) {
 184                /*
 185                 * If no cpu took the do_timer update, assign it to
 186                 * this cpu:
 187                 */
 188                if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
 189                        if (!tick_nohz_full_cpu(cpu))
 190                                tick_do_timer_cpu = cpu;
 191                        else
 192                                tick_do_timer_cpu = TICK_DO_TIMER_NONE;
 193                        tick_next_period = ktime_get();
 194                        tick_period = NSEC_PER_SEC / HZ;
 195                }
 196
 197                /*
 198                 * Startup in periodic mode first.
 199                 */
 200                td->mode = TICKDEV_MODE_PERIODIC;
 201        } else {
 202                handler = td->evtdev->event_handler;
 203                next_event = td->evtdev->next_event;
 204                td->evtdev->event_handler = clockevents_handle_noop;
 205        }
 206
 207        td->evtdev = newdev;
 208
 209        /*
 210         * When the device is not per cpu, pin the interrupt to the
 211         * current cpu:
 212         */
 213        if (!cpumask_equal(newdev->cpumask, cpumask))
 214                irq_set_affinity(newdev->irq, cpumask);
 215
 216        /*
 217         * When global broadcasting is active, check if the current
 218         * device is registered as a placeholder for broadcast mode.
 219         * This allows us to handle this x86 misfeature in a generic
 220         * way. This function also returns !=0 when we keep the
 221         * current active broadcast state for this CPU.
 222         */
 223        if (tick_device_uses_broadcast(newdev, cpu))
 224                return;
 225
 226        if (td->mode == TICKDEV_MODE_PERIODIC)
 227                tick_setup_periodic(newdev, 0);
 228        else
 229                tick_setup_oneshot(newdev, handler, next_event);
 230}
 231
 232void tick_install_replacement(struct clock_event_device *newdev)
 233{
 234        struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
 235        int cpu = smp_processor_id();
 236
 237        clockevents_exchange_device(td->evtdev, newdev);
 238        tick_setup_device(td, newdev, cpu, cpumask_of(cpu));
 239        if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
 240                tick_oneshot_notify();
 241}
 242
 243static bool tick_check_percpu(struct clock_event_device *curdev,
 244                              struct clock_event_device *newdev, int cpu)
 245{
 246        if (!cpumask_test_cpu(cpu, newdev->cpumask))
 247                return false;
 248        if (cpumask_equal(newdev->cpumask, cpumask_of(cpu)))
 249                return true;
 250        /* Check if irq affinity can be set */
 251        if (newdev->irq >= 0 && !irq_can_set_affinity(newdev->irq))
 252                return false;
 253        /* Prefer an existing cpu local device */
 254        if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu)))
 255                return false;
 256        return true;
 257}
 258
 259static bool tick_check_preferred(struct clock_event_device *curdev,
 260                                 struct clock_event_device *newdev)
 261{
 262        /* Prefer oneshot capable device */
 263        if (!(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) {
 264                if (curdev && (curdev->features & CLOCK_EVT_FEAT_ONESHOT))
 265                        return false;
 266                if (tick_oneshot_mode_active())
 267                        return false;
 268        }
 269
 270        /*
 271         * Use the higher rated one, but prefer a CPU local device with a lower
 272         * rating than a non-CPU local device
 273         */
 274        return !curdev ||
 275                newdev->rating > curdev->rating ||
 276               !cpumask_equal(curdev->cpumask, newdev->cpumask);
 277}
 278
 279/*
 280 * Check whether the new device is a better fit than curdev. curdev
 281 * can be NULL !
 282 */
 283bool tick_check_replacement(struct clock_event_device *curdev,
 284                            struct clock_event_device *newdev)
 285{
 286        if (!tick_check_percpu(curdev, newdev, smp_processor_id()))
 287                return false;
 288
 289        return tick_check_preferred(curdev, newdev);
 290}
 291
 292/*
 293 * Check, if the new registered device should be used. Called with
 294 * clockevents_lock held and interrupts disabled.
 295 */
 296void tick_check_new_device(struct clock_event_device *newdev)
 297{
 298        struct clock_event_device *curdev;
 299        struct tick_device *td;
 300        int cpu;
 301
 302        cpu = smp_processor_id();
 303        td = &per_cpu(tick_cpu_device, cpu);
 304        curdev = td->evtdev;
 305
 306        /* cpu local device ? */
 307        if (!tick_check_percpu(curdev, newdev, cpu))
 308                goto out_bc;
 309
 310        /* Preference decision */
 311        if (!tick_check_preferred(curdev, newdev))
 312                goto out_bc;
 313
 314        if (!try_module_get(newdev->owner))
 315                return;
 316
 317        /*
 318         * Replace the eventually existing device by the new
 319         * device. If the current device is the broadcast device, do
 320         * not give it back to the clockevents layer !
 321         */
 322        if (tick_is_broadcast_device(curdev)) {
 323                clockevents_shutdown(curdev);
 324                curdev = NULL;
 325        }
 326        clockevents_exchange_device(curdev, newdev);
 327        tick_setup_device(td, newdev, cpu, cpumask_of(cpu));
 328        if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
 329                tick_oneshot_notify();
 330        return;
 331
 332out_bc:
 333        /*
 334         * Can the new device be used as a broadcast device ?
 335         */
 336        tick_install_broadcast_device(newdev);
 337}
 338
 339/**
 340 * tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode
 341 * @state:      The target state (enter/exit)
 342 *
 343 * The system enters/leaves a state, where affected devices might stop
 344 * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups.
 345 *
 346 * Called with interrupts disabled, so clockevents_lock is not
 347 * required here because the local clock event device cannot go away
 348 * under us.
 349 */
 350int tick_broadcast_oneshot_control(enum tick_broadcast_state state)
 351{
 352        struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
 353
 354        if (!(td->evtdev->features & CLOCK_EVT_FEAT_C3STOP))
 355                return 0;
 356
 357        return __tick_broadcast_oneshot_control(state);
 358}
 359EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control);
 360
 361#ifdef CONFIG_HOTPLUG_CPU
 362/*
 363 * Transfer the do_timer job away from a dying cpu.
 364 *
 365 * Called with interrupts disabled. Not locking required. If
 366 * tick_do_timer_cpu is owned by this cpu, nothing can change it.
 367 */
 368void tick_handover_do_timer(void)
 369{
 370        if (tick_do_timer_cpu == smp_processor_id()) {
 371                int cpu = cpumask_first(cpu_online_mask);
 372
 373                tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu :
 374                        TICK_DO_TIMER_NONE;
 375        }
 376}
 377
 378/*
 379 * Shutdown an event device on a given cpu:
 380 *
 381 * This is called on a life CPU, when a CPU is dead. So we cannot
 382 * access the hardware device itself.
 383 * We just set the mode and remove it from the lists.
 384 */
 385void tick_shutdown(unsigned int cpu)
 386{
 387        struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
 388        struct clock_event_device *dev = td->evtdev;
 389
 390        td->mode = TICKDEV_MODE_PERIODIC;
 391        if (dev) {
 392                /*
 393                 * Prevent that the clock events layer tries to call
 394                 * the set mode function!
 395                 */
 396                clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED);
 397                clockevents_exchange_device(dev, NULL);
 398                dev->event_handler = clockevents_handle_noop;
 399                td->evtdev = NULL;
 400        }
 401}
 402#endif
 403
 404/**
 405 * tick_suspend_local - Suspend the local tick device
 406 *
 407 * Called from the local cpu for freeze with interrupts disabled.
 408 *
 409 * No locks required. Nothing can change the per cpu device.
 410 */
 411void tick_suspend_local(void)
 412{
 413        struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
 414
 415        clockevents_shutdown(td->evtdev);
 416}
 417
 418/**
 419 * tick_resume_local - Resume the local tick device
 420 *
 421 * Called from the local CPU for unfreeze or XEN resume magic.
 422 *
 423 * No locks required. Nothing can change the per cpu device.
 424 */
 425void tick_resume_local(void)
 426{
 427        struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
 428        bool broadcast = tick_resume_check_broadcast();
 429
 430        clockevents_tick_resume(td->evtdev);
 431        if (!broadcast) {
 432                if (td->mode == TICKDEV_MODE_PERIODIC)
 433                        tick_setup_periodic(td->evtdev, 0);
 434                else
 435                        tick_resume_oneshot();
 436        }
 437}
 438
 439/**
 440 * tick_suspend - Suspend the tick and the broadcast device
 441 *
 442 * Called from syscore_suspend() via timekeeping_suspend with only one
 443 * CPU online and interrupts disabled or from tick_unfreeze() under
 444 * tick_freeze_lock.
 445 *
 446 * No locks required. Nothing can change the per cpu device.
 447 */
 448void tick_suspend(void)
 449{
 450        tick_suspend_local();
 451        tick_suspend_broadcast();
 452}
 453
 454/**
 455 * tick_resume - Resume the tick and the broadcast device
 456 *
 457 * Called from syscore_resume() via timekeeping_resume with only one
 458 * CPU online and interrupts disabled.
 459 *
 460 * No locks required. Nothing can change the per cpu device.
 461 */
 462void tick_resume(void)
 463{
 464        tick_resume_broadcast();
 465        tick_resume_local();
 466}
 467
 468#ifdef CONFIG_SUSPEND
 469static DEFINE_RAW_SPINLOCK(tick_freeze_lock);
 470static unsigned int tick_freeze_depth;
 471
 472/**
 473 * tick_freeze - Suspend the local tick and (possibly) timekeeping.
 474 *
 475 * Check if this is the last online CPU executing the function and if so,
 476 * suspend timekeeping.  Otherwise suspend the local tick.
 477 *
 478 * Call with interrupts disabled.  Must be balanced with %tick_unfreeze().
 479 * Interrupts must not be enabled before the subsequent %tick_unfreeze().
 480 */
 481void tick_freeze(void)
 482{
 483        raw_spin_lock(&tick_freeze_lock);
 484
 485        tick_freeze_depth++;
 486        if (tick_freeze_depth == num_online_cpus()) {
 487                trace_suspend_resume(TPS("timekeeping_freeze"),
 488                                     smp_processor_id(), true);
 489                system_state = SYSTEM_SUSPEND;
 490                timekeeping_suspend();
 491        } else {
 492                tick_suspend_local();
 493        }
 494
 495        raw_spin_unlock(&tick_freeze_lock);
 496}
 497
 498/**
 499 * tick_unfreeze - Resume the local tick and (possibly) timekeeping.
 500 *
 501 * Check if this is the first CPU executing the function and if so, resume
 502 * timekeeping.  Otherwise resume the local tick.
 503 *
 504 * Call with interrupts disabled.  Must be balanced with %tick_freeze().
 505 * Interrupts must not be enabled after the preceding %tick_freeze().
 506 */
 507void tick_unfreeze(void)
 508{
 509        raw_spin_lock(&tick_freeze_lock);
 510
 511        if (tick_freeze_depth == num_online_cpus()) {
 512                timekeeping_resume();
 513                system_state = SYSTEM_RUNNING;
 514                trace_suspend_resume(TPS("timekeeping_freeze"),
 515                                     smp_processor_id(), false);
 516        } else {
 517                tick_resume_local();
 518        }
 519
 520        tick_freeze_depth--;
 521
 522        raw_spin_unlock(&tick_freeze_lock);
 523}
 524#endif /* CONFIG_SUSPEND */
 525
 526/**
 527 * tick_init - initialize the tick control
 528 */
 529void __init tick_init(void)
 530{
 531        tick_broadcast_init();
 532        tick_nohz_init();
 533}
 534