linux/kernel/time/tick-broadcast.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * This file contains functions which emulate a local clock-event
   4 * device via a broadcast event source.
   5 *
   6 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
   7 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
   8 * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
   9 */
  10#include <linux/cpu.h>
  11#include <linux/err.h>
  12#include <linux/hrtimer.h>
  13#include <linux/interrupt.h>
  14#include <linux/percpu.h>
  15#include <linux/profile.h>
  16#include <linux/sched.h>
  17#include <linux/smp.h>
  18#include <linux/module.h>
  19
  20#include "tick-internal.h"
  21
  22/*
  23 * Broadcast support for broken x86 hardware, where the local apic
  24 * timer stops in C3 state.
  25 */
  26
  27static struct tick_device tick_broadcast_device;
  28static cpumask_var_t tick_broadcast_mask __cpumask_var_read_mostly;
  29static cpumask_var_t tick_broadcast_on __cpumask_var_read_mostly;
  30static cpumask_var_t tmpmask __cpumask_var_read_mostly;
  31static int tick_broadcast_forced;
  32
  33static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
  34
  35#ifdef CONFIG_TICK_ONESHOT
  36static DEFINE_PER_CPU(struct clock_event_device *, tick_oneshot_wakeup_device);
  37
  38static void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
  39static void tick_broadcast_clear_oneshot(int cpu);
  40static void tick_resume_broadcast_oneshot(struct clock_event_device *bc);
  41# ifdef CONFIG_HOTPLUG_CPU
  42static void tick_broadcast_oneshot_offline(unsigned int cpu);
  43# endif
  44#else
  45static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
  46static inline void tick_broadcast_clear_oneshot(int cpu) { }
  47static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { }
  48# ifdef CONFIG_HOTPLUG_CPU
  49static inline void tick_broadcast_oneshot_offline(unsigned int cpu) { }
  50# endif
  51#endif
  52
  53/*
  54 * Debugging: see timer_list.c
  55 */
  56struct tick_device *tick_get_broadcast_device(void)
  57{
  58        return &tick_broadcast_device;
  59}
  60
  61struct cpumask *tick_get_broadcast_mask(void)
  62{
  63        return tick_broadcast_mask;
  64}
  65
  66static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu);
  67
  68const struct clock_event_device *tick_get_wakeup_device(int cpu)
  69{
  70        return tick_get_oneshot_wakeup_device(cpu);
  71}
  72
  73/*
  74 * Start the device in periodic mode
  75 */
  76static void tick_broadcast_start_periodic(struct clock_event_device *bc)
  77{
  78        if (bc)
  79                tick_setup_periodic(bc, 1);
  80}
  81
  82/*
  83 * Check, if the device can be utilized as broadcast device:
  84 */
  85static bool tick_check_broadcast_device(struct clock_event_device *curdev,
  86                                        struct clock_event_device *newdev)
  87{
  88        if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) ||
  89            (newdev->features & CLOCK_EVT_FEAT_PERCPU) ||
  90            (newdev->features & CLOCK_EVT_FEAT_C3STOP))
  91                return false;
  92
  93        if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT &&
  94            !(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
  95                return false;
  96
  97        return !curdev || newdev->rating > curdev->rating;
  98}
  99
 100#ifdef CONFIG_TICK_ONESHOT
 101static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu)
 102{
 103        return per_cpu(tick_oneshot_wakeup_device, cpu);
 104}
 105
 106static void tick_oneshot_wakeup_handler(struct clock_event_device *wd)
 107{
 108        /*
 109         * If we woke up early and the tick was reprogrammed in the
 110         * meantime then this may be spurious but harmless.
 111         */
 112        tick_receive_broadcast();
 113}
 114
 115static bool tick_set_oneshot_wakeup_device(struct clock_event_device *newdev,
 116                                           int cpu)
 117{
 118        struct clock_event_device *curdev = tick_get_oneshot_wakeup_device(cpu);
 119
 120        if (!newdev)
 121                goto set_device;
 122
 123        if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) ||
 124            (newdev->features & CLOCK_EVT_FEAT_C3STOP))
 125                 return false;
 126
 127        if (!(newdev->features & CLOCK_EVT_FEAT_PERCPU) ||
 128            !(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
 129                return false;
 130
 131        if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu)))
 132                return false;
 133
 134        if (curdev && newdev->rating <= curdev->rating)
 135                return false;
 136
 137        if (!try_module_get(newdev->owner))
 138                return false;
 139
 140        newdev->event_handler = tick_oneshot_wakeup_handler;
 141set_device:
 142        clockevents_exchange_device(curdev, newdev);
 143        per_cpu(tick_oneshot_wakeup_device, cpu) = newdev;
 144        return true;
 145}
 146#else
 147static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu)
 148{
 149        return NULL;
 150}
 151
 152static bool tick_set_oneshot_wakeup_device(struct clock_event_device *newdev,
 153                                           int cpu)
 154{
 155        return false;
 156}
 157#endif
 158
 159/*
 160 * Conditionally install/replace broadcast device
 161 */
 162void tick_install_broadcast_device(struct clock_event_device *dev, int cpu)
 163{
 164        struct clock_event_device *cur = tick_broadcast_device.evtdev;
 165
 166        if (tick_set_oneshot_wakeup_device(dev, cpu))
 167                return;
 168
 169        if (!tick_check_broadcast_device(cur, dev))
 170                return;
 171
 172        if (!try_module_get(dev->owner))
 173                return;
 174
 175        clockevents_exchange_device(cur, dev);
 176        if (cur)
 177                cur->event_handler = clockevents_handle_noop;
 178        tick_broadcast_device.evtdev = dev;
 179        if (!cpumask_empty(tick_broadcast_mask))
 180                tick_broadcast_start_periodic(dev);
 181
 182        if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
 183                return;
 184
 185        /*
 186         * If the system already runs in oneshot mode, switch the newly
 187         * registered broadcast device to oneshot mode explicitly.
 188         */
 189        if (tick_broadcast_oneshot_active()) {
 190                tick_broadcast_switch_to_oneshot();
 191                return;
 192        }
 193
 194        /*
 195         * Inform all cpus about this. We might be in a situation
 196         * where we did not switch to oneshot mode because the per cpu
 197         * devices are affected by CLOCK_EVT_FEAT_C3STOP and the lack
 198         * of a oneshot capable broadcast device. Without that
 199         * notification the systems stays stuck in periodic mode
 200         * forever.
 201         */
 202        tick_clock_notify();
 203}
 204
 205/*
 206 * Check, if the device is the broadcast device
 207 */
 208int tick_is_broadcast_device(struct clock_event_device *dev)
 209{
 210        return (dev && tick_broadcast_device.evtdev == dev);
 211}
 212
 213int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq)
 214{
 215        int ret = -ENODEV;
 216
 217        if (tick_is_broadcast_device(dev)) {
 218                raw_spin_lock(&tick_broadcast_lock);
 219                ret = __clockevents_update_freq(dev, freq);
 220                raw_spin_unlock(&tick_broadcast_lock);
 221        }
 222        return ret;
 223}
 224
 225
 226static void err_broadcast(const struct cpumask *mask)
 227{
 228        pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n");
 229}
 230
 231static void tick_device_setup_broadcast_func(struct clock_event_device *dev)
 232{
 233        if (!dev->broadcast)
 234                dev->broadcast = tick_broadcast;
 235        if (!dev->broadcast) {
 236                pr_warn_once("%s depends on broadcast, but no broadcast function available\n",
 237                             dev->name);
 238                dev->broadcast = err_broadcast;
 239        }
 240}
 241
 242/*
 243 * Check, if the device is dysfunctional and a placeholder, which
 244 * needs to be handled by the broadcast device.
 245 */
 246int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
 247{
 248        struct clock_event_device *bc = tick_broadcast_device.evtdev;
 249        unsigned long flags;
 250        int ret = 0;
 251
 252        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 253
 254        /*
 255         * Devices might be registered with both periodic and oneshot
 256         * mode disabled. This signals, that the device needs to be
 257         * operated from the broadcast device and is a placeholder for
 258         * the cpu local device.
 259         */
 260        if (!tick_device_is_functional(dev)) {
 261                dev->event_handler = tick_handle_periodic;
 262                tick_device_setup_broadcast_func(dev);
 263                cpumask_set_cpu(cpu, tick_broadcast_mask);
 264                if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
 265                        tick_broadcast_start_periodic(bc);
 266                else
 267                        tick_broadcast_setup_oneshot(bc);
 268                ret = 1;
 269        } else {
 270                /*
 271                 * Clear the broadcast bit for this cpu if the
 272                 * device is not power state affected.
 273                 */
 274                if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
 275                        cpumask_clear_cpu(cpu, tick_broadcast_mask);
 276                else
 277                        tick_device_setup_broadcast_func(dev);
 278
 279                /*
 280                 * Clear the broadcast bit if the CPU is not in
 281                 * periodic broadcast on state.
 282                 */
 283                if (!cpumask_test_cpu(cpu, tick_broadcast_on))
 284                        cpumask_clear_cpu(cpu, tick_broadcast_mask);
 285
 286                switch (tick_broadcast_device.mode) {
 287                case TICKDEV_MODE_ONESHOT:
 288                        /*
 289                         * If the system is in oneshot mode we can
 290                         * unconditionally clear the oneshot mask bit,
 291                         * because the CPU is running and therefore
 292                         * not in an idle state which causes the power
 293                         * state affected device to stop. Let the
 294                         * caller initialize the device.
 295                         */
 296                        tick_broadcast_clear_oneshot(cpu);
 297                        ret = 0;
 298                        break;
 299
 300                case TICKDEV_MODE_PERIODIC:
 301                        /*
 302                         * If the system is in periodic mode, check
 303                         * whether the broadcast device can be
 304                         * switched off now.
 305                         */
 306                        if (cpumask_empty(tick_broadcast_mask) && bc)
 307                                clockevents_shutdown(bc);
 308                        /*
 309                         * If we kept the cpu in the broadcast mask,
 310                         * tell the caller to leave the per cpu device
 311                         * in shutdown state. The periodic interrupt
 312                         * is delivered by the broadcast device, if
 313                         * the broadcast device exists and is not
 314                         * hrtimer based.
 315                         */
 316                        if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER))
 317                                ret = cpumask_test_cpu(cpu, tick_broadcast_mask);
 318                        break;
 319                default:
 320                        break;
 321                }
 322        }
 323        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 324        return ret;
 325}
 326
 327int tick_receive_broadcast(void)
 328{
 329        struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
 330        struct clock_event_device *evt = td->evtdev;
 331
 332        if (!evt)
 333                return -ENODEV;
 334
 335        if (!evt->event_handler)
 336                return -EINVAL;
 337
 338        evt->event_handler(evt);
 339        return 0;
 340}
 341
 342/*
 343 * Broadcast the event to the cpus, which are set in the mask (mangled).
 344 */
 345static bool tick_do_broadcast(struct cpumask *mask)
 346{
 347        int cpu = smp_processor_id();
 348        struct tick_device *td;
 349        bool local = false;
 350
 351        /*
 352         * Check, if the current cpu is in the mask
 353         */
 354        if (cpumask_test_cpu(cpu, mask)) {
 355                struct clock_event_device *bc = tick_broadcast_device.evtdev;
 356
 357                cpumask_clear_cpu(cpu, mask);
 358                /*
 359                 * We only run the local handler, if the broadcast
 360                 * device is not hrtimer based. Otherwise we run into
 361                 * a hrtimer recursion.
 362                 *
 363                 * local timer_interrupt()
 364                 *   local_handler()
 365                 *     expire_hrtimers()
 366                 *       bc_handler()
 367                 *         local_handler()
 368                 *           expire_hrtimers()
 369                 */
 370                local = !(bc->features & CLOCK_EVT_FEAT_HRTIMER);
 371        }
 372
 373        if (!cpumask_empty(mask)) {
 374                /*
 375                 * It might be necessary to actually check whether the devices
 376                 * have different broadcast functions. For now, just use the
 377                 * one of the first device. This works as long as we have this
 378                 * misfeature only on x86 (lapic)
 379                 */
 380                td = &per_cpu(tick_cpu_device, cpumask_first(mask));
 381                td->evtdev->broadcast(mask);
 382        }
 383        return local;
 384}
 385
 386/*
 387 * Periodic broadcast:
 388 * - invoke the broadcast handlers
 389 */
 390static bool tick_do_periodic_broadcast(void)
 391{
 392        cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask);
 393        return tick_do_broadcast(tmpmask);
 394}
 395
 396/*
 397 * Event handler for periodic broadcast ticks
 398 */
 399static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
 400{
 401        struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
 402        bool bc_local;
 403
 404        raw_spin_lock(&tick_broadcast_lock);
 405
 406        /* Handle spurious interrupts gracefully */
 407        if (clockevent_state_shutdown(tick_broadcast_device.evtdev)) {
 408                raw_spin_unlock(&tick_broadcast_lock);
 409                return;
 410        }
 411
 412        bc_local = tick_do_periodic_broadcast();
 413
 414        if (clockevent_state_oneshot(dev)) {
 415                ktime_t next = ktime_add_ns(dev->next_event, TICK_NSEC);
 416
 417                clockevents_program_event(dev, next, true);
 418        }
 419        raw_spin_unlock(&tick_broadcast_lock);
 420
 421        /*
 422         * We run the handler of the local cpu after dropping
 423         * tick_broadcast_lock because the handler might deadlock when
 424         * trying to switch to oneshot mode.
 425         */
 426        if (bc_local)
 427                td->evtdev->event_handler(td->evtdev);
 428}
 429
 430/**
 431 * tick_broadcast_control - Enable/disable or force broadcast mode
 432 * @mode:       The selected broadcast mode
 433 *
 434 * Called when the system enters a state where affected tick devices
 435 * might stop. Note: TICK_BROADCAST_FORCE cannot be undone.
 436 */
 437void tick_broadcast_control(enum tick_broadcast_mode mode)
 438{
 439        struct clock_event_device *bc, *dev;
 440        struct tick_device *td;
 441        int cpu, bc_stopped;
 442        unsigned long flags;
 443
 444        /* Protects also the local clockevent device. */
 445        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 446        td = this_cpu_ptr(&tick_cpu_device);
 447        dev = td->evtdev;
 448
 449        /*
 450         * Is the device not affected by the powerstate ?
 451         */
 452        if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP))
 453                goto out;
 454
 455        if (!tick_device_is_functional(dev))
 456                goto out;
 457
 458        cpu = smp_processor_id();
 459        bc = tick_broadcast_device.evtdev;
 460        bc_stopped = cpumask_empty(tick_broadcast_mask);
 461
 462        switch (mode) {
 463        case TICK_BROADCAST_FORCE:
 464                tick_broadcast_forced = 1;
 465                fallthrough;
 466        case TICK_BROADCAST_ON:
 467                cpumask_set_cpu(cpu, tick_broadcast_on);
 468                if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
 469                        /*
 470                         * Only shutdown the cpu local device, if:
 471                         *
 472                         * - the broadcast device exists
 473                         * - the broadcast device is not a hrtimer based one
 474                         * - the broadcast device is in periodic mode to
 475                         *   avoid a hiccup during switch to oneshot mode
 476                         */
 477                        if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER) &&
 478                            tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
 479                                clockevents_shutdown(dev);
 480                }
 481                break;
 482
 483        case TICK_BROADCAST_OFF:
 484                if (tick_broadcast_forced)
 485                        break;
 486                cpumask_clear_cpu(cpu, tick_broadcast_on);
 487                if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) {
 488                        if (tick_broadcast_device.mode ==
 489                            TICKDEV_MODE_PERIODIC)
 490                                tick_setup_periodic(dev, 0);
 491                }
 492                break;
 493        }
 494
 495        if (bc) {
 496                if (cpumask_empty(tick_broadcast_mask)) {
 497                        if (!bc_stopped)
 498                                clockevents_shutdown(bc);
 499                } else if (bc_stopped) {
 500                        if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
 501                                tick_broadcast_start_periodic(bc);
 502                        else
 503                                tick_broadcast_setup_oneshot(bc);
 504                }
 505        }
 506out:
 507        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 508}
 509EXPORT_SYMBOL_GPL(tick_broadcast_control);
 510
 511/*
 512 * Set the periodic handler depending on broadcast on/off
 513 */
 514void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
 515{
 516        if (!broadcast)
 517                dev->event_handler = tick_handle_periodic;
 518        else
 519                dev->event_handler = tick_handle_periodic_broadcast;
 520}
 521
 522#ifdef CONFIG_HOTPLUG_CPU
 523static void tick_shutdown_broadcast(void)
 524{
 525        struct clock_event_device *bc = tick_broadcast_device.evtdev;
 526
 527        if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
 528                if (bc && cpumask_empty(tick_broadcast_mask))
 529                        clockevents_shutdown(bc);
 530        }
 531}
 532
 533/*
 534 * Remove a CPU from broadcasting
 535 */
 536void tick_broadcast_offline(unsigned int cpu)
 537{
 538        raw_spin_lock(&tick_broadcast_lock);
 539        cpumask_clear_cpu(cpu, tick_broadcast_mask);
 540        cpumask_clear_cpu(cpu, tick_broadcast_on);
 541        tick_broadcast_oneshot_offline(cpu);
 542        tick_shutdown_broadcast();
 543        raw_spin_unlock(&tick_broadcast_lock);
 544}
 545
 546#endif
 547
 548void tick_suspend_broadcast(void)
 549{
 550        struct clock_event_device *bc;
 551        unsigned long flags;
 552
 553        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 554
 555        bc = tick_broadcast_device.evtdev;
 556        if (bc)
 557                clockevents_shutdown(bc);
 558
 559        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 560}
 561
 562/*
 563 * This is called from tick_resume_local() on a resuming CPU. That's
 564 * called from the core resume function, tick_unfreeze() and the magic XEN
 565 * resume hackery.
 566 *
 567 * In none of these cases the broadcast device mode can change and the
 568 * bit of the resuming CPU in the broadcast mask is safe as well.
 569 */
 570bool tick_resume_check_broadcast(void)
 571{
 572        if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT)
 573                return false;
 574        else
 575                return cpumask_test_cpu(smp_processor_id(), tick_broadcast_mask);
 576}
 577
 578void tick_resume_broadcast(void)
 579{
 580        struct clock_event_device *bc;
 581        unsigned long flags;
 582
 583        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 584
 585        bc = tick_broadcast_device.evtdev;
 586
 587        if (bc) {
 588                clockevents_tick_resume(bc);
 589
 590                switch (tick_broadcast_device.mode) {
 591                case TICKDEV_MODE_PERIODIC:
 592                        if (!cpumask_empty(tick_broadcast_mask))
 593                                tick_broadcast_start_periodic(bc);
 594                        break;
 595                case TICKDEV_MODE_ONESHOT:
 596                        if (!cpumask_empty(tick_broadcast_mask))
 597                                tick_resume_broadcast_oneshot(bc);
 598                        break;
 599                }
 600        }
 601        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 602}
 603
 604#ifdef CONFIG_TICK_ONESHOT
 605
 606static cpumask_var_t tick_broadcast_oneshot_mask __cpumask_var_read_mostly;
 607static cpumask_var_t tick_broadcast_pending_mask __cpumask_var_read_mostly;
 608static cpumask_var_t tick_broadcast_force_mask __cpumask_var_read_mostly;
 609
 610/*
 611 * Exposed for debugging: see timer_list.c
 612 */
 613struct cpumask *tick_get_broadcast_oneshot_mask(void)
 614{
 615        return tick_broadcast_oneshot_mask;
 616}
 617
 618/*
 619 * Called before going idle with interrupts disabled. Checks whether a
 620 * broadcast event from the other core is about to happen. We detected
 621 * that in tick_broadcast_oneshot_control(). The callsite can use this
 622 * to avoid a deep idle transition as we are about to get the
 623 * broadcast IPI right away.
 624 */
 625int tick_check_broadcast_expired(void)
 626{
 627        return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask);
 628}
 629
 630/*
 631 * Set broadcast interrupt affinity
 632 */
 633static void tick_broadcast_set_affinity(struct clock_event_device *bc,
 634                                        const struct cpumask *cpumask)
 635{
 636        if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ))
 637                return;
 638
 639        if (cpumask_equal(bc->cpumask, cpumask))
 640                return;
 641
 642        bc->cpumask = cpumask;
 643        irq_set_affinity(bc->irq, bc->cpumask);
 644}
 645
 646static void tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
 647                                     ktime_t expires)
 648{
 649        if (!clockevent_state_oneshot(bc))
 650                clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT);
 651
 652        clockevents_program_event(bc, expires, 1);
 653        tick_broadcast_set_affinity(bc, cpumask_of(cpu));
 654}
 655
 656static void tick_resume_broadcast_oneshot(struct clock_event_device *bc)
 657{
 658        clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT);
 659}
 660
 661/*
 662 * Called from irq_enter() when idle was interrupted to reenable the
 663 * per cpu device.
 664 */
 665void tick_check_oneshot_broadcast_this_cpu(void)
 666{
 667        if (cpumask_test_cpu(smp_processor_id(), tick_broadcast_oneshot_mask)) {
 668                struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
 669
 670                /*
 671                 * We might be in the middle of switching over from
 672                 * periodic to oneshot. If the CPU has not yet
 673                 * switched over, leave the device alone.
 674                 */
 675                if (td->mode == TICKDEV_MODE_ONESHOT) {
 676                        clockevents_switch_state(td->evtdev,
 677                                              CLOCK_EVT_STATE_ONESHOT);
 678                }
 679        }
 680}
 681
 682/*
 683 * Handle oneshot mode broadcasting
 684 */
 685static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
 686{
 687        struct tick_device *td;
 688        ktime_t now, next_event;
 689        int cpu, next_cpu = 0;
 690        bool bc_local;
 691
 692        raw_spin_lock(&tick_broadcast_lock);
 693        dev->next_event = KTIME_MAX;
 694        next_event = KTIME_MAX;
 695        cpumask_clear(tmpmask);
 696        now = ktime_get();
 697        /* Find all expired events */
 698        for_each_cpu(cpu, tick_broadcast_oneshot_mask) {
 699                /*
 700                 * Required for !SMP because for_each_cpu() reports
 701                 * unconditionally CPU0 as set on UP kernels.
 702                 */
 703                if (!IS_ENABLED(CONFIG_SMP) &&
 704                    cpumask_empty(tick_broadcast_oneshot_mask))
 705                        break;
 706
 707                td = &per_cpu(tick_cpu_device, cpu);
 708                if (td->evtdev->next_event <= now) {
 709                        cpumask_set_cpu(cpu, tmpmask);
 710                        /*
 711                         * Mark the remote cpu in the pending mask, so
 712                         * it can avoid reprogramming the cpu local
 713                         * timer in tick_broadcast_oneshot_control().
 714                         */
 715                        cpumask_set_cpu(cpu, tick_broadcast_pending_mask);
 716                } else if (td->evtdev->next_event < next_event) {
 717                        next_event = td->evtdev->next_event;
 718                        next_cpu = cpu;
 719                }
 720        }
 721
 722        /*
 723         * Remove the current cpu from the pending mask. The event is
 724         * delivered immediately in tick_do_broadcast() !
 725         */
 726        cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask);
 727
 728        /* Take care of enforced broadcast requests */
 729        cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask);
 730        cpumask_clear(tick_broadcast_force_mask);
 731
 732        /*
 733         * Sanity check. Catch the case where we try to broadcast to
 734         * offline cpus.
 735         */
 736        if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask)))
 737                cpumask_and(tmpmask, tmpmask, cpu_online_mask);
 738
 739        /*
 740         * Wakeup the cpus which have an expired event.
 741         */
 742        bc_local = tick_do_broadcast(tmpmask);
 743
 744        /*
 745         * Two reasons for reprogram:
 746         *
 747         * - The global event did not expire any CPU local
 748         * events. This happens in dyntick mode, as the maximum PIT
 749         * delta is quite small.
 750         *
 751         * - There are pending events on sleeping CPUs which were not
 752         * in the event mask
 753         */
 754        if (next_event != KTIME_MAX)
 755                tick_broadcast_set_event(dev, next_cpu, next_event);
 756
 757        raw_spin_unlock(&tick_broadcast_lock);
 758
 759        if (bc_local) {
 760                td = this_cpu_ptr(&tick_cpu_device);
 761                td->evtdev->event_handler(td->evtdev);
 762        }
 763}
 764
 765static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu)
 766{
 767        if (!(bc->features & CLOCK_EVT_FEAT_HRTIMER))
 768                return 0;
 769        if (bc->next_event == KTIME_MAX)
 770                return 0;
 771        return bc->bound_on == cpu ? -EBUSY : 0;
 772}
 773
 774static void broadcast_shutdown_local(struct clock_event_device *bc,
 775                                     struct clock_event_device *dev)
 776{
 777        /*
 778         * For hrtimer based broadcasting we cannot shutdown the cpu
 779         * local device if our own event is the first one to expire or
 780         * if we own the broadcast timer.
 781         */
 782        if (bc->features & CLOCK_EVT_FEAT_HRTIMER) {
 783                if (broadcast_needs_cpu(bc, smp_processor_id()))
 784                        return;
 785                if (dev->next_event < bc->next_event)
 786                        return;
 787        }
 788        clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
 789}
 790
 791static int ___tick_broadcast_oneshot_control(enum tick_broadcast_state state,
 792                                             struct tick_device *td,
 793                                             int cpu)
 794{
 795        struct clock_event_device *bc, *dev = td->evtdev;
 796        int ret = 0;
 797        ktime_t now;
 798
 799        raw_spin_lock(&tick_broadcast_lock);
 800        bc = tick_broadcast_device.evtdev;
 801
 802        if (state == TICK_BROADCAST_ENTER) {
 803                /*
 804                 * If the current CPU owns the hrtimer broadcast
 805                 * mechanism, it cannot go deep idle and we do not add
 806                 * the CPU to the broadcast mask. We don't have to go
 807                 * through the EXIT path as the local timer is not
 808                 * shutdown.
 809                 */
 810                ret = broadcast_needs_cpu(bc, cpu);
 811                if (ret)
 812                        goto out;
 813
 814                /*
 815                 * If the broadcast device is in periodic mode, we
 816                 * return.
 817                 */
 818                if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
 819                        /* If it is a hrtimer based broadcast, return busy */
 820                        if (bc->features & CLOCK_EVT_FEAT_HRTIMER)
 821                                ret = -EBUSY;
 822                        goto out;
 823                }
 824
 825                if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
 826                        WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
 827
 828                        /* Conditionally shut down the local timer. */
 829                        broadcast_shutdown_local(bc, dev);
 830
 831                        /*
 832                         * We only reprogram the broadcast timer if we
 833                         * did not mark ourself in the force mask and
 834                         * if the cpu local event is earlier than the
 835                         * broadcast event. If the current CPU is in
 836                         * the force mask, then we are going to be
 837                         * woken by the IPI right away; we return
 838                         * busy, so the CPU does not try to go deep
 839                         * idle.
 840                         */
 841                        if (cpumask_test_cpu(cpu, tick_broadcast_force_mask)) {
 842                                ret = -EBUSY;
 843                        } else if (dev->next_event < bc->next_event) {
 844                                tick_broadcast_set_event(bc, cpu, dev->next_event);
 845                                /*
 846                                 * In case of hrtimer broadcasts the
 847                                 * programming might have moved the
 848                                 * timer to this cpu. If yes, remove
 849                                 * us from the broadcast mask and
 850                                 * return busy.
 851                                 */
 852                                ret = broadcast_needs_cpu(bc, cpu);
 853                                if (ret) {
 854                                        cpumask_clear_cpu(cpu,
 855                                                tick_broadcast_oneshot_mask);
 856                                }
 857                        }
 858                }
 859        } else {
 860                if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
 861                        clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
 862                        /*
 863                         * The cpu which was handling the broadcast
 864                         * timer marked this cpu in the broadcast
 865                         * pending mask and fired the broadcast
 866                         * IPI. So we are going to handle the expired
 867                         * event anyway via the broadcast IPI
 868                         * handler. No need to reprogram the timer
 869                         * with an already expired event.
 870                         */
 871                        if (cpumask_test_and_clear_cpu(cpu,
 872                                       tick_broadcast_pending_mask))
 873                                goto out;
 874
 875                        /*
 876                         * Bail out if there is no next event.
 877                         */
 878                        if (dev->next_event == KTIME_MAX)
 879                                goto out;
 880                        /*
 881                         * If the pending bit is not set, then we are
 882                         * either the CPU handling the broadcast
 883                         * interrupt or we got woken by something else.
 884                         *
 885                         * We are no longer in the broadcast mask, so
 886                         * if the cpu local expiry time is already
 887                         * reached, we would reprogram the cpu local
 888                         * timer with an already expired event.
 889                         *
 890                         * This can lead to a ping-pong when we return
 891                         * to idle and therefore rearm the broadcast
 892                         * timer before the cpu local timer was able
 893                         * to fire. This happens because the forced
 894                         * reprogramming makes sure that the event
 895                         * will happen in the future and depending on
 896                         * the min_delta setting this might be far
 897                         * enough out that the ping-pong starts.
 898                         *
 899                         * If the cpu local next_event has expired
 900                         * then we know that the broadcast timer
 901                         * next_event has expired as well and
 902                         * broadcast is about to be handled. So we
 903                         * avoid reprogramming and enforce that the
 904                         * broadcast handler, which did not run yet,
 905                         * will invoke the cpu local handler.
 906                         *
 907                         * We cannot call the handler directly from
 908                         * here, because we might be in a NOHZ phase
 909                         * and we did not go through the irq_enter()
 910                         * nohz fixups.
 911                         */
 912                        now = ktime_get();
 913                        if (dev->next_event <= now) {
 914                                cpumask_set_cpu(cpu, tick_broadcast_force_mask);
 915                                goto out;
 916                        }
 917                        /*
 918                         * We got woken by something else. Reprogram
 919                         * the cpu local timer device.
 920                         */
 921                        tick_program_event(dev->next_event, 1);
 922                }
 923        }
 924out:
 925        raw_spin_unlock(&tick_broadcast_lock);
 926        return ret;
 927}
 928
 929static int tick_oneshot_wakeup_control(enum tick_broadcast_state state,
 930                                       struct tick_device *td,
 931                                       int cpu)
 932{
 933        struct clock_event_device *dev, *wd;
 934
 935        dev = td->evtdev;
 936        if (td->mode != TICKDEV_MODE_ONESHOT)
 937                return -EINVAL;
 938
 939        wd = tick_get_oneshot_wakeup_device(cpu);
 940        if (!wd)
 941                return -ENODEV;
 942
 943        switch (state) {
 944        case TICK_BROADCAST_ENTER:
 945                clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT_STOPPED);
 946                clockevents_switch_state(wd, CLOCK_EVT_STATE_ONESHOT);
 947                clockevents_program_event(wd, dev->next_event, 1);
 948                break;
 949        case TICK_BROADCAST_EXIT:
 950                /* We may have transitioned to oneshot mode while idle */
 951                if (clockevent_get_state(wd) != CLOCK_EVT_STATE_ONESHOT)
 952                        return -ENODEV;
 953        }
 954
 955        return 0;
 956}
 957
 958int __tick_broadcast_oneshot_control(enum tick_broadcast_state state)
 959{
 960        struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
 961        int cpu = smp_processor_id();
 962
 963        if (!tick_oneshot_wakeup_control(state, td, cpu))
 964                return 0;
 965
 966        if (tick_broadcast_device.evtdev)
 967                return ___tick_broadcast_oneshot_control(state, td, cpu);
 968
 969        /*
 970         * If there is no broadcast or wakeup device, tell the caller not
 971         * to go into deep idle.
 972         */
 973        return -EBUSY;
 974}
 975
 976/*
 977 * Reset the one shot broadcast for a cpu
 978 *
 979 * Called with tick_broadcast_lock held
 980 */
 981static void tick_broadcast_clear_oneshot(int cpu)
 982{
 983        cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
 984        cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
 985}
 986
 987static void tick_broadcast_init_next_event(struct cpumask *mask,
 988                                           ktime_t expires)
 989{
 990        struct tick_device *td;
 991        int cpu;
 992
 993        for_each_cpu(cpu, mask) {
 994                td = &per_cpu(tick_cpu_device, cpu);
 995                if (td->evtdev)
 996                        td->evtdev->next_event = expires;
 997        }
 998}
 999
1000static inline ktime_t tick_get_next_period(void)
1001{
1002        ktime_t next;
1003
1004        /*
1005         * Protect against concurrent updates (store /load tearing on
1006         * 32bit). It does not matter if the time is already in the
1007         * past. The broadcast device which is about to be programmed will
1008         * fire in any case.
1009         */
1010        raw_spin_lock(&jiffies_lock);
1011        next = tick_next_period;
1012        raw_spin_unlock(&jiffies_lock);
1013        return next;
1014}
1015
1016/**
1017 * tick_broadcast_setup_oneshot - setup the broadcast device
1018 */
1019static void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
1020{
1021        int cpu = smp_processor_id();
1022
1023        if (!bc)
1024                return;
1025
1026        /* Set it up only once ! */
1027        if (bc->event_handler != tick_handle_oneshot_broadcast) {
1028                int was_periodic = clockevent_state_periodic(bc);
1029
1030                bc->event_handler = tick_handle_oneshot_broadcast;
1031
1032                /*
1033                 * We must be careful here. There might be other CPUs
1034                 * waiting for periodic broadcast. We need to set the
1035                 * oneshot_mask bits for those and program the
1036                 * broadcast device to fire.
1037                 */
1038                cpumask_copy(tmpmask, tick_broadcast_mask);
1039                cpumask_clear_cpu(cpu, tmpmask);
1040                cpumask_or(tick_broadcast_oneshot_mask,
1041                           tick_broadcast_oneshot_mask, tmpmask);
1042
1043                if (was_periodic && !cpumask_empty(tmpmask)) {
1044                        ktime_t nextevt = tick_get_next_period();
1045
1046                        clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT);
1047                        tick_broadcast_init_next_event(tmpmask, nextevt);
1048                        tick_broadcast_set_event(bc, cpu, nextevt);
1049                } else
1050                        bc->next_event = KTIME_MAX;
1051        } else {
1052                /*
1053                 * The first cpu which switches to oneshot mode sets
1054                 * the bit for all other cpus which are in the general
1055                 * (periodic) broadcast mask. So the bit is set and
1056                 * would prevent the first broadcast enter after this
1057                 * to program the bc device.
1058                 */
1059                tick_broadcast_clear_oneshot(cpu);
1060        }
1061}
1062
1063/*
1064 * Select oneshot operating mode for the broadcast device
1065 */
1066void tick_broadcast_switch_to_oneshot(void)
1067{
1068        struct clock_event_device *bc;
1069        unsigned long flags;
1070
1071        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
1072
1073        tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
1074        bc = tick_broadcast_device.evtdev;
1075        if (bc)
1076                tick_broadcast_setup_oneshot(bc);
1077
1078        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
1079}
1080
1081#ifdef CONFIG_HOTPLUG_CPU
1082void hotplug_cpu__broadcast_tick_pull(int deadcpu)
1083{
1084        struct clock_event_device *bc;
1085        unsigned long flags;
1086
1087        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
1088        bc = tick_broadcast_device.evtdev;
1089
1090        if (bc && broadcast_needs_cpu(bc, deadcpu)) {
1091                /* This moves the broadcast assignment to this CPU: */
1092                clockevents_program_event(bc, bc->next_event, 1);
1093        }
1094        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
1095}
1096
1097/*
1098 * Remove a dying CPU from broadcasting
1099 */
1100static void tick_broadcast_oneshot_offline(unsigned int cpu)
1101{
1102        if (tick_get_oneshot_wakeup_device(cpu))
1103                tick_set_oneshot_wakeup_device(NULL, cpu);
1104
1105        /*
1106         * Clear the broadcast masks for the dead cpu, but do not stop
1107         * the broadcast device!
1108         */
1109        cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
1110        cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
1111        cpumask_clear_cpu(cpu, tick_broadcast_force_mask);
1112}
1113#endif
1114
1115/*
1116 * Check, whether the broadcast device is in one shot mode
1117 */
1118int tick_broadcast_oneshot_active(void)
1119{
1120        return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT;
1121}
1122
1123/*
1124 * Check whether the broadcast device supports oneshot.
1125 */
1126bool tick_broadcast_oneshot_available(void)
1127{
1128        struct clock_event_device *bc = tick_broadcast_device.evtdev;
1129
1130        return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false;
1131}
1132
1133#else
1134int __tick_broadcast_oneshot_control(enum tick_broadcast_state state)
1135{
1136        struct clock_event_device *bc = tick_broadcast_device.evtdev;
1137
1138        if (!bc || (bc->features & CLOCK_EVT_FEAT_HRTIMER))
1139                return -EBUSY;
1140
1141        return 0;
1142}
1143#endif
1144
1145void __init tick_broadcast_init(void)
1146{
1147        zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT);
1148        zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT);
1149        zalloc_cpumask_var(&tmpmask, GFP_NOWAIT);
1150#ifdef CONFIG_TICK_ONESHOT
1151        zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT);
1152        zalloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT);
1153        zalloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT);
1154#endif
1155}
1156