linux/kernel/time/tick-broadcast.c
<<
>>
Prefs
   1/*
   2 * linux/kernel/time/tick-broadcast.c
   3 *
   4 * This file contains functions which emulate a local clock-event
   5 * device via a broadcast event source.
   6 *
   7 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
   8 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
   9 * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
  10 *
  11 * This code is licenced under the GPL version 2. For details see
  12 * kernel-base/COPYING.
  13 */
  14#include <linux/cpu.h>
  15#include <linux/err.h>
  16#include <linux/hrtimer.h>
  17#include <linux/interrupt.h>
  18#include <linux/percpu.h>
  19#include <linux/profile.h>
  20#include <linux/sched.h>
  21#include <linux/smp.h>
  22#include <linux/module.h>
  23
  24#include "tick-internal.h"
  25
  26/*
  27 * Broadcast support for broken x86 hardware, where the local apic
  28 * timer stops in C3 state.
  29 */
  30
  31static struct tick_device tick_broadcast_device;
  32static cpumask_var_t tick_broadcast_mask;
  33static cpumask_var_t tick_broadcast_on;
  34static cpumask_var_t tmpmask;
  35static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
  36static int tick_broadcast_forced;
  37
  38#ifdef CONFIG_TICK_ONESHOT
  39static void tick_broadcast_clear_oneshot(int cpu);
  40static void tick_resume_broadcast_oneshot(struct clock_event_device *bc);
  41#else
  42static inline void tick_broadcast_clear_oneshot(int cpu) { }
  43static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { }
  44#endif
  45
  46/*
  47 * Debugging: see timer_list.c
  48 */
  49struct tick_device *tick_get_broadcast_device(void)
  50{
  51        return &tick_broadcast_device;
  52}
  53
  54struct cpumask *tick_get_broadcast_mask(void)
  55{
  56        return tick_broadcast_mask;
  57}
  58
  59/*
  60 * Start the device in periodic mode
  61 */
  62static void tick_broadcast_start_periodic(struct clock_event_device *bc)
  63{
  64        if (bc)
  65                tick_setup_periodic(bc, 1);
  66}
  67
  68/*
  69 * Check, if the device can be utilized as broadcast device:
  70 */
  71static bool tick_check_broadcast_device(struct clock_event_device *curdev,
  72                                        struct clock_event_device *newdev)
  73{
  74        if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) ||
  75            (newdev->features & CLOCK_EVT_FEAT_PERCPU) ||
  76            (newdev->features & CLOCK_EVT_FEAT_C3STOP))
  77                return false;
  78
  79        if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT &&
  80            !(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
  81                return false;
  82
  83        return !curdev || newdev->rating > curdev->rating;
  84}
  85
  86/*
  87 * Conditionally install/replace broadcast device
  88 */
  89void tick_install_broadcast_device(struct clock_event_device *dev)
  90{
  91        struct clock_event_device *cur = tick_broadcast_device.evtdev;
  92
  93        if (!tick_check_broadcast_device(cur, dev))
  94                return;
  95
  96        if (!try_module_get(dev->owner))
  97                return;
  98
  99        clockevents_exchange_device(cur, dev);
 100        if (cur)
 101                cur->event_handler = clockevents_handle_noop;
 102        tick_broadcast_device.evtdev = dev;
 103        if (!cpumask_empty(tick_broadcast_mask))
 104                tick_broadcast_start_periodic(dev);
 105        /*
 106         * Inform all cpus about this. We might be in a situation
 107         * where we did not switch to oneshot mode because the per cpu
 108         * devices are affected by CLOCK_EVT_FEAT_C3STOP and the lack
 109         * of a oneshot capable broadcast device. Without that
 110         * notification the systems stays stuck in periodic mode
 111         * forever.
 112         */
 113        if (dev->features & CLOCK_EVT_FEAT_ONESHOT)
 114                tick_clock_notify();
 115}
 116
 117/*
 118 * Check, if the device is the broadcast device
 119 */
 120int tick_is_broadcast_device(struct clock_event_device *dev)
 121{
 122        return (dev && tick_broadcast_device.evtdev == dev);
 123}
 124
 125int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq)
 126{
 127        int ret = -ENODEV;
 128
 129        if (tick_is_broadcast_device(dev)) {
 130                raw_spin_lock(&tick_broadcast_lock);
 131                ret = __clockevents_update_freq(dev, freq);
 132                raw_spin_unlock(&tick_broadcast_lock);
 133        }
 134        return ret;
 135}
 136
 137
 138static void err_broadcast(const struct cpumask *mask)
 139{
 140        pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n");
 141}
 142
 143static void tick_device_setup_broadcast_func(struct clock_event_device *dev)
 144{
 145        if (!dev->broadcast)
 146                dev->broadcast = tick_broadcast;
 147        if (!dev->broadcast) {
 148                pr_warn_once("%s depends on broadcast, but no broadcast function available\n",
 149                             dev->name);
 150                dev->broadcast = err_broadcast;
 151        }
 152}
 153
 154/*
 155 * Check, if the device is disfunctional and a place holder, which
 156 * needs to be handled by the broadcast device.
 157 */
 158int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
 159{
 160        struct clock_event_device *bc = tick_broadcast_device.evtdev;
 161        unsigned long flags;
 162        int ret = 0;
 163
 164        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 165
 166        /*
 167         * Devices might be registered with both periodic and oneshot
 168         * mode disabled. This signals, that the device needs to be
 169         * operated from the broadcast device and is a placeholder for
 170         * the cpu local device.
 171         */
 172        if (!tick_device_is_functional(dev)) {
 173                dev->event_handler = tick_handle_periodic;
 174                tick_device_setup_broadcast_func(dev);
 175                cpumask_set_cpu(cpu, tick_broadcast_mask);
 176                if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
 177                        tick_broadcast_start_periodic(bc);
 178                else
 179                        tick_broadcast_setup_oneshot(bc);
 180                ret = 1;
 181        } else {
 182                /*
 183                 * Clear the broadcast bit for this cpu if the
 184                 * device is not power state affected.
 185                 */
 186                if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
 187                        cpumask_clear_cpu(cpu, tick_broadcast_mask);
 188                else
 189                        tick_device_setup_broadcast_func(dev);
 190
 191                /*
 192                 * Clear the broadcast bit if the CPU is not in
 193                 * periodic broadcast on state.
 194                 */
 195                if (!cpumask_test_cpu(cpu, tick_broadcast_on))
 196                        cpumask_clear_cpu(cpu, tick_broadcast_mask);
 197
 198                switch (tick_broadcast_device.mode) {
 199                case TICKDEV_MODE_ONESHOT:
 200                        /*
 201                         * If the system is in oneshot mode we can
 202                         * unconditionally clear the oneshot mask bit,
 203                         * because the CPU is running and therefore
 204                         * not in an idle state which causes the power
 205                         * state affected device to stop. Let the
 206                         * caller initialize the device.
 207                         */
 208                        tick_broadcast_clear_oneshot(cpu);
 209                        ret = 0;
 210                        break;
 211
 212                case TICKDEV_MODE_PERIODIC:
 213                        /*
 214                         * If the system is in periodic mode, check
 215                         * whether the broadcast device can be
 216                         * switched off now.
 217                         */
 218                        if (cpumask_empty(tick_broadcast_mask) && bc)
 219                                clockevents_shutdown(bc);
 220                        /*
 221                         * If we kept the cpu in the broadcast mask,
 222                         * tell the caller to leave the per cpu device
 223                         * in shutdown state. The periodic interrupt
 224                         * is delivered by the broadcast device, if
 225                         * the broadcast device exists and is not
 226                         * hrtimer based.
 227                         */
 228                        if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER))
 229                                ret = cpumask_test_cpu(cpu, tick_broadcast_mask);
 230                        break;
 231                default:
 232                        break;
 233                }
 234        }
 235        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 236        return ret;
 237}
 238
 239#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 240int tick_receive_broadcast(void)
 241{
 242        struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
 243        struct clock_event_device *evt = td->evtdev;
 244
 245        if (!evt)
 246                return -ENODEV;
 247
 248        if (!evt->event_handler)
 249                return -EINVAL;
 250
 251        evt->event_handler(evt);
 252        return 0;
 253}
 254#endif
 255
 256/*
 257 * Broadcast the event to the cpus, which are set in the mask (mangled).
 258 */
 259static bool tick_do_broadcast(struct cpumask *mask)
 260{
 261        int cpu = smp_processor_id();
 262        struct tick_device *td;
 263        bool local = false;
 264
 265        /*
 266         * Check, if the current cpu is in the mask
 267         */
 268        if (cpumask_test_cpu(cpu, mask)) {
 269                struct clock_event_device *bc = tick_broadcast_device.evtdev;
 270
 271                cpumask_clear_cpu(cpu, mask);
 272                /*
 273                 * We only run the local handler, if the broadcast
 274                 * device is not hrtimer based. Otherwise we run into
 275                 * a hrtimer recursion.
 276                 *
 277                 * local timer_interrupt()
 278                 *   local_handler()
 279                 *     expire_hrtimers()
 280                 *       bc_handler()
 281                 *         local_handler()
 282                 *           expire_hrtimers()
 283                 */
 284                local = !(bc->features & CLOCK_EVT_FEAT_HRTIMER);
 285        }
 286
 287        if (!cpumask_empty(mask)) {
 288                /*
 289                 * It might be necessary to actually check whether the devices
 290                 * have different broadcast functions. For now, just use the
 291                 * one of the first device. This works as long as we have this
 292                 * misfeature only on x86 (lapic)
 293                 */
 294                td = &per_cpu(tick_cpu_device, cpumask_first(mask));
 295                td->evtdev->broadcast(mask);
 296        }
 297        return local;
 298}
 299
 300/*
 301 * Periodic broadcast:
 302 * - invoke the broadcast handlers
 303 */
 304static bool tick_do_periodic_broadcast(void)
 305{
 306        cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask);
 307        return tick_do_broadcast(tmpmask);
 308}
 309
 310/*
 311 * Event handler for periodic broadcast ticks
 312 */
 313static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
 314{
 315        struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
 316        bool bc_local;
 317
 318        raw_spin_lock(&tick_broadcast_lock);
 319
 320        /* Handle spurious interrupts gracefully */
 321        if (clockevent_state_shutdown(tick_broadcast_device.evtdev)) {
 322                raw_spin_unlock(&tick_broadcast_lock);
 323                return;
 324        }
 325
 326        bc_local = tick_do_periodic_broadcast();
 327
 328        if (clockevent_state_oneshot(dev)) {
 329                ktime_t next = ktime_add(dev->next_event, tick_period);
 330
 331                clockevents_program_event(dev, next, true);
 332        }
 333        raw_spin_unlock(&tick_broadcast_lock);
 334
 335        /*
 336         * We run the handler of the local cpu after dropping
 337         * tick_broadcast_lock because the handler might deadlock when
 338         * trying to switch to oneshot mode.
 339         */
 340        if (bc_local)
 341                td->evtdev->event_handler(td->evtdev);
 342}
 343
 344/**
 345 * tick_broadcast_control - Enable/disable or force broadcast mode
 346 * @mode:       The selected broadcast mode
 347 *
 348 * Called when the system enters a state where affected tick devices
 349 * might stop. Note: TICK_BROADCAST_FORCE cannot be undone.
 350 *
 351 * Called with interrupts disabled, so clockevents_lock is not
 352 * required here because the local clock event device cannot go away
 353 * under us.
 354 */
 355void tick_broadcast_control(enum tick_broadcast_mode mode)
 356{
 357        struct clock_event_device *bc, *dev;
 358        struct tick_device *td;
 359        int cpu, bc_stopped;
 360
 361        td = this_cpu_ptr(&tick_cpu_device);
 362        dev = td->evtdev;
 363
 364        /*
 365         * Is the device not affected by the powerstate ?
 366         */
 367        if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP))
 368                return;
 369
 370        if (!tick_device_is_functional(dev))
 371                return;
 372
 373        raw_spin_lock(&tick_broadcast_lock);
 374        cpu = smp_processor_id();
 375        bc = tick_broadcast_device.evtdev;
 376        bc_stopped = cpumask_empty(tick_broadcast_mask);
 377
 378        switch (mode) {
 379        case TICK_BROADCAST_FORCE:
 380                tick_broadcast_forced = 1;
 381        case TICK_BROADCAST_ON:
 382                cpumask_set_cpu(cpu, tick_broadcast_on);
 383                if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
 384                        /*
 385                         * Only shutdown the cpu local device, if:
 386                         *
 387                         * - the broadcast device exists
 388                         * - the broadcast device is not a hrtimer based one
 389                         * - the broadcast device is in periodic mode to
 390                         *   avoid a hickup during switch to oneshot mode
 391                         */
 392                        if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER) &&
 393                            tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
 394                                clockevents_shutdown(dev);
 395                }
 396                break;
 397
 398        case TICK_BROADCAST_OFF:
 399                if (tick_broadcast_forced)
 400                        break;
 401                cpumask_clear_cpu(cpu, tick_broadcast_on);
 402                if (!tick_device_is_functional(dev))
 403                        break;
 404                if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) {
 405                        if (tick_broadcast_device.mode ==
 406                            TICKDEV_MODE_PERIODIC)
 407                                tick_setup_periodic(dev, 0);
 408                }
 409                break;
 410        }
 411
 412        if (bc) {
 413                if (cpumask_empty(tick_broadcast_mask)) {
 414                        if (!bc_stopped)
 415                                clockevents_shutdown(bc);
 416                } else if (bc_stopped) {
 417                        if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
 418                                tick_broadcast_start_periodic(bc);
 419                        else
 420                                tick_broadcast_setup_oneshot(bc);
 421                }
 422        }
 423        raw_spin_unlock(&tick_broadcast_lock);
 424}
 425EXPORT_SYMBOL_GPL(tick_broadcast_control);
 426
 427/*
 428 * Set the periodic handler depending on broadcast on/off
 429 */
 430void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
 431{
 432        if (!broadcast)
 433                dev->event_handler = tick_handle_periodic;
 434        else
 435                dev->event_handler = tick_handle_periodic_broadcast;
 436}
 437
 438#ifdef CONFIG_HOTPLUG_CPU
 439/*
 440 * Remove a CPU from broadcasting
 441 */
 442void tick_shutdown_broadcast(unsigned int cpu)
 443{
 444        struct clock_event_device *bc;
 445        unsigned long flags;
 446
 447        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 448
 449        bc = tick_broadcast_device.evtdev;
 450        cpumask_clear_cpu(cpu, tick_broadcast_mask);
 451        cpumask_clear_cpu(cpu, tick_broadcast_on);
 452
 453        if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
 454                if (bc && cpumask_empty(tick_broadcast_mask))
 455                        clockevents_shutdown(bc);
 456        }
 457
 458        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 459}
 460#endif
 461
 462void tick_suspend_broadcast(void)
 463{
 464        struct clock_event_device *bc;
 465        unsigned long flags;
 466
 467        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 468
 469        bc = tick_broadcast_device.evtdev;
 470        if (bc)
 471                clockevents_shutdown(bc);
 472
 473        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 474}
 475
 476/*
 477 * This is called from tick_resume_local() on a resuming CPU. That's
 478 * called from the core resume function, tick_unfreeze() and the magic XEN
 479 * resume hackery.
 480 *
 481 * In none of these cases the broadcast device mode can change and the
 482 * bit of the resuming CPU in the broadcast mask is safe as well.
 483 */
 484bool tick_resume_check_broadcast(void)
 485{
 486        if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT)
 487                return false;
 488        else
 489                return cpumask_test_cpu(smp_processor_id(), tick_broadcast_mask);
 490}
 491
 492void tick_resume_broadcast(void)
 493{
 494        struct clock_event_device *bc;
 495        unsigned long flags;
 496
 497        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 498
 499        bc = tick_broadcast_device.evtdev;
 500
 501        if (bc) {
 502                clockevents_tick_resume(bc);
 503
 504                switch (tick_broadcast_device.mode) {
 505                case TICKDEV_MODE_PERIODIC:
 506                        if (!cpumask_empty(tick_broadcast_mask))
 507                                tick_broadcast_start_periodic(bc);
 508                        break;
 509                case TICKDEV_MODE_ONESHOT:
 510                        if (!cpumask_empty(tick_broadcast_mask))
 511                                tick_resume_broadcast_oneshot(bc);
 512                        break;
 513                }
 514        }
 515        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 516}
 517
 518#ifdef CONFIG_TICK_ONESHOT
 519
 520static cpumask_var_t tick_broadcast_oneshot_mask;
 521static cpumask_var_t tick_broadcast_pending_mask;
 522static cpumask_var_t tick_broadcast_force_mask;
 523
 524/*
 525 * Exposed for debugging: see timer_list.c
 526 */
 527struct cpumask *tick_get_broadcast_oneshot_mask(void)
 528{
 529        return tick_broadcast_oneshot_mask;
 530}
 531
 532/*
 533 * Called before going idle with interrupts disabled. Checks whether a
 534 * broadcast event from the other core is about to happen. We detected
 535 * that in tick_broadcast_oneshot_control(). The callsite can use this
 536 * to avoid a deep idle transition as we are about to get the
 537 * broadcast IPI right away.
 538 */
 539int tick_check_broadcast_expired(void)
 540{
 541        return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask);
 542}
 543
 544/*
 545 * Set broadcast interrupt affinity
 546 */
 547static void tick_broadcast_set_affinity(struct clock_event_device *bc,
 548                                        const struct cpumask *cpumask)
 549{
 550        if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ))
 551                return;
 552
 553        if (cpumask_equal(bc->cpumask, cpumask))
 554                return;
 555
 556        bc->cpumask = cpumask;
 557        irq_set_affinity(bc->irq, bc->cpumask);
 558}
 559
 560static void tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
 561                                     ktime_t expires)
 562{
 563        if (!clockevent_state_oneshot(bc))
 564                clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT);
 565
 566        clockevents_program_event(bc, expires, 1);
 567        tick_broadcast_set_affinity(bc, cpumask_of(cpu));
 568}
 569
 570static void tick_resume_broadcast_oneshot(struct clock_event_device *bc)
 571{
 572        clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT);
 573}
 574
 575/*
 576 * Called from irq_enter() when idle was interrupted to reenable the
 577 * per cpu device.
 578 */
 579void tick_check_oneshot_broadcast_this_cpu(void)
 580{
 581        if (cpumask_test_cpu(smp_processor_id(), tick_broadcast_oneshot_mask)) {
 582                struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
 583
 584                /*
 585                 * We might be in the middle of switching over from
 586                 * periodic to oneshot. If the CPU has not yet
 587                 * switched over, leave the device alone.
 588                 */
 589                if (td->mode == TICKDEV_MODE_ONESHOT) {
 590                        clockevents_switch_state(td->evtdev,
 591                                              CLOCK_EVT_STATE_ONESHOT);
 592                }
 593        }
 594}
 595
 596/*
 597 * Handle oneshot mode broadcasting
 598 */
 599static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
 600{
 601        struct tick_device *td;
 602        ktime_t now, next_event;
 603        int cpu, next_cpu = 0;
 604        bool bc_local;
 605
 606        raw_spin_lock(&tick_broadcast_lock);
 607        dev->next_event.tv64 = KTIME_MAX;
 608        next_event.tv64 = KTIME_MAX;
 609        cpumask_clear(tmpmask);
 610        now = ktime_get();
 611        /* Find all expired events */
 612        for_each_cpu(cpu, tick_broadcast_oneshot_mask) {
 613                td = &per_cpu(tick_cpu_device, cpu);
 614                if (td->evtdev->next_event.tv64 <= now.tv64) {
 615                        cpumask_set_cpu(cpu, tmpmask);
 616                        /*
 617                         * Mark the remote cpu in the pending mask, so
 618                         * it can avoid reprogramming the cpu local
 619                         * timer in tick_broadcast_oneshot_control().
 620                         */
 621                        cpumask_set_cpu(cpu, tick_broadcast_pending_mask);
 622                } else if (td->evtdev->next_event.tv64 < next_event.tv64) {
 623                        next_event.tv64 = td->evtdev->next_event.tv64;
 624                        next_cpu = cpu;
 625                }
 626        }
 627
 628        /*
 629         * Remove the current cpu from the pending mask. The event is
 630         * delivered immediately in tick_do_broadcast() !
 631         */
 632        cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask);
 633
 634        /* Take care of enforced broadcast requests */
 635        cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask);
 636        cpumask_clear(tick_broadcast_force_mask);
 637
 638        /*
 639         * Sanity check. Catch the case where we try to broadcast to
 640         * offline cpus.
 641         */
 642        if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask)))
 643                cpumask_and(tmpmask, tmpmask, cpu_online_mask);
 644
 645        /*
 646         * Wakeup the cpus which have an expired event.
 647         */
 648        bc_local = tick_do_broadcast(tmpmask);
 649
 650        /*
 651         * Two reasons for reprogram:
 652         *
 653         * - The global event did not expire any CPU local
 654         * events. This happens in dyntick mode, as the maximum PIT
 655         * delta is quite small.
 656         *
 657         * - There are pending events on sleeping CPUs which were not
 658         * in the event mask
 659         */
 660        if (next_event.tv64 != KTIME_MAX)
 661                tick_broadcast_set_event(dev, next_cpu, next_event);
 662
 663        raw_spin_unlock(&tick_broadcast_lock);
 664
 665        if (bc_local) {
 666                td = this_cpu_ptr(&tick_cpu_device);
 667                td->evtdev->event_handler(td->evtdev);
 668        }
 669}
 670
 671static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu)
 672{
 673        if (!(bc->features & CLOCK_EVT_FEAT_HRTIMER))
 674                return 0;
 675        if (bc->next_event.tv64 == KTIME_MAX)
 676                return 0;
 677        return bc->bound_on == cpu ? -EBUSY : 0;
 678}
 679
 680static void broadcast_shutdown_local(struct clock_event_device *bc,
 681                                     struct clock_event_device *dev)
 682{
 683        /*
 684         * For hrtimer based broadcasting we cannot shutdown the cpu
 685         * local device if our own event is the first one to expire or
 686         * if we own the broadcast timer.
 687         */
 688        if (bc->features & CLOCK_EVT_FEAT_HRTIMER) {
 689                if (broadcast_needs_cpu(bc, smp_processor_id()))
 690                        return;
 691                if (dev->next_event.tv64 < bc->next_event.tv64)
 692                        return;
 693        }
 694        clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
 695}
 696
 697int __tick_broadcast_oneshot_control(enum tick_broadcast_state state)
 698{
 699        struct clock_event_device *bc, *dev;
 700        int cpu, ret = 0;
 701        ktime_t now;
 702
 703        /*
 704         * If there is no broadcast device, tell the caller not to go
 705         * into deep idle.
 706         */
 707        if (!tick_broadcast_device.evtdev)
 708                return -EBUSY;
 709
 710        dev = this_cpu_ptr(&tick_cpu_device)->evtdev;
 711
 712        raw_spin_lock(&tick_broadcast_lock);
 713        bc = tick_broadcast_device.evtdev;
 714        cpu = smp_processor_id();
 715
 716        if (state == TICK_BROADCAST_ENTER) {
 717                /*
 718                 * If the current CPU owns the hrtimer broadcast
 719                 * mechanism, it cannot go deep idle and we do not add
 720                 * the CPU to the broadcast mask. We don't have to go
 721                 * through the EXIT path as the local timer is not
 722                 * shutdown.
 723                 */
 724                ret = broadcast_needs_cpu(bc, cpu);
 725                if (ret)
 726                        goto out;
 727
 728                /*
 729                 * If the broadcast device is in periodic mode, we
 730                 * return.
 731                 */
 732                if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
 733                        /* If it is a hrtimer based broadcast, return busy */
 734                        if (bc->features & CLOCK_EVT_FEAT_HRTIMER)
 735                                ret = -EBUSY;
 736                        goto out;
 737                }
 738
 739                if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
 740                        WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
 741
 742                        /* Conditionally shut down the local timer. */
 743                        broadcast_shutdown_local(bc, dev);
 744
 745                        /*
 746                         * We only reprogram the broadcast timer if we
 747                         * did not mark ourself in the force mask and
 748                         * if the cpu local event is earlier than the
 749                         * broadcast event. If the current CPU is in
 750                         * the force mask, then we are going to be
 751                         * woken by the IPI right away; we return
 752                         * busy, so the CPU does not try to go deep
 753                         * idle.
 754                         */
 755                        if (cpumask_test_cpu(cpu, tick_broadcast_force_mask)) {
 756                                ret = -EBUSY;
 757                        } else if (dev->next_event.tv64 < bc->next_event.tv64) {
 758                                tick_broadcast_set_event(bc, cpu, dev->next_event);
 759                                /*
 760                                 * In case of hrtimer broadcasts the
 761                                 * programming might have moved the
 762                                 * timer to this cpu. If yes, remove
 763                                 * us from the broadcast mask and
 764                                 * return busy.
 765                                 */
 766                                ret = broadcast_needs_cpu(bc, cpu);
 767                                if (ret) {
 768                                        cpumask_clear_cpu(cpu,
 769                                                tick_broadcast_oneshot_mask);
 770                                }
 771                        }
 772                }
 773        } else {
 774                if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
 775                        clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
 776                        /*
 777                         * The cpu which was handling the broadcast
 778                         * timer marked this cpu in the broadcast
 779                         * pending mask and fired the broadcast
 780                         * IPI. So we are going to handle the expired
 781                         * event anyway via the broadcast IPI
 782                         * handler. No need to reprogram the timer
 783                         * with an already expired event.
 784                         */
 785                        if (cpumask_test_and_clear_cpu(cpu,
 786                                       tick_broadcast_pending_mask))
 787                                goto out;
 788
 789                        /*
 790                         * Bail out if there is no next event.
 791                         */
 792                        if (dev->next_event.tv64 == KTIME_MAX)
 793                                goto out;
 794                        /*
 795                         * If the pending bit is not set, then we are
 796                         * either the CPU handling the broadcast
 797                         * interrupt or we got woken by something else.
 798                         *
 799                         * We are not longer in the broadcast mask, so
 800                         * if the cpu local expiry time is already
 801                         * reached, we would reprogram the cpu local
 802                         * timer with an already expired event.
 803                         *
 804                         * This can lead to a ping-pong when we return
 805                         * to idle and therefor rearm the broadcast
 806                         * timer before the cpu local timer was able
 807                         * to fire. This happens because the forced
 808                         * reprogramming makes sure that the event
 809                         * will happen in the future and depending on
 810                         * the min_delta setting this might be far
 811                         * enough out that the ping-pong starts.
 812                         *
 813                         * If the cpu local next_event has expired
 814                         * then we know that the broadcast timer
 815                         * next_event has expired as well and
 816                         * broadcast is about to be handled. So we
 817                         * avoid reprogramming and enforce that the
 818                         * broadcast handler, which did not run yet,
 819                         * will invoke the cpu local handler.
 820                         *
 821                         * We cannot call the handler directly from
 822                         * here, because we might be in a NOHZ phase
 823                         * and we did not go through the irq_enter()
 824                         * nohz fixups.
 825                         */
 826                        now = ktime_get();
 827                        if (dev->next_event.tv64 <= now.tv64) {
 828                                cpumask_set_cpu(cpu, tick_broadcast_force_mask);
 829                                goto out;
 830                        }
 831                        /*
 832                         * We got woken by something else. Reprogram
 833                         * the cpu local timer device.
 834                         */
 835                        tick_program_event(dev->next_event, 1);
 836                }
 837        }
 838out:
 839        raw_spin_unlock(&tick_broadcast_lock);
 840        return ret;
 841}
 842
 843/*
 844 * Reset the one shot broadcast for a cpu
 845 *
 846 * Called with tick_broadcast_lock held
 847 */
 848static void tick_broadcast_clear_oneshot(int cpu)
 849{
 850        cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
 851        cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
 852}
 853
 854static void tick_broadcast_init_next_event(struct cpumask *mask,
 855                                           ktime_t expires)
 856{
 857        struct tick_device *td;
 858        int cpu;
 859
 860        for_each_cpu(cpu, mask) {
 861                td = &per_cpu(tick_cpu_device, cpu);
 862                if (td->evtdev)
 863                        td->evtdev->next_event = expires;
 864        }
 865}
 866
 867/**
 868 * tick_broadcast_setup_oneshot - setup the broadcast device
 869 */
 870void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
 871{
 872        int cpu = smp_processor_id();
 873
 874        /* Set it up only once ! */
 875        if (bc->event_handler != tick_handle_oneshot_broadcast) {
 876                int was_periodic = clockevent_state_periodic(bc);
 877
 878                bc->event_handler = tick_handle_oneshot_broadcast;
 879
 880                /*
 881                 * We must be careful here. There might be other CPUs
 882                 * waiting for periodic broadcast. We need to set the
 883                 * oneshot_mask bits for those and program the
 884                 * broadcast device to fire.
 885                 */
 886                cpumask_copy(tmpmask, tick_broadcast_mask);
 887                cpumask_clear_cpu(cpu, tmpmask);
 888                cpumask_or(tick_broadcast_oneshot_mask,
 889                           tick_broadcast_oneshot_mask, tmpmask);
 890
 891                if (was_periodic && !cpumask_empty(tmpmask)) {
 892                        clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT);
 893                        tick_broadcast_init_next_event(tmpmask,
 894                                                       tick_next_period);
 895                        tick_broadcast_set_event(bc, cpu, tick_next_period);
 896                } else
 897                        bc->next_event.tv64 = KTIME_MAX;
 898        } else {
 899                /*
 900                 * The first cpu which switches to oneshot mode sets
 901                 * the bit for all other cpus which are in the general
 902                 * (periodic) broadcast mask. So the bit is set and
 903                 * would prevent the first broadcast enter after this
 904                 * to program the bc device.
 905                 */
 906                tick_broadcast_clear_oneshot(cpu);
 907        }
 908}
 909
 910/*
 911 * Select oneshot operating mode for the broadcast device
 912 */
 913void tick_broadcast_switch_to_oneshot(void)
 914{
 915        struct clock_event_device *bc;
 916        unsigned long flags;
 917
 918        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 919
 920        tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
 921        bc = tick_broadcast_device.evtdev;
 922        if (bc)
 923                tick_broadcast_setup_oneshot(bc);
 924
 925        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 926}
 927
 928#ifdef CONFIG_HOTPLUG_CPU
 929void hotplug_cpu__broadcast_tick_pull(int deadcpu)
 930{
 931        struct clock_event_device *bc;
 932        unsigned long flags;
 933
 934        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 935        bc = tick_broadcast_device.evtdev;
 936
 937        if (bc && broadcast_needs_cpu(bc, deadcpu)) {
 938                /* This moves the broadcast assignment to this CPU: */
 939                clockevents_program_event(bc, bc->next_event, 1);
 940        }
 941        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 942}
 943
 944/*
 945 * Remove a dead CPU from broadcasting
 946 */
 947void tick_shutdown_broadcast_oneshot(unsigned int cpu)
 948{
 949        unsigned long flags;
 950
 951        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 952
 953        /*
 954         * Clear the broadcast masks for the dead cpu, but do not stop
 955         * the broadcast device!
 956         */
 957        cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
 958        cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
 959        cpumask_clear_cpu(cpu, tick_broadcast_force_mask);
 960
 961        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 962}
 963#endif
 964
 965/*
 966 * Check, whether the broadcast device is in one shot mode
 967 */
 968int tick_broadcast_oneshot_active(void)
 969{
 970        return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT;
 971}
 972
 973/*
 974 * Check whether the broadcast device supports oneshot.
 975 */
 976bool tick_broadcast_oneshot_available(void)
 977{
 978        struct clock_event_device *bc = tick_broadcast_device.evtdev;
 979
 980        return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false;
 981}
 982
 983#else
 984int __tick_broadcast_oneshot_control(enum tick_broadcast_state state)
 985{
 986        struct clock_event_device *bc = tick_broadcast_device.evtdev;
 987
 988        if (!bc || (bc->features & CLOCK_EVT_FEAT_HRTIMER))
 989                return -EBUSY;
 990
 991        return 0;
 992}
 993#endif
 994
 995void __init tick_broadcast_init(void)
 996{
 997        zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT);
 998        zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT);
 999        zalloc_cpumask_var(&tmpmask, GFP_NOWAIT);
1000#ifdef CONFIG_TICK_ONESHOT
1001        zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT);
1002        zalloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT);
1003        zalloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT);
1004#endif
1005}
1006