linux/kernel/time/tick-broadcast.c
<<
>>
Prefs
   1/*
   2 * linux/kernel/time/tick-broadcast.c
   3 *
   4 * This file contains functions which emulate a local clock-event
   5 * device via a broadcast event source.
   6 *
   7 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
   8 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
   9 * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
  10 *
  11 * This code is licenced under the GPL version 2. For details see
  12 * kernel-base/COPYING.
  13 */
  14#include <linux/cpu.h>
  15#include <linux/err.h>
  16#include <linux/hrtimer.h>
  17#include <linux/interrupt.h>
  18#include <linux/percpu.h>
  19#include <linux/profile.h>
  20#include <linux/sched.h>
  21#include <linux/smp.h>
  22#include <linux/module.h>
  23
  24#include "tick-internal.h"
  25
  26/*
  27 * Broadcast support for broken x86 hardware, where the local apic
  28 * timer stops in C3 state.
  29 */
  30
  31static struct tick_device tick_broadcast_device;
  32static cpumask_var_t tick_broadcast_mask;
  33static cpumask_var_t tick_broadcast_on;
  34static cpumask_var_t tmpmask;
  35static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
  36static int tick_broadcast_force;
  37
  38#ifdef CONFIG_TICK_ONESHOT
  39static void tick_broadcast_clear_oneshot(int cpu);
  40#else
  41static inline void tick_broadcast_clear_oneshot(int cpu) { }
  42#endif
  43
  44/*
  45 * Debugging: see timer_list.c
  46 */
  47struct tick_device *tick_get_broadcast_device(void)
  48{
  49        return &tick_broadcast_device;
  50}
  51
  52struct cpumask *tick_get_broadcast_mask(void)
  53{
  54        return tick_broadcast_mask;
  55}
  56
  57/*
  58 * Start the device in periodic mode
  59 */
  60static void tick_broadcast_start_periodic(struct clock_event_device *bc)
  61{
  62        if (bc)
  63                tick_setup_periodic(bc, 1);
  64}
  65
  66/*
  67 * Check, if the device can be utilized as broadcast device:
  68 */
  69static bool tick_check_broadcast_device(struct clock_event_device *curdev,
  70                                        struct clock_event_device *newdev)
  71{
  72        if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) ||
  73            (newdev->features & CLOCK_EVT_FEAT_C3STOP))
  74                return false;
  75
  76        if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT &&
  77            !(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
  78                return false;
  79
  80        return !curdev || newdev->rating > curdev->rating;
  81}
  82
  83/*
  84 * Conditionally install/replace broadcast device
  85 */
  86void tick_install_broadcast_device(struct clock_event_device *dev)
  87{
  88        struct clock_event_device *cur = tick_broadcast_device.evtdev;
  89
  90        if (!tick_check_broadcast_device(cur, dev))
  91                return;
  92
  93        if (!try_module_get(dev->owner))
  94                return;
  95
  96        clockevents_exchange_device(cur, dev);
  97        if (cur)
  98                cur->event_handler = clockevents_handle_noop;
  99        tick_broadcast_device.evtdev = dev;
 100        if (!cpumask_empty(tick_broadcast_mask))
 101                tick_broadcast_start_periodic(dev);
 102        /*
 103         * Inform all cpus about this. We might be in a situation
 104         * where we did not switch to oneshot mode because the per cpu
 105         * devices are affected by CLOCK_EVT_FEAT_C3STOP and the lack
 106         * of a oneshot capable broadcast device. Without that
 107         * notification the systems stays stuck in periodic mode
 108         * forever.
 109         */
 110        if (dev->features & CLOCK_EVT_FEAT_ONESHOT)
 111                tick_clock_notify();
 112}
 113
 114/*
 115 * Check, if the device is the broadcast device
 116 */
 117int tick_is_broadcast_device(struct clock_event_device *dev)
 118{
 119        return (dev && tick_broadcast_device.evtdev == dev);
 120}
 121
 122int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq)
 123{
 124        int ret = -ENODEV;
 125
 126        if (tick_is_broadcast_device(dev)) {
 127                raw_spin_lock(&tick_broadcast_lock);
 128                ret = __clockevents_update_freq(dev, freq);
 129                raw_spin_unlock(&tick_broadcast_lock);
 130        }
 131        return ret;
 132}
 133
 134
 135static void err_broadcast(const struct cpumask *mask)
 136{
 137        pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n");
 138}
 139
 140static void tick_device_setup_broadcast_func(struct clock_event_device *dev)
 141{
 142        if (!dev->broadcast)
 143                dev->broadcast = tick_broadcast;
 144        if (!dev->broadcast) {
 145                pr_warn_once("%s depends on broadcast, but no broadcast function available\n",
 146                             dev->name);
 147                dev->broadcast = err_broadcast;
 148        }
 149}
 150
 151/*
 152 * Check, if the device is disfunctional and a place holder, which
 153 * needs to be handled by the broadcast device.
 154 */
 155int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
 156{
 157        struct clock_event_device *bc = tick_broadcast_device.evtdev;
 158        unsigned long flags;
 159        int ret;
 160
 161        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 162
 163        /*
 164         * Devices might be registered with both periodic and oneshot
 165         * mode disabled. This signals, that the device needs to be
 166         * operated from the broadcast device and is a placeholder for
 167         * the cpu local device.
 168         */
 169        if (!tick_device_is_functional(dev)) {
 170                dev->event_handler = tick_handle_periodic;
 171                tick_device_setup_broadcast_func(dev);
 172                cpumask_set_cpu(cpu, tick_broadcast_mask);
 173                tick_broadcast_start_periodic(bc);
 174                ret = 1;
 175        } else {
 176                /*
 177                 * Clear the broadcast bit for this cpu if the
 178                 * device is not power state affected.
 179                 */
 180                if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
 181                        cpumask_clear_cpu(cpu, tick_broadcast_mask);
 182                else
 183                        tick_device_setup_broadcast_func(dev);
 184
 185                /*
 186                 * Clear the broadcast bit if the CPU is not in
 187                 * periodic broadcast on state.
 188                 */
 189                if (!cpumask_test_cpu(cpu, tick_broadcast_on))
 190                        cpumask_clear_cpu(cpu, tick_broadcast_mask);
 191
 192                switch (tick_broadcast_device.mode) {
 193                case TICKDEV_MODE_ONESHOT:
 194                        /*
 195                         * If the system is in oneshot mode we can
 196                         * unconditionally clear the oneshot mask bit,
 197                         * because the CPU is running and therefore
 198                         * not in an idle state which causes the power
 199                         * state affected device to stop. Let the
 200                         * caller initialize the device.
 201                         */
 202                        tick_broadcast_clear_oneshot(cpu);
 203                        ret = 0;
 204                        break;
 205
 206                case TICKDEV_MODE_PERIODIC:
 207                        /*
 208                         * If the system is in periodic mode, check
 209                         * whether the broadcast device can be
 210                         * switched off now.
 211                         */
 212                        if (cpumask_empty(tick_broadcast_mask) && bc)
 213                                clockevents_shutdown(bc);
 214                        /*
 215                         * If we kept the cpu in the broadcast mask,
 216                         * tell the caller to leave the per cpu device
 217                         * in shutdown state. The periodic interrupt
 218                         * is delivered by the broadcast device.
 219                         */
 220                        ret = cpumask_test_cpu(cpu, tick_broadcast_mask);
 221                        break;
 222                default:
 223                        /* Nothing to do */
 224                        ret = 0;
 225                        break;
 226                }
 227        }
 228        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 229        return ret;
 230}
 231
 232#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 233int tick_receive_broadcast(void)
 234{
 235        struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
 236        struct clock_event_device *evt = td->evtdev;
 237
 238        if (!evt)
 239                return -ENODEV;
 240
 241        if (!evt->event_handler)
 242                return -EINVAL;
 243
 244        evt->event_handler(evt);
 245        return 0;
 246}
 247#endif
 248
 249/*
 250 * Broadcast the event to the cpus, which are set in the mask (mangled).
 251 */
 252static bool tick_do_broadcast(struct cpumask *mask)
 253{
 254        int cpu = smp_processor_id();
 255        struct tick_device *td;
 256        bool local = false;
 257
 258        /*
 259         * Check, if the current cpu is in the mask
 260         */
 261        if (cpumask_test_cpu(cpu, mask)) {
 262                cpumask_clear_cpu(cpu, mask);
 263                local = true;
 264        }
 265
 266        if (!cpumask_empty(mask)) {
 267                /*
 268                 * It might be necessary to actually check whether the devices
 269                 * have different broadcast functions. For now, just use the
 270                 * one of the first device. This works as long as we have this
 271                 * misfeature only on x86 (lapic)
 272                 */
 273                td = &per_cpu(tick_cpu_device, cpumask_first(mask));
 274                td->evtdev->broadcast(mask);
 275        }
 276        return local;
 277}
 278
 279/*
 280 * Periodic broadcast:
 281 * - invoke the broadcast handlers
 282 */
 283static bool tick_do_periodic_broadcast(void)
 284{
 285        cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask);
 286        return tick_do_broadcast(tmpmask);
 287}
 288
 289/*
 290 * Event handler for periodic broadcast ticks
 291 */
 292static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
 293{
 294        struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
 295        bool bc_local;
 296
 297        raw_spin_lock(&tick_broadcast_lock);
 298        bc_local = tick_do_periodic_broadcast();
 299
 300        if (dev->mode == CLOCK_EVT_MODE_ONESHOT) {
 301                ktime_t next = ktime_add(dev->next_event, tick_period);
 302
 303                clockevents_program_event(dev, next, true);
 304        }
 305        raw_spin_unlock(&tick_broadcast_lock);
 306
 307        /*
 308         * We run the handler of the local cpu after dropping
 309         * tick_broadcast_lock because the handler might deadlock when
 310         * trying to switch to oneshot mode.
 311         */
 312        if (bc_local)
 313                td->evtdev->event_handler(td->evtdev);
 314}
 315
 316/*
 317 * Powerstate information: The system enters/leaves a state, where
 318 * affected devices might stop
 319 */
 320static void tick_do_broadcast_on_off(unsigned long *reason)
 321{
 322        struct clock_event_device *bc, *dev;
 323        struct tick_device *td;
 324        unsigned long flags;
 325        int cpu, bc_stopped;
 326
 327        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 328
 329        cpu = smp_processor_id();
 330        td = &per_cpu(tick_cpu_device, cpu);
 331        dev = td->evtdev;
 332        bc = tick_broadcast_device.evtdev;
 333
 334        /*
 335         * Is the device not affected by the powerstate ?
 336         */
 337        if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP))
 338                goto out;
 339
 340        if (!tick_device_is_functional(dev))
 341                goto out;
 342
 343        bc_stopped = cpumask_empty(tick_broadcast_mask);
 344
 345        switch (*reason) {
 346        case CLOCK_EVT_NOTIFY_BROADCAST_ON:
 347        case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
 348                cpumask_set_cpu(cpu, tick_broadcast_on);
 349                if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
 350                        if (tick_broadcast_device.mode ==
 351                            TICKDEV_MODE_PERIODIC)
 352                                clockevents_shutdown(dev);
 353                }
 354                if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE)
 355                        tick_broadcast_force = 1;
 356                break;
 357        case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
 358                if (tick_broadcast_force)
 359                        break;
 360                cpumask_clear_cpu(cpu, tick_broadcast_on);
 361                if (!tick_device_is_functional(dev))
 362                        break;
 363                if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) {
 364                        if (tick_broadcast_device.mode ==
 365                            TICKDEV_MODE_PERIODIC)
 366                                tick_setup_periodic(dev, 0);
 367                }
 368                break;
 369        }
 370
 371        if (cpumask_empty(tick_broadcast_mask)) {
 372                if (!bc_stopped)
 373                        clockevents_shutdown(bc);
 374        } else if (bc_stopped) {
 375                if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
 376                        tick_broadcast_start_periodic(bc);
 377                else
 378                        tick_broadcast_setup_oneshot(bc);
 379        }
 380out:
 381        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 382}
 383
 384/*
 385 * Powerstate information: The system enters/leaves a state, where
 386 * affected devices might stop.
 387 */
 388void tick_broadcast_on_off(unsigned long reason, int *oncpu)
 389{
 390        if (!cpumask_test_cpu(*oncpu, cpu_online_mask))
 391                printk(KERN_ERR "tick-broadcast: ignoring broadcast for "
 392                       "offline CPU #%d\n", *oncpu);
 393        else
 394                tick_do_broadcast_on_off(&reason);
 395}
 396
 397/*
 398 * Set the periodic handler depending on broadcast on/off
 399 */
 400void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
 401{
 402        if (!broadcast)
 403                dev->event_handler = tick_handle_periodic;
 404        else
 405                dev->event_handler = tick_handle_periodic_broadcast;
 406}
 407
 408/*
 409 * Remove a CPU from broadcasting
 410 */
 411void tick_shutdown_broadcast(unsigned int *cpup)
 412{
 413        struct clock_event_device *bc;
 414        unsigned long flags;
 415        unsigned int cpu = *cpup;
 416
 417        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 418
 419        bc = tick_broadcast_device.evtdev;
 420        cpumask_clear_cpu(cpu, tick_broadcast_mask);
 421        cpumask_clear_cpu(cpu, tick_broadcast_on);
 422
 423        if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
 424                if (bc && cpumask_empty(tick_broadcast_mask))
 425                        clockevents_shutdown(bc);
 426        }
 427
 428        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 429}
 430
 431void tick_suspend_broadcast(void)
 432{
 433        struct clock_event_device *bc;
 434        unsigned long flags;
 435
 436        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 437
 438        bc = tick_broadcast_device.evtdev;
 439        if (bc)
 440                clockevents_shutdown(bc);
 441
 442        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 443}
 444
 445int tick_resume_broadcast(void)
 446{
 447        struct clock_event_device *bc;
 448        unsigned long flags;
 449        int broadcast = 0;
 450
 451        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 452
 453        bc = tick_broadcast_device.evtdev;
 454
 455        if (bc) {
 456                clockevents_set_mode(bc, CLOCK_EVT_MODE_RESUME);
 457
 458                switch (tick_broadcast_device.mode) {
 459                case TICKDEV_MODE_PERIODIC:
 460                        if (!cpumask_empty(tick_broadcast_mask))
 461                                tick_broadcast_start_periodic(bc);
 462                        broadcast = cpumask_test_cpu(smp_processor_id(),
 463                                                     tick_broadcast_mask);
 464                        break;
 465                case TICKDEV_MODE_ONESHOT:
 466                        if (!cpumask_empty(tick_broadcast_mask))
 467                                broadcast = tick_resume_broadcast_oneshot(bc);
 468                        break;
 469                }
 470        }
 471        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 472
 473        return broadcast;
 474}
 475
 476
 477#ifdef CONFIG_TICK_ONESHOT
 478
 479static cpumask_var_t tick_broadcast_oneshot_mask;
 480static cpumask_var_t tick_broadcast_pending_mask;
 481static cpumask_var_t tick_broadcast_force_mask;
 482
 483/*
 484 * Exposed for debugging: see timer_list.c
 485 */
 486struct cpumask *tick_get_broadcast_oneshot_mask(void)
 487{
 488        return tick_broadcast_oneshot_mask;
 489}
 490
 491/*
 492 * Called before going idle with interrupts disabled. Checks whether a
 493 * broadcast event from the other core is about to happen. We detected
 494 * that in tick_broadcast_oneshot_control(). The callsite can use this
 495 * to avoid a deep idle transition as we are about to get the
 496 * broadcast IPI right away.
 497 */
 498int tick_check_broadcast_expired(void)
 499{
 500        return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask);
 501}
 502
 503/*
 504 * Set broadcast interrupt affinity
 505 */
 506static void tick_broadcast_set_affinity(struct clock_event_device *bc,
 507                                        const struct cpumask *cpumask)
 508{
 509        if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ))
 510                return;
 511
 512        if (cpumask_equal(bc->cpumask, cpumask))
 513                return;
 514
 515        bc->cpumask = cpumask;
 516        irq_set_affinity(bc->irq, bc->cpumask);
 517}
 518
 519static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
 520                                    ktime_t expires, int force)
 521{
 522        int ret;
 523
 524        if (bc->mode != CLOCK_EVT_MODE_ONESHOT)
 525                clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
 526
 527        ret = clockevents_program_event(bc, expires, force);
 528        if (!ret)
 529                tick_broadcast_set_affinity(bc, cpumask_of(cpu));
 530        return ret;
 531}
 532
 533int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
 534{
 535        clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
 536        return 0;
 537}
 538
 539/*
 540 * Called from irq_enter() when idle was interrupted to reenable the
 541 * per cpu device.
 542 */
 543void tick_check_oneshot_broadcast_this_cpu(void)
 544{
 545        if (cpumask_test_cpu(smp_processor_id(), tick_broadcast_oneshot_mask)) {
 546                struct tick_device *td = &__get_cpu_var(tick_cpu_device);
 547
 548                /*
 549                 * We might be in the middle of switching over from
 550                 * periodic to oneshot. If the CPU has not yet
 551                 * switched over, leave the device alone.
 552                 */
 553                if (td->mode == TICKDEV_MODE_ONESHOT) {
 554                        clockevents_set_mode(td->evtdev,
 555                                             CLOCK_EVT_MODE_ONESHOT);
 556                }
 557        }
 558}
 559
 560/*
 561 * Handle oneshot mode broadcasting
 562 */
 563static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
 564{
 565        struct tick_device *td;
 566        ktime_t now, next_event;
 567        int cpu, next_cpu = 0;
 568
 569        raw_spin_lock(&tick_broadcast_lock);
 570again:
 571        dev->next_event.tv64 = KTIME_MAX;
 572        next_event.tv64 = KTIME_MAX;
 573        cpumask_clear(tmpmask);
 574        now = ktime_get();
 575        /* Find all expired events */
 576        for_each_cpu(cpu, tick_broadcast_oneshot_mask) {
 577                td = &per_cpu(tick_cpu_device, cpu);
 578                if (td->evtdev->next_event.tv64 <= now.tv64) {
 579                        cpumask_set_cpu(cpu, tmpmask);
 580                        /*
 581                         * Mark the remote cpu in the pending mask, so
 582                         * it can avoid reprogramming the cpu local
 583                         * timer in tick_broadcast_oneshot_control().
 584                         */
 585                        cpumask_set_cpu(cpu, tick_broadcast_pending_mask);
 586                } else if (td->evtdev->next_event.tv64 < next_event.tv64) {
 587                        next_event.tv64 = td->evtdev->next_event.tv64;
 588                        next_cpu = cpu;
 589                }
 590        }
 591
 592        /*
 593         * Remove the current cpu from the pending mask. The event is
 594         * delivered immediately in tick_do_broadcast() !
 595         */
 596        cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask);
 597
 598        /* Take care of enforced broadcast requests */
 599        cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask);
 600        cpumask_clear(tick_broadcast_force_mask);
 601
 602        /*
 603         * Sanity check. Catch the case where we try to broadcast to
 604         * offline cpus.
 605         */
 606        if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask)))
 607                cpumask_and(tmpmask, tmpmask, cpu_online_mask);
 608
 609        /*
 610         * Wakeup the cpus which have an expired event and handle the
 611         * broadcast event of the local cpu.
 612         */
 613        if (tick_do_broadcast(tmpmask)) {
 614                td = this_cpu_ptr(&tick_cpu_device);
 615                td->evtdev->event_handler(td->evtdev);
 616        }
 617
 618        /*
 619         * Two reasons for reprogram:
 620         *
 621         * - The global event did not expire any CPU local
 622         * events. This happens in dyntick mode, as the maximum PIT
 623         * delta is quite small.
 624         *
 625         * - There are pending events on sleeping CPUs which were not
 626         * in the event mask
 627         */
 628        if (next_event.tv64 != KTIME_MAX) {
 629                /*
 630                 * Rearm the broadcast device. If event expired,
 631                 * repeat the above
 632                 */
 633                if (tick_broadcast_set_event(dev, next_cpu, next_event, 0))
 634                        goto again;
 635        }
 636        raw_spin_unlock(&tick_broadcast_lock);
 637}
 638
 639static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu)
 640{
 641        if (!(bc->features & CLOCK_EVT_FEAT_HRTIMER))
 642                return 0;
 643        if (bc->next_event.tv64 == KTIME_MAX)
 644                return 0;
 645        return bc->bound_on == cpu ? -EBUSY : 0;
 646}
 647
 648static void broadcast_shutdown_local(struct clock_event_device *bc,
 649                                     struct clock_event_device *dev)
 650{
 651        /*
 652         * For hrtimer based broadcasting we cannot shutdown the cpu
 653         * local device if our own event is the first one to expire or
 654         * if we own the broadcast timer.
 655         */
 656        if (bc->features & CLOCK_EVT_FEAT_HRTIMER) {
 657                if (broadcast_needs_cpu(bc, smp_processor_id()))
 658                        return;
 659                if (dev->next_event.tv64 < bc->next_event.tv64)
 660                        return;
 661        }
 662        clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
 663}
 664
 665void hotplug_cpu__broadcast_tick_pull(int deadcpu)
 666{
 667        struct clock_event_device *bc;
 668        unsigned long flags;
 669
 670        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 671        bc = tick_broadcast_device.evtdev;
 672
 673        if (bc && broadcast_needs_cpu(bc, deadcpu)) {
 674                /* This moves the broadcast assignment to this CPU: */
 675                clockevents_program_event(bc, bc->next_event, 1);
 676        }
 677        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 678}
 679
 680/*
 681 * Powerstate information: The system enters/leaves a state, where
 682 * affected devices might stop
 683 * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups.
 684 */
 685int tick_broadcast_oneshot_control(unsigned long reason)
 686{
 687        struct clock_event_device *bc, *dev;
 688        struct tick_device *td;
 689        unsigned long flags;
 690        ktime_t now;
 691        int cpu, ret = 0;
 692
 693        /*
 694         * Periodic mode does not care about the enter/exit of power
 695         * states
 696         */
 697        if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
 698                return 0;
 699
 700        /*
 701         * We are called with preemtion disabled from the depth of the
 702         * idle code, so we can't be moved away.
 703         */
 704        cpu = smp_processor_id();
 705        td = &per_cpu(tick_cpu_device, cpu);
 706        dev = td->evtdev;
 707
 708        if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
 709                return 0;
 710
 711        bc = tick_broadcast_device.evtdev;
 712
 713        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 714        if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
 715                if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
 716                        WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
 717                        broadcast_shutdown_local(bc, dev);
 718                        /*
 719                         * We only reprogram the broadcast timer if we
 720                         * did not mark ourself in the force mask and
 721                         * if the cpu local event is earlier than the
 722                         * broadcast event. If the current CPU is in
 723                         * the force mask, then we are going to be
 724                         * woken by the IPI right away.
 725                         */
 726                        if (!cpumask_test_cpu(cpu, tick_broadcast_force_mask) &&
 727                            dev->next_event.tv64 < bc->next_event.tv64)
 728                                tick_broadcast_set_event(bc, cpu, dev->next_event, 1);
 729                }
 730                /*
 731                 * If the current CPU owns the hrtimer broadcast
 732                 * mechanism, it cannot go deep idle and we remove the
 733                 * CPU from the broadcast mask. We don't have to go
 734                 * through the EXIT path as the local timer is not
 735                 * shutdown.
 736                 */
 737                ret = broadcast_needs_cpu(bc, cpu);
 738                if (ret)
 739                        cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
 740        } else {
 741                if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
 742                        clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
 743                        /*
 744                         * The cpu which was handling the broadcast
 745                         * timer marked this cpu in the broadcast
 746                         * pending mask and fired the broadcast
 747                         * IPI. So we are going to handle the expired
 748                         * event anyway via the broadcast IPI
 749                         * handler. No need to reprogram the timer
 750                         * with an already expired event.
 751                         */
 752                        if (cpumask_test_and_clear_cpu(cpu,
 753                                       tick_broadcast_pending_mask))
 754                                goto out;
 755
 756                        /*
 757                         * Bail out if there is no next event.
 758                         */
 759                        if (dev->next_event.tv64 == KTIME_MAX)
 760                                goto out;
 761                        /*
 762                         * If the pending bit is not set, then we are
 763                         * either the CPU handling the broadcast
 764                         * interrupt or we got woken by something else.
 765                         *
 766                         * We are not longer in the broadcast mask, so
 767                         * if the cpu local expiry time is already
 768                         * reached, we would reprogram the cpu local
 769                         * timer with an already expired event.
 770                         *
 771                         * This can lead to a ping-pong when we return
 772                         * to idle and therefor rearm the broadcast
 773                         * timer before the cpu local timer was able
 774                         * to fire. This happens because the forced
 775                         * reprogramming makes sure that the event
 776                         * will happen in the future and depending on
 777                         * the min_delta setting this might be far
 778                         * enough out that the ping-pong starts.
 779                         *
 780                         * If the cpu local next_event has expired
 781                         * then we know that the broadcast timer
 782                         * next_event has expired as well and
 783                         * broadcast is about to be handled. So we
 784                         * avoid reprogramming and enforce that the
 785                         * broadcast handler, which did not run yet,
 786                         * will invoke the cpu local handler.
 787                         *
 788                         * We cannot call the handler directly from
 789                         * here, because we might be in a NOHZ phase
 790                         * and we did not go through the irq_enter()
 791                         * nohz fixups.
 792                         */
 793                        now = ktime_get();
 794                        if (dev->next_event.tv64 <= now.tv64) {
 795                                cpumask_set_cpu(cpu, tick_broadcast_force_mask);
 796                                goto out;
 797                        }
 798                        /*
 799                         * We got woken by something else. Reprogram
 800                         * the cpu local timer device.
 801                         */
 802                        tick_program_event(dev->next_event, 1);
 803                }
 804        }
 805out:
 806        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 807        return ret;
 808}
 809
 810/*
 811 * Reset the one shot broadcast for a cpu
 812 *
 813 * Called with tick_broadcast_lock held
 814 */
 815static void tick_broadcast_clear_oneshot(int cpu)
 816{
 817        cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
 818        cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
 819}
 820
 821static void tick_broadcast_init_next_event(struct cpumask *mask,
 822                                           ktime_t expires)
 823{
 824        struct tick_device *td;
 825        int cpu;
 826
 827        for_each_cpu(cpu, mask) {
 828                td = &per_cpu(tick_cpu_device, cpu);
 829                if (td->evtdev)
 830                        td->evtdev->next_event = expires;
 831        }
 832}
 833
 834/**
 835 * tick_broadcast_setup_oneshot - setup the broadcast device
 836 */
 837void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
 838{
 839        int cpu = smp_processor_id();
 840
 841        /* Set it up only once ! */
 842        if (bc->event_handler != tick_handle_oneshot_broadcast) {
 843                int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC;
 844
 845                bc->event_handler = tick_handle_oneshot_broadcast;
 846
 847                /*
 848                 * We must be careful here. There might be other CPUs
 849                 * waiting for periodic broadcast. We need to set the
 850                 * oneshot_mask bits for those and program the
 851                 * broadcast device to fire.
 852                 */
 853                cpumask_copy(tmpmask, tick_broadcast_mask);
 854                cpumask_clear_cpu(cpu, tmpmask);
 855                cpumask_or(tick_broadcast_oneshot_mask,
 856                           tick_broadcast_oneshot_mask, tmpmask);
 857
 858                if (was_periodic && !cpumask_empty(tmpmask)) {
 859                        clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
 860                        tick_broadcast_init_next_event(tmpmask,
 861                                                       tick_next_period);
 862                        tick_broadcast_set_event(bc, cpu, tick_next_period, 1);
 863                } else
 864                        bc->next_event.tv64 = KTIME_MAX;
 865        } else {
 866                /*
 867                 * The first cpu which switches to oneshot mode sets
 868                 * the bit for all other cpus which are in the general
 869                 * (periodic) broadcast mask. So the bit is set and
 870                 * would prevent the first broadcast enter after this
 871                 * to program the bc device.
 872                 */
 873                tick_broadcast_clear_oneshot(cpu);
 874        }
 875}
 876
 877/*
 878 * Select oneshot operating mode for the broadcast device
 879 */
 880void tick_broadcast_switch_to_oneshot(void)
 881{
 882        struct clock_event_device *bc;
 883        unsigned long flags;
 884
 885        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 886
 887        tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
 888        bc = tick_broadcast_device.evtdev;
 889        if (bc)
 890                tick_broadcast_setup_oneshot(bc);
 891
 892        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 893}
 894
 895
 896/*
 897 * Remove a dead CPU from broadcasting
 898 */
 899void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
 900{
 901        unsigned long flags;
 902        unsigned int cpu = *cpup;
 903
 904        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 905
 906        /*
 907         * Clear the broadcast masks for the dead cpu, but do not stop
 908         * the broadcast device!
 909         */
 910        cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
 911        cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
 912        cpumask_clear_cpu(cpu, tick_broadcast_force_mask);
 913
 914        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 915}
 916
 917/*
 918 * Check, whether the broadcast device is in one shot mode
 919 */
 920int tick_broadcast_oneshot_active(void)
 921{
 922        return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT;
 923}
 924
 925/*
 926 * Check whether the broadcast device supports oneshot.
 927 */
 928bool tick_broadcast_oneshot_available(void)
 929{
 930        struct clock_event_device *bc = tick_broadcast_device.evtdev;
 931
 932        return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false;
 933}
 934
 935#endif
 936
 937void __init tick_broadcast_init(void)
 938{
 939        zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT);
 940        zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT);
 941        zalloc_cpumask_var(&tmpmask, GFP_NOWAIT);
 942#ifdef CONFIG_TICK_ONESHOT
 943        zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT);
 944        zalloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT);
 945        zalloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT);
 946#endif
 947}
 948