linux/kernel/time/tick-broadcast.c
<<
>>
Prefs
   1/*
   2 * linux/kernel/time/tick-broadcast.c
   3 *
   4 * This file contains functions which emulate a local clock-event
   5 * device via a broadcast event source.
   6 *
   7 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
   8 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
   9 * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
  10 *
  11 * This code is licenced under the GPL version 2. For details see
  12 * kernel-base/COPYING.
  13 */
  14#include <linux/cpu.h>
  15#include <linux/err.h>
  16#include <linux/hrtimer.h>
  17#include <linux/interrupt.h>
  18#include <linux/percpu.h>
  19#include <linux/profile.h>
  20#include <linux/sched.h>
  21#include <linux/smp.h>
  22#include <linux/module.h>
  23
  24#include "tick-internal.h"
  25
  26/*
  27 * Broadcast support for broken x86 hardware, where the local apic
  28 * timer stops in C3 state.
  29 */
  30
  31static struct tick_device tick_broadcast_device;
  32static cpumask_var_t tick_broadcast_mask;
  33static cpumask_var_t tick_broadcast_on;
  34static cpumask_var_t tmpmask;
  35static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
  36static int tick_broadcast_force;
  37
  38#ifdef CONFIG_TICK_ONESHOT
  39static void tick_broadcast_clear_oneshot(int cpu);
  40#else
  41static inline void tick_broadcast_clear_oneshot(int cpu) { }
  42#endif
  43
  44/*
  45 * Debugging: see timer_list.c
  46 */
  47struct tick_device *tick_get_broadcast_device(void)
  48{
  49        return &tick_broadcast_device;
  50}
  51
  52struct cpumask *tick_get_broadcast_mask(void)
  53{
  54        return tick_broadcast_mask;
  55}
  56
  57/*
  58 * Start the device in periodic mode
  59 */
  60static void tick_broadcast_start_periodic(struct clock_event_device *bc)
  61{
  62        if (bc)
  63                tick_setup_periodic(bc, 1);
  64}
  65
  66/*
  67 * Check, if the device can be utilized as broadcast device:
  68 */
  69static bool tick_check_broadcast_device(struct clock_event_device *curdev,
  70                                        struct clock_event_device *newdev)
  71{
  72        if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) ||
  73            (newdev->features & CLOCK_EVT_FEAT_PERCPU) ||
  74            (newdev->features & CLOCK_EVT_FEAT_C3STOP))
  75                return false;
  76
  77        if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT &&
  78            !(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
  79                return false;
  80
  81        return !curdev || newdev->rating > curdev->rating;
  82}
  83
  84/*
  85 * Conditionally install/replace broadcast device
  86 */
  87void tick_install_broadcast_device(struct clock_event_device *dev)
  88{
  89        struct clock_event_device *cur = tick_broadcast_device.evtdev;
  90
  91        if (!tick_check_broadcast_device(cur, dev))
  92                return;
  93
  94        if (!try_module_get(dev->owner))
  95                return;
  96
  97        clockevents_exchange_device(cur, dev);
  98        if (cur)
  99                cur->event_handler = clockevents_handle_noop;
 100        tick_broadcast_device.evtdev = dev;
 101        if (!cpumask_empty(tick_broadcast_mask))
 102                tick_broadcast_start_periodic(dev);
 103        /*
 104         * Inform all cpus about this. We might be in a situation
 105         * where we did not switch to oneshot mode because the per cpu
 106         * devices are affected by CLOCK_EVT_FEAT_C3STOP and the lack
 107         * of a oneshot capable broadcast device. Without that
 108         * notification the systems stays stuck in periodic mode
 109         * forever.
 110         */
 111        if (dev->features & CLOCK_EVT_FEAT_ONESHOT)
 112                tick_clock_notify();
 113}
 114
 115/*
 116 * Check, if the device is the broadcast device
 117 */
 118int tick_is_broadcast_device(struct clock_event_device *dev)
 119{
 120        return (dev && tick_broadcast_device.evtdev == dev);
 121}
 122
 123int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq)
 124{
 125        int ret = -ENODEV;
 126
 127        if (tick_is_broadcast_device(dev)) {
 128                raw_spin_lock(&tick_broadcast_lock);
 129                ret = __clockevents_update_freq(dev, freq);
 130                raw_spin_unlock(&tick_broadcast_lock);
 131        }
 132        return ret;
 133}
 134
 135
 136static void err_broadcast(const struct cpumask *mask)
 137{
 138        pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n");
 139}
 140
 141static void tick_device_setup_broadcast_func(struct clock_event_device *dev)
 142{
 143        if (!dev->broadcast)
 144                dev->broadcast = tick_broadcast;
 145        if (!dev->broadcast) {
 146                pr_warn_once("%s depends on broadcast, but no broadcast function available\n",
 147                             dev->name);
 148                dev->broadcast = err_broadcast;
 149        }
 150}
 151
 152/*
 153 * Check, if the device is disfunctional and a place holder, which
 154 * needs to be handled by the broadcast device.
 155 */
 156int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
 157{
 158        struct clock_event_device *bc = tick_broadcast_device.evtdev;
 159        unsigned long flags;
 160        int ret;
 161
 162        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 163
 164        /*
 165         * Devices might be registered with both periodic and oneshot
 166         * mode disabled. This signals, that the device needs to be
 167         * operated from the broadcast device and is a placeholder for
 168         * the cpu local device.
 169         */
 170        if (!tick_device_is_functional(dev)) {
 171                dev->event_handler = tick_handle_periodic;
 172                tick_device_setup_broadcast_func(dev);
 173                cpumask_set_cpu(cpu, tick_broadcast_mask);
 174                if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
 175                        tick_broadcast_start_periodic(bc);
 176                else
 177                        tick_broadcast_setup_oneshot(bc);
 178                ret = 1;
 179        } else {
 180                /*
 181                 * Clear the broadcast bit for this cpu if the
 182                 * device is not power state affected.
 183                 */
 184                if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
 185                        cpumask_clear_cpu(cpu, tick_broadcast_mask);
 186                else
 187                        tick_device_setup_broadcast_func(dev);
 188
 189                /*
 190                 * Clear the broadcast bit if the CPU is not in
 191                 * periodic broadcast on state.
 192                 */
 193                if (!cpumask_test_cpu(cpu, tick_broadcast_on))
 194                        cpumask_clear_cpu(cpu, tick_broadcast_mask);
 195
 196                switch (tick_broadcast_device.mode) {
 197                case TICKDEV_MODE_ONESHOT:
 198                        /*
 199                         * If the system is in oneshot mode we can
 200                         * unconditionally clear the oneshot mask bit,
 201                         * because the CPU is running and therefore
 202                         * not in an idle state which causes the power
 203                         * state affected device to stop. Let the
 204                         * caller initialize the device.
 205                         */
 206                        tick_broadcast_clear_oneshot(cpu);
 207                        ret = 0;
 208                        break;
 209
 210                case TICKDEV_MODE_PERIODIC:
 211                        /*
 212                         * If the system is in periodic mode, check
 213                         * whether the broadcast device can be
 214                         * switched off now.
 215                         */
 216                        if (cpumask_empty(tick_broadcast_mask) && bc)
 217                                clockevents_shutdown(bc);
 218                        /*
 219                         * If we kept the cpu in the broadcast mask,
 220                         * tell the caller to leave the per cpu device
 221                         * in shutdown state. The periodic interrupt
 222                         * is delivered by the broadcast device.
 223                         */
 224                        ret = cpumask_test_cpu(cpu, tick_broadcast_mask);
 225                        break;
 226                default:
 227                        /* Nothing to do */
 228                        ret = 0;
 229                        break;
 230                }
 231        }
 232        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 233        return ret;
 234}
 235
 236#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 237int tick_receive_broadcast(void)
 238{
 239        struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
 240        struct clock_event_device *evt = td->evtdev;
 241
 242        if (!evt)
 243                return -ENODEV;
 244
 245        if (!evt->event_handler)
 246                return -EINVAL;
 247
 248        evt->event_handler(evt);
 249        return 0;
 250}
 251#endif
 252
 253/*
 254 * Broadcast the event to the cpus, which are set in the mask (mangled).
 255 */
 256static void tick_do_broadcast(struct cpumask *mask)
 257{
 258        int cpu = smp_processor_id();
 259        struct tick_device *td;
 260
 261        /*
 262         * Check, if the current cpu is in the mask
 263         */
 264        if (cpumask_test_cpu(cpu, mask)) {
 265                cpumask_clear_cpu(cpu, mask);
 266                td = &per_cpu(tick_cpu_device, cpu);
 267                td->evtdev->event_handler(td->evtdev);
 268        }
 269
 270        if (!cpumask_empty(mask)) {
 271                /*
 272                 * It might be necessary to actually check whether the devices
 273                 * have different broadcast functions. For now, just use the
 274                 * one of the first device. This works as long as we have this
 275                 * misfeature only on x86 (lapic)
 276                 */
 277                td = &per_cpu(tick_cpu_device, cpumask_first(mask));
 278                td->evtdev->broadcast(mask);
 279        }
 280}
 281
 282/*
 283 * Periodic broadcast:
 284 * - invoke the broadcast handlers
 285 */
 286static void tick_do_periodic_broadcast(void)
 287{
 288        cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask);
 289        tick_do_broadcast(tmpmask);
 290}
 291
 292/*
 293 * Event handler for periodic broadcast ticks
 294 */
 295static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
 296{
 297        ktime_t next;
 298
 299        raw_spin_lock(&tick_broadcast_lock);
 300
 301        tick_do_periodic_broadcast();
 302
 303        /*
 304         * The device is in periodic mode. No reprogramming necessary:
 305         */
 306        if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
 307                goto unlock;
 308
 309        /*
 310         * Setup the next period for devices, which do not have
 311         * periodic mode. We read dev->next_event first and add to it
 312         * when the event already expired. clockevents_program_event()
 313         * sets dev->next_event only when the event is really
 314         * programmed to the device.
 315         */
 316        for (next = dev->next_event; ;) {
 317                next = ktime_add(next, tick_period);
 318
 319                if (!clockevents_program_event(dev, next, false))
 320                        goto unlock;
 321                tick_do_periodic_broadcast();
 322        }
 323unlock:
 324        raw_spin_unlock(&tick_broadcast_lock);
 325}
 326
 327/*
 328 * Powerstate information: The system enters/leaves a state, where
 329 * affected devices might stop
 330 */
 331static void tick_do_broadcast_on_off(unsigned long *reason)
 332{
 333        struct clock_event_device *bc, *dev;
 334        struct tick_device *td;
 335        unsigned long flags;
 336        int cpu, bc_stopped;
 337
 338        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 339
 340        cpu = smp_processor_id();
 341        td = &per_cpu(tick_cpu_device, cpu);
 342        dev = td->evtdev;
 343        bc = tick_broadcast_device.evtdev;
 344
 345        /*
 346         * Is the device not affected by the powerstate ?
 347         */
 348        if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP))
 349                goto out;
 350
 351        if (!tick_device_is_functional(dev))
 352                goto out;
 353
 354        bc_stopped = cpumask_empty(tick_broadcast_mask);
 355
 356        switch (*reason) {
 357        case CLOCK_EVT_NOTIFY_BROADCAST_ON:
 358        case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
 359                cpumask_set_cpu(cpu, tick_broadcast_on);
 360                if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
 361                        if (tick_broadcast_device.mode ==
 362                            TICKDEV_MODE_PERIODIC)
 363                                clockevents_shutdown(dev);
 364                }
 365                if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE)
 366                        tick_broadcast_force = 1;
 367                break;
 368        case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
 369                if (tick_broadcast_force)
 370                        break;
 371                cpumask_clear_cpu(cpu, tick_broadcast_on);
 372                if (!tick_device_is_functional(dev))
 373                        break;
 374                if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) {
 375                        if (tick_broadcast_device.mode ==
 376                            TICKDEV_MODE_PERIODIC)
 377                                tick_setup_periodic(dev, 0);
 378                }
 379                break;
 380        }
 381
 382        if (cpumask_empty(tick_broadcast_mask)) {
 383                if (!bc_stopped)
 384                        clockevents_shutdown(bc);
 385        } else if (bc_stopped) {
 386                if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
 387                        tick_broadcast_start_periodic(bc);
 388                else
 389                        tick_broadcast_setup_oneshot(bc);
 390        }
 391out:
 392        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 393}
 394
 395/*
 396 * Powerstate information: The system enters/leaves a state, where
 397 * affected devices might stop.
 398 */
 399void tick_broadcast_on_off(unsigned long reason, int *oncpu)
 400{
 401        if (!cpumask_test_cpu(*oncpu, cpu_online_mask))
 402                printk(KERN_ERR "tick-broadcast: ignoring broadcast for "
 403                       "offline CPU #%d\n", *oncpu);
 404        else
 405                tick_do_broadcast_on_off(&reason);
 406}
 407
 408/*
 409 * Set the periodic handler depending on broadcast on/off
 410 */
 411void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
 412{
 413        if (!broadcast)
 414                dev->event_handler = tick_handle_periodic;
 415        else
 416                dev->event_handler = tick_handle_periodic_broadcast;
 417}
 418
 419/*
 420 * Remove a CPU from broadcasting
 421 */
 422void tick_shutdown_broadcast(unsigned int *cpup)
 423{
 424        struct clock_event_device *bc;
 425        unsigned long flags;
 426        unsigned int cpu = *cpup;
 427
 428        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 429
 430        bc = tick_broadcast_device.evtdev;
 431        cpumask_clear_cpu(cpu, tick_broadcast_mask);
 432        cpumask_clear_cpu(cpu, tick_broadcast_on);
 433
 434        if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
 435                if (bc && cpumask_empty(tick_broadcast_mask))
 436                        clockevents_shutdown(bc);
 437        }
 438
 439        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 440}
 441
 442void tick_suspend_broadcast(void)
 443{
 444        struct clock_event_device *bc;
 445        unsigned long flags;
 446
 447        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 448
 449        bc = tick_broadcast_device.evtdev;
 450        if (bc)
 451                clockevents_shutdown(bc);
 452
 453        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 454}
 455
 456int tick_resume_broadcast(void)
 457{
 458        struct clock_event_device *bc;
 459        unsigned long flags;
 460        int broadcast = 0;
 461
 462        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 463
 464        bc = tick_broadcast_device.evtdev;
 465
 466        if (bc) {
 467                clockevents_set_mode(bc, CLOCK_EVT_MODE_RESUME);
 468
 469                switch (tick_broadcast_device.mode) {
 470                case TICKDEV_MODE_PERIODIC:
 471                        if (!cpumask_empty(tick_broadcast_mask))
 472                                tick_broadcast_start_periodic(bc);
 473                        broadcast = cpumask_test_cpu(smp_processor_id(),
 474                                                     tick_broadcast_mask);
 475                        break;
 476                case TICKDEV_MODE_ONESHOT:
 477                        if (!cpumask_empty(tick_broadcast_mask))
 478                                broadcast = tick_resume_broadcast_oneshot(bc);
 479                        break;
 480                }
 481        }
 482        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 483
 484        return broadcast;
 485}
 486
 487
 488#ifdef CONFIG_TICK_ONESHOT
 489
 490static cpumask_var_t tick_broadcast_oneshot_mask;
 491static cpumask_var_t tick_broadcast_pending_mask;
 492static cpumask_var_t tick_broadcast_force_mask;
 493
 494/*
 495 * Exposed for debugging: see timer_list.c
 496 */
 497struct cpumask *tick_get_broadcast_oneshot_mask(void)
 498{
 499        return tick_broadcast_oneshot_mask;
 500}
 501
 502/*
 503 * Called before going idle with interrupts disabled. Checks whether a
 504 * broadcast event from the other core is about to happen. We detected
 505 * that in tick_broadcast_oneshot_control(). The callsite can use this
 506 * to avoid a deep idle transition as we are about to get the
 507 * broadcast IPI right away.
 508 */
 509int tick_check_broadcast_expired(void)
 510{
 511        return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask);
 512}
 513
 514/*
 515 * Set broadcast interrupt affinity
 516 */
 517static void tick_broadcast_set_affinity(struct clock_event_device *bc,
 518                                        const struct cpumask *cpumask)
 519{
 520        if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ))
 521                return;
 522
 523        if (cpumask_equal(bc->cpumask, cpumask))
 524                return;
 525
 526        bc->cpumask = cpumask;
 527        irq_set_affinity(bc->irq, bc->cpumask);
 528}
 529
 530static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
 531                                    ktime_t expires, int force)
 532{
 533        int ret;
 534
 535        if (bc->mode != CLOCK_EVT_MODE_ONESHOT)
 536                clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
 537
 538        ret = clockevents_program_event(bc, expires, force);
 539        if (!ret)
 540                tick_broadcast_set_affinity(bc, cpumask_of(cpu));
 541        return ret;
 542}
 543
 544int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
 545{
 546        clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
 547        return 0;
 548}
 549
 550/*
 551 * Called from irq_enter() when idle was interrupted to reenable the
 552 * per cpu device.
 553 */
 554void tick_check_oneshot_broadcast_this_cpu(void)
 555{
 556        if (cpumask_test_cpu(smp_processor_id(), tick_broadcast_oneshot_mask)) {
 557                struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
 558
 559                /*
 560                 * We might be in the middle of switching over from
 561                 * periodic to oneshot. If the CPU has not yet
 562                 * switched over, leave the device alone.
 563                 */
 564                if (td->mode == TICKDEV_MODE_ONESHOT) {
 565                        clockevents_set_mode(td->evtdev,
 566                                             CLOCK_EVT_MODE_ONESHOT);
 567                }
 568        }
 569}
 570
 571/*
 572 * Handle oneshot mode broadcasting
 573 */
 574static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
 575{
 576        struct tick_device *td;
 577        ktime_t now, next_event;
 578        int cpu, next_cpu = 0;
 579
 580        raw_spin_lock(&tick_broadcast_lock);
 581again:
 582        dev->next_event.tv64 = KTIME_MAX;
 583        next_event.tv64 = KTIME_MAX;
 584        cpumask_clear(tmpmask);
 585        now = ktime_get();
 586        /* Find all expired events */
 587        for_each_cpu(cpu, tick_broadcast_oneshot_mask) {
 588                td = &per_cpu(tick_cpu_device, cpu);
 589                if (td->evtdev->next_event.tv64 <= now.tv64) {
 590                        cpumask_set_cpu(cpu, tmpmask);
 591                        /*
 592                         * Mark the remote cpu in the pending mask, so
 593                         * it can avoid reprogramming the cpu local
 594                         * timer in tick_broadcast_oneshot_control().
 595                         */
 596                        cpumask_set_cpu(cpu, tick_broadcast_pending_mask);
 597                } else if (td->evtdev->next_event.tv64 < next_event.tv64) {
 598                        next_event.tv64 = td->evtdev->next_event.tv64;
 599                        next_cpu = cpu;
 600                }
 601        }
 602
 603        /*
 604         * Remove the current cpu from the pending mask. The event is
 605         * delivered immediately in tick_do_broadcast() !
 606         */
 607        cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask);
 608
 609        /* Take care of enforced broadcast requests */
 610        cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask);
 611        cpumask_clear(tick_broadcast_force_mask);
 612
 613        /*
 614         * Sanity check. Catch the case where we try to broadcast to
 615         * offline cpus.
 616         */
 617        if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask)))
 618                cpumask_and(tmpmask, tmpmask, cpu_online_mask);
 619
 620        /*
 621         * Wakeup the cpus which have an expired event.
 622         */
 623        tick_do_broadcast(tmpmask);
 624
 625        /*
 626         * Two reasons for reprogram:
 627         *
 628         * - The global event did not expire any CPU local
 629         * events. This happens in dyntick mode, as the maximum PIT
 630         * delta is quite small.
 631         *
 632         * - There are pending events on sleeping CPUs which were not
 633         * in the event mask
 634         */
 635        if (next_event.tv64 != KTIME_MAX) {
 636                /*
 637                 * Rearm the broadcast device. If event expired,
 638                 * repeat the above
 639                 */
 640                if (tick_broadcast_set_event(dev, next_cpu, next_event, 0))
 641                        goto again;
 642        }
 643        raw_spin_unlock(&tick_broadcast_lock);
 644}
 645
 646static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu)
 647{
 648        if (!(bc->features & CLOCK_EVT_FEAT_HRTIMER))
 649                return 0;
 650        if (bc->next_event.tv64 == KTIME_MAX)
 651                return 0;
 652        return bc->bound_on == cpu ? -EBUSY : 0;
 653}
 654
 655static void broadcast_shutdown_local(struct clock_event_device *bc,
 656                                     struct clock_event_device *dev)
 657{
 658        /*
 659         * For hrtimer based broadcasting we cannot shutdown the cpu
 660         * local device if our own event is the first one to expire or
 661         * if we own the broadcast timer.
 662         */
 663        if (bc->features & CLOCK_EVT_FEAT_HRTIMER) {
 664                if (broadcast_needs_cpu(bc, smp_processor_id()))
 665                        return;
 666                if (dev->next_event.tv64 < bc->next_event.tv64)
 667                        return;
 668        }
 669        clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
 670}
 671
 672static void broadcast_move_bc(int deadcpu)
 673{
 674        struct clock_event_device *bc = tick_broadcast_device.evtdev;
 675
 676        if (!bc || !broadcast_needs_cpu(bc, deadcpu))
 677                return;
 678        /* This moves the broadcast assignment to this cpu */
 679        clockevents_program_event(bc, bc->next_event, 1);
 680}
 681
 682/*
 683 * Powerstate information: The system enters/leaves a state, where
 684 * affected devices might stop
 685 * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups.
 686 */
 687int tick_broadcast_oneshot_control(unsigned long reason)
 688{
 689        struct clock_event_device *bc, *dev;
 690        struct tick_device *td;
 691        unsigned long flags;
 692        ktime_t now;
 693        int cpu, ret = 0;
 694
 695        /*
 696         * Periodic mode does not care about the enter/exit of power
 697         * states
 698         */
 699        if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
 700                return 0;
 701
 702        /*
 703         * We are called with preemtion disabled from the depth of the
 704         * idle code, so we can't be moved away.
 705         */
 706        cpu = smp_processor_id();
 707        td = &per_cpu(tick_cpu_device, cpu);
 708        dev = td->evtdev;
 709
 710        if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
 711                return 0;
 712
 713        bc = tick_broadcast_device.evtdev;
 714
 715        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 716        if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
 717                if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
 718                        WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
 719                        broadcast_shutdown_local(bc, dev);
 720                        /*
 721                         * We only reprogram the broadcast timer if we
 722                         * did not mark ourself in the force mask and
 723                         * if the cpu local event is earlier than the
 724                         * broadcast event. If the current CPU is in
 725                         * the force mask, then we are going to be
 726                         * woken by the IPI right away.
 727                         */
 728                        if (!cpumask_test_cpu(cpu, tick_broadcast_force_mask) &&
 729                            dev->next_event.tv64 < bc->next_event.tv64)
 730                                tick_broadcast_set_event(bc, cpu, dev->next_event, 1);
 731                }
 732                /*
 733                 * If the current CPU owns the hrtimer broadcast
 734                 * mechanism, it cannot go deep idle and we remove the
 735                 * CPU from the broadcast mask. We don't have to go
 736                 * through the EXIT path as the local timer is not
 737                 * shutdown.
 738                 */
 739                ret = broadcast_needs_cpu(bc, cpu);
 740                if (ret)
 741                        cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
 742        } else {
 743                if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
 744                        clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
 745                        /*
 746                         * The cpu which was handling the broadcast
 747                         * timer marked this cpu in the broadcast
 748                         * pending mask and fired the broadcast
 749                         * IPI. So we are going to handle the expired
 750                         * event anyway via the broadcast IPI
 751                         * handler. No need to reprogram the timer
 752                         * with an already expired event.
 753                         */
 754                        if (cpumask_test_and_clear_cpu(cpu,
 755                                       tick_broadcast_pending_mask))
 756                                goto out;
 757
 758                        /*
 759                         * Bail out if there is no next event.
 760                         */
 761                        if (dev->next_event.tv64 == KTIME_MAX)
 762                                goto out;
 763                        /*
 764                         * If the pending bit is not set, then we are
 765                         * either the CPU handling the broadcast
 766                         * interrupt or we got woken by something else.
 767                         *
 768                         * We are not longer in the broadcast mask, so
 769                         * if the cpu local expiry time is already
 770                         * reached, we would reprogram the cpu local
 771                         * timer with an already expired event.
 772                         *
 773                         * This can lead to a ping-pong when we return
 774                         * to idle and therefor rearm the broadcast
 775                         * timer before the cpu local timer was able
 776                         * to fire. This happens because the forced
 777                         * reprogramming makes sure that the event
 778                         * will happen in the future and depending on
 779                         * the min_delta setting this might be far
 780                         * enough out that the ping-pong starts.
 781                         *
 782                         * If the cpu local next_event has expired
 783                         * then we know that the broadcast timer
 784                         * next_event has expired as well and
 785                         * broadcast is about to be handled. So we
 786                         * avoid reprogramming and enforce that the
 787                         * broadcast handler, which did not run yet,
 788                         * will invoke the cpu local handler.
 789                         *
 790                         * We cannot call the handler directly from
 791                         * here, because we might be in a NOHZ phase
 792                         * and we did not go through the irq_enter()
 793                         * nohz fixups.
 794                         */
 795                        now = ktime_get();
 796                        if (dev->next_event.tv64 <= now.tv64) {
 797                                cpumask_set_cpu(cpu, tick_broadcast_force_mask);
 798                                goto out;
 799                        }
 800                        /*
 801                         * We got woken by something else. Reprogram
 802                         * the cpu local timer device.
 803                         */
 804                        tick_program_event(dev->next_event, 1);
 805                }
 806        }
 807out:
 808        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 809        return ret;
 810}
 811
 812/*
 813 * Reset the one shot broadcast for a cpu
 814 *
 815 * Called with tick_broadcast_lock held
 816 */
 817static void tick_broadcast_clear_oneshot(int cpu)
 818{
 819        cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
 820        cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
 821}
 822
 823static void tick_broadcast_init_next_event(struct cpumask *mask,
 824                                           ktime_t expires)
 825{
 826        struct tick_device *td;
 827        int cpu;
 828
 829        for_each_cpu(cpu, mask) {
 830                td = &per_cpu(tick_cpu_device, cpu);
 831                if (td->evtdev)
 832                        td->evtdev->next_event = expires;
 833        }
 834}
 835
 836/**
 837 * tick_broadcast_setup_oneshot - setup the broadcast device
 838 */
 839void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
 840{
 841        int cpu = smp_processor_id();
 842
 843        /* Set it up only once ! */
 844        if (bc->event_handler != tick_handle_oneshot_broadcast) {
 845                int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC;
 846
 847                bc->event_handler = tick_handle_oneshot_broadcast;
 848
 849                /*
 850                 * We must be careful here. There might be other CPUs
 851                 * waiting for periodic broadcast. We need to set the
 852                 * oneshot_mask bits for those and program the
 853                 * broadcast device to fire.
 854                 */
 855                cpumask_copy(tmpmask, tick_broadcast_mask);
 856                cpumask_clear_cpu(cpu, tmpmask);
 857                cpumask_or(tick_broadcast_oneshot_mask,
 858                           tick_broadcast_oneshot_mask, tmpmask);
 859
 860                if (was_periodic && !cpumask_empty(tmpmask)) {
 861                        clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
 862                        tick_broadcast_init_next_event(tmpmask,
 863                                                       tick_next_period);
 864                        tick_broadcast_set_event(bc, cpu, tick_next_period, 1);
 865                } else
 866                        bc->next_event.tv64 = KTIME_MAX;
 867        } else {
 868                /*
 869                 * The first cpu which switches to oneshot mode sets
 870                 * the bit for all other cpus which are in the general
 871                 * (periodic) broadcast mask. So the bit is set and
 872                 * would prevent the first broadcast enter after this
 873                 * to program the bc device.
 874                 */
 875                tick_broadcast_clear_oneshot(cpu);
 876        }
 877}
 878
 879/*
 880 * Select oneshot operating mode for the broadcast device
 881 */
 882void tick_broadcast_switch_to_oneshot(void)
 883{
 884        struct clock_event_device *bc;
 885        unsigned long flags;
 886
 887        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 888
 889        tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
 890        bc = tick_broadcast_device.evtdev;
 891        if (bc)
 892                tick_broadcast_setup_oneshot(bc);
 893
 894        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 895}
 896
 897
 898/*
 899 * Remove a dead CPU from broadcasting
 900 */
 901void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
 902{
 903        unsigned long flags;
 904        unsigned int cpu = *cpup;
 905
 906        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 907
 908        /*
 909         * Clear the broadcast masks for the dead cpu, but do not stop
 910         * the broadcast device!
 911         */
 912        cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
 913        cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
 914        cpumask_clear_cpu(cpu, tick_broadcast_force_mask);
 915
 916        broadcast_move_bc(cpu);
 917
 918        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 919}
 920
 921/*
 922 * Check, whether the broadcast device is in one shot mode
 923 */
 924int tick_broadcast_oneshot_active(void)
 925{
 926        return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT;
 927}
 928
 929/*
 930 * Check whether the broadcast device supports oneshot.
 931 */
 932bool tick_broadcast_oneshot_available(void)
 933{
 934        struct clock_event_device *bc = tick_broadcast_device.evtdev;
 935
 936        return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false;
 937}
 938
 939#endif
 940
 941void __init tick_broadcast_init(void)
 942{
 943        zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT);
 944        zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT);
 945        zalloc_cpumask_var(&tmpmask, GFP_NOWAIT);
 946#ifdef CONFIG_TICK_ONESHOT
 947        zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT);
 948        zalloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT);
 949        zalloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT);
 950#endif
 951}
 952