linux/kernel/softirq.c
<<
>>
Prefs
   1/*
   2 *      linux/kernel/softirq.c
   3 *
   4 *      Copyright (C) 1992 Linus Torvalds
   5 *
   6 *      Distribute under GPLv2.
   7 *
   8 *      Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
   9 *
  10 *      Remote softirq infrastructure is by Jens Axboe.
  11 */
  12
  13#include <linux/module.h>
  14#include <linux/kernel_stat.h>
  15#include <linux/interrupt.h>
  16#include <linux/init.h>
  17#include <linux/mm.h>
  18#include <linux/notifier.h>
  19#include <linux/percpu.h>
  20#include <linux/cpu.h>
  21#include <linux/freezer.h>
  22#include <linux/kthread.h>
  23#include <linux/rcupdate.h>
  24#include <linux/ftrace.h>
  25#include <linux/smp.h>
  26#include <linux/tick.h>
  27
  28#define CREATE_TRACE_POINTS
  29#include <trace/events/irq.h>
  30
  31#include <asm/irq.h>
  32/*
  33   - No shared variables, all the data are CPU local.
  34   - If a softirq needs serialization, let it serialize itself
  35     by its own spinlocks.
  36   - Even if softirq is serialized, only local cpu is marked for
  37     execution. Hence, we get something sort of weak cpu binding.
  38     Though it is still not clear, will it result in better locality
  39     or will not.
  40
  41   Examples:
  42   - NET RX softirq. It is multithreaded and does not require
  43     any global serialization.
  44   - NET TX softirq. It kicks software netdevice queues, hence
  45     it is logically serialized per device, but this serialization
  46     is invisible to common code.
  47   - Tasklets: serialized wrt itself.
  48 */
  49
  50#ifndef __ARCH_IRQ_STAT
  51irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
  52EXPORT_SYMBOL(irq_stat);
  53#endif
  54
  55static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
  56
  57static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
  58
  59char *softirq_to_name[NR_SOFTIRQS] = {
  60        "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
  61        "TASKLET", "SCHED", "HRTIMER",  "RCU"
  62};
  63
  64/*
  65 * we cannot loop indefinitely here to avoid userspace starvation,
  66 * but we also don't want to introduce a worst case 1/HZ latency
  67 * to the pending events, so lets the scheduler to balance
  68 * the softirq load for us.
  69 */
  70void wakeup_softirqd(void)
  71{
  72        /* Interrupts are disabled: no need to stop preemption */
  73        struct task_struct *tsk = __get_cpu_var(ksoftirqd);
  74
  75        if (tsk && tsk->state != TASK_RUNNING)
  76                wake_up_process(tsk);
  77}
  78
  79/*
  80 * This one is for softirq.c-internal use,
  81 * where hardirqs are disabled legitimately:
  82 */
  83#ifdef CONFIG_TRACE_IRQFLAGS
  84static void __local_bh_disable(unsigned long ip)
  85{
  86        unsigned long flags;
  87
  88        WARN_ON_ONCE(in_irq());
  89
  90        raw_local_irq_save(flags);
  91        /*
  92         * The preempt tracer hooks into add_preempt_count and will break
  93         * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
  94         * is set and before current->softirq_enabled is cleared.
  95         * We must manually increment preempt_count here and manually
  96         * call the trace_preempt_off later.
  97         */
  98        preempt_count() += SOFTIRQ_OFFSET;
  99        /*
 100         * Were softirqs turned off above:
 101         */
 102        if (softirq_count() == SOFTIRQ_OFFSET)
 103                trace_softirqs_off(ip);
 104        raw_local_irq_restore(flags);
 105
 106        if (preempt_count() == SOFTIRQ_OFFSET)
 107                trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
 108}
 109#else /* !CONFIG_TRACE_IRQFLAGS */
 110static inline void __local_bh_disable(unsigned long ip)
 111{
 112        add_preempt_count(SOFTIRQ_OFFSET);
 113        barrier();
 114}
 115#endif /* CONFIG_TRACE_IRQFLAGS */
 116
 117void local_bh_disable(void)
 118{
 119        __local_bh_disable((unsigned long)__builtin_return_address(0));
 120}
 121
 122EXPORT_SYMBOL(local_bh_disable);
 123
 124/*
 125 * Special-case - softirqs can safely be enabled in
 126 * cond_resched_softirq(), or by __do_softirq(),
 127 * without processing still-pending softirqs:
 128 */
 129void _local_bh_enable(void)
 130{
 131        WARN_ON_ONCE(in_irq());
 132        WARN_ON_ONCE(!irqs_disabled());
 133
 134        if (softirq_count() == SOFTIRQ_OFFSET)
 135                trace_softirqs_on((unsigned long)__builtin_return_address(0));
 136        sub_preempt_count(SOFTIRQ_OFFSET);
 137}
 138
 139EXPORT_SYMBOL(_local_bh_enable);
 140
 141static inline void _local_bh_enable_ip(unsigned long ip)
 142{
 143        WARN_ON_ONCE(in_irq() || irqs_disabled());
 144#ifdef CONFIG_TRACE_IRQFLAGS
 145        local_irq_disable();
 146#endif
 147        /*
 148         * Are softirqs going to be turned on now:
 149         */
 150        if (softirq_count() == SOFTIRQ_OFFSET)
 151                trace_softirqs_on(ip);
 152        /*
 153         * Keep preemption disabled until we are done with
 154         * softirq processing:
 155         */
 156        sub_preempt_count(SOFTIRQ_OFFSET - 1);
 157
 158        if (unlikely(!in_interrupt() && local_softirq_pending()))
 159                do_softirq();
 160
 161        dec_preempt_count();
 162#ifdef CONFIG_TRACE_IRQFLAGS
 163        local_irq_enable();
 164#endif
 165        preempt_check_resched();
 166}
 167
 168void local_bh_enable(void)
 169{
 170        _local_bh_enable_ip((unsigned long)__builtin_return_address(0));
 171}
 172EXPORT_SYMBOL(local_bh_enable);
 173
 174void local_bh_enable_ip(unsigned long ip)
 175{
 176        _local_bh_enable_ip(ip);
 177}
 178EXPORT_SYMBOL(local_bh_enable_ip);
 179
 180/*
 181 * We restart softirq processing MAX_SOFTIRQ_RESTART times,
 182 * and we fall back to softirqd after that.
 183 *
 184 * This number has been established via experimentation.
 185 * The two things to balance is latency against fairness -
 186 * we want to handle softirqs as soon as possible, but they
 187 * should not be able to lock up the box.
 188 */
 189#define MAX_SOFTIRQ_RESTART 10
 190
 191asmlinkage void __do_softirq(void)
 192{
 193        struct softirq_action *h;
 194        __u32 pending;
 195        int max_restart = MAX_SOFTIRQ_RESTART;
 196        int cpu;
 197
 198        pending = local_softirq_pending();
 199        account_system_vtime(current);
 200
 201        __local_bh_disable((unsigned long)__builtin_return_address(0));
 202        lockdep_softirq_enter();
 203
 204        cpu = smp_processor_id();
 205restart:
 206        /* Reset the pending bitmask before enabling irqs */
 207        set_softirq_pending(0);
 208
 209        local_irq_enable();
 210
 211        h = softirq_vec;
 212
 213        do {
 214                if (pending & 1) {
 215                        int prev_count = preempt_count();
 216                        kstat_incr_softirqs_this_cpu(h - softirq_vec);
 217
 218                        trace_softirq_entry(h, softirq_vec);
 219                        h->action(h);
 220                        trace_softirq_exit(h, softirq_vec);
 221                        if (unlikely(prev_count != preempt_count())) {
 222                                printk(KERN_ERR "huh, entered softirq %td %s %p"
 223                                       "with preempt_count %08x,"
 224                                       " exited with %08x?\n", h - softirq_vec,
 225                                       softirq_to_name[h - softirq_vec],
 226                                       h->action, prev_count, preempt_count());
 227                                preempt_count() = prev_count;
 228                        }
 229
 230                        rcu_bh_qs(cpu);
 231                }
 232                h++;
 233                pending >>= 1;
 234        } while (pending);
 235
 236        local_irq_disable();
 237
 238        pending = local_softirq_pending();
 239        if (pending && --max_restart)
 240                goto restart;
 241
 242        if (pending)
 243                wakeup_softirqd();
 244
 245        lockdep_softirq_exit();
 246
 247        account_system_vtime(current);
 248        _local_bh_enable();
 249}
 250
 251#ifndef __ARCH_HAS_DO_SOFTIRQ
 252
 253asmlinkage void do_softirq(void)
 254{
 255        __u32 pending;
 256        unsigned long flags;
 257
 258        if (in_interrupt())
 259                return;
 260
 261        local_irq_save(flags);
 262
 263        pending = local_softirq_pending();
 264
 265        if (pending)
 266                __do_softirq();
 267
 268        local_irq_restore(flags);
 269}
 270
 271#endif
 272
 273/*
 274 * Enter an interrupt context.
 275 */
 276void irq_enter(void)
 277{
 278        int cpu = smp_processor_id();
 279
 280        rcu_irq_enter();
 281        if (idle_cpu(cpu) && !in_interrupt()) {
 282                __irq_enter();
 283                tick_check_idle(cpu);
 284        } else
 285                __irq_enter();
 286}
 287
 288#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
 289# define invoke_softirq()       __do_softirq()
 290#else
 291# define invoke_softirq()       do_softirq()
 292#endif
 293
 294/*
 295 * Exit an interrupt context. Process softirqs if needed and possible:
 296 */
 297void irq_exit(void)
 298{
 299        account_system_vtime(current);
 300        trace_hardirq_exit();
 301        sub_preempt_count(IRQ_EXIT_OFFSET);
 302        if (!in_interrupt() && local_softirq_pending())
 303                invoke_softirq();
 304
 305#ifdef CONFIG_NO_HZ
 306        /* Make sure that timer wheel updates are propagated */
 307        rcu_irq_exit();
 308        if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
 309                tick_nohz_stop_sched_tick(0);
 310#endif
 311        preempt_enable_no_resched();
 312}
 313
 314/*
 315 * This function must run with irqs disabled!
 316 */
 317inline void raise_softirq_irqoff(unsigned int nr)
 318{
 319        __raise_softirq_irqoff(nr);
 320
 321        /*
 322         * If we're in an interrupt or softirq, we're done
 323         * (this also catches softirq-disabled code). We will
 324         * actually run the softirq once we return from
 325         * the irq or softirq.
 326         *
 327         * Otherwise we wake up ksoftirqd to make sure we
 328         * schedule the softirq soon.
 329         */
 330        if (!in_interrupt())
 331                wakeup_softirqd();
 332}
 333
 334void raise_softirq(unsigned int nr)
 335{
 336        unsigned long flags;
 337
 338        local_irq_save(flags);
 339        raise_softirq_irqoff(nr);
 340        local_irq_restore(flags);
 341}
 342
 343void open_softirq(int nr, void (*action)(struct softirq_action *))
 344{
 345        softirq_vec[nr].action = action;
 346}
 347
 348/*
 349 * Tasklets
 350 */
 351struct tasklet_head
 352{
 353        struct tasklet_struct *head;
 354        struct tasklet_struct **tail;
 355};
 356
 357static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
 358static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
 359
 360void __tasklet_schedule(struct tasklet_struct *t)
 361{
 362        unsigned long flags;
 363
 364        local_irq_save(flags);
 365        t->next = NULL;
 366        *__get_cpu_var(tasklet_vec).tail = t;
 367        __get_cpu_var(tasklet_vec).tail = &(t->next);
 368        raise_softirq_irqoff(TASKLET_SOFTIRQ);
 369        local_irq_restore(flags);
 370}
 371
 372EXPORT_SYMBOL(__tasklet_schedule);
 373
 374void __tasklet_hi_schedule(struct tasklet_struct *t)
 375{
 376        unsigned long flags;
 377
 378        local_irq_save(flags);
 379        t->next = NULL;
 380        *__get_cpu_var(tasklet_hi_vec).tail = t;
 381        __get_cpu_var(tasklet_hi_vec).tail = &(t->next);
 382        raise_softirq_irqoff(HI_SOFTIRQ);
 383        local_irq_restore(flags);
 384}
 385
 386EXPORT_SYMBOL(__tasklet_hi_schedule);
 387
 388void __tasklet_hi_schedule_first(struct tasklet_struct *t)
 389{
 390        BUG_ON(!irqs_disabled());
 391
 392        t->next = __get_cpu_var(tasklet_hi_vec).head;
 393        __get_cpu_var(tasklet_hi_vec).head = t;
 394        __raise_softirq_irqoff(HI_SOFTIRQ);
 395}
 396
 397EXPORT_SYMBOL(__tasklet_hi_schedule_first);
 398
 399static void tasklet_action(struct softirq_action *a)
 400{
 401        struct tasklet_struct *list;
 402
 403        local_irq_disable();
 404        list = __get_cpu_var(tasklet_vec).head;
 405        __get_cpu_var(tasklet_vec).head = NULL;
 406        __get_cpu_var(tasklet_vec).tail = &__get_cpu_var(tasklet_vec).head;
 407        local_irq_enable();
 408
 409        while (list) {
 410                struct tasklet_struct *t = list;
 411
 412                list = list->next;
 413
 414                if (tasklet_trylock(t)) {
 415                        if (!atomic_read(&t->count)) {
 416                                if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
 417                                        BUG();
 418                                t->func(t->data);
 419                                tasklet_unlock(t);
 420                                continue;
 421                        }
 422                        tasklet_unlock(t);
 423                }
 424
 425                local_irq_disable();
 426                t->next = NULL;
 427                *__get_cpu_var(tasklet_vec).tail = t;
 428                __get_cpu_var(tasklet_vec).tail = &(t->next);
 429                __raise_softirq_irqoff(TASKLET_SOFTIRQ);
 430                local_irq_enable();
 431        }
 432}
 433
 434static void tasklet_hi_action(struct softirq_action *a)
 435{
 436        struct tasklet_struct *list;
 437
 438        local_irq_disable();
 439        list = __get_cpu_var(tasklet_hi_vec).head;
 440        __get_cpu_var(tasklet_hi_vec).head = NULL;
 441        __get_cpu_var(tasklet_hi_vec).tail = &__get_cpu_var(tasklet_hi_vec).head;
 442        local_irq_enable();
 443
 444        while (list) {
 445                struct tasklet_struct *t = list;
 446
 447                list = list->next;
 448
 449                if (tasklet_trylock(t)) {
 450                        if (!atomic_read(&t->count)) {
 451                                if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
 452                                        BUG();
 453                                t->func(t->data);
 454                                tasklet_unlock(t);
 455                                continue;
 456                        }
 457                        tasklet_unlock(t);
 458                }
 459
 460                local_irq_disable();
 461                t->next = NULL;
 462                *__get_cpu_var(tasklet_hi_vec).tail = t;
 463                __get_cpu_var(tasklet_hi_vec).tail = &(t->next);
 464                __raise_softirq_irqoff(HI_SOFTIRQ);
 465                local_irq_enable();
 466        }
 467}
 468
 469
 470void tasklet_init(struct tasklet_struct *t,
 471                  void (*func)(unsigned long), unsigned long data)
 472{
 473        t->next = NULL;
 474        t->state = 0;
 475        atomic_set(&t->count, 0);
 476        t->func = func;
 477        t->data = data;
 478}
 479
 480EXPORT_SYMBOL(tasklet_init);
 481
 482void tasklet_kill(struct tasklet_struct *t)
 483{
 484        if (in_interrupt())
 485                printk("Attempt to kill tasklet from interrupt\n");
 486
 487        while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
 488                do {
 489                        yield();
 490                } while (test_bit(TASKLET_STATE_SCHED, &t->state));
 491        }
 492        tasklet_unlock_wait(t);
 493        clear_bit(TASKLET_STATE_SCHED, &t->state);
 494}
 495
 496EXPORT_SYMBOL(tasklet_kill);
 497
 498/*
 499 * tasklet_hrtimer
 500 */
 501
 502/*
 503 * The trampoline is called when the hrtimer expires. If this is
 504 * called from the hrtimer interrupt then we schedule the tasklet as
 505 * the timer callback function expects to run in softirq context. If
 506 * it's called in softirq context anyway (i.e. high resolution timers
 507 * disabled) then the hrtimer callback is called right away.
 508 */
 509static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer)
 510{
 511        struct tasklet_hrtimer *ttimer =
 512                container_of(timer, struct tasklet_hrtimer, timer);
 513
 514        if (hrtimer_is_hres_active(timer)) {
 515                tasklet_hi_schedule(&ttimer->tasklet);
 516                return HRTIMER_NORESTART;
 517        }
 518        return ttimer->function(timer);
 519}
 520
 521/*
 522 * Helper function which calls the hrtimer callback from
 523 * tasklet/softirq context
 524 */
 525static void __tasklet_hrtimer_trampoline(unsigned long data)
 526{
 527        struct tasklet_hrtimer *ttimer = (void *)data;
 528        enum hrtimer_restart restart;
 529
 530        restart = ttimer->function(&ttimer->timer);
 531        if (restart != HRTIMER_NORESTART)
 532                hrtimer_restart(&ttimer->timer);
 533}
 534
 535/**
 536 * tasklet_hrtimer_init - Init a tasklet/hrtimer combo for softirq callbacks
 537 * @ttimer:      tasklet_hrtimer which is initialized
 538 * @function:    hrtimer callback funtion which gets called from softirq context
 539 * @which_clock: clock id (CLOCK_MONOTONIC/CLOCK_REALTIME)
 540 * @mode:        hrtimer mode (HRTIMER_MODE_ABS/HRTIMER_MODE_REL)
 541 */
 542void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
 543                          enum hrtimer_restart (*function)(struct hrtimer *),
 544                          clockid_t which_clock, enum hrtimer_mode mode)
 545{
 546        hrtimer_init(&ttimer->timer, which_clock, mode);
 547        ttimer->timer.function = __hrtimer_tasklet_trampoline;
 548        tasklet_init(&ttimer->tasklet, __tasklet_hrtimer_trampoline,
 549                     (unsigned long)ttimer);
 550        ttimer->function = function;
 551}
 552EXPORT_SYMBOL_GPL(tasklet_hrtimer_init);
 553
 554/*
 555 * Remote softirq bits
 556 */
 557
 558DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
 559EXPORT_PER_CPU_SYMBOL(softirq_work_list);
 560
 561static void __local_trigger(struct call_single_data *cp, int softirq)
 562{
 563        struct list_head *head = &__get_cpu_var(softirq_work_list[softirq]);
 564
 565        list_add_tail(&cp->list, head);
 566
 567        /* Trigger the softirq only if the list was previously empty.  */
 568        if (head->next == &cp->list)
 569                raise_softirq_irqoff(softirq);
 570}
 571
 572#ifdef CONFIG_USE_GENERIC_SMP_HELPERS
 573static void remote_softirq_receive(void *data)
 574{
 575        struct call_single_data *cp = data;
 576        unsigned long flags;
 577        int softirq;
 578
 579        softirq = cp->priv;
 580
 581        local_irq_save(flags);
 582        __local_trigger(cp, softirq);
 583        local_irq_restore(flags);
 584}
 585
 586static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
 587{
 588        if (cpu_online(cpu)) {
 589                cp->func = remote_softirq_receive;
 590                cp->info = cp;
 591                cp->flags = 0;
 592                cp->priv = softirq;
 593
 594                __smp_call_function_single(cpu, cp, 0);
 595                return 0;
 596        }
 597        return 1;
 598}
 599#else /* CONFIG_USE_GENERIC_SMP_HELPERS */
 600static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
 601{
 602        return 1;
 603}
 604#endif
 605
 606/**
 607 * __send_remote_softirq - try to schedule softirq work on a remote cpu
 608 * @cp: private SMP call function data area
 609 * @cpu: the remote cpu
 610 * @this_cpu: the currently executing cpu
 611 * @softirq: the softirq for the work
 612 *
 613 * Attempt to schedule softirq work on a remote cpu.  If this cannot be
 614 * done, the work is instead queued up on the local cpu.
 615 *
 616 * Interrupts must be disabled.
 617 */
 618void __send_remote_softirq(struct call_single_data *cp, int cpu, int this_cpu, int softirq)
 619{
 620        if (cpu == this_cpu || __try_remote_softirq(cp, cpu, softirq))
 621                __local_trigger(cp, softirq);
 622}
 623EXPORT_SYMBOL(__send_remote_softirq);
 624
 625/**
 626 * send_remote_softirq - try to schedule softirq work on a remote cpu
 627 * @cp: private SMP call function data area
 628 * @cpu: the remote cpu
 629 * @softirq: the softirq for the work
 630 *
 631 * Like __send_remote_softirq except that disabling interrupts and
 632 * computing the current cpu is done for the caller.
 633 */
 634void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
 635{
 636        unsigned long flags;
 637        int this_cpu;
 638
 639        local_irq_save(flags);
 640        this_cpu = smp_processor_id();
 641        __send_remote_softirq(cp, cpu, this_cpu, softirq);
 642        local_irq_restore(flags);
 643}
 644EXPORT_SYMBOL(send_remote_softirq);
 645
 646static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self,
 647                                               unsigned long action, void *hcpu)
 648{
 649        /*
 650         * If a CPU goes away, splice its entries to the current CPU
 651         * and trigger a run of the softirq
 652         */
 653        if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
 654                int cpu = (unsigned long) hcpu;
 655                int i;
 656
 657                local_irq_disable();
 658                for (i = 0; i < NR_SOFTIRQS; i++) {
 659                        struct list_head *head = &per_cpu(softirq_work_list[i], cpu);
 660                        struct list_head *local_head;
 661
 662                        if (list_empty(head))
 663                                continue;
 664
 665                        local_head = &__get_cpu_var(softirq_work_list[i]);
 666                        list_splice_init(head, local_head);
 667                        raise_softirq_irqoff(i);
 668                }
 669                local_irq_enable();
 670        }
 671
 672        return NOTIFY_OK;
 673}
 674
 675static struct notifier_block __cpuinitdata remote_softirq_cpu_notifier = {
 676        .notifier_call  = remote_softirq_cpu_notify,
 677};
 678
 679void __init softirq_init(void)
 680{
 681        int cpu;
 682
 683        for_each_possible_cpu(cpu) {
 684                int i;
 685
 686                per_cpu(tasklet_vec, cpu).tail =
 687                        &per_cpu(tasklet_vec, cpu).head;
 688                per_cpu(tasklet_hi_vec, cpu).tail =
 689                        &per_cpu(tasklet_hi_vec, cpu).head;
 690                for (i = 0; i < NR_SOFTIRQS; i++)
 691                        INIT_LIST_HEAD(&per_cpu(softirq_work_list[i], cpu));
 692        }
 693
 694        register_hotcpu_notifier(&remote_softirq_cpu_notifier);
 695
 696        open_softirq(TASKLET_SOFTIRQ, tasklet_action);
 697        open_softirq(HI_SOFTIRQ, tasklet_hi_action);
 698}
 699
 700static int ksoftirqd(void * __bind_cpu)
 701{
 702        set_current_state(TASK_INTERRUPTIBLE);
 703
 704        while (!kthread_should_stop()) {
 705                preempt_disable();
 706                if (!local_softirq_pending()) {
 707                        preempt_enable_no_resched();
 708                        schedule();
 709                        preempt_disable();
 710                }
 711
 712                __set_current_state(TASK_RUNNING);
 713
 714                while (local_softirq_pending()) {
 715                        /* Preempt disable stops cpu going offline.
 716                           If already offline, we'll be on wrong CPU:
 717                           don't process */
 718                        if (cpu_is_offline((long)__bind_cpu))
 719                                goto wait_to_die;
 720                        do_softirq();
 721                        preempt_enable_no_resched();
 722                        cond_resched();
 723                        preempt_disable();
 724                        rcu_sched_qs((long)__bind_cpu);
 725                }
 726                preempt_enable();
 727                set_current_state(TASK_INTERRUPTIBLE);
 728        }
 729        __set_current_state(TASK_RUNNING);
 730        return 0;
 731
 732wait_to_die:
 733        preempt_enable();
 734        /* Wait for kthread_stop */
 735        set_current_state(TASK_INTERRUPTIBLE);
 736        while (!kthread_should_stop()) {
 737                schedule();
 738                set_current_state(TASK_INTERRUPTIBLE);
 739        }
 740        __set_current_state(TASK_RUNNING);
 741        return 0;
 742}
 743
 744#ifdef CONFIG_HOTPLUG_CPU
 745/*
 746 * tasklet_kill_immediate is called to remove a tasklet which can already be
 747 * scheduled for execution on @cpu.
 748 *
 749 * Unlike tasklet_kill, this function removes the tasklet
 750 * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
 751 *
 752 * When this function is called, @cpu must be in the CPU_DEAD state.
 753 */
 754void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
 755{
 756        struct tasklet_struct **i;
 757
 758        BUG_ON(cpu_online(cpu));
 759        BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
 760
 761        if (!test_bit(TASKLET_STATE_SCHED, &t->state))
 762                return;
 763
 764        /* CPU is dead, so no lock needed. */
 765        for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
 766                if (*i == t) {
 767                        *i = t->next;
 768                        /* If this was the tail element, move the tail ptr */
 769                        if (*i == NULL)
 770                                per_cpu(tasklet_vec, cpu).tail = i;
 771                        return;
 772                }
 773        }
 774        BUG();
 775}
 776
 777static void takeover_tasklets(unsigned int cpu)
 778{
 779        /* CPU is dead, so no lock needed. */
 780        local_irq_disable();
 781
 782        /* Find end, append list for that CPU. */
 783        if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
 784                *(__get_cpu_var(tasklet_vec).tail) = per_cpu(tasklet_vec, cpu).head;
 785                __get_cpu_var(tasklet_vec).tail = per_cpu(tasklet_vec, cpu).tail;
 786                per_cpu(tasklet_vec, cpu).head = NULL;
 787                per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
 788        }
 789        raise_softirq_irqoff(TASKLET_SOFTIRQ);
 790
 791        if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
 792                *__get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).head;
 793                __get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).tail;
 794                per_cpu(tasklet_hi_vec, cpu).head = NULL;
 795                per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
 796        }
 797        raise_softirq_irqoff(HI_SOFTIRQ);
 798
 799        local_irq_enable();
 800}
 801#endif /* CONFIG_HOTPLUG_CPU */
 802
 803static int __cpuinit cpu_callback(struct notifier_block *nfb,
 804                                  unsigned long action,
 805                                  void *hcpu)
 806{
 807        int hotcpu = (unsigned long)hcpu;
 808        struct task_struct *p;
 809
 810        switch (action) {
 811        case CPU_UP_PREPARE:
 812        case CPU_UP_PREPARE_FROZEN:
 813                p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
 814                if (IS_ERR(p)) {
 815                        printk("ksoftirqd for %i failed\n", hotcpu);
 816                        return NOTIFY_BAD;
 817                }
 818                kthread_bind(p, hotcpu);
 819                per_cpu(ksoftirqd, hotcpu) = p;
 820                break;
 821        case CPU_ONLINE:
 822        case CPU_ONLINE_FROZEN:
 823                wake_up_process(per_cpu(ksoftirqd, hotcpu));
 824                break;
 825#ifdef CONFIG_HOTPLUG_CPU
 826        case CPU_UP_CANCELED:
 827        case CPU_UP_CANCELED_FROZEN:
 828                if (!per_cpu(ksoftirqd, hotcpu))
 829                        break;
 830                /* Unbind so it can run.  Fall thru. */
 831                kthread_bind(per_cpu(ksoftirqd, hotcpu),
 832                             cpumask_any(cpu_online_mask));
 833        case CPU_DEAD:
 834        case CPU_DEAD_FROZEN: {
 835                struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
 836
 837                p = per_cpu(ksoftirqd, hotcpu);
 838                per_cpu(ksoftirqd, hotcpu) = NULL;
 839                sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
 840                kthread_stop(p);
 841                takeover_tasklets(hotcpu);
 842                break;
 843        }
 844#endif /* CONFIG_HOTPLUG_CPU */
 845        }
 846        return NOTIFY_OK;
 847}
 848
 849static struct notifier_block __cpuinitdata cpu_nfb = {
 850        .notifier_call = cpu_callback
 851};
 852
 853static __init int spawn_ksoftirqd(void)
 854{
 855        void *cpu = (void *)(long)smp_processor_id();
 856        int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
 857
 858        BUG_ON(err == NOTIFY_BAD);
 859        cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
 860        register_cpu_notifier(&cpu_nfb);
 861        return 0;
 862}
 863early_initcall(spawn_ksoftirqd);
 864
 865#ifdef CONFIG_SMP
 866/*
 867 * Call a function on all processors
 868 */
 869int on_each_cpu(void (*func) (void *info), void *info, int wait)
 870{
 871        int ret = 0;
 872
 873        preempt_disable();
 874        ret = smp_call_function(func, info, wait);
 875        local_irq_disable();
 876        func(info);
 877        local_irq_enable();
 878        preempt_enable();
 879        return ret;
 880}
 881EXPORT_SYMBOL(on_each_cpu);
 882#endif
 883
 884/*
 885 * [ These __weak aliases are kept in a separate compilation unit, so that
 886 *   GCC does not inline them incorrectly. ]
 887 */
 888
 889int __init __weak early_irq_init(void)
 890{
 891        return 0;
 892}
 893
 894int __init __weak arch_probe_nr_irqs(void)
 895{
 896        return 0;
 897}
 898
 899int __init __weak arch_early_irq_init(void)
 900{
 901        return 0;
 902}
 903
 904int __weak arch_init_chip_data(struct irq_desc *desc, int node)
 905{
 906        return 0;
 907}
 908