linux/kernel/softirq.c
<<
>>
Prefs
   1/*
   2 *      linux/kernel/softirq.c
   3 *
   4 *      Copyright (C) 1992 Linus Torvalds
   5 *
   6 *      Distribute under GPLv2.
   7 *
   8 *      Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
   9 *
  10 *      Remote softirq infrastructure is by Jens Axboe.
  11 */
  12
  13#include <linux/module.h>
  14#include <linux/kernel_stat.h>
  15#include <linux/interrupt.h>
  16#include <linux/init.h>
  17#include <linux/mm.h>
  18#include <linux/notifier.h>
  19#include <linux/percpu.h>
  20#include <linux/cpu.h>
  21#include <linux/freezer.h>
  22#include <linux/kthread.h>
  23#include <linux/rcupdate.h>
  24#include <linux/ftrace.h>
  25#include <linux/smp.h>
  26#include <linux/tick.h>
  27
  28#define CREATE_TRACE_POINTS
  29#include <trace/events/irq.h>
  30
  31#include <asm/irq.h>
  32/*
  33   - No shared variables, all the data are CPU local.
  34   - If a softirq needs serialization, let it serialize itself
  35     by its own spinlocks.
  36   - Even if softirq is serialized, only local cpu is marked for
  37     execution. Hence, we get something sort of weak cpu binding.
  38     Though it is still not clear, will it result in better locality
  39     or will not.
  40
  41   Examples:
  42   - NET RX softirq. It is multithreaded and does not require
  43     any global serialization.
  44   - NET TX softirq. It kicks software netdevice queues, hence
  45     it is logically serialized per device, but this serialization
  46     is invisible to common code.
  47   - Tasklets: serialized wrt itself.
  48 */
  49
  50#ifndef __ARCH_IRQ_STAT
  51irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
  52EXPORT_SYMBOL(irq_stat);
  53#endif
  54
  55static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
  56
  57DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
  58
  59char *softirq_to_name[NR_SOFTIRQS] = {
  60        "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
  61        "TASKLET", "SCHED", "HRTIMER", "RCU"
  62};
  63
  64/*
  65 * we cannot loop indefinitely here to avoid userspace starvation,
  66 * but we also don't want to introduce a worst case 1/HZ latency
  67 * to the pending events, so lets the scheduler to balance
  68 * the softirq load for us.
  69 */
  70static void wakeup_softirqd(void)
  71{
  72        /* Interrupts are disabled: no need to stop preemption */
  73        struct task_struct *tsk = __this_cpu_read(ksoftirqd);
  74
  75        if (tsk && tsk->state != TASK_RUNNING)
  76                wake_up_process(tsk);
  77}
  78
  79/*
  80 * preempt_count and SOFTIRQ_OFFSET usage:
  81 * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
  82 *   softirq processing.
  83 * - preempt_count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
  84 *   on local_bh_disable or local_bh_enable.
  85 * This lets us distinguish between whether we are currently processing
  86 * softirq and whether we just have bh disabled.
  87 */
  88
  89/*
  90 * This one is for softirq.c-internal use,
  91 * where hardirqs are disabled legitimately:
  92 */
  93#ifdef CONFIG_TRACE_IRQFLAGS
  94static void __local_bh_disable(unsigned long ip, unsigned int cnt)
  95{
  96        unsigned long flags;
  97
  98        WARN_ON_ONCE(in_irq());
  99
 100        raw_local_irq_save(flags);
 101        /*
 102         * The preempt tracer hooks into add_preempt_count and will break
 103         * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
 104         * is set and before current->softirq_enabled is cleared.
 105         * We must manually increment preempt_count here and manually
 106         * call the trace_preempt_off later.
 107         */
 108        preempt_count() += cnt;
 109        /*
 110         * Were softirqs turned off above:
 111         */
 112        if (softirq_count() == cnt)
 113                trace_softirqs_off(ip);
 114        raw_local_irq_restore(flags);
 115
 116        if (preempt_count() == cnt)
 117                trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
 118}
 119#else /* !CONFIG_TRACE_IRQFLAGS */
 120static inline void __local_bh_disable(unsigned long ip, unsigned int cnt)
 121{
 122        add_preempt_count(cnt);
 123        barrier();
 124}
 125#endif /* CONFIG_TRACE_IRQFLAGS */
 126
 127void local_bh_disable(void)
 128{
 129        __local_bh_disable((unsigned long)__builtin_return_address(0),
 130                                SOFTIRQ_DISABLE_OFFSET);
 131}
 132
 133EXPORT_SYMBOL(local_bh_disable);
 134
 135static void __local_bh_enable(unsigned int cnt)
 136{
 137        WARN_ON_ONCE(in_irq());
 138        WARN_ON_ONCE(!irqs_disabled());
 139
 140        if (softirq_count() == cnt)
 141                trace_softirqs_on((unsigned long)__builtin_return_address(0));
 142        sub_preempt_count(cnt);
 143}
 144
 145/*
 146 * Special-case - softirqs can safely be enabled in
 147 * cond_resched_softirq(), or by __do_softirq(),
 148 * without processing still-pending softirqs:
 149 */
 150void _local_bh_enable(void)
 151{
 152        __local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
 153}
 154
 155EXPORT_SYMBOL(_local_bh_enable);
 156
 157static inline void _local_bh_enable_ip(unsigned long ip)
 158{
 159        WARN_ON_ONCE(in_irq() || irqs_disabled());
 160#ifdef CONFIG_TRACE_IRQFLAGS
 161        local_irq_disable();
 162#endif
 163        /*
 164         * Are softirqs going to be turned on now:
 165         */
 166        if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)
 167                trace_softirqs_on(ip);
 168        /*
 169         * Keep preemption disabled until we are done with
 170         * softirq processing:
 171         */
 172        sub_preempt_count(SOFTIRQ_DISABLE_OFFSET - 1);
 173
 174        if (unlikely(!in_interrupt() && local_softirq_pending()))
 175                do_softirq();
 176
 177        dec_preempt_count();
 178#ifdef CONFIG_TRACE_IRQFLAGS
 179        local_irq_enable();
 180#endif
 181        preempt_check_resched();
 182}
 183
 184void local_bh_enable(void)
 185{
 186        _local_bh_enable_ip((unsigned long)__builtin_return_address(0));
 187}
 188EXPORT_SYMBOL(local_bh_enable);
 189
 190void local_bh_enable_ip(unsigned long ip)
 191{
 192        _local_bh_enable_ip(ip);
 193}
 194EXPORT_SYMBOL(local_bh_enable_ip);
 195
 196/*
 197 * We restart softirq processing MAX_SOFTIRQ_RESTART times,
 198 * and we fall back to softirqd after that.
 199 *
 200 * This number has been established via experimentation.
 201 * The two things to balance is latency against fairness -
 202 * we want to handle softirqs as soon as possible, but they
 203 * should not be able to lock up the box.
 204 */
 205#define MAX_SOFTIRQ_RESTART 10
 206
 207asmlinkage void __do_softirq(void)
 208{
 209        struct softirq_action *h;
 210        __u32 pending;
 211        int max_restart = MAX_SOFTIRQ_RESTART;
 212        int cpu;
 213
 214        pending = local_softirq_pending();
 215        account_system_vtime(current);
 216
 217        __local_bh_disable((unsigned long)__builtin_return_address(0),
 218                                SOFTIRQ_OFFSET);
 219        lockdep_softirq_enter();
 220
 221        cpu = smp_processor_id();
 222restart:
 223        /* Reset the pending bitmask before enabling irqs */
 224        set_softirq_pending(0);
 225
 226        local_irq_enable();
 227
 228        h = softirq_vec;
 229
 230        do {
 231                if (pending & 1) {
 232                        unsigned int vec_nr = h - softirq_vec;
 233                        int prev_count = preempt_count();
 234
 235                        kstat_incr_softirqs_this_cpu(vec_nr);
 236
 237                        trace_softirq_entry(vec_nr);
 238                        h->action(h);
 239                        trace_softirq_exit(vec_nr);
 240                        if (unlikely(prev_count != preempt_count())) {
 241                                printk(KERN_ERR "huh, entered softirq %u %s %p"
 242                                       "with preempt_count %08x,"
 243                                       " exited with %08x?\n", vec_nr,
 244                                       softirq_to_name[vec_nr], h->action,
 245                                       prev_count, preempt_count());
 246                                preempt_count() = prev_count;
 247                        }
 248
 249                        rcu_bh_qs(cpu);
 250                }
 251                h++;
 252                pending >>= 1;
 253        } while (pending);
 254
 255        local_irq_disable();
 256
 257        pending = local_softirq_pending();
 258        if (pending && --max_restart)
 259                goto restart;
 260
 261        if (pending)
 262                wakeup_softirqd();
 263
 264        lockdep_softirq_exit();
 265
 266        account_system_vtime(current);
 267        __local_bh_enable(SOFTIRQ_OFFSET);
 268}
 269
 270#ifndef __ARCH_HAS_DO_SOFTIRQ
 271
 272asmlinkage void do_softirq(void)
 273{
 274        __u32 pending;
 275        unsigned long flags;
 276
 277        if (in_interrupt())
 278                return;
 279
 280        local_irq_save(flags);
 281
 282        pending = local_softirq_pending();
 283
 284        if (pending)
 285                __do_softirq();
 286
 287        local_irq_restore(flags);
 288}
 289
 290#endif
 291
 292/*
 293 * Enter an interrupt context.
 294 */
 295void irq_enter(void)
 296{
 297        int cpu = smp_processor_id();
 298
 299        rcu_irq_enter();
 300        if (idle_cpu(cpu) && !in_interrupt()) {
 301                /*
 302                 * Prevent raise_softirq from needlessly waking up ksoftirqd
 303                 * here, as softirq will be serviced on return from interrupt.
 304                 */
 305                local_bh_disable();
 306                tick_check_idle(cpu);
 307                _local_bh_enable();
 308        }
 309
 310        __irq_enter();
 311}
 312
 313#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
 314static inline void invoke_softirq(void)
 315{
 316        if (!force_irqthreads)
 317                __do_softirq();
 318        else {
 319                __local_bh_disable((unsigned long)__builtin_return_address(0),
 320                                SOFTIRQ_OFFSET);
 321                wakeup_softirqd();
 322                __local_bh_enable(SOFTIRQ_OFFSET);
 323        }
 324}
 325#else
 326static inline void invoke_softirq(void)
 327{
 328        if (!force_irqthreads)
 329                do_softirq();
 330        else {
 331                __local_bh_disable((unsigned long)__builtin_return_address(0),
 332                                SOFTIRQ_OFFSET);
 333                wakeup_softirqd();
 334                __local_bh_enable(SOFTIRQ_OFFSET);
 335        }
 336}
 337#endif
 338
 339/*
 340 * Exit an interrupt context. Process softirqs if needed and possible:
 341 */
 342void irq_exit(void)
 343{
 344        account_system_vtime(current);
 345        trace_hardirq_exit();
 346        sub_preempt_count(IRQ_EXIT_OFFSET);
 347        if (!in_interrupt() && local_softirq_pending())
 348                invoke_softirq();
 349
 350        rcu_irq_exit();
 351#ifdef CONFIG_NO_HZ
 352        /* Make sure that timer wheel updates are propagated */
 353        if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
 354                tick_nohz_stop_sched_tick(0);
 355#endif
 356        preempt_enable_no_resched();
 357}
 358
 359/*
 360 * This function must run with irqs disabled!
 361 */
 362inline void raise_softirq_irqoff(unsigned int nr)
 363{
 364        __raise_softirq_irqoff(nr);
 365
 366        /*
 367         * If we're in an interrupt or softirq, we're done
 368         * (this also catches softirq-disabled code). We will
 369         * actually run the softirq once we return from
 370         * the irq or softirq.
 371         *
 372         * Otherwise we wake up ksoftirqd to make sure we
 373         * schedule the softirq soon.
 374         */
 375        if (!in_interrupt())
 376                wakeup_softirqd();
 377}
 378
 379void raise_softirq(unsigned int nr)
 380{
 381        unsigned long flags;
 382
 383        local_irq_save(flags);
 384        raise_softirq_irqoff(nr);
 385        local_irq_restore(flags);
 386}
 387
 388void open_softirq(int nr, void (*action)(struct softirq_action *))
 389{
 390        softirq_vec[nr].action = action;
 391}
 392
 393/*
 394 * Tasklets
 395 */
 396struct tasklet_head
 397{
 398        struct tasklet_struct *head;
 399        struct tasklet_struct **tail;
 400};
 401
 402static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
 403static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
 404
 405void __tasklet_schedule(struct tasklet_struct *t)
 406{
 407        unsigned long flags;
 408
 409        local_irq_save(flags);
 410        t->next = NULL;
 411        *__this_cpu_read(tasklet_vec.tail) = t;
 412        __this_cpu_write(tasklet_vec.tail, &(t->next));
 413        raise_softirq_irqoff(TASKLET_SOFTIRQ);
 414        local_irq_restore(flags);
 415}
 416
 417EXPORT_SYMBOL(__tasklet_schedule);
 418
 419void __tasklet_hi_schedule(struct tasklet_struct *t)
 420{
 421        unsigned long flags;
 422
 423        local_irq_save(flags);
 424        t->next = NULL;
 425        *__this_cpu_read(tasklet_hi_vec.tail) = t;
 426        __this_cpu_write(tasklet_hi_vec.tail,  &(t->next));
 427        raise_softirq_irqoff(HI_SOFTIRQ);
 428        local_irq_restore(flags);
 429}
 430
 431EXPORT_SYMBOL(__tasklet_hi_schedule);
 432
 433void __tasklet_hi_schedule_first(struct tasklet_struct *t)
 434{
 435        BUG_ON(!irqs_disabled());
 436
 437        t->next = __this_cpu_read(tasklet_hi_vec.head);
 438        __this_cpu_write(tasklet_hi_vec.head, t);
 439        __raise_softirq_irqoff(HI_SOFTIRQ);
 440}
 441
 442EXPORT_SYMBOL(__tasklet_hi_schedule_first);
 443
 444static void tasklet_action(struct softirq_action *a)
 445{
 446        struct tasklet_struct *list;
 447
 448        local_irq_disable();
 449        list = __this_cpu_read(tasklet_vec.head);
 450        __this_cpu_write(tasklet_vec.head, NULL);
 451        __this_cpu_write(tasklet_vec.tail, &__get_cpu_var(tasklet_vec).head);
 452        local_irq_enable();
 453
 454        while (list) {
 455                struct tasklet_struct *t = list;
 456
 457                list = list->next;
 458
 459                if (tasklet_trylock(t)) {
 460                        if (!atomic_read(&t->count)) {
 461                                if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
 462                                        BUG();
 463                                t->func(t->data);
 464                                tasklet_unlock(t);
 465                                continue;
 466                        }
 467                        tasklet_unlock(t);
 468                }
 469
 470                local_irq_disable();
 471                t->next = NULL;
 472                *__this_cpu_read(tasklet_vec.tail) = t;
 473                __this_cpu_write(tasklet_vec.tail, &(t->next));
 474                __raise_softirq_irqoff(TASKLET_SOFTIRQ);
 475                local_irq_enable();
 476        }
 477}
 478
 479static void tasklet_hi_action(struct softirq_action *a)
 480{
 481        struct tasklet_struct *list;
 482
 483        local_irq_disable();
 484        list = __this_cpu_read(tasklet_hi_vec.head);
 485        __this_cpu_write(tasklet_hi_vec.head, NULL);
 486        __this_cpu_write(tasklet_hi_vec.tail, &__get_cpu_var(tasklet_hi_vec).head);
 487        local_irq_enable();
 488
 489        while (list) {
 490                struct tasklet_struct *t = list;
 491
 492                list = list->next;
 493
 494                if (tasklet_trylock(t)) {
 495                        if (!atomic_read(&t->count)) {
 496                                if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
 497                                        BUG();
 498                                t->func(t->data);
 499                                tasklet_unlock(t);
 500                                continue;
 501                        }
 502                        tasklet_unlock(t);
 503                }
 504
 505                local_irq_disable();
 506                t->next = NULL;
 507                *__this_cpu_read(tasklet_hi_vec.tail) = t;
 508                __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
 509                __raise_softirq_irqoff(HI_SOFTIRQ);
 510                local_irq_enable();
 511        }
 512}
 513
 514
 515void tasklet_init(struct tasklet_struct *t,
 516                  void (*func)(unsigned long), unsigned long data)
 517{
 518        t->next = NULL;
 519        t->state = 0;
 520        atomic_set(&t->count, 0);
 521        t->func = func;
 522        t->data = data;
 523}
 524
 525EXPORT_SYMBOL(tasklet_init);
 526
 527void tasklet_kill(struct tasklet_struct *t)
 528{
 529        if (in_interrupt())
 530                printk("Attempt to kill tasklet from interrupt\n");
 531
 532        while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
 533                do {
 534                        yield();
 535                } while (test_bit(TASKLET_STATE_SCHED, &t->state));
 536        }
 537        tasklet_unlock_wait(t);
 538        clear_bit(TASKLET_STATE_SCHED, &t->state);
 539}
 540
 541EXPORT_SYMBOL(tasklet_kill);
 542
 543/*
 544 * tasklet_hrtimer
 545 */
 546
 547/*
 548 * The trampoline is called when the hrtimer expires. It schedules a tasklet
 549 * to run __tasklet_hrtimer_trampoline() which in turn will call the intended
 550 * hrtimer callback, but from softirq context.
 551 */
 552static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer)
 553{
 554        struct tasklet_hrtimer *ttimer =
 555                container_of(timer, struct tasklet_hrtimer, timer);
 556
 557        tasklet_hi_schedule(&ttimer->tasklet);
 558        return HRTIMER_NORESTART;
 559}
 560
 561/*
 562 * Helper function which calls the hrtimer callback from
 563 * tasklet/softirq context
 564 */
 565static void __tasklet_hrtimer_trampoline(unsigned long data)
 566{
 567        struct tasklet_hrtimer *ttimer = (void *)data;
 568        enum hrtimer_restart restart;
 569
 570        restart = ttimer->function(&ttimer->timer);
 571        if (restart != HRTIMER_NORESTART)
 572                hrtimer_restart(&ttimer->timer);
 573}
 574
 575/**
 576 * tasklet_hrtimer_init - Init a tasklet/hrtimer combo for softirq callbacks
 577 * @ttimer:      tasklet_hrtimer which is initialized
 578 * @function:    hrtimer callback function which gets called from softirq context
 579 * @which_clock: clock id (CLOCK_MONOTONIC/CLOCK_REALTIME)
 580 * @mode:        hrtimer mode (HRTIMER_MODE_ABS/HRTIMER_MODE_REL)
 581 */
 582void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
 583                          enum hrtimer_restart (*function)(struct hrtimer *),
 584                          clockid_t which_clock, enum hrtimer_mode mode)
 585{
 586        hrtimer_init(&ttimer->timer, which_clock, mode);
 587        ttimer->timer.function = __hrtimer_tasklet_trampoline;
 588        tasklet_init(&ttimer->tasklet, __tasklet_hrtimer_trampoline,
 589                     (unsigned long)ttimer);
 590        ttimer->function = function;
 591}
 592EXPORT_SYMBOL_GPL(tasklet_hrtimer_init);
 593
 594/*
 595 * Remote softirq bits
 596 */
 597
 598DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
 599EXPORT_PER_CPU_SYMBOL(softirq_work_list);
 600
 601static void __local_trigger(struct call_single_data *cp, int softirq)
 602{
 603        struct list_head *head = &__get_cpu_var(softirq_work_list[softirq]);
 604
 605        list_add_tail(&cp->list, head);
 606
 607        /* Trigger the softirq only if the list was previously empty.  */
 608        if (head->next == &cp->list)
 609                raise_softirq_irqoff(softirq);
 610}
 611
 612#ifdef CONFIG_USE_GENERIC_SMP_HELPERS
 613static void remote_softirq_receive(void *data)
 614{
 615        struct call_single_data *cp = data;
 616        unsigned long flags;
 617        int softirq;
 618
 619        softirq = cp->priv;
 620
 621        local_irq_save(flags);
 622        __local_trigger(cp, softirq);
 623        local_irq_restore(flags);
 624}
 625
 626static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
 627{
 628        if (cpu_online(cpu)) {
 629                cp->func = remote_softirq_receive;
 630                cp->info = cp;
 631                cp->flags = 0;
 632                cp->priv = softirq;
 633
 634                __smp_call_function_single(cpu, cp, 0);
 635                return 0;
 636        }
 637        return 1;
 638}
 639#else /* CONFIG_USE_GENERIC_SMP_HELPERS */
 640static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
 641{
 642        return 1;
 643}
 644#endif
 645
 646/**
 647 * __send_remote_softirq - try to schedule softirq work on a remote cpu
 648 * @cp: private SMP call function data area
 649 * @cpu: the remote cpu
 650 * @this_cpu: the currently executing cpu
 651 * @softirq: the softirq for the work
 652 *
 653 * Attempt to schedule softirq work on a remote cpu.  If this cannot be
 654 * done, the work is instead queued up on the local cpu.
 655 *
 656 * Interrupts must be disabled.
 657 */
 658void __send_remote_softirq(struct call_single_data *cp, int cpu, int this_cpu, int softirq)
 659{
 660        if (cpu == this_cpu || __try_remote_softirq(cp, cpu, softirq))
 661                __local_trigger(cp, softirq);
 662}
 663EXPORT_SYMBOL(__send_remote_softirq);
 664
 665/**
 666 * send_remote_softirq - try to schedule softirq work on a remote cpu
 667 * @cp: private SMP call function data area
 668 * @cpu: the remote cpu
 669 * @softirq: the softirq for the work
 670 *
 671 * Like __send_remote_softirq except that disabling interrupts and
 672 * computing the current cpu is done for the caller.
 673 */
 674void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
 675{
 676        unsigned long flags;
 677        int this_cpu;
 678
 679        local_irq_save(flags);
 680        this_cpu = smp_processor_id();
 681        __send_remote_softirq(cp, cpu, this_cpu, softirq);
 682        local_irq_restore(flags);
 683}
 684EXPORT_SYMBOL(send_remote_softirq);
 685
 686static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self,
 687                                               unsigned long action, void *hcpu)
 688{
 689        /*
 690         * If a CPU goes away, splice its entries to the current CPU
 691         * and trigger a run of the softirq
 692         */
 693        if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
 694                int cpu = (unsigned long) hcpu;
 695                int i;
 696
 697                local_irq_disable();
 698                for (i = 0; i < NR_SOFTIRQS; i++) {
 699                        struct list_head *head = &per_cpu(softirq_work_list[i], cpu);
 700                        struct list_head *local_head;
 701
 702                        if (list_empty(head))
 703                                continue;
 704
 705                        local_head = &__get_cpu_var(softirq_work_list[i]);
 706                        list_splice_init(head, local_head);
 707                        raise_softirq_irqoff(i);
 708                }
 709                local_irq_enable();
 710        }
 711
 712        return NOTIFY_OK;
 713}
 714
 715static struct notifier_block __cpuinitdata remote_softirq_cpu_notifier = {
 716        .notifier_call  = remote_softirq_cpu_notify,
 717};
 718
 719void __init softirq_init(void)
 720{
 721        int cpu;
 722
 723        for_each_possible_cpu(cpu) {
 724                int i;
 725
 726                per_cpu(tasklet_vec, cpu).tail =
 727                        &per_cpu(tasklet_vec, cpu).head;
 728                per_cpu(tasklet_hi_vec, cpu).tail =
 729                        &per_cpu(tasklet_hi_vec, cpu).head;
 730                for (i = 0; i < NR_SOFTIRQS; i++)
 731                        INIT_LIST_HEAD(&per_cpu(softirq_work_list[i], cpu));
 732        }
 733
 734        register_hotcpu_notifier(&remote_softirq_cpu_notifier);
 735
 736        open_softirq(TASKLET_SOFTIRQ, tasklet_action);
 737        open_softirq(HI_SOFTIRQ, tasklet_hi_action);
 738}
 739
 740static int run_ksoftirqd(void * __bind_cpu)
 741{
 742        set_current_state(TASK_INTERRUPTIBLE);
 743
 744        while (!kthread_should_stop()) {
 745                preempt_disable();
 746                if (!local_softirq_pending()) {
 747                        preempt_enable_no_resched();
 748                        schedule();
 749                        preempt_disable();
 750                }
 751
 752                __set_current_state(TASK_RUNNING);
 753
 754                while (local_softirq_pending()) {
 755                        /* Preempt disable stops cpu going offline.
 756                           If already offline, we'll be on wrong CPU:
 757                           don't process */
 758                        if (cpu_is_offline((long)__bind_cpu))
 759                                goto wait_to_die;
 760                        local_irq_disable();
 761                        if (local_softirq_pending())
 762                                __do_softirq();
 763                        local_irq_enable();
 764                        preempt_enable_no_resched();
 765                        cond_resched();
 766                        preempt_disable();
 767                        rcu_note_context_switch((long)__bind_cpu);
 768                }
 769                preempt_enable();
 770                set_current_state(TASK_INTERRUPTIBLE);
 771        }
 772        __set_current_state(TASK_RUNNING);
 773        return 0;
 774
 775wait_to_die:
 776        preempt_enable();
 777        /* Wait for kthread_stop */
 778        set_current_state(TASK_INTERRUPTIBLE);
 779        while (!kthread_should_stop()) {
 780                schedule();
 781                set_current_state(TASK_INTERRUPTIBLE);
 782        }
 783        __set_current_state(TASK_RUNNING);
 784        return 0;
 785}
 786
 787#ifdef CONFIG_HOTPLUG_CPU
 788/*
 789 * tasklet_kill_immediate is called to remove a tasklet which can already be
 790 * scheduled for execution on @cpu.
 791 *
 792 * Unlike tasklet_kill, this function removes the tasklet
 793 * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
 794 *
 795 * When this function is called, @cpu must be in the CPU_DEAD state.
 796 */
 797void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
 798{
 799        struct tasklet_struct **i;
 800
 801        BUG_ON(cpu_online(cpu));
 802        BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
 803
 804        if (!test_bit(TASKLET_STATE_SCHED, &t->state))
 805                return;
 806
 807        /* CPU is dead, so no lock needed. */
 808        for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
 809                if (*i == t) {
 810                        *i = t->next;
 811                        /* If this was the tail element, move the tail ptr */
 812                        if (*i == NULL)
 813                                per_cpu(tasklet_vec, cpu).tail = i;
 814                        return;
 815                }
 816        }
 817        BUG();
 818}
 819
 820static void takeover_tasklets(unsigned int cpu)
 821{
 822        /* CPU is dead, so no lock needed. */
 823        local_irq_disable();
 824
 825        /* Find end, append list for that CPU. */
 826        if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
 827                *__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
 828                this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
 829                per_cpu(tasklet_vec, cpu).head = NULL;
 830                per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
 831        }
 832        raise_softirq_irqoff(TASKLET_SOFTIRQ);
 833
 834        if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
 835                *__this_cpu_read(tasklet_hi_vec.tail) = per_cpu(tasklet_hi_vec, cpu).head;
 836                __this_cpu_write(tasklet_hi_vec.tail, per_cpu(tasklet_hi_vec, cpu).tail);
 837                per_cpu(tasklet_hi_vec, cpu).head = NULL;
 838                per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
 839        }
 840        raise_softirq_irqoff(HI_SOFTIRQ);
 841
 842        local_irq_enable();
 843}
 844#endif /* CONFIG_HOTPLUG_CPU */
 845
 846static int __cpuinit cpu_callback(struct notifier_block *nfb,
 847                                  unsigned long action,
 848                                  void *hcpu)
 849{
 850        int hotcpu = (unsigned long)hcpu;
 851        struct task_struct *p;
 852
 853        switch (action) {
 854        case CPU_UP_PREPARE:
 855        case CPU_UP_PREPARE_FROZEN:
 856                p = kthread_create_on_node(run_ksoftirqd,
 857                                           hcpu,
 858                                           cpu_to_node(hotcpu),
 859                                           "ksoftirqd/%d", hotcpu);
 860                if (IS_ERR(p)) {
 861                        printk("ksoftirqd for %i failed\n", hotcpu);
 862                        return notifier_from_errno(PTR_ERR(p));
 863                }
 864                kthread_bind(p, hotcpu);
 865                per_cpu(ksoftirqd, hotcpu) = p;
 866                break;
 867        case CPU_ONLINE:
 868        case CPU_ONLINE_FROZEN:
 869                wake_up_process(per_cpu(ksoftirqd, hotcpu));
 870                break;
 871#ifdef CONFIG_HOTPLUG_CPU
 872        case CPU_UP_CANCELED:
 873        case CPU_UP_CANCELED_FROZEN:
 874                if (!per_cpu(ksoftirqd, hotcpu))
 875                        break;
 876                /* Unbind so it can run.  Fall thru. */
 877                kthread_bind(per_cpu(ksoftirqd, hotcpu),
 878                             cpumask_any(cpu_online_mask));
 879        case CPU_DEAD:
 880        case CPU_DEAD_FROZEN: {
 881                static const struct sched_param param = {
 882                        .sched_priority = MAX_RT_PRIO-1
 883                };
 884
 885                p = per_cpu(ksoftirqd, hotcpu);
 886                per_cpu(ksoftirqd, hotcpu) = NULL;
 887                sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
 888                kthread_stop(p);
 889                takeover_tasklets(hotcpu);
 890                break;
 891        }
 892#endif /* CONFIG_HOTPLUG_CPU */
 893        }
 894        return NOTIFY_OK;
 895}
 896
 897static struct notifier_block __cpuinitdata cpu_nfb = {
 898        .notifier_call = cpu_callback
 899};
 900
 901static __init int spawn_ksoftirqd(void)
 902{
 903        void *cpu = (void *)(long)smp_processor_id();
 904        int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
 905
 906        BUG_ON(err != NOTIFY_OK);
 907        cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
 908        register_cpu_notifier(&cpu_nfb);
 909        return 0;
 910}
 911early_initcall(spawn_ksoftirqd);
 912
 913/*
 914 * [ These __weak aliases are kept in a separate compilation unit, so that
 915 *   GCC does not inline them incorrectly. ]
 916 */
 917
 918int __init __weak early_irq_init(void)
 919{
 920        return 0;
 921}
 922
 923#ifdef CONFIG_GENERIC_HARDIRQS
 924int __init __weak arch_probe_nr_irqs(void)
 925{
 926        return NR_IRQS_LEGACY;
 927}
 928
 929int __init __weak arch_early_irq_init(void)
 930{
 931        return 0;
 932}
 933#endif
 934