linux/kernel/softirq.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 *      linux/kernel/softirq.c
   4 *
   5 *      Copyright (C) 1992 Linus Torvalds
   6 *
   7 *      Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
   8 */
   9
  10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  11
  12#include <linux/export.h>
  13#include <linux/kernel_stat.h>
  14#include <linux/interrupt.h>
  15#include <linux/init.h>
  16#include <linux/local_lock.h>
  17#include <linux/mm.h>
  18#include <linux/notifier.h>
  19#include <linux/percpu.h>
  20#include <linux/cpu.h>
  21#include <linux/freezer.h>
  22#include <linux/kthread.h>
  23#include <linux/rcupdate.h>
  24#include <linux/ftrace.h>
  25#include <linux/smp.h>
  26#include <linux/smpboot.h>
  27#include <linux/tick.h>
  28#include <linux/irq.h>
  29#include <linux/wait_bit.h>
  30
  31#include <asm/softirq_stack.h>
  32
  33#define CREATE_TRACE_POINTS
  34#include <trace/events/irq.h>
  35
  36/*
  37   - No shared variables, all the data are CPU local.
  38   - If a softirq needs serialization, let it serialize itself
  39     by its own spinlocks.
  40   - Even if softirq is serialized, only local cpu is marked for
  41     execution. Hence, we get something sort of weak cpu binding.
  42     Though it is still not clear, will it result in better locality
  43     or will not.
  44
  45   Examples:
  46   - NET RX softirq. It is multithreaded and does not require
  47     any global serialization.
  48   - NET TX softirq. It kicks software netdevice queues, hence
  49     it is logically serialized per device, but this serialization
  50     is invisible to common code.
  51   - Tasklets: serialized wrt itself.
  52 */
  53
  54#ifndef __ARCH_IRQ_STAT
  55DEFINE_PER_CPU_ALIGNED(irq_cpustat_t, irq_stat);
  56EXPORT_PER_CPU_SYMBOL(irq_stat);
  57#endif
  58
  59static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
  60
  61DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
  62
  63const char * const softirq_to_name[NR_SOFTIRQS] = {
  64        "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL",
  65        "TASKLET", "SCHED", "HRTIMER", "RCU"
  66};
  67
  68/*
  69 * we cannot loop indefinitely here to avoid userspace starvation,
  70 * but we also don't want to introduce a worst case 1/HZ latency
  71 * to the pending events, so lets the scheduler to balance
  72 * the softirq load for us.
  73 */
  74static void wakeup_softirqd(void)
  75{
  76        /* Interrupts are disabled: no need to stop preemption */
  77        struct task_struct *tsk = __this_cpu_read(ksoftirqd);
  78
  79        if (tsk)
  80                wake_up_process(tsk);
  81}
  82
  83/*
  84 * If ksoftirqd is scheduled, we do not want to process pending softirqs
  85 * right now. Let ksoftirqd handle this at its own rate, to get fairness,
  86 * unless we're doing some of the synchronous softirqs.
  87 */
  88#define SOFTIRQ_NOW_MASK ((1 << HI_SOFTIRQ) | (1 << TASKLET_SOFTIRQ))
  89static bool ksoftirqd_running(unsigned long pending)
  90{
  91        struct task_struct *tsk = __this_cpu_read(ksoftirqd);
  92
  93        if (pending & SOFTIRQ_NOW_MASK)
  94                return false;
  95        return tsk && task_is_running(tsk) && !__kthread_should_park(tsk);
  96}
  97
  98#ifdef CONFIG_TRACE_IRQFLAGS
  99DEFINE_PER_CPU(int, hardirqs_enabled);
 100DEFINE_PER_CPU(int, hardirq_context);
 101EXPORT_PER_CPU_SYMBOL_GPL(hardirqs_enabled);
 102EXPORT_PER_CPU_SYMBOL_GPL(hardirq_context);
 103#endif
 104
 105/*
 106 * SOFTIRQ_OFFSET usage:
 107 *
 108 * On !RT kernels 'count' is the preempt counter, on RT kernels this applies
 109 * to a per CPU counter and to task::softirqs_disabled_cnt.
 110 *
 111 * - count is changed by SOFTIRQ_OFFSET on entering or leaving softirq
 112 *   processing.
 113 *
 114 * - count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
 115 *   on local_bh_disable or local_bh_enable.
 116 *
 117 * This lets us distinguish between whether we are currently processing
 118 * softirq and whether we just have bh disabled.
 119 */
 120#ifdef CONFIG_PREEMPT_RT
 121
 122/*
 123 * RT accounts for BH disabled sections in task::softirqs_disabled_cnt and
 124 * also in per CPU softirq_ctrl::cnt. This is necessary to allow tasks in a
 125 * softirq disabled section to be preempted.
 126 *
 127 * The per task counter is used for softirq_count(), in_softirq() and
 128 * in_serving_softirqs() because these counts are only valid when the task
 129 * holding softirq_ctrl::lock is running.
 130 *
 131 * The per CPU counter prevents pointless wakeups of ksoftirqd in case that
 132 * the task which is in a softirq disabled section is preempted or blocks.
 133 */
 134struct softirq_ctrl {
 135        local_lock_t    lock;
 136        int             cnt;
 137};
 138
 139static DEFINE_PER_CPU(struct softirq_ctrl, softirq_ctrl) = {
 140        .lock   = INIT_LOCAL_LOCK(softirq_ctrl.lock),
 141};
 142
 143/**
 144 * local_bh_blocked() - Check for idle whether BH processing is blocked
 145 *
 146 * Returns false if the per CPU softirq::cnt is 0 otherwise true.
 147 *
 148 * This is invoked from the idle task to guard against false positive
 149 * softirq pending warnings, which would happen when the task which holds
 150 * softirq_ctrl::lock was the only running task on the CPU and blocks on
 151 * some other lock.
 152 */
 153bool local_bh_blocked(void)
 154{
 155        return __this_cpu_read(softirq_ctrl.cnt) != 0;
 156}
 157
 158void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
 159{
 160        unsigned long flags;
 161        int newcnt;
 162
 163        WARN_ON_ONCE(in_hardirq());
 164
 165        /* First entry of a task into a BH disabled section? */
 166        if (!current->softirq_disable_cnt) {
 167                if (preemptible()) {
 168                        local_lock(&softirq_ctrl.lock);
 169                        /* Required to meet the RCU bottomhalf requirements. */
 170                        rcu_read_lock();
 171                } else {
 172                        DEBUG_LOCKS_WARN_ON(this_cpu_read(softirq_ctrl.cnt));
 173                }
 174        }
 175
 176        /*
 177         * Track the per CPU softirq disabled state. On RT this is per CPU
 178         * state to allow preemption of bottom half disabled sections.
 179         */
 180        newcnt = __this_cpu_add_return(softirq_ctrl.cnt, cnt);
 181        /*
 182         * Reflect the result in the task state to prevent recursion on the
 183         * local lock and to make softirq_count() & al work.
 184         */
 185        current->softirq_disable_cnt = newcnt;
 186
 187        if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && newcnt == cnt) {
 188                raw_local_irq_save(flags);
 189                lockdep_softirqs_off(ip);
 190                raw_local_irq_restore(flags);
 191        }
 192}
 193EXPORT_SYMBOL(__local_bh_disable_ip);
 194
 195static void __local_bh_enable(unsigned int cnt, bool unlock)
 196{
 197        unsigned long flags;
 198        int newcnt;
 199
 200        DEBUG_LOCKS_WARN_ON(current->softirq_disable_cnt !=
 201                            this_cpu_read(softirq_ctrl.cnt));
 202
 203        if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && softirq_count() == cnt) {
 204                raw_local_irq_save(flags);
 205                lockdep_softirqs_on(_RET_IP_);
 206                raw_local_irq_restore(flags);
 207        }
 208
 209        newcnt = __this_cpu_sub_return(softirq_ctrl.cnt, cnt);
 210        current->softirq_disable_cnt = newcnt;
 211
 212        if (!newcnt && unlock) {
 213                rcu_read_unlock();
 214                local_unlock(&softirq_ctrl.lock);
 215        }
 216}
 217
 218void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
 219{
 220        bool preempt_on = preemptible();
 221        unsigned long flags;
 222        u32 pending;
 223        int curcnt;
 224
 225        WARN_ON_ONCE(in_irq());
 226        lockdep_assert_irqs_enabled();
 227
 228        local_irq_save(flags);
 229        curcnt = __this_cpu_read(softirq_ctrl.cnt);
 230
 231        /*
 232         * If this is not reenabling soft interrupts, no point in trying to
 233         * run pending ones.
 234         */
 235        if (curcnt != cnt)
 236                goto out;
 237
 238        pending = local_softirq_pending();
 239        if (!pending || ksoftirqd_running(pending))
 240                goto out;
 241
 242        /*
 243         * If this was called from non preemptible context, wake up the
 244         * softirq daemon.
 245         */
 246        if (!preempt_on) {
 247                wakeup_softirqd();
 248                goto out;
 249        }
 250
 251        /*
 252         * Adjust softirq count to SOFTIRQ_OFFSET which makes
 253         * in_serving_softirq() become true.
 254         */
 255        cnt = SOFTIRQ_OFFSET;
 256        __local_bh_enable(cnt, false);
 257        __do_softirq();
 258
 259out:
 260        __local_bh_enable(cnt, preempt_on);
 261        local_irq_restore(flags);
 262}
 263EXPORT_SYMBOL(__local_bh_enable_ip);
 264
 265/*
 266 * Invoked from ksoftirqd_run() outside of the interrupt disabled section
 267 * to acquire the per CPU local lock for reentrancy protection.
 268 */
 269static inline void ksoftirqd_run_begin(void)
 270{
 271        __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
 272        local_irq_disable();
 273}
 274
 275/* Counterpart to ksoftirqd_run_begin() */
 276static inline void ksoftirqd_run_end(void)
 277{
 278        __local_bh_enable(SOFTIRQ_OFFSET, true);
 279        WARN_ON_ONCE(in_interrupt());
 280        local_irq_enable();
 281}
 282
 283static inline void softirq_handle_begin(void) { }
 284static inline void softirq_handle_end(void) { }
 285
 286static inline bool should_wake_ksoftirqd(void)
 287{
 288        return !this_cpu_read(softirq_ctrl.cnt);
 289}
 290
 291static inline void invoke_softirq(void)
 292{
 293        if (should_wake_ksoftirqd())
 294                wakeup_softirqd();
 295}
 296
 297#else /* CONFIG_PREEMPT_RT */
 298
 299/*
 300 * This one is for softirq.c-internal use, where hardirqs are disabled
 301 * legitimately:
 302 */
 303#ifdef CONFIG_TRACE_IRQFLAGS
 304void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
 305{
 306        unsigned long flags;
 307
 308        WARN_ON_ONCE(in_irq());
 309
 310        raw_local_irq_save(flags);
 311        /*
 312         * The preempt tracer hooks into preempt_count_add and will break
 313         * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
 314         * is set and before current->softirq_enabled is cleared.
 315         * We must manually increment preempt_count here and manually
 316         * call the trace_preempt_off later.
 317         */
 318        __preempt_count_add(cnt);
 319        /*
 320         * Were softirqs turned off above:
 321         */
 322        if (softirq_count() == (cnt & SOFTIRQ_MASK))
 323                lockdep_softirqs_off(ip);
 324        raw_local_irq_restore(flags);
 325
 326        if (preempt_count() == cnt) {
 327#ifdef CONFIG_DEBUG_PREEMPT
 328                current->preempt_disable_ip = get_lock_parent_ip();
 329#endif
 330                trace_preempt_off(CALLER_ADDR0, get_lock_parent_ip());
 331        }
 332}
 333EXPORT_SYMBOL(__local_bh_disable_ip);
 334#endif /* CONFIG_TRACE_IRQFLAGS */
 335
 336static void __local_bh_enable(unsigned int cnt)
 337{
 338        lockdep_assert_irqs_disabled();
 339
 340        if (preempt_count() == cnt)
 341                trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
 342
 343        if (softirq_count() == (cnt & SOFTIRQ_MASK))
 344                lockdep_softirqs_on(_RET_IP_);
 345
 346        __preempt_count_sub(cnt);
 347}
 348
 349/*
 350 * Special-case - softirqs can safely be enabled by __do_softirq(),
 351 * without processing still-pending softirqs:
 352 */
 353void _local_bh_enable(void)
 354{
 355        WARN_ON_ONCE(in_irq());
 356        __local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
 357}
 358EXPORT_SYMBOL(_local_bh_enable);
 359
 360void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
 361{
 362        WARN_ON_ONCE(in_irq());
 363        lockdep_assert_irqs_enabled();
 364#ifdef CONFIG_TRACE_IRQFLAGS
 365        local_irq_disable();
 366#endif
 367        /*
 368         * Are softirqs going to be turned on now:
 369         */
 370        if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)
 371                lockdep_softirqs_on(ip);
 372        /*
 373         * Keep preemption disabled until we are done with
 374         * softirq processing:
 375         */
 376        __preempt_count_sub(cnt - 1);
 377
 378        if (unlikely(!in_interrupt() && local_softirq_pending())) {
 379                /*
 380                 * Run softirq if any pending. And do it in its own stack
 381                 * as we may be calling this deep in a task call stack already.
 382                 */
 383                do_softirq();
 384        }
 385
 386        preempt_count_dec();
 387#ifdef CONFIG_TRACE_IRQFLAGS
 388        local_irq_enable();
 389#endif
 390        preempt_check_resched();
 391}
 392EXPORT_SYMBOL(__local_bh_enable_ip);
 393
 394static inline void softirq_handle_begin(void)
 395{
 396        __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
 397}
 398
 399static inline void softirq_handle_end(void)
 400{
 401        __local_bh_enable(SOFTIRQ_OFFSET);
 402        WARN_ON_ONCE(in_interrupt());
 403}
 404
 405static inline void ksoftirqd_run_begin(void)
 406{
 407        local_irq_disable();
 408}
 409
 410static inline void ksoftirqd_run_end(void)
 411{
 412        local_irq_enable();
 413}
 414
 415static inline bool should_wake_ksoftirqd(void)
 416{
 417        return true;
 418}
 419
 420static inline void invoke_softirq(void)
 421{
 422        if (ksoftirqd_running(local_softirq_pending()))
 423                return;
 424
 425        if (!force_irqthreads || !__this_cpu_read(ksoftirqd)) {
 426#ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
 427                /*
 428                 * We can safely execute softirq on the current stack if
 429                 * it is the irq stack, because it should be near empty
 430                 * at this stage.
 431                 */
 432                __do_softirq();
 433#else
 434                /*
 435                 * Otherwise, irq_exit() is called on the task stack that can
 436                 * be potentially deep already. So call softirq in its own stack
 437                 * to prevent from any overrun.
 438                 */
 439                do_softirq_own_stack();
 440#endif
 441        } else {
 442                wakeup_softirqd();
 443        }
 444}
 445
 446asmlinkage __visible void do_softirq(void)
 447{
 448        __u32 pending;
 449        unsigned long flags;
 450
 451        if (in_interrupt())
 452                return;
 453
 454        local_irq_save(flags);
 455
 456        pending = local_softirq_pending();
 457
 458        if (pending && !ksoftirqd_running(pending))
 459                do_softirq_own_stack();
 460
 461        local_irq_restore(flags);
 462}
 463
 464#endif /* !CONFIG_PREEMPT_RT */
 465
 466/*
 467 * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times,
 468 * but break the loop if need_resched() is set or after 2 ms.
 469 * The MAX_SOFTIRQ_TIME provides a nice upper bound in most cases, but in
 470 * certain cases, such as stop_machine(), jiffies may cease to
 471 * increment and so we need the MAX_SOFTIRQ_RESTART limit as
 472 * well to make sure we eventually return from this method.
 473 *
 474 * These limits have been established via experimentation.
 475 * The two things to balance is latency against fairness -
 476 * we want to handle softirqs as soon as possible, but they
 477 * should not be able to lock up the box.
 478 */
 479#define MAX_SOFTIRQ_TIME  msecs_to_jiffies(2)
 480#define MAX_SOFTIRQ_RESTART 10
 481
 482#ifdef CONFIG_TRACE_IRQFLAGS
 483/*
 484 * When we run softirqs from irq_exit() and thus on the hardirq stack we need
 485 * to keep the lockdep irq context tracking as tight as possible in order to
 486 * not miss-qualify lock contexts and miss possible deadlocks.
 487 */
 488
 489static inline bool lockdep_softirq_start(void)
 490{
 491        bool in_hardirq = false;
 492
 493        if (lockdep_hardirq_context()) {
 494                in_hardirq = true;
 495                lockdep_hardirq_exit();
 496        }
 497
 498        lockdep_softirq_enter();
 499
 500        return in_hardirq;
 501}
 502
 503static inline void lockdep_softirq_end(bool in_hardirq)
 504{
 505        lockdep_softirq_exit();
 506
 507        if (in_hardirq)
 508                lockdep_hardirq_enter();
 509}
 510#else
 511static inline bool lockdep_softirq_start(void) { return false; }
 512static inline void lockdep_softirq_end(bool in_hardirq) { }
 513#endif
 514
 515asmlinkage __visible void __softirq_entry __do_softirq(void)
 516{
 517        unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
 518        unsigned long old_flags = current->flags;
 519        int max_restart = MAX_SOFTIRQ_RESTART;
 520        struct softirq_action *h;
 521        bool in_hardirq;
 522        __u32 pending;
 523        int softirq_bit;
 524
 525        /*
 526         * Mask out PF_MEMALLOC as the current task context is borrowed for the
 527         * softirq. A softirq handled, such as network RX, might set PF_MEMALLOC
 528         * again if the socket is related to swapping.
 529         */
 530        current->flags &= ~PF_MEMALLOC;
 531
 532        pending = local_softirq_pending();
 533
 534        softirq_handle_begin();
 535        in_hardirq = lockdep_softirq_start();
 536        account_softirq_enter(current);
 537
 538restart:
 539        /* Reset the pending bitmask before enabling irqs */
 540        set_softirq_pending(0);
 541
 542        local_irq_enable();
 543
 544        h = softirq_vec;
 545
 546        while ((softirq_bit = ffs(pending))) {
 547                unsigned int vec_nr;
 548                int prev_count;
 549
 550                h += softirq_bit - 1;
 551
 552                vec_nr = h - softirq_vec;
 553                prev_count = preempt_count();
 554
 555                kstat_incr_softirqs_this_cpu(vec_nr);
 556
 557                trace_softirq_entry(vec_nr);
 558                h->action(h);
 559                trace_softirq_exit(vec_nr);
 560                if (unlikely(prev_count != preempt_count())) {
 561                        pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
 562                               vec_nr, softirq_to_name[vec_nr], h->action,
 563                               prev_count, preempt_count());
 564                        preempt_count_set(prev_count);
 565                }
 566                h++;
 567                pending >>= softirq_bit;
 568        }
 569
 570        if (!IS_ENABLED(CONFIG_PREEMPT_RT) &&
 571            __this_cpu_read(ksoftirqd) == current)
 572                rcu_softirq_qs();
 573
 574        local_irq_disable();
 575
 576        pending = local_softirq_pending();
 577        if (pending) {
 578                if (time_before(jiffies, end) && !need_resched() &&
 579                    --max_restart)
 580                        goto restart;
 581
 582                wakeup_softirqd();
 583        }
 584
 585        account_softirq_exit(current);
 586        lockdep_softirq_end(in_hardirq);
 587        softirq_handle_end();
 588        current_restore_flags(old_flags, PF_MEMALLOC);
 589}
 590
 591/**
 592 * irq_enter_rcu - Enter an interrupt context with RCU watching
 593 */
 594void irq_enter_rcu(void)
 595{
 596        __irq_enter_raw();
 597
 598        if (is_idle_task(current) && (irq_count() == HARDIRQ_OFFSET))
 599                tick_irq_enter();
 600
 601        account_hardirq_enter(current);
 602}
 603
 604/**
 605 * irq_enter - Enter an interrupt context including RCU update
 606 */
 607void irq_enter(void)
 608{
 609        rcu_irq_enter();
 610        irq_enter_rcu();
 611}
 612
 613static inline void tick_irq_exit(void)
 614{
 615#ifdef CONFIG_NO_HZ_COMMON
 616        int cpu = smp_processor_id();
 617
 618        /* Make sure that timer wheel updates are propagated */
 619        if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) {
 620                if (!in_irq())
 621                        tick_nohz_irq_exit();
 622        }
 623#endif
 624}
 625
 626static inline void __irq_exit_rcu(void)
 627{
 628#ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
 629        local_irq_disable();
 630#else
 631        lockdep_assert_irqs_disabled();
 632#endif
 633        account_hardirq_exit(current);
 634        preempt_count_sub(HARDIRQ_OFFSET);
 635        if (!in_interrupt() && local_softirq_pending())
 636                invoke_softirq();
 637
 638        tick_irq_exit();
 639}
 640
 641/**
 642 * irq_exit_rcu() - Exit an interrupt context without updating RCU
 643 *
 644 * Also processes softirqs if needed and possible.
 645 */
 646void irq_exit_rcu(void)
 647{
 648        __irq_exit_rcu();
 649         /* must be last! */
 650        lockdep_hardirq_exit();
 651}
 652
 653/**
 654 * irq_exit - Exit an interrupt context, update RCU and lockdep
 655 *
 656 * Also processes softirqs if needed and possible.
 657 */
 658void irq_exit(void)
 659{
 660        __irq_exit_rcu();
 661        rcu_irq_exit();
 662         /* must be last! */
 663        lockdep_hardirq_exit();
 664}
 665
 666/*
 667 * This function must run with irqs disabled!
 668 */
 669inline void raise_softirq_irqoff(unsigned int nr)
 670{
 671        __raise_softirq_irqoff(nr);
 672
 673        /*
 674         * If we're in an interrupt or softirq, we're done
 675         * (this also catches softirq-disabled code). We will
 676         * actually run the softirq once we return from
 677         * the irq or softirq.
 678         *
 679         * Otherwise we wake up ksoftirqd to make sure we
 680         * schedule the softirq soon.
 681         */
 682        if (!in_interrupt() && should_wake_ksoftirqd())
 683                wakeup_softirqd();
 684}
 685
 686void raise_softirq(unsigned int nr)
 687{
 688        unsigned long flags;
 689
 690        local_irq_save(flags);
 691        raise_softirq_irqoff(nr);
 692        local_irq_restore(flags);
 693}
 694
 695void __raise_softirq_irqoff(unsigned int nr)
 696{
 697        lockdep_assert_irqs_disabled();
 698        trace_softirq_raise(nr);
 699        or_softirq_pending(1UL << nr);
 700}
 701
 702void open_softirq(int nr, void (*action)(struct softirq_action *))
 703{
 704        softirq_vec[nr].action = action;
 705}
 706
 707/*
 708 * Tasklets
 709 */
 710struct tasklet_head {
 711        struct tasklet_struct *head;
 712        struct tasklet_struct **tail;
 713};
 714
 715static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
 716static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
 717
 718static void __tasklet_schedule_common(struct tasklet_struct *t,
 719                                      struct tasklet_head __percpu *headp,
 720                                      unsigned int softirq_nr)
 721{
 722        struct tasklet_head *head;
 723        unsigned long flags;
 724
 725        local_irq_save(flags);
 726        head = this_cpu_ptr(headp);
 727        t->next = NULL;
 728        *head->tail = t;
 729        head->tail = &(t->next);
 730        raise_softirq_irqoff(softirq_nr);
 731        local_irq_restore(flags);
 732}
 733
 734void __tasklet_schedule(struct tasklet_struct *t)
 735{
 736        __tasklet_schedule_common(t, &tasklet_vec,
 737                                  TASKLET_SOFTIRQ);
 738}
 739EXPORT_SYMBOL(__tasklet_schedule);
 740
 741void __tasklet_hi_schedule(struct tasklet_struct *t)
 742{
 743        __tasklet_schedule_common(t, &tasklet_hi_vec,
 744                                  HI_SOFTIRQ);
 745}
 746EXPORT_SYMBOL(__tasklet_hi_schedule);
 747
 748static bool tasklet_clear_sched(struct tasklet_struct *t)
 749{
 750        if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) {
 751                wake_up_var(&t->state);
 752                return true;
 753        }
 754
 755        WARN_ONCE(1, "tasklet SCHED state not set: %s %pS\n",
 756                  t->use_callback ? "callback" : "func",
 757                  t->use_callback ? (void *)t->callback : (void *)t->func);
 758
 759        return false;
 760}
 761
 762static void tasklet_action_common(struct softirq_action *a,
 763                                  struct tasklet_head *tl_head,
 764                                  unsigned int softirq_nr)
 765{
 766        struct tasklet_struct *list;
 767
 768        local_irq_disable();
 769        list = tl_head->head;
 770        tl_head->head = NULL;
 771        tl_head->tail = &tl_head->head;
 772        local_irq_enable();
 773
 774        while (list) {
 775                struct tasklet_struct *t = list;
 776
 777                list = list->next;
 778
 779                if (tasklet_trylock(t)) {
 780                        if (!atomic_read(&t->count)) {
 781                                if (tasklet_clear_sched(t)) {
 782                                        if (t->use_callback)
 783                                                t->callback(t);
 784                                        else
 785                                                t->func(t->data);
 786                                }
 787                                tasklet_unlock(t);
 788                                continue;
 789                        }
 790                        tasklet_unlock(t);
 791                }
 792
 793                local_irq_disable();
 794                t->next = NULL;
 795                *tl_head->tail = t;
 796                tl_head->tail = &t->next;
 797                __raise_softirq_irqoff(softirq_nr);
 798                local_irq_enable();
 799        }
 800}
 801
 802static __latent_entropy void tasklet_action(struct softirq_action *a)
 803{
 804        tasklet_action_common(a, this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ);
 805}
 806
 807static __latent_entropy void tasklet_hi_action(struct softirq_action *a)
 808{
 809        tasklet_action_common(a, this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ);
 810}
 811
 812void tasklet_setup(struct tasklet_struct *t,
 813                   void (*callback)(struct tasklet_struct *))
 814{
 815        t->next = NULL;
 816        t->state = 0;
 817        atomic_set(&t->count, 0);
 818        t->callback = callback;
 819        t->use_callback = true;
 820        t->data = 0;
 821}
 822EXPORT_SYMBOL(tasklet_setup);
 823
 824void tasklet_init(struct tasklet_struct *t,
 825                  void (*func)(unsigned long), unsigned long data)
 826{
 827        t->next = NULL;
 828        t->state = 0;
 829        atomic_set(&t->count, 0);
 830        t->func = func;
 831        t->use_callback = false;
 832        t->data = data;
 833}
 834EXPORT_SYMBOL(tasklet_init);
 835
 836#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
 837/*
 838 * Do not use in new code. Waiting for tasklets from atomic contexts is
 839 * error prone and should be avoided.
 840 */
 841void tasklet_unlock_spin_wait(struct tasklet_struct *t)
 842{
 843        while (test_bit(TASKLET_STATE_RUN, &(t)->state)) {
 844                if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
 845                        /*
 846                         * Prevent a live lock when current preempted soft
 847                         * interrupt processing or prevents ksoftirqd from
 848                         * running. If the tasklet runs on a different CPU
 849                         * then this has no effect other than doing the BH
 850                         * disable/enable dance for nothing.
 851                         */
 852                        local_bh_disable();
 853                        local_bh_enable();
 854                } else {
 855                        cpu_relax();
 856                }
 857        }
 858}
 859EXPORT_SYMBOL(tasklet_unlock_spin_wait);
 860#endif
 861
 862void tasklet_kill(struct tasklet_struct *t)
 863{
 864        if (in_interrupt())
 865                pr_notice("Attempt to kill tasklet from interrupt\n");
 866
 867        while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
 868                wait_var_event(&t->state, !test_bit(TASKLET_STATE_SCHED, &t->state));
 869
 870        tasklet_unlock_wait(t);
 871        tasklet_clear_sched(t);
 872}
 873EXPORT_SYMBOL(tasklet_kill);
 874
 875#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
 876void tasklet_unlock(struct tasklet_struct *t)
 877{
 878        smp_mb__before_atomic();
 879        clear_bit(TASKLET_STATE_RUN, &t->state);
 880        smp_mb__after_atomic();
 881        wake_up_var(&t->state);
 882}
 883EXPORT_SYMBOL_GPL(tasklet_unlock);
 884
 885void tasklet_unlock_wait(struct tasklet_struct *t)
 886{
 887        wait_var_event(&t->state, !test_bit(TASKLET_STATE_RUN, &t->state));
 888}
 889EXPORT_SYMBOL_GPL(tasklet_unlock_wait);
 890#endif
 891
 892void __init softirq_init(void)
 893{
 894        int cpu;
 895
 896        for_each_possible_cpu(cpu) {
 897                per_cpu(tasklet_vec, cpu).tail =
 898                        &per_cpu(tasklet_vec, cpu).head;
 899                per_cpu(tasklet_hi_vec, cpu).tail =
 900                        &per_cpu(tasklet_hi_vec, cpu).head;
 901        }
 902
 903        open_softirq(TASKLET_SOFTIRQ, tasklet_action);
 904        open_softirq(HI_SOFTIRQ, tasklet_hi_action);
 905}
 906
 907static int ksoftirqd_should_run(unsigned int cpu)
 908{
 909        return local_softirq_pending();
 910}
 911
 912static void run_ksoftirqd(unsigned int cpu)
 913{
 914        ksoftirqd_run_begin();
 915        if (local_softirq_pending()) {
 916                /*
 917                 * We can safely run softirq on inline stack, as we are not deep
 918                 * in the task stack here.
 919                 */
 920                __do_softirq();
 921                ksoftirqd_run_end();
 922                cond_resched();
 923                return;
 924        }
 925        ksoftirqd_run_end();
 926}
 927
 928#ifdef CONFIG_HOTPLUG_CPU
 929static int takeover_tasklets(unsigned int cpu)
 930{
 931        /* CPU is dead, so no lock needed. */
 932        local_irq_disable();
 933
 934        /* Find end, append list for that CPU. */
 935        if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
 936                *__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
 937                __this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
 938                per_cpu(tasklet_vec, cpu).head = NULL;
 939                per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
 940        }
 941        raise_softirq_irqoff(TASKLET_SOFTIRQ);
 942
 943        if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
 944                *__this_cpu_read(tasklet_hi_vec.tail) = per_cpu(tasklet_hi_vec, cpu).head;
 945                __this_cpu_write(tasklet_hi_vec.tail, per_cpu(tasklet_hi_vec, cpu).tail);
 946                per_cpu(tasklet_hi_vec, cpu).head = NULL;
 947                per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
 948        }
 949        raise_softirq_irqoff(HI_SOFTIRQ);
 950
 951        local_irq_enable();
 952        return 0;
 953}
 954#else
 955#define takeover_tasklets       NULL
 956#endif /* CONFIG_HOTPLUG_CPU */
 957
 958static struct smp_hotplug_thread softirq_threads = {
 959        .store                  = &ksoftirqd,
 960        .thread_should_run      = ksoftirqd_should_run,
 961        .thread_fn              = run_ksoftirqd,
 962        .thread_comm            = "ksoftirqd/%u",
 963};
 964
 965static __init int spawn_ksoftirqd(void)
 966{
 967        cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
 968                                  takeover_tasklets);
 969        BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
 970
 971        return 0;
 972}
 973early_initcall(spawn_ksoftirqd);
 974
 975/*
 976 * [ These __weak aliases are kept in a separate compilation unit, so that
 977 *   GCC does not inline them incorrectly. ]
 978 */
 979
 980int __init __weak early_irq_init(void)
 981{
 982        return 0;
 983}
 984
 985int __init __weak arch_probe_nr_irqs(void)
 986{
 987        return NR_IRQS_LEGACY;
 988}
 989
 990int __init __weak arch_early_irq_init(void)
 991{
 992        return 0;
 993}
 994
 995unsigned int __weak arch_dynirq_lower_bound(unsigned int from)
 996{
 997        return from;
 998}
 999