linux/kernel/irq/spurious.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar
   4 *
   5 * This file contains spurious interrupt handling.
   6 */
   7
   8#include <linux/jiffies.h>
   9#include <linux/irq.h>
  10#include <linux/module.h>
  11#include <linux/interrupt.h>
  12#include <linux/moduleparam.h>
  13#include <linux/timer.h>
  14
  15#include "internals.h"
  16
  17static int irqfixup __read_mostly;
  18
  19#define POLL_SPURIOUS_IRQ_INTERVAL (HZ/10)
  20static void poll_spurious_irqs(struct timer_list *unused);
  21static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs);
  22static int irq_poll_cpu;
  23static atomic_t irq_poll_active;
  24
  25/*
  26 * We wait here for a poller to finish.
  27 *
  28 * If the poll runs on this CPU, then we yell loudly and return
  29 * false. That will leave the interrupt line disabled in the worst
  30 * case, but it should never happen.
  31 *
  32 * We wait until the poller is done and then recheck disabled and
  33 * action (about to be disabled). Only if it's still active, we return
  34 * true and let the handler run.
  35 */
  36bool irq_wait_for_poll(struct irq_desc *desc)
  37        __must_hold(&desc->lock)
  38{
  39        if (WARN_ONCE(irq_poll_cpu == smp_processor_id(),
  40                      "irq poll in progress on cpu %d for irq %d\n",
  41                      smp_processor_id(), desc->irq_data.irq))
  42                return false;
  43
  44#ifdef CONFIG_SMP
  45        do {
  46                raw_spin_unlock(&desc->lock);
  47                while (irqd_irq_inprogress(&desc->irq_data))
  48                        cpu_relax();
  49                raw_spin_lock(&desc->lock);
  50        } while (irqd_irq_inprogress(&desc->irq_data));
  51        /* Might have been disabled in meantime */
  52        return !irqd_irq_disabled(&desc->irq_data) && desc->action;
  53#else
  54        return false;
  55#endif
  56}
  57
  58
  59/*
  60 * Recovery handler for misrouted interrupts.
  61 */
  62static int try_one_irq(struct irq_desc *desc, bool force)
  63{
  64        irqreturn_t ret = IRQ_NONE;
  65        struct irqaction *action;
  66
  67        raw_spin_lock(&desc->lock);
  68
  69        /*
  70         * PER_CPU, nested thread interrupts and interrupts explicitly
  71         * marked polled are excluded from polling.
  72         */
  73        if (irq_settings_is_per_cpu(desc) ||
  74            irq_settings_is_nested_thread(desc) ||
  75            irq_settings_is_polled(desc))
  76                goto out;
  77
  78        /*
  79         * Do not poll disabled interrupts unless the spurious
  80         * disabled poller asks explicitly.
  81         */
  82        if (irqd_irq_disabled(&desc->irq_data) && !force)
  83                goto out;
  84
  85        /*
  86         * All handlers must agree on IRQF_SHARED, so we test just the
  87         * first.
  88         */
  89        action = desc->action;
  90        if (!action || !(action->flags & IRQF_SHARED) ||
  91            (action->flags & __IRQF_TIMER))
  92                goto out;
  93
  94        /* Already running on another processor */
  95        if (irqd_irq_inprogress(&desc->irq_data)) {
  96                /*
  97                 * Already running: If it is shared get the other
  98                 * CPU to go looking for our mystery interrupt too
  99                 */
 100                desc->istate |= IRQS_PENDING;
 101                goto out;
 102        }
 103
 104        /* Mark it poll in progress */
 105        desc->istate |= IRQS_POLL_INPROGRESS;
 106        do {
 107                if (handle_irq_event(desc) == IRQ_HANDLED)
 108                        ret = IRQ_HANDLED;
 109                /* Make sure that there is still a valid action */
 110                action = desc->action;
 111        } while ((desc->istate & IRQS_PENDING) && action);
 112        desc->istate &= ~IRQS_POLL_INPROGRESS;
 113out:
 114        raw_spin_unlock(&desc->lock);
 115        return ret == IRQ_HANDLED;
 116}
 117
 118static int misrouted_irq(int irq)
 119{
 120        struct irq_desc *desc;
 121        int i, ok = 0;
 122
 123        if (atomic_inc_return(&irq_poll_active) != 1)
 124                goto out;
 125
 126        irq_poll_cpu = smp_processor_id();
 127
 128        for_each_irq_desc(i, desc) {
 129                if (!i)
 130                         continue;
 131
 132                if (i == irq)   /* Already tried */
 133                        continue;
 134
 135                if (try_one_irq(desc, false))
 136                        ok = 1;
 137        }
 138out:
 139        atomic_dec(&irq_poll_active);
 140        /* So the caller can adjust the irq error counts */
 141        return ok;
 142}
 143
 144static void poll_spurious_irqs(struct timer_list *unused)
 145{
 146        struct irq_desc *desc;
 147        int i;
 148
 149        if (atomic_inc_return(&irq_poll_active) != 1)
 150                goto out;
 151        irq_poll_cpu = smp_processor_id();
 152
 153        for_each_irq_desc(i, desc) {
 154                unsigned int state;
 155
 156                if (!i)
 157                         continue;
 158
 159                /* Racy but it doesn't matter */
 160                state = desc->istate;
 161                barrier();
 162                if (!(state & IRQS_SPURIOUS_DISABLED))
 163                        continue;
 164
 165                local_irq_disable();
 166                try_one_irq(desc, true);
 167                local_irq_enable();
 168        }
 169out:
 170        atomic_dec(&irq_poll_active);
 171        mod_timer(&poll_spurious_irq_timer,
 172                  jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
 173}
 174
 175static inline int bad_action_ret(irqreturn_t action_ret)
 176{
 177        unsigned int r = action_ret;
 178
 179        if (likely(r <= (IRQ_HANDLED | IRQ_WAKE_THREAD)))
 180                return 0;
 181        return 1;
 182}
 183
 184/*
 185 * If 99,900 of the previous 100,000 interrupts have not been handled
 186 * then assume that the IRQ is stuck in some manner. Drop a diagnostic
 187 * and try to turn the IRQ off.
 188 *
 189 * (The other 100-of-100,000 interrupts may have been a correctly
 190 *  functioning device sharing an IRQ with the failing one)
 191 */
 192static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
 193{
 194        unsigned int irq = irq_desc_get_irq(desc);
 195        struct irqaction *action;
 196        unsigned long flags;
 197
 198        if (bad_action_ret(action_ret)) {
 199                printk(KERN_ERR "irq event %d: bogus return value %x\n",
 200                                irq, action_ret);
 201        } else {
 202                printk(KERN_ERR "irq %d: nobody cared (try booting with "
 203                                "the \"irqpoll\" option)\n", irq);
 204        }
 205        dump_stack();
 206        printk(KERN_ERR "handlers:\n");
 207
 208        /*
 209         * We need to take desc->lock here. note_interrupt() is called
 210         * w/o desc->lock held, but IRQ_PROGRESS set. We might race
 211         * with something else removing an action. It's ok to take
 212         * desc->lock here. See synchronize_irq().
 213         */
 214        raw_spin_lock_irqsave(&desc->lock, flags);
 215        for_each_action_of_desc(desc, action) {
 216                printk(KERN_ERR "[<%p>] %ps", action->handler, action->handler);
 217                if (action->thread_fn)
 218                        printk(KERN_CONT " threaded [<%p>] %ps",
 219                                        action->thread_fn, action->thread_fn);
 220                printk(KERN_CONT "\n");
 221        }
 222        raw_spin_unlock_irqrestore(&desc->lock, flags);
 223}
 224
 225static void report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
 226{
 227        static int count = 100;
 228
 229        if (count > 0) {
 230                count--;
 231                __report_bad_irq(desc, action_ret);
 232        }
 233}
 234
 235static inline int
 236try_misrouted_irq(unsigned int irq, struct irq_desc *desc,
 237                  irqreturn_t action_ret)
 238{
 239        struct irqaction *action;
 240
 241        if (!irqfixup)
 242                return 0;
 243
 244        /* We didn't actually handle the IRQ - see if it was misrouted? */
 245        if (action_ret == IRQ_NONE)
 246                return 1;
 247
 248        /*
 249         * But for 'irqfixup == 2' we also do it for handled interrupts if
 250         * they are marked as IRQF_IRQPOLL (or for irq zero, which is the
 251         * traditional PC timer interrupt.. Legacy)
 252         */
 253        if (irqfixup < 2)
 254                return 0;
 255
 256        if (!irq)
 257                return 1;
 258
 259        /*
 260         * Since we don't get the descriptor lock, "action" can
 261         * change under us.  We don't really care, but we don't
 262         * want to follow a NULL pointer. So tell the compiler to
 263         * just load it once by using a barrier.
 264         */
 265        action = desc->action;
 266        barrier();
 267        return action && (action->flags & IRQF_IRQPOLL);
 268}
 269
 270#define SPURIOUS_DEFERRED       0x80000000
 271
 272void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret)
 273{
 274        unsigned int irq;
 275
 276        if (desc->istate & IRQS_POLL_INPROGRESS ||
 277            irq_settings_is_polled(desc))
 278                return;
 279
 280        if (bad_action_ret(action_ret)) {
 281                report_bad_irq(desc, action_ret);
 282                return;
 283        }
 284
 285        /*
 286         * We cannot call note_interrupt from the threaded handler
 287         * because we need to look at the compound of all handlers
 288         * (primary and threaded). Aside of that in the threaded
 289         * shared case we have no serialization against an incoming
 290         * hardware interrupt while we are dealing with a threaded
 291         * result.
 292         *
 293         * So in case a thread is woken, we just note the fact and
 294         * defer the analysis to the next hardware interrupt.
 295         *
 296         * The threaded handlers store whether they successfully
 297         * handled an interrupt and we check whether that number
 298         * changed versus the last invocation.
 299         *
 300         * We could handle all interrupts with the delayed by one
 301         * mechanism, but for the non forced threaded case we'd just
 302         * add pointless overhead to the straight hardirq interrupts
 303         * for the sake of a few lines less code.
 304         */
 305        if (action_ret & IRQ_WAKE_THREAD) {
 306                /*
 307                 * There is a thread woken. Check whether one of the
 308                 * shared primary handlers returned IRQ_HANDLED. If
 309                 * not we defer the spurious detection to the next
 310                 * interrupt.
 311                 */
 312                if (action_ret == IRQ_WAKE_THREAD) {
 313                        int handled;
 314                        /*
 315                         * We use bit 31 of thread_handled_last to
 316                         * denote the deferred spurious detection
 317                         * active. No locking necessary as
 318                         * thread_handled_last is only accessed here
 319                         * and we have the guarantee that hard
 320                         * interrupts are not reentrant.
 321                         */
 322                        if (!(desc->threads_handled_last & SPURIOUS_DEFERRED)) {
 323                                desc->threads_handled_last |= SPURIOUS_DEFERRED;
 324                                return;
 325                        }
 326                        /*
 327                         * Check whether one of the threaded handlers
 328                         * returned IRQ_HANDLED since the last
 329                         * interrupt happened.
 330                         *
 331                         * For simplicity we just set bit 31, as it is
 332                         * set in threads_handled_last as well. So we
 333                         * avoid extra masking. And we really do not
 334                         * care about the high bits of the handled
 335                         * count. We just care about the count being
 336                         * different than the one we saw before.
 337                         */
 338                        handled = atomic_read(&desc->threads_handled);
 339                        handled |= SPURIOUS_DEFERRED;
 340                        if (handled != desc->threads_handled_last) {
 341                                action_ret = IRQ_HANDLED;
 342                                /*
 343                                 * Note: We keep the SPURIOUS_DEFERRED
 344                                 * bit set. We are handling the
 345                                 * previous invocation right now.
 346                                 * Keep it for the current one, so the
 347                                 * next hardware interrupt will
 348                                 * account for it.
 349                                 */
 350                                desc->threads_handled_last = handled;
 351                        } else {
 352                                /*
 353                                 * None of the threaded handlers felt
 354                                 * responsible for the last interrupt
 355                                 *
 356                                 * We keep the SPURIOUS_DEFERRED bit
 357                                 * set in threads_handled_last as we
 358                                 * need to account for the current
 359                                 * interrupt as well.
 360                                 */
 361                                action_ret = IRQ_NONE;
 362                        }
 363                } else {
 364                        /*
 365                         * One of the primary handlers returned
 366                         * IRQ_HANDLED. So we don't care about the
 367                         * threaded handlers on the same line. Clear
 368                         * the deferred detection bit.
 369                         *
 370                         * In theory we could/should check whether the
 371                         * deferred bit is set and take the result of
 372                         * the previous run into account here as
 373                         * well. But it's really not worth the
 374                         * trouble. If every other interrupt is
 375                         * handled we never trigger the spurious
 376                         * detector. And if this is just the one out
 377                         * of 100k unhandled ones which is handled
 378                         * then we merily delay the spurious detection
 379                         * by one hard interrupt. Not a real problem.
 380                         */
 381                        desc->threads_handled_last &= ~SPURIOUS_DEFERRED;
 382                }
 383        }
 384
 385        if (unlikely(action_ret == IRQ_NONE)) {
 386                /*
 387                 * If we are seeing only the odd spurious IRQ caused by
 388                 * bus asynchronicity then don't eventually trigger an error,
 389                 * otherwise the counter becomes a doomsday timer for otherwise
 390                 * working systems
 391                 */
 392                if (time_after(jiffies, desc->last_unhandled + HZ/10))
 393                        desc->irqs_unhandled = 1;
 394                else
 395                        desc->irqs_unhandled++;
 396                desc->last_unhandled = jiffies;
 397        }
 398
 399        irq = irq_desc_get_irq(desc);
 400        if (unlikely(try_misrouted_irq(irq, desc, action_ret))) {
 401                int ok = misrouted_irq(irq);
 402                if (action_ret == IRQ_NONE)
 403                        desc->irqs_unhandled -= ok;
 404        }
 405
 406        desc->irq_count++;
 407        if (likely(desc->irq_count < 100000))
 408                return;
 409
 410        desc->irq_count = 0;
 411        if (unlikely(desc->irqs_unhandled > 99900)) {
 412                /*
 413                 * The interrupt is stuck
 414                 */
 415                __report_bad_irq(desc, action_ret);
 416                /*
 417                 * Now kill the IRQ
 418                 */
 419                printk(KERN_EMERG "Disabling IRQ #%d\n", irq);
 420                desc->istate |= IRQS_SPURIOUS_DISABLED;
 421                desc->depth++;
 422                irq_disable(desc);
 423
 424                mod_timer(&poll_spurious_irq_timer,
 425                          jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
 426        }
 427        desc->irqs_unhandled = 0;
 428}
 429
 430bool noirqdebug __read_mostly;
 431
 432int noirqdebug_setup(char *str)
 433{
 434        noirqdebug = 1;
 435        printk(KERN_INFO "IRQ lockup detection disabled\n");
 436
 437        return 1;
 438}
 439
 440__setup("noirqdebug", noirqdebug_setup);
 441module_param(noirqdebug, bool, 0644);
 442MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true");
 443
 444static int __init irqfixup_setup(char *str)
 445{
 446        irqfixup = 1;
 447        printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
 448        printk(KERN_WARNING "This may impact system performance.\n");
 449
 450        return 1;
 451}
 452
 453__setup("irqfixup", irqfixup_setup);
 454module_param(irqfixup, int, 0644);
 455
 456static int __init irqpoll_setup(char *str)
 457{
 458        irqfixup = 2;
 459        printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
 460                                "enabled\n");
 461        printk(KERN_WARNING "This may significantly impact system "
 462                                "performance\n");
 463        return 1;
 464}
 465
 466__setup("irqpoll", irqpoll_setup);
 467