linux/kernel/irq/spurious.c
<<
>>
Prefs
   1/*
   2 * linux/kernel/irq/spurious.c
   3 *
   4 * Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar
   5 *
   6 * This file contains spurious interrupt handling.
   7 */
   8
   9#include <linux/jiffies.h>
  10#include <linux/irq.h>
  11#include <linux/module.h>
  12#include <linux/kallsyms.h>
  13#include <linux/interrupt.h>
  14#include <linux/moduleparam.h>
  15#include <linux/timer.h>
  16
  17#include "internals.h"
  18
  19static int irqfixup __read_mostly;
  20
  21#define POLL_SPURIOUS_IRQ_INTERVAL (HZ/10)
  22static void poll_spurious_irqs(unsigned long dummy);
  23static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs, 0, 0);
  24static int irq_poll_cpu;
  25static atomic_t irq_poll_active;
  26
  27/*
  28 * We wait here for a poller to finish.
  29 *
  30 * If the poll runs on this CPU, then we yell loudly and return
  31 * false. That will leave the interrupt line disabled in the worst
  32 * case, but it should never happen.
  33 *
  34 * We wait until the poller is done and then recheck disabled and
  35 * action (about to be disabled). Only if it's still active, we return
  36 * true and let the handler run.
  37 */
  38bool irq_wait_for_poll(struct irq_desc *desc)
  39{
  40        if (WARN_ONCE(irq_poll_cpu == smp_processor_id(),
  41                      "irq poll in progress on cpu %d for irq %d\n",
  42                      smp_processor_id(), desc->irq_data.irq))
  43                return false;
  44
  45#ifdef CONFIG_SMP
  46        do {
  47                raw_spin_unlock(&desc->lock);
  48                while (irqd_irq_inprogress(&desc->irq_data))
  49                        cpu_relax();
  50                raw_spin_lock(&desc->lock);
  51        } while (irqd_irq_inprogress(&desc->irq_data));
  52        /* Might have been disabled in meantime */
  53        return !irqd_irq_disabled(&desc->irq_data) && desc->action;
  54#else
  55        return false;
  56#endif
  57}
  58
  59
  60/*
  61 * Recovery handler for misrouted interrupts.
  62 */
  63static int try_one_irq(struct irq_desc *desc, bool force)
  64{
  65        irqreturn_t ret = IRQ_NONE;
  66        struct irqaction *action;
  67
  68        raw_spin_lock(&desc->lock);
  69
  70        /*
  71         * PER_CPU, nested thread interrupts and interrupts explicitely
  72         * marked polled are excluded from polling.
  73         */
  74        if (irq_settings_is_per_cpu(desc) ||
  75            irq_settings_is_nested_thread(desc) ||
  76            irq_settings_is_polled(desc))
  77                goto out;
  78
  79        /*
  80         * Do not poll disabled interrupts unless the spurious
  81         * disabled poller asks explicitely.
  82         */
  83        if (irqd_irq_disabled(&desc->irq_data) && !force)
  84                goto out;
  85
  86        /*
  87         * All handlers must agree on IRQF_SHARED, so we test just the
  88         * first.
  89         */
  90        action = desc->action;
  91        if (!action || !(action->flags & IRQF_SHARED) ||
  92            (action->flags & __IRQF_TIMER))
  93                goto out;
  94
  95        /* Already running on another processor */
  96        if (irqd_irq_inprogress(&desc->irq_data)) {
  97                /*
  98                 * Already running: If it is shared get the other
  99                 * CPU to go looking for our mystery interrupt too
 100                 */
 101                desc->istate |= IRQS_PENDING;
 102                goto out;
 103        }
 104
 105        /* Mark it poll in progress */
 106        desc->istate |= IRQS_POLL_INPROGRESS;
 107        do {
 108                if (handle_irq_event(desc) == IRQ_HANDLED)
 109                        ret = IRQ_HANDLED;
 110                /* Make sure that there is still a valid action */
 111                action = desc->action;
 112        } while ((desc->istate & IRQS_PENDING) && action);
 113        desc->istate &= ~IRQS_POLL_INPROGRESS;
 114out:
 115        raw_spin_unlock(&desc->lock);
 116        return ret == IRQ_HANDLED;
 117}
 118
 119static int misrouted_irq(int irq)
 120{
 121        struct irq_desc *desc;
 122        int i, ok = 0;
 123
 124        if (atomic_inc_return(&irq_poll_active) != 1)
 125                goto out;
 126
 127        irq_poll_cpu = smp_processor_id();
 128
 129        for_each_irq_desc(i, desc) {
 130                if (!i)
 131                         continue;
 132
 133                if (i == irq)   /* Already tried */
 134                        continue;
 135
 136                if (try_one_irq(desc, false))
 137                        ok = 1;
 138        }
 139out:
 140        atomic_dec(&irq_poll_active);
 141        /* So the caller can adjust the irq error counts */
 142        return ok;
 143}
 144
 145static void poll_spurious_irqs(unsigned long dummy)
 146{
 147        struct irq_desc *desc;
 148        int i;
 149
 150        if (atomic_inc_return(&irq_poll_active) != 1)
 151                goto out;
 152        irq_poll_cpu = smp_processor_id();
 153
 154        for_each_irq_desc(i, desc) {
 155                unsigned int state;
 156
 157                if (!i)
 158                         continue;
 159
 160                /* Racy but it doesn't matter */
 161                state = desc->istate;
 162                barrier();
 163                if (!(state & IRQS_SPURIOUS_DISABLED))
 164                        continue;
 165
 166                local_irq_disable();
 167                try_one_irq(desc, true);
 168                local_irq_enable();
 169        }
 170out:
 171        atomic_dec(&irq_poll_active);
 172        mod_timer(&poll_spurious_irq_timer,
 173                  jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
 174}
 175
 176static inline int bad_action_ret(irqreturn_t action_ret)
 177{
 178        unsigned int r = action_ret;
 179
 180        if (likely(r <= (IRQ_HANDLED | IRQ_WAKE_THREAD)))
 181                return 0;
 182        return 1;
 183}
 184
 185/*
 186 * If 99,900 of the previous 100,000 interrupts have not been handled
 187 * then assume that the IRQ is stuck in some manner. Drop a diagnostic
 188 * and try to turn the IRQ off.
 189 *
 190 * (The other 100-of-100,000 interrupts may have been a correctly
 191 *  functioning device sharing an IRQ with the failing one)
 192 */
 193static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
 194{
 195        unsigned int irq = irq_desc_get_irq(desc);
 196        struct irqaction *action;
 197        unsigned long flags;
 198
 199        if (bad_action_ret(action_ret)) {
 200                printk(KERN_ERR "irq event %d: bogus return value %x\n",
 201                                irq, action_ret);
 202        } else {
 203                printk(KERN_ERR "irq %d: nobody cared (try booting with "
 204                                "the \"irqpoll\" option)\n", irq);
 205        }
 206        dump_stack();
 207        printk(KERN_ERR "handlers:\n");
 208
 209        /*
 210         * We need to take desc->lock here. note_interrupt() is called
 211         * w/o desc->lock held, but IRQ_PROGRESS set. We might race
 212         * with something else removing an action. It's ok to take
 213         * desc->lock here. See synchronize_irq().
 214         */
 215        raw_spin_lock_irqsave(&desc->lock, flags);
 216        for_each_action_of_desc(desc, action) {
 217                printk(KERN_ERR "[<%p>] %pf", action->handler, action->handler);
 218                if (action->thread_fn)
 219                        printk(KERN_CONT " threaded [<%p>] %pf",
 220                                        action->thread_fn, action->thread_fn);
 221                printk(KERN_CONT "\n");
 222        }
 223        raw_spin_unlock_irqrestore(&desc->lock, flags);
 224}
 225
 226static void report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
 227{
 228        static int count = 100;
 229
 230        if (count > 0) {
 231                count--;
 232                __report_bad_irq(desc, action_ret);
 233        }
 234}
 235
 236static inline int
 237try_misrouted_irq(unsigned int irq, struct irq_desc *desc,
 238                  irqreturn_t action_ret)
 239{
 240        struct irqaction *action;
 241
 242        if (!irqfixup)
 243                return 0;
 244
 245        /* We didn't actually handle the IRQ - see if it was misrouted? */
 246        if (action_ret == IRQ_NONE)
 247                return 1;
 248
 249        /*
 250         * But for 'irqfixup == 2' we also do it for handled interrupts if
 251         * they are marked as IRQF_IRQPOLL (or for irq zero, which is the
 252         * traditional PC timer interrupt.. Legacy)
 253         */
 254        if (irqfixup < 2)
 255                return 0;
 256
 257        if (!irq)
 258                return 1;
 259
 260        /*
 261         * Since we don't get the descriptor lock, "action" can
 262         * change under us.  We don't really care, but we don't
 263         * want to follow a NULL pointer. So tell the compiler to
 264         * just load it once by using a barrier.
 265         */
 266        action = desc->action;
 267        barrier();
 268        return action && (action->flags & IRQF_IRQPOLL);
 269}
 270
 271#define SPURIOUS_DEFERRED       0x80000000
 272
 273void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret)
 274{
 275        unsigned int irq;
 276
 277        if (desc->istate & IRQS_POLL_INPROGRESS ||
 278            irq_settings_is_polled(desc))
 279                return;
 280
 281        if (bad_action_ret(action_ret)) {
 282                report_bad_irq(desc, action_ret);
 283                return;
 284        }
 285
 286        /*
 287         * We cannot call note_interrupt from the threaded handler
 288         * because we need to look at the compound of all handlers
 289         * (primary and threaded). Aside of that in the threaded
 290         * shared case we have no serialization against an incoming
 291         * hardware interrupt while we are dealing with a threaded
 292         * result.
 293         *
 294         * So in case a thread is woken, we just note the fact and
 295         * defer the analysis to the next hardware interrupt.
 296         *
 297         * The threaded handlers store whether they sucessfully
 298         * handled an interrupt and we check whether that number
 299         * changed versus the last invocation.
 300         *
 301         * We could handle all interrupts with the delayed by one
 302         * mechanism, but for the non forced threaded case we'd just
 303         * add pointless overhead to the straight hardirq interrupts
 304         * for the sake of a few lines less code.
 305         */
 306        if (action_ret & IRQ_WAKE_THREAD) {
 307                /*
 308                 * There is a thread woken. Check whether one of the
 309                 * shared primary handlers returned IRQ_HANDLED. If
 310                 * not we defer the spurious detection to the next
 311                 * interrupt.
 312                 */
 313                if (action_ret == IRQ_WAKE_THREAD) {
 314                        int handled;
 315                        /*
 316                         * We use bit 31 of thread_handled_last to
 317                         * denote the deferred spurious detection
 318                         * active. No locking necessary as
 319                         * thread_handled_last is only accessed here
 320                         * and we have the guarantee that hard
 321                         * interrupts are not reentrant.
 322                         */
 323                        if (!(desc->threads_handled_last & SPURIOUS_DEFERRED)) {
 324                                desc->threads_handled_last |= SPURIOUS_DEFERRED;
 325                                return;
 326                        }
 327                        /*
 328                         * Check whether one of the threaded handlers
 329                         * returned IRQ_HANDLED since the last
 330                         * interrupt happened.
 331                         *
 332                         * For simplicity we just set bit 31, as it is
 333                         * set in threads_handled_last as well. So we
 334                         * avoid extra masking. And we really do not
 335                         * care about the high bits of the handled
 336                         * count. We just care about the count being
 337                         * different than the one we saw before.
 338                         */
 339                        handled = atomic_read(&desc->threads_handled);
 340                        handled |= SPURIOUS_DEFERRED;
 341                        if (handled != desc->threads_handled_last) {
 342                                action_ret = IRQ_HANDLED;
 343                                /*
 344                                 * Note: We keep the SPURIOUS_DEFERRED
 345                                 * bit set. We are handling the
 346                                 * previous invocation right now.
 347                                 * Keep it for the current one, so the
 348                                 * next hardware interrupt will
 349                                 * account for it.
 350                                 */
 351                                desc->threads_handled_last = handled;
 352                        } else {
 353                                /*
 354                                 * None of the threaded handlers felt
 355                                 * responsible for the last interrupt
 356                                 *
 357                                 * We keep the SPURIOUS_DEFERRED bit
 358                                 * set in threads_handled_last as we
 359                                 * need to account for the current
 360                                 * interrupt as well.
 361                                 */
 362                                action_ret = IRQ_NONE;
 363                        }
 364                } else {
 365                        /*
 366                         * One of the primary handlers returned
 367                         * IRQ_HANDLED. So we don't care about the
 368                         * threaded handlers on the same line. Clear
 369                         * the deferred detection bit.
 370                         *
 371                         * In theory we could/should check whether the
 372                         * deferred bit is set and take the result of
 373                         * the previous run into account here as
 374                         * well. But it's really not worth the
 375                         * trouble. If every other interrupt is
 376                         * handled we never trigger the spurious
 377                         * detector. And if this is just the one out
 378                         * of 100k unhandled ones which is handled
 379                         * then we merily delay the spurious detection
 380                         * by one hard interrupt. Not a real problem.
 381                         */
 382                        desc->threads_handled_last &= ~SPURIOUS_DEFERRED;
 383                }
 384        }
 385
 386        if (unlikely(action_ret == IRQ_NONE)) {
 387                /*
 388                 * If we are seeing only the odd spurious IRQ caused by
 389                 * bus asynchronicity then don't eventually trigger an error,
 390                 * otherwise the counter becomes a doomsday timer for otherwise
 391                 * working systems
 392                 */
 393                if (time_after(jiffies, desc->last_unhandled + HZ/10))
 394                        desc->irqs_unhandled = 1;
 395                else
 396                        desc->irqs_unhandled++;
 397                desc->last_unhandled = jiffies;
 398        }
 399
 400        irq = irq_desc_get_irq(desc);
 401        if (unlikely(try_misrouted_irq(irq, desc, action_ret))) {
 402                int ok = misrouted_irq(irq);
 403                if (action_ret == IRQ_NONE)
 404                        desc->irqs_unhandled -= ok;
 405        }
 406
 407        desc->irq_count++;
 408        if (likely(desc->irq_count < 100000))
 409                return;
 410
 411        desc->irq_count = 0;
 412        if (unlikely(desc->irqs_unhandled > 99900)) {
 413                /*
 414                 * The interrupt is stuck
 415                 */
 416                __report_bad_irq(desc, action_ret);
 417                /*
 418                 * Now kill the IRQ
 419                 */
 420                printk(KERN_EMERG "Disabling IRQ #%d\n", irq);
 421                desc->istate |= IRQS_SPURIOUS_DISABLED;
 422                desc->depth++;
 423                irq_disable(desc);
 424
 425                mod_timer(&poll_spurious_irq_timer,
 426                          jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
 427        }
 428        desc->irqs_unhandled = 0;
 429}
 430
 431bool noirqdebug __read_mostly;
 432
 433int noirqdebug_setup(char *str)
 434{
 435        noirqdebug = 1;
 436        printk(KERN_INFO "IRQ lockup detection disabled\n");
 437
 438        return 1;
 439}
 440
 441__setup("noirqdebug", noirqdebug_setup);
 442module_param(noirqdebug, bool, 0644);
 443MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true");
 444
 445static int __init irqfixup_setup(char *str)
 446{
 447        irqfixup = 1;
 448        printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
 449        printk(KERN_WARNING "This may impact system performance.\n");
 450
 451        return 1;
 452}
 453
 454__setup("irqfixup", irqfixup_setup);
 455module_param(irqfixup, int, 0644);
 456
 457static int __init irqpoll_setup(char *str)
 458{
 459        irqfixup = 2;
 460        printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
 461                                "enabled\n");
 462        printk(KERN_WARNING "This may significantly impact system "
 463                                "performance\n");
 464        return 1;
 465}
 466
 467__setup("irqpoll", irqpoll_setup);
 468