linux/arch/x86/kernel/apic/io_apic.c
<<
>>
Prefs
   1/*
   2 *      Intel IO-APIC support for multi-Pentium hosts.
   3 *
   4 *      Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
   5 *
   6 *      Many thanks to Stig Venaas for trying out countless experimental
   7 *      patches and reporting/debugging problems patiently!
   8 *
   9 *      (c) 1999, Multiple IO-APIC support, developed by
  10 *      Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
  11 *      Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
  12 *      further tested and cleaned up by Zach Brown <zab@redhat.com>
  13 *      and Ingo Molnar <mingo@redhat.com>
  14 *
  15 *      Fixes
  16 *      Maciej W. Rozycki       :       Bits for genuine 82489DX APICs;
  17 *                                      thanks to Eric Gilmore
  18 *                                      and Rolf G. Tews
  19 *                                      for testing these extensively
  20 *      Paul Diefenbaugh        :       Added full ACPI support
  21 */
  22
  23#include <linux/mm.h>
  24#include <linux/interrupt.h>
  25#include <linux/init.h>
  26#include <linux/delay.h>
  27#include <linux/sched.h>
  28#include <linux/pci.h>
  29#include <linux/mc146818rtc.h>
  30#include <linux/compiler.h>
  31#include <linux/acpi.h>
  32#include <linux/module.h>
  33#include <linux/syscore_ops.h>
  34#include <linux/irqdomain.h>
  35#include <linux/freezer.h>
  36#include <linux/kthread.h>
  37#include <linux/jiffies.h>      /* time_after() */
  38#include <linux/slab.h>
  39#include <linux/bootmem.h>
  40
  41#include <asm/idle.h>
  42#include <asm/io.h>
  43#include <asm/smp.h>
  44#include <asm/cpu.h>
  45#include <asm/desc.h>
  46#include <asm/proto.h>
  47#include <asm/acpi.h>
  48#include <asm/dma.h>
  49#include <asm/timer.h>
  50#include <asm/i8259.h>
  51#include <asm/setup.h>
  52#include <asm/irq_remapping.h>
  53#include <asm/hw_irq.h>
  54
  55#include <asm/apic.h>
  56
  57#define for_each_ioapic(idx)            \
  58        for ((idx) = 0; (idx) < nr_ioapics; (idx)++)
  59#define for_each_ioapic_reverse(idx)    \
  60        for ((idx) = nr_ioapics - 1; (idx) >= 0; (idx)--)
  61#define for_each_pin(idx, pin)          \
  62        for ((pin) = 0; (pin) < ioapics[(idx)].nr_registers; (pin)++)
  63#define for_each_ioapic_pin(idx, pin)   \
  64        for_each_ioapic((idx))          \
  65                for_each_pin((idx), (pin))
  66
  67#define for_each_irq_pin(entry, head) \
  68        list_for_each_entry(entry, &head, list)
  69
  70/*
  71 *      Is the SiS APIC rmw bug present ?
  72 *      -1 = don't know, 0 = no, 1 = yes
  73 */
  74int sis_apic_bug = -1;
  75
  76static DEFINE_RAW_SPINLOCK(ioapic_lock);
  77static DEFINE_MUTEX(ioapic_mutex);
  78static unsigned int ioapic_dynirq_base;
  79static int ioapic_initialized;
  80
  81struct mp_pin_info {
  82        int trigger;
  83        int polarity;
  84        int node;
  85        int set;
  86        u32 count;
  87};
  88
  89static struct ioapic {
  90        /*
  91         * # of IRQ routing registers
  92         */
  93        int nr_registers;
  94        /*
  95         * Saved state during suspend/resume, or while enabling intr-remap.
  96         */
  97        struct IO_APIC_route_entry *saved_registers;
  98        /* I/O APIC config */
  99        struct mpc_ioapic mp_config;
 100        /* IO APIC gsi routing info */
 101        struct mp_ioapic_gsi  gsi_config;
 102        struct ioapic_domain_cfg irqdomain_cfg;
 103        struct irq_domain *irqdomain;
 104        struct mp_pin_info *pin_info;
 105        struct resource *iomem_res;
 106} ioapics[MAX_IO_APICS];
 107
 108#define mpc_ioapic_ver(ioapic_idx)      ioapics[ioapic_idx].mp_config.apicver
 109
 110int mpc_ioapic_id(int ioapic_idx)
 111{
 112        return ioapics[ioapic_idx].mp_config.apicid;
 113}
 114
 115unsigned int mpc_ioapic_addr(int ioapic_idx)
 116{
 117        return ioapics[ioapic_idx].mp_config.apicaddr;
 118}
 119
 120struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic_idx)
 121{
 122        return &ioapics[ioapic_idx].gsi_config;
 123}
 124
 125static inline int mp_ioapic_pin_count(int ioapic)
 126{
 127        struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic);
 128
 129        return gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1;
 130}
 131
 132u32 mp_pin_to_gsi(int ioapic, int pin)
 133{
 134        return mp_ioapic_gsi_routing(ioapic)->gsi_base + pin;
 135}
 136
 137/*
 138 * Initialize all legacy IRQs and all pins on the first IOAPIC
 139 * if we have legacy interrupt controller. Kernel boot option "pirq="
 140 * may rely on non-legacy pins on the first IOAPIC.
 141 */
 142static inline int mp_init_irq_at_boot(int ioapic, int irq)
 143{
 144        if (!nr_legacy_irqs())
 145                return 0;
 146
 147        return ioapic == 0 || (irq >= 0 && irq < nr_legacy_irqs());
 148}
 149
 150static inline struct mp_pin_info *mp_pin_info(int ioapic_idx, int pin)
 151{
 152        return ioapics[ioapic_idx].pin_info + pin;
 153}
 154
 155static inline struct irq_domain *mp_ioapic_irqdomain(int ioapic)
 156{
 157        return ioapics[ioapic].irqdomain;
 158}
 159
 160int nr_ioapics;
 161
 162/* The one past the highest gsi number used */
 163u32 gsi_top;
 164
 165/* MP IRQ source entries */
 166struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
 167
 168/* # of MP IRQ source entries */
 169int mp_irq_entries;
 170
 171#ifdef CONFIG_EISA
 172int mp_bus_id_to_type[MAX_MP_BUSSES];
 173#endif
 174
 175DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
 176
 177int skip_ioapic_setup;
 178
 179/**
 180 * disable_ioapic_support() - disables ioapic support at runtime
 181 */
 182void disable_ioapic_support(void)
 183{
 184#ifdef CONFIG_PCI
 185        noioapicquirk = 1;
 186        noioapicreroute = -1;
 187#endif
 188        skip_ioapic_setup = 1;
 189}
 190
 191static int __init parse_noapic(char *str)
 192{
 193        /* disable IO-APIC */
 194        disable_ioapic_support();
 195        return 0;
 196}
 197early_param("noapic", parse_noapic);
 198
 199/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
 200void mp_save_irq(struct mpc_intsrc *m)
 201{
 202        int i;
 203
 204        apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
 205                " IRQ %02x, APIC ID %x, APIC INT %02x\n",
 206                m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus,
 207                m->srcbusirq, m->dstapic, m->dstirq);
 208
 209        for (i = 0; i < mp_irq_entries; i++) {
 210                if (!memcmp(&mp_irqs[i], m, sizeof(*m)))
 211                        return;
 212        }
 213
 214        memcpy(&mp_irqs[mp_irq_entries], m, sizeof(*m));
 215        if (++mp_irq_entries == MAX_IRQ_SOURCES)
 216                panic("Max # of irq sources exceeded!!\n");
 217}
 218
 219struct irq_pin_list {
 220        struct list_head list;
 221        int apic, pin;
 222};
 223
 224static struct irq_pin_list *alloc_irq_pin_list(int node)
 225{
 226        return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node);
 227}
 228
 229static void alloc_ioapic_saved_registers(int idx)
 230{
 231        size_t size;
 232
 233        if (ioapics[idx].saved_registers)
 234                return;
 235
 236        size = sizeof(struct IO_APIC_route_entry) * ioapics[idx].nr_registers;
 237        ioapics[idx].saved_registers = kzalloc(size, GFP_KERNEL);
 238        if (!ioapics[idx].saved_registers)
 239                pr_err("IOAPIC %d: suspend/resume impossible!\n", idx);
 240}
 241
 242static void free_ioapic_saved_registers(int idx)
 243{
 244        kfree(ioapics[idx].saved_registers);
 245        ioapics[idx].saved_registers = NULL;
 246}
 247
 248int __init arch_early_ioapic_init(void)
 249{
 250        struct irq_cfg *cfg;
 251        int i, node = cpu_to_node(0);
 252
 253        if (!nr_legacy_irqs())
 254                io_apic_irqs = ~0UL;
 255
 256        for_each_ioapic(i)
 257                alloc_ioapic_saved_registers(i);
 258
 259        /*
 260         * For legacy IRQ's, start with assigning irq0 to irq15 to
 261         * IRQ0_VECTOR to IRQ15_VECTOR for all cpu's.
 262         */
 263        for (i = 0; i < nr_legacy_irqs(); i++) {
 264                cfg = alloc_irq_and_cfg_at(i, node);
 265                cfg->vector = IRQ0_VECTOR + i;
 266                cpumask_setall(cfg->domain);
 267        }
 268
 269        return 0;
 270}
 271
 272struct io_apic {
 273        unsigned int index;
 274        unsigned int unused[3];
 275        unsigned int data;
 276        unsigned int unused2[11];
 277        unsigned int eoi;
 278};
 279
 280static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
 281{
 282        return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
 283                + (mpc_ioapic_addr(idx) & ~PAGE_MASK);
 284}
 285
 286void io_apic_eoi(unsigned int apic, unsigned int vector)
 287{
 288        struct io_apic __iomem *io_apic = io_apic_base(apic);
 289        writel(vector, &io_apic->eoi);
 290}
 291
 292unsigned int native_io_apic_read(unsigned int apic, unsigned int reg)
 293{
 294        struct io_apic __iomem *io_apic = io_apic_base(apic);
 295        writel(reg, &io_apic->index);
 296        return readl(&io_apic->data);
 297}
 298
 299void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
 300{
 301        struct io_apic __iomem *io_apic = io_apic_base(apic);
 302
 303        writel(reg, &io_apic->index);
 304        writel(value, &io_apic->data);
 305}
 306
 307/*
 308 * Re-write a value: to be used for read-modify-write
 309 * cycles where the read already set up the index register.
 310 *
 311 * Older SiS APIC requires we rewrite the index register
 312 */
 313void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
 314{
 315        struct io_apic __iomem *io_apic = io_apic_base(apic);
 316
 317        if (sis_apic_bug)
 318                writel(reg, &io_apic->index);
 319        writel(value, &io_apic->data);
 320}
 321
 322union entry_union {
 323        struct { u32 w1, w2; };
 324        struct IO_APIC_route_entry entry;
 325};
 326
 327static struct IO_APIC_route_entry __ioapic_read_entry(int apic, int pin)
 328{
 329        union entry_union eu;
 330
 331        eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
 332        eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
 333
 334        return eu.entry;
 335}
 336
 337static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
 338{
 339        union entry_union eu;
 340        unsigned long flags;
 341
 342        raw_spin_lock_irqsave(&ioapic_lock, flags);
 343        eu.entry = __ioapic_read_entry(apic, pin);
 344        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 345
 346        return eu.entry;
 347}
 348
 349/*
 350 * When we write a new IO APIC routing entry, we need to write the high
 351 * word first! If the mask bit in the low word is clear, we will enable
 352 * the interrupt, and we need to make sure the entry is fully populated
 353 * before that happens.
 354 */
 355static void __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
 356{
 357        union entry_union eu = {{0, 0}};
 358
 359        eu.entry = e;
 360        io_apic_write(apic, 0x11 + 2*pin, eu.w2);
 361        io_apic_write(apic, 0x10 + 2*pin, eu.w1);
 362}
 363
 364static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
 365{
 366        unsigned long flags;
 367
 368        raw_spin_lock_irqsave(&ioapic_lock, flags);
 369        __ioapic_write_entry(apic, pin, e);
 370        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 371}
 372
 373/*
 374 * When we mask an IO APIC routing entry, we need to write the low
 375 * word first, in order to set the mask bit before we change the
 376 * high bits!
 377 */
 378static void ioapic_mask_entry(int apic, int pin)
 379{
 380        unsigned long flags;
 381        union entry_union eu = { .entry.mask = 1 };
 382
 383        raw_spin_lock_irqsave(&ioapic_lock, flags);
 384        io_apic_write(apic, 0x10 + 2*pin, eu.w1);
 385        io_apic_write(apic, 0x11 + 2*pin, eu.w2);
 386        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 387}
 388
 389/*
 390 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
 391 * shared ISA-space IRQs, so we have to support them. We are super
 392 * fast in the common case, and fast for shared ISA-space IRQs.
 393 */
 394static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
 395{
 396        struct irq_pin_list *entry;
 397
 398        /* don't allow duplicates */
 399        for_each_irq_pin(entry, cfg->irq_2_pin)
 400                if (entry->apic == apic && entry->pin == pin)
 401                        return 0;
 402
 403        entry = alloc_irq_pin_list(node);
 404        if (!entry) {
 405                pr_err("can not alloc irq_pin_list (%d,%d,%d)\n",
 406                       node, apic, pin);
 407                return -ENOMEM;
 408        }
 409        entry->apic = apic;
 410        entry->pin = pin;
 411
 412        list_add_tail(&entry->list, &cfg->irq_2_pin);
 413        return 0;
 414}
 415
 416static void __remove_pin_from_irq(struct irq_cfg *cfg, int apic, int pin)
 417{
 418        struct irq_pin_list *tmp, *entry;
 419
 420        list_for_each_entry_safe(entry, tmp, &cfg->irq_2_pin, list)
 421                if (entry->apic == apic && entry->pin == pin) {
 422                        list_del(&entry->list);
 423                        kfree(entry);
 424                        return;
 425                }
 426}
 427
 428static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
 429{
 430        if (__add_pin_to_irq_node(cfg, node, apic, pin))
 431                panic("IO-APIC: failed to add irq-pin. Can not proceed\n");
 432}
 433
 434/*
 435 * Reroute an IRQ to a different pin.
 436 */
 437static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
 438                                           int oldapic, int oldpin,
 439                                           int newapic, int newpin)
 440{
 441        struct irq_pin_list *entry;
 442
 443        for_each_irq_pin(entry, cfg->irq_2_pin) {
 444                if (entry->apic == oldapic && entry->pin == oldpin) {
 445                        entry->apic = newapic;
 446                        entry->pin = newpin;
 447                        /* every one is different, right? */
 448                        return;
 449                }
 450        }
 451
 452        /* old apic/pin didn't exist, so just add new ones */
 453        add_pin_to_irq_node(cfg, node, newapic, newpin);
 454}
 455
 456static void __io_apic_modify_irq(struct irq_pin_list *entry,
 457                                 int mask_and, int mask_or,
 458                                 void (*final)(struct irq_pin_list *entry))
 459{
 460        unsigned int reg, pin;
 461
 462        pin = entry->pin;
 463        reg = io_apic_read(entry->apic, 0x10 + pin * 2);
 464        reg &= mask_and;
 465        reg |= mask_or;
 466        io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
 467        if (final)
 468                final(entry);
 469}
 470
 471static void io_apic_modify_irq(struct irq_cfg *cfg,
 472                               int mask_and, int mask_or,
 473                               void (*final)(struct irq_pin_list *entry))
 474{
 475        struct irq_pin_list *entry;
 476
 477        for_each_irq_pin(entry, cfg->irq_2_pin)
 478                __io_apic_modify_irq(entry, mask_and, mask_or, final);
 479}
 480
 481static void io_apic_sync(struct irq_pin_list *entry)
 482{
 483        /*
 484         * Synchronize the IO-APIC and the CPU by doing
 485         * a dummy read from the IO-APIC
 486         */
 487        struct io_apic __iomem *io_apic;
 488
 489        io_apic = io_apic_base(entry->apic);
 490        readl(&io_apic->data);
 491}
 492
 493static void mask_ioapic(struct irq_cfg *cfg)
 494{
 495        unsigned long flags;
 496
 497        raw_spin_lock_irqsave(&ioapic_lock, flags);
 498        io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
 499        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 500}
 501
 502static void mask_ioapic_irq(struct irq_data *data)
 503{
 504        mask_ioapic(irqd_cfg(data));
 505}
 506
 507static void __unmask_ioapic(struct irq_cfg *cfg)
 508{
 509        io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
 510}
 511
 512static void unmask_ioapic(struct irq_cfg *cfg)
 513{
 514        unsigned long flags;
 515
 516        raw_spin_lock_irqsave(&ioapic_lock, flags);
 517        __unmask_ioapic(cfg);
 518        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 519}
 520
 521static void unmask_ioapic_irq(struct irq_data *data)
 522{
 523        unmask_ioapic(irqd_cfg(data));
 524}
 525
 526/*
 527 * IO-APIC versions below 0x20 don't support EOI register.
 528 * For the record, here is the information about various versions:
 529 *     0Xh     82489DX
 530 *     1Xh     I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant
 531 *     2Xh     I/O(x)APIC which is PCI 2.2 Compliant
 532 *     30h-FFh Reserved
 533 *
 534 * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic
 535 * version as 0x2. This is an error with documentation and these ICH chips
 536 * use io-apic's of version 0x20.
 537 *
 538 * For IO-APIC's with EOI register, we use that to do an explicit EOI.
 539 * Otherwise, we simulate the EOI message manually by changing the trigger
 540 * mode to edge and then back to level, with RTE being masked during this.
 541 */
 542void native_eoi_ioapic_pin(int apic, int pin, int vector)
 543{
 544        if (mpc_ioapic_ver(apic) >= 0x20) {
 545                io_apic_eoi(apic, vector);
 546        } else {
 547                struct IO_APIC_route_entry entry, entry1;
 548
 549                entry = entry1 = __ioapic_read_entry(apic, pin);
 550
 551                /*
 552                 * Mask the entry and change the trigger mode to edge.
 553                 */
 554                entry1.mask = 1;
 555                entry1.trigger = IOAPIC_EDGE;
 556
 557                __ioapic_write_entry(apic, pin, entry1);
 558
 559                /*
 560                 * Restore the previous level triggered entry.
 561                 */
 562                __ioapic_write_entry(apic, pin, entry);
 563        }
 564}
 565
 566void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
 567{
 568        struct irq_pin_list *entry;
 569        unsigned long flags;
 570
 571        raw_spin_lock_irqsave(&ioapic_lock, flags);
 572        for_each_irq_pin(entry, cfg->irq_2_pin)
 573                x86_io_apic_ops.eoi_ioapic_pin(entry->apic, entry->pin,
 574                                               cfg->vector);
 575        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 576}
 577
 578static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
 579{
 580        struct IO_APIC_route_entry entry;
 581
 582        /* Check delivery_mode to be sure we're not clearing an SMI pin */
 583        entry = ioapic_read_entry(apic, pin);
 584        if (entry.delivery_mode == dest_SMI)
 585                return;
 586
 587        /*
 588         * Make sure the entry is masked and re-read the contents to check
 589         * if it is a level triggered pin and if the remote-IRR is set.
 590         */
 591        if (!entry.mask) {
 592                entry.mask = 1;
 593                ioapic_write_entry(apic, pin, entry);
 594                entry = ioapic_read_entry(apic, pin);
 595        }
 596
 597        if (entry.irr) {
 598                unsigned long flags;
 599
 600                /*
 601                 * Make sure the trigger mode is set to level. Explicit EOI
 602                 * doesn't clear the remote-IRR if the trigger mode is not
 603                 * set to level.
 604                 */
 605                if (!entry.trigger) {
 606                        entry.trigger = IOAPIC_LEVEL;
 607                        ioapic_write_entry(apic, pin, entry);
 608                }
 609
 610                raw_spin_lock_irqsave(&ioapic_lock, flags);
 611                x86_io_apic_ops.eoi_ioapic_pin(apic, pin, entry.vector);
 612                raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 613        }
 614
 615        /*
 616         * Clear the rest of the bits in the IO-APIC RTE except for the mask
 617         * bit.
 618         */
 619        ioapic_mask_entry(apic, pin);
 620        entry = ioapic_read_entry(apic, pin);
 621        if (entry.irr)
 622                pr_err("Unable to reset IRR for apic: %d, pin :%d\n",
 623                       mpc_ioapic_id(apic), pin);
 624}
 625
 626static void clear_IO_APIC (void)
 627{
 628        int apic, pin;
 629
 630        for_each_ioapic_pin(apic, pin)
 631                clear_IO_APIC_pin(apic, pin);
 632}
 633
 634#ifdef CONFIG_X86_32
 635/*
 636 * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
 637 * specific CPU-side IRQs.
 638 */
 639
 640#define MAX_PIRQS 8
 641static int pirq_entries[MAX_PIRQS] = {
 642        [0 ... MAX_PIRQS - 1] = -1
 643};
 644
 645static int __init ioapic_pirq_setup(char *str)
 646{
 647        int i, max;
 648        int ints[MAX_PIRQS+1];
 649
 650        get_options(str, ARRAY_SIZE(ints), ints);
 651
 652        apic_printk(APIC_VERBOSE, KERN_INFO
 653                        "PIRQ redirection, working around broken MP-BIOS.\n");
 654        max = MAX_PIRQS;
 655        if (ints[0] < MAX_PIRQS)
 656                max = ints[0];
 657
 658        for (i = 0; i < max; i++) {
 659                apic_printk(APIC_VERBOSE, KERN_DEBUG
 660                                "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
 661                /*
 662                 * PIRQs are mapped upside down, usually.
 663                 */
 664                pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
 665        }
 666        return 1;
 667}
 668
 669__setup("pirq=", ioapic_pirq_setup);
 670#endif /* CONFIG_X86_32 */
 671
 672/*
 673 * Saves all the IO-APIC RTE's
 674 */
 675int save_ioapic_entries(void)
 676{
 677        int apic, pin;
 678        int err = 0;
 679
 680        for_each_ioapic(apic) {
 681                if (!ioapics[apic].saved_registers) {
 682                        err = -ENOMEM;
 683                        continue;
 684                }
 685
 686                for_each_pin(apic, pin)
 687                        ioapics[apic].saved_registers[pin] =
 688                                ioapic_read_entry(apic, pin);
 689        }
 690
 691        return err;
 692}
 693
 694/*
 695 * Mask all IO APIC entries.
 696 */
 697void mask_ioapic_entries(void)
 698{
 699        int apic, pin;
 700
 701        for_each_ioapic(apic) {
 702                if (!ioapics[apic].saved_registers)
 703                        continue;
 704
 705                for_each_pin(apic, pin) {
 706                        struct IO_APIC_route_entry entry;
 707
 708                        entry = ioapics[apic].saved_registers[pin];
 709                        if (!entry.mask) {
 710                                entry.mask = 1;
 711                                ioapic_write_entry(apic, pin, entry);
 712                        }
 713                }
 714        }
 715}
 716
 717/*
 718 * Restore IO APIC entries which was saved in the ioapic structure.
 719 */
 720int restore_ioapic_entries(void)
 721{
 722        int apic, pin;
 723
 724        for_each_ioapic(apic) {
 725                if (!ioapics[apic].saved_registers)
 726                        continue;
 727
 728                for_each_pin(apic, pin)
 729                        ioapic_write_entry(apic, pin,
 730                                           ioapics[apic].saved_registers[pin]);
 731        }
 732        return 0;
 733}
 734
 735/*
 736 * Find the IRQ entry number of a certain pin.
 737 */
 738static int find_irq_entry(int ioapic_idx, int pin, int type)
 739{
 740        int i;
 741
 742        for (i = 0; i < mp_irq_entries; i++)
 743                if (mp_irqs[i].irqtype == type &&
 744                    (mp_irqs[i].dstapic == mpc_ioapic_id(ioapic_idx) ||
 745                     mp_irqs[i].dstapic == MP_APIC_ALL) &&
 746                    mp_irqs[i].dstirq == pin)
 747                        return i;
 748
 749        return -1;
 750}
 751
 752/*
 753 * Find the pin to which IRQ[irq] (ISA) is connected
 754 */
 755static int __init find_isa_irq_pin(int irq, int type)
 756{
 757        int i;
 758
 759        for (i = 0; i < mp_irq_entries; i++) {
 760                int lbus = mp_irqs[i].srcbus;
 761
 762                if (test_bit(lbus, mp_bus_not_pci) &&
 763                    (mp_irqs[i].irqtype == type) &&
 764                    (mp_irqs[i].srcbusirq == irq))
 765
 766                        return mp_irqs[i].dstirq;
 767        }
 768        return -1;
 769}
 770
 771static int __init find_isa_irq_apic(int irq, int type)
 772{
 773        int i;
 774
 775        for (i = 0; i < mp_irq_entries; i++) {
 776                int lbus = mp_irqs[i].srcbus;
 777
 778                if (test_bit(lbus, mp_bus_not_pci) &&
 779                    (mp_irqs[i].irqtype == type) &&
 780                    (mp_irqs[i].srcbusirq == irq))
 781                        break;
 782        }
 783
 784        if (i < mp_irq_entries) {
 785                int ioapic_idx;
 786
 787                for_each_ioapic(ioapic_idx)
 788                        if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic)
 789                                return ioapic_idx;
 790        }
 791
 792        return -1;
 793}
 794
 795#ifdef CONFIG_EISA
 796/*
 797 * EISA Edge/Level control register, ELCR
 798 */
 799static int EISA_ELCR(unsigned int irq)
 800{
 801        if (irq < nr_legacy_irqs()) {
 802                unsigned int port = 0x4d0 + (irq >> 3);
 803                return (inb(port) >> (irq & 7)) & 1;
 804        }
 805        apic_printk(APIC_VERBOSE, KERN_INFO
 806                        "Broken MPtable reports ISA irq %d\n", irq);
 807        return 0;
 808}
 809
 810#endif
 811
 812/* ISA interrupts are always polarity zero edge triggered,
 813 * when listed as conforming in the MP table. */
 814
 815#define default_ISA_trigger(idx)        (0)
 816#define default_ISA_polarity(idx)       (0)
 817
 818/* EISA interrupts are always polarity zero and can be edge or level
 819 * trigger depending on the ELCR value.  If an interrupt is listed as
 820 * EISA conforming in the MP table, that means its trigger type must
 821 * be read in from the ELCR */
 822
 823#define default_EISA_trigger(idx)       (EISA_ELCR(mp_irqs[idx].srcbusirq))
 824#define default_EISA_polarity(idx)      default_ISA_polarity(idx)
 825
 826/* PCI interrupts are always polarity one level triggered,
 827 * when listed as conforming in the MP table. */
 828
 829#define default_PCI_trigger(idx)        (1)
 830#define default_PCI_polarity(idx)       (1)
 831
 832static int irq_polarity(int idx)
 833{
 834        int bus = mp_irqs[idx].srcbus;
 835        int polarity;
 836
 837        /*
 838         * Determine IRQ line polarity (high active or low active):
 839         */
 840        switch (mp_irqs[idx].irqflag & 3)
 841        {
 842                case 0: /* conforms, ie. bus-type dependent polarity */
 843                        if (test_bit(bus, mp_bus_not_pci))
 844                                polarity = default_ISA_polarity(idx);
 845                        else
 846                                polarity = default_PCI_polarity(idx);
 847                        break;
 848                case 1: /* high active */
 849                {
 850                        polarity = 0;
 851                        break;
 852                }
 853                case 2: /* reserved */
 854                {
 855                        pr_warn("broken BIOS!!\n");
 856                        polarity = 1;
 857                        break;
 858                }
 859                case 3: /* low active */
 860                {
 861                        polarity = 1;
 862                        break;
 863                }
 864                default: /* invalid */
 865                {
 866                        pr_warn("broken BIOS!!\n");
 867                        polarity = 1;
 868                        break;
 869                }
 870        }
 871        return polarity;
 872}
 873
 874static int irq_trigger(int idx)
 875{
 876        int bus = mp_irqs[idx].srcbus;
 877        int trigger;
 878
 879        /*
 880         * Determine IRQ trigger mode (edge or level sensitive):
 881         */
 882        switch ((mp_irqs[idx].irqflag>>2) & 3)
 883        {
 884                case 0: /* conforms, ie. bus-type dependent */
 885                        if (test_bit(bus, mp_bus_not_pci))
 886                                trigger = default_ISA_trigger(idx);
 887                        else
 888                                trigger = default_PCI_trigger(idx);
 889#ifdef CONFIG_EISA
 890                        switch (mp_bus_id_to_type[bus]) {
 891                                case MP_BUS_ISA: /* ISA pin */
 892                                {
 893                                        /* set before the switch */
 894                                        break;
 895                                }
 896                                case MP_BUS_EISA: /* EISA pin */
 897                                {
 898                                        trigger = default_EISA_trigger(idx);
 899                                        break;
 900                                }
 901                                case MP_BUS_PCI: /* PCI pin */
 902                                {
 903                                        /* set before the switch */
 904                                        break;
 905                                }
 906                                default:
 907                                {
 908                                        pr_warn("broken BIOS!!\n");
 909                                        trigger = 1;
 910                                        break;
 911                                }
 912                        }
 913#endif
 914                        break;
 915                case 1: /* edge */
 916                {
 917                        trigger = 0;
 918                        break;
 919                }
 920                case 2: /* reserved */
 921                {
 922                        pr_warn("broken BIOS!!\n");
 923                        trigger = 1;
 924                        break;
 925                }
 926                case 3: /* level */
 927                {
 928                        trigger = 1;
 929                        break;
 930                }
 931                default: /* invalid */
 932                {
 933                        pr_warn("broken BIOS!!\n");
 934                        trigger = 0;
 935                        break;
 936                }
 937        }
 938        return trigger;
 939}
 940
 941static int alloc_irq_from_domain(struct irq_domain *domain, u32 gsi, int pin)
 942{
 943        int irq = -1;
 944        int ioapic = (int)(long)domain->host_data;
 945        int type = ioapics[ioapic].irqdomain_cfg.type;
 946
 947        switch (type) {
 948        case IOAPIC_DOMAIN_LEGACY:
 949                /*
 950                 * Dynamically allocate IRQ number for non-ISA IRQs in the first 16
 951                 * GSIs on some weird platforms.
 952                 */
 953                if (gsi < nr_legacy_irqs())
 954                        irq = irq_create_mapping(domain, pin);
 955                else if (irq_create_strict_mappings(domain, gsi, pin, 1) == 0)
 956                        irq = gsi;
 957                break;
 958        case IOAPIC_DOMAIN_STRICT:
 959                if (irq_create_strict_mappings(domain, gsi, pin, 1) == 0)
 960                        irq = gsi;
 961                break;
 962        case IOAPIC_DOMAIN_DYNAMIC:
 963                irq = irq_create_mapping(domain, pin);
 964                break;
 965        default:
 966                WARN(1, "ioapic: unknown irqdomain type %d\n", type);
 967                break;
 968        }
 969
 970        return irq > 0 ? irq : -1;
 971}
 972
 973static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin,
 974                             unsigned int flags)
 975{
 976        int irq;
 977        struct irq_domain *domain = mp_ioapic_irqdomain(ioapic);
 978        struct mp_pin_info *info = mp_pin_info(ioapic, pin);
 979
 980        if (!domain)
 981                return -1;
 982
 983        mutex_lock(&ioapic_mutex);
 984
 985        /*
 986         * Don't use irqdomain to manage ISA IRQs because there may be
 987         * multiple IOAPIC pins sharing the same ISA IRQ number and
 988         * irqdomain only supports 1:1 mapping between IOAPIC pin and
 989         * IRQ number. A typical IOAPIC has 24 pins, pin 0-15 are used
 990         * for legacy IRQs and pin 16-23 are used for PCI IRQs (PIRQ A-H).
 991         * When ACPI is disabled, only legacy IRQ numbers (IRQ0-15) are
 992         * available, and some BIOSes may use MP Interrupt Source records
 993         * to override IRQ numbers for PIRQs instead of reprogramming
 994         * the interrupt routing logic. Thus there may be multiple pins
 995         * sharing the same legacy IRQ number when ACPI is disabled.
 996         */
 997        if (idx >= 0 && test_bit(mp_irqs[idx].srcbus, mp_bus_not_pci)) {
 998                irq = mp_irqs[idx].srcbusirq;
 999                if (flags & IOAPIC_MAP_ALLOC) {
1000                        if (info->count == 0 &&
1001                            mp_irqdomain_map(domain, irq, pin) != 0)
1002                                irq = -1;
1003
1004                        /* special handling for timer IRQ0 */
1005                        if (irq == 0)
1006                                info->count++;
1007                }
1008        } else {
1009                irq = irq_find_mapping(domain, pin);
1010                if (irq <= 0 && (flags & IOAPIC_MAP_ALLOC))
1011                        irq = alloc_irq_from_domain(domain, gsi, pin);
1012        }
1013
1014        if (flags & IOAPIC_MAP_ALLOC) {
1015                /* special handling for legacy IRQs */
1016                if (irq < nr_legacy_irqs() && info->count == 1 &&
1017                    mp_irqdomain_map(domain, irq, pin) != 0)
1018                        irq = -1;
1019
1020                if (irq > 0)
1021                        info->count++;
1022                else if (info->count == 0)
1023                        info->set = 0;
1024        }
1025
1026        mutex_unlock(&ioapic_mutex);
1027
1028        return irq > 0 ? irq : -1;
1029}
1030
1031static int pin_2_irq(int idx, int ioapic, int pin, unsigned int flags)
1032{
1033        u32 gsi = mp_pin_to_gsi(ioapic, pin);
1034
1035        /*
1036         * Debugging check, we are in big trouble if this message pops up!
1037         */
1038        if (mp_irqs[idx].dstirq != pin)
1039                pr_err("broken BIOS or MPTABLE parser, ayiee!!\n");
1040
1041#ifdef CONFIG_X86_32
1042        /*
1043         * PCI IRQ command line redirection. Yes, limits are hardcoded.
1044         */
1045        if ((pin >= 16) && (pin <= 23)) {
1046                if (pirq_entries[pin-16] != -1) {
1047                        if (!pirq_entries[pin-16]) {
1048                                apic_printk(APIC_VERBOSE, KERN_DEBUG
1049                                                "disabling PIRQ%d\n", pin-16);
1050                        } else {
1051                                int irq = pirq_entries[pin-16];
1052                                apic_printk(APIC_VERBOSE, KERN_DEBUG
1053                                                "using PIRQ%d -> IRQ %d\n",
1054                                                pin-16, irq);
1055                                return irq;
1056                        }
1057                }
1058        }
1059#endif
1060
1061        return  mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags);
1062}
1063
1064int mp_map_gsi_to_irq(u32 gsi, unsigned int flags)
1065{
1066        int ioapic, pin, idx;
1067
1068        ioapic = mp_find_ioapic(gsi);
1069        if (ioapic < 0)
1070                return -1;
1071
1072        pin = mp_find_ioapic_pin(ioapic, gsi);
1073        idx = find_irq_entry(ioapic, pin, mp_INT);
1074        if ((flags & IOAPIC_MAP_CHECK) && idx < 0)
1075                return -1;
1076
1077        return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags);
1078}
1079
1080void mp_unmap_irq(int irq)
1081{
1082        struct irq_data *data = irq_get_irq_data(irq);
1083        struct mp_pin_info *info;
1084        int ioapic, pin;
1085
1086        if (!data || !data->domain)
1087                return;
1088
1089        ioapic = (int)(long)data->domain->host_data;
1090        pin = (int)data->hwirq;
1091        info = mp_pin_info(ioapic, pin);
1092
1093        mutex_lock(&ioapic_mutex);
1094        if (--info->count == 0) {
1095                info->set = 0;
1096                if (irq < nr_legacy_irqs() &&
1097                    ioapics[ioapic].irqdomain_cfg.type == IOAPIC_DOMAIN_LEGACY)
1098                        mp_irqdomain_unmap(data->domain, irq);
1099                else
1100                        irq_dispose_mapping(irq);
1101        }
1102        mutex_unlock(&ioapic_mutex);
1103}
1104
1105/*
1106 * Find a specific PCI IRQ entry.
1107 * Not an __init, possibly needed by modules
1108 */
1109int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
1110{
1111        int irq, i, best_ioapic = -1, best_idx = -1;
1112
1113        apic_printk(APIC_DEBUG,
1114                    "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
1115                    bus, slot, pin);
1116        if (test_bit(bus, mp_bus_not_pci)) {
1117                apic_printk(APIC_VERBOSE,
1118                            "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
1119                return -1;
1120        }
1121
1122        for (i = 0; i < mp_irq_entries; i++) {
1123                int lbus = mp_irqs[i].srcbus;
1124                int ioapic_idx, found = 0;
1125
1126                if (bus != lbus || mp_irqs[i].irqtype != mp_INT ||
1127                    slot != ((mp_irqs[i].srcbusirq >> 2) & 0x1f))
1128                        continue;
1129
1130                for_each_ioapic(ioapic_idx)
1131                        if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic ||
1132                            mp_irqs[i].dstapic == MP_APIC_ALL) {
1133                                found = 1;
1134                                break;
1135                        }
1136                if (!found)
1137                        continue;
1138
1139                /* Skip ISA IRQs */
1140                irq = pin_2_irq(i, ioapic_idx, mp_irqs[i].dstirq, 0);
1141                if (irq > 0 && !IO_APIC_IRQ(irq))
1142                        continue;
1143
1144                if (pin == (mp_irqs[i].srcbusirq & 3)) {
1145                        best_idx = i;
1146                        best_ioapic = ioapic_idx;
1147                        goto out;
1148                }
1149
1150                /*
1151                 * Use the first all-but-pin matching entry as a
1152                 * best-guess fuzzy result for broken mptables.
1153                 */
1154                if (best_idx < 0) {
1155                        best_idx = i;
1156                        best_ioapic = ioapic_idx;
1157                }
1158        }
1159        if (best_idx < 0)
1160                return -1;
1161
1162out:
1163        return pin_2_irq(best_idx, best_ioapic, mp_irqs[best_idx].dstirq,
1164                         IOAPIC_MAP_ALLOC);
1165}
1166EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
1167
1168static struct irq_chip ioapic_chip;
1169
1170#ifdef CONFIG_X86_32
1171static inline int IO_APIC_irq_trigger(int irq)
1172{
1173        int apic, idx, pin;
1174
1175        for_each_ioapic_pin(apic, pin) {
1176                idx = find_irq_entry(apic, pin, mp_INT);
1177                if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin, 0)))
1178                        return irq_trigger(idx);
1179        }
1180        /*
1181         * nonexistent IRQs are edge default
1182         */
1183        return 0;
1184}
1185#else
1186static inline int IO_APIC_irq_trigger(int irq)
1187{
1188        return 1;
1189}
1190#endif
1191
1192static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg,
1193                                 unsigned long trigger)
1194{
1195        struct irq_chip *chip = &ioapic_chip;
1196        irq_flow_handler_t hdl;
1197        bool fasteoi;
1198
1199        if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1200            trigger == IOAPIC_LEVEL) {
1201                irq_set_status_flags(irq, IRQ_LEVEL);
1202                fasteoi = true;
1203        } else {
1204                irq_clear_status_flags(irq, IRQ_LEVEL);
1205                fasteoi = false;
1206        }
1207
1208        if (setup_remapped_irq(irq, cfg, chip))
1209                fasteoi = trigger != 0;
1210
1211        hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq;
1212        irq_set_chip_and_handler_name(irq, chip, hdl,
1213                                      fasteoi ? "fasteoi" : "edge");
1214}
1215
1216int native_setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry,
1217                              unsigned int destination, int vector,
1218                              struct io_apic_irq_attr *attr)
1219{
1220        memset(entry, 0, sizeof(*entry));
1221
1222        entry->delivery_mode = apic->irq_delivery_mode;
1223        entry->dest_mode     = apic->irq_dest_mode;
1224        entry->dest          = destination;
1225        entry->vector        = vector;
1226        entry->mask          = 0;                       /* enable IRQ */
1227        entry->trigger       = attr->trigger;
1228        entry->polarity      = attr->polarity;
1229
1230        /*
1231         * Mask level triggered irqs.
1232         * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
1233         */
1234        if (attr->trigger)
1235                entry->mask = 1;
1236
1237        return 0;
1238}
1239
1240static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
1241                                struct io_apic_irq_attr *attr)
1242{
1243        struct IO_APIC_route_entry entry;
1244        unsigned int dest;
1245
1246        if (!IO_APIC_IRQ(irq))
1247                return;
1248
1249        if (assign_irq_vector(irq, cfg, apic->target_cpus()))
1250                return;
1251
1252        if (apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus(),
1253                                         &dest)) {
1254                pr_warn("Failed to obtain apicid for ioapic %d, pin %d\n",
1255                        mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
1256                clear_irq_vector(irq, cfg);
1257
1258                return;
1259        }
1260
1261        apic_printk(APIC_VERBOSE,KERN_DEBUG
1262                    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
1263                    "IRQ %d Mode:%i Active:%i Dest:%d)\n",
1264                    attr->ioapic, mpc_ioapic_id(attr->ioapic), attr->ioapic_pin,
1265                    cfg->vector, irq, attr->trigger, attr->polarity, dest);
1266
1267        if (x86_io_apic_ops.setup_entry(irq, &entry, dest, cfg->vector, attr)) {
1268                pr_warn("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
1269                        mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
1270                clear_irq_vector(irq, cfg);
1271
1272                return;
1273        }
1274
1275        ioapic_register_intr(irq, cfg, attr->trigger);
1276        if (irq < nr_legacy_irqs())
1277                legacy_pic->mask(irq);
1278
1279        ioapic_write_entry(attr->ioapic, attr->ioapic_pin, entry);
1280}
1281
1282static void __init setup_IO_APIC_irqs(void)
1283{
1284        unsigned int ioapic, pin;
1285        int idx;
1286
1287        apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
1288
1289        for_each_ioapic_pin(ioapic, pin) {
1290                idx = find_irq_entry(ioapic, pin, mp_INT);
1291                if (idx < 0)
1292                        apic_printk(APIC_VERBOSE,
1293                                    KERN_DEBUG " apic %d pin %d not connected\n",
1294                                    mpc_ioapic_id(ioapic), pin);
1295                else
1296                        pin_2_irq(idx, ioapic, pin,
1297                                  ioapic ? 0 : IOAPIC_MAP_ALLOC);
1298        }
1299}
1300
1301/*
1302 * Set up the timer pin, possibly with the 8259A-master behind.
1303 */
1304static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
1305                                        unsigned int pin, int vector)
1306{
1307        struct IO_APIC_route_entry entry;
1308        unsigned int dest;
1309
1310        memset(&entry, 0, sizeof(entry));
1311
1312        /*
1313         * We use logical delivery to get the timer IRQ
1314         * to the first CPU.
1315         */
1316        if (unlikely(apic->cpu_mask_to_apicid_and(apic->target_cpus(),
1317                                                  apic->target_cpus(), &dest)))
1318                dest = BAD_APICID;
1319
1320        entry.dest_mode = apic->irq_dest_mode;
1321        entry.mask = 0;                 /* don't mask IRQ for edge */
1322        entry.dest = dest;
1323        entry.delivery_mode = apic->irq_delivery_mode;
1324        entry.polarity = 0;
1325        entry.trigger = 0;
1326        entry.vector = vector;
1327
1328        /*
1329         * The timer IRQ doesn't have to know that behind the
1330         * scene we may have a 8259A-master in AEOI mode ...
1331         */
1332        irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq,
1333                                      "edge");
1334
1335        /*
1336         * Add it to the IO-APIC irq-routing table:
1337         */
1338        ioapic_write_entry(ioapic_idx, pin, entry);
1339}
1340
1341void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries)
1342{
1343        int i;
1344
1345        pr_debug(" NR Dst Mask Trig IRR Pol Stat Dmod Deli Vect:\n");
1346
1347        for (i = 0; i <= nr_entries; i++) {
1348                struct IO_APIC_route_entry entry;
1349
1350                entry = ioapic_read_entry(apic, i);
1351
1352                pr_debug(" %02x %02X  ", i, entry.dest);
1353                pr_cont("%1d    %1d    %1d   %1d   %1d    "
1354                        "%1d    %1d    %02X\n",
1355                        entry.mask,
1356                        entry.trigger,
1357                        entry.irr,
1358                        entry.polarity,
1359                        entry.delivery_status,
1360                        entry.dest_mode,
1361                        entry.delivery_mode,
1362                        entry.vector);
1363        }
1364}
1365
1366void intel_ir_io_apic_print_entries(unsigned int apic,
1367                                    unsigned int nr_entries)
1368{
1369        int i;
1370
1371        pr_debug(" NR Indx Fmt Mask Trig IRR Pol Stat Indx2 Zero Vect:\n");
1372
1373        for (i = 0; i <= nr_entries; i++) {
1374                struct IR_IO_APIC_route_entry *ir_entry;
1375                struct IO_APIC_route_entry entry;
1376
1377                entry = ioapic_read_entry(apic, i);
1378
1379                ir_entry = (struct IR_IO_APIC_route_entry *)&entry;
1380
1381                pr_debug(" %02x %04X ", i, ir_entry->index);
1382                pr_cont("%1d   %1d    %1d    %1d   %1d   "
1383                        "%1d    %1d     %X    %02X\n",
1384                        ir_entry->format,
1385                        ir_entry->mask,
1386                        ir_entry->trigger,
1387                        ir_entry->irr,
1388                        ir_entry->polarity,
1389                        ir_entry->delivery_status,
1390                        ir_entry->index2,
1391                        ir_entry->zero,
1392                        ir_entry->vector);
1393        }
1394}
1395
1396void ioapic_zap_locks(void)
1397{
1398        raw_spin_lock_init(&ioapic_lock);
1399}
1400
1401static void __init print_IO_APIC(int ioapic_idx)
1402{
1403        union IO_APIC_reg_00 reg_00;
1404        union IO_APIC_reg_01 reg_01;
1405        union IO_APIC_reg_02 reg_02;
1406        union IO_APIC_reg_03 reg_03;
1407        unsigned long flags;
1408
1409        raw_spin_lock_irqsave(&ioapic_lock, flags);
1410        reg_00.raw = io_apic_read(ioapic_idx, 0);
1411        reg_01.raw = io_apic_read(ioapic_idx, 1);
1412        if (reg_01.bits.version >= 0x10)
1413                reg_02.raw = io_apic_read(ioapic_idx, 2);
1414        if (reg_01.bits.version >= 0x20)
1415                reg_03.raw = io_apic_read(ioapic_idx, 3);
1416        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
1417
1418        printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx));
1419        printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
1420        printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
1421        printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
1422        printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
1423
1424        printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
1425        printk(KERN_DEBUG ".......     : max redirection entries: %02X\n",
1426                reg_01.bits.entries);
1427
1428        printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
1429        printk(KERN_DEBUG ".......     : IO APIC version: %02X\n",
1430                reg_01.bits.version);
1431
1432        /*
1433         * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
1434         * but the value of reg_02 is read as the previous read register
1435         * value, so ignore it if reg_02 == reg_01.
1436         */
1437        if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
1438                printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
1439                printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
1440        }
1441
1442        /*
1443         * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
1444         * or reg_03, but the value of reg_0[23] is read as the previous read
1445         * register value, so ignore it if reg_03 == reg_0[12].
1446         */
1447        if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
1448            reg_03.raw != reg_01.raw) {
1449                printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
1450                printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
1451        }
1452
1453        printk(KERN_DEBUG ".... IRQ redirection table:\n");
1454
1455        x86_io_apic_ops.print_entries(ioapic_idx, reg_01.bits.entries);
1456}
1457
1458void __init print_IO_APICs(void)
1459{
1460        int ioapic_idx;
1461        struct irq_cfg *cfg;
1462        unsigned int irq;
1463        struct irq_chip *chip;
1464
1465        printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
1466        for_each_ioapic(ioapic_idx)
1467                printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
1468                       mpc_ioapic_id(ioapic_idx),
1469                       ioapics[ioapic_idx].nr_registers);
1470
1471        /*
1472         * We are a bit conservative about what we expect.  We have to
1473         * know about every hardware change ASAP.
1474         */
1475        printk(KERN_INFO "testing the IO APIC.......................\n");
1476
1477        for_each_ioapic(ioapic_idx)
1478                print_IO_APIC(ioapic_idx);
1479
1480        printk(KERN_DEBUG "IRQ to pin mappings:\n");
1481        for_each_active_irq(irq) {
1482                struct irq_pin_list *entry;
1483
1484                chip = irq_get_chip(irq);
1485                if (chip != &ioapic_chip)
1486                        continue;
1487
1488                cfg = irq_cfg(irq);
1489                if (!cfg)
1490                        continue;
1491                if (list_empty(&cfg->irq_2_pin))
1492                        continue;
1493                printk(KERN_DEBUG "IRQ%d ", irq);
1494                for_each_irq_pin(entry, cfg->irq_2_pin)
1495                        pr_cont("-> %d:%d", entry->apic, entry->pin);
1496                pr_cont("\n");
1497        }
1498
1499        printk(KERN_INFO ".................................... done.\n");
1500}
1501
1502/* Where if anywhere is the i8259 connect in external int mode */
1503static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
1504
1505void __init enable_IO_APIC(void)
1506{
1507        int i8259_apic, i8259_pin;
1508        int apic, pin;
1509
1510        if (!nr_legacy_irqs())
1511                return;
1512
1513        for_each_ioapic_pin(apic, pin) {
1514                /* See if any of the pins is in ExtINT mode */
1515                struct IO_APIC_route_entry entry = ioapic_read_entry(apic, pin);
1516
1517                /* If the interrupt line is enabled and in ExtInt mode
1518                 * I have found the pin where the i8259 is connected.
1519                 */
1520                if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
1521                        ioapic_i8259.apic = apic;
1522                        ioapic_i8259.pin  = pin;
1523                        goto found_i8259;
1524                }
1525        }
1526 found_i8259:
1527        /* Look to see what if the MP table has reported the ExtINT */
1528        /* If we could not find the appropriate pin by looking at the ioapic
1529         * the i8259 probably is not connected the ioapic but give the
1530         * mptable a chance anyway.
1531         */
1532        i8259_pin  = find_isa_irq_pin(0, mp_ExtINT);
1533        i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
1534        /* Trust the MP table if nothing is setup in the hardware */
1535        if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
1536                printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
1537                ioapic_i8259.pin  = i8259_pin;
1538                ioapic_i8259.apic = i8259_apic;
1539        }
1540        /* Complain if the MP table and the hardware disagree */
1541        if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
1542                (i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
1543        {
1544                printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
1545        }
1546
1547        /*
1548         * Do not trust the IO-APIC being empty at bootup
1549         */
1550        clear_IO_APIC();
1551}
1552
1553void native_disable_io_apic(void)
1554{
1555        /*
1556         * If the i8259 is routed through an IOAPIC
1557         * Put that IOAPIC in virtual wire mode
1558         * so legacy interrupts can be delivered.
1559         */
1560        if (ioapic_i8259.pin != -1) {
1561                struct IO_APIC_route_entry entry;
1562
1563                memset(&entry, 0, sizeof(entry));
1564                entry.mask            = 0; /* Enabled */
1565                entry.trigger         = 0; /* Edge */
1566                entry.irr             = 0;
1567                entry.polarity        = 0; /* High */
1568                entry.delivery_status = 0;
1569                entry.dest_mode       = 0; /* Physical */
1570                entry.delivery_mode   = dest_ExtINT; /* ExtInt */
1571                entry.vector          = 0;
1572                entry.dest            = read_apic_id();
1573
1574                /*
1575                 * Add it to the IO-APIC irq-routing table:
1576                 */
1577                ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
1578        }
1579
1580        if (cpu_has_apic || apic_from_smp_config())
1581                disconnect_bsp_APIC(ioapic_i8259.pin != -1);
1582
1583}
1584
1585/*
1586 * Not an __init, needed by the reboot code
1587 */
1588void disable_IO_APIC(void)
1589{
1590        /*
1591         * Clear the IO-APIC before rebooting:
1592         */
1593        clear_IO_APIC();
1594
1595        if (!nr_legacy_irqs())
1596                return;
1597
1598        x86_io_apic_ops.disable();
1599}
1600
1601#ifdef CONFIG_X86_32
1602/*
1603 * function to set the IO-APIC physical IDs based on the
1604 * values stored in the MPC table.
1605 *
1606 * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
1607 */
1608void __init setup_ioapic_ids_from_mpc_nocheck(void)
1609{
1610        union IO_APIC_reg_00 reg_00;
1611        physid_mask_t phys_id_present_map;
1612        int ioapic_idx;
1613        int i;
1614        unsigned char old_id;
1615        unsigned long flags;
1616
1617        /*
1618         * This is broken; anything with a real cpu count has to
1619         * circumvent this idiocy regardless.
1620         */
1621        apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map);
1622
1623        /*
1624         * Set the IOAPIC ID to the value stored in the MPC table.
1625         */
1626        for_each_ioapic(ioapic_idx) {
1627                /* Read the register 0 value */
1628                raw_spin_lock_irqsave(&ioapic_lock, flags);
1629                reg_00.raw = io_apic_read(ioapic_idx, 0);
1630                raw_spin_unlock_irqrestore(&ioapic_lock, flags);
1631
1632                old_id = mpc_ioapic_id(ioapic_idx);
1633
1634                if (mpc_ioapic_id(ioapic_idx) >= get_physical_broadcast()) {
1635                        printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
1636                                ioapic_idx, mpc_ioapic_id(ioapic_idx));
1637                        printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
1638                                reg_00.bits.ID);
1639                        ioapics[ioapic_idx].mp_config.apicid = reg_00.bits.ID;
1640                }
1641
1642                /*
1643                 * Sanity check, is the ID really free? Every APIC in a
1644                 * system must have a unique ID or we get lots of nice
1645                 * 'stuck on smp_invalidate_needed IPI wait' messages.
1646                 */
1647                if (apic->check_apicid_used(&phys_id_present_map,
1648                                            mpc_ioapic_id(ioapic_idx))) {
1649                        printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
1650                                ioapic_idx, mpc_ioapic_id(ioapic_idx));
1651                        for (i = 0; i < get_physical_broadcast(); i++)
1652                                if (!physid_isset(i, phys_id_present_map))
1653                                        break;
1654                        if (i >= get_physical_broadcast())
1655                                panic("Max APIC ID exceeded!\n");
1656                        printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
1657                                i);
1658                        physid_set(i, phys_id_present_map);
1659                        ioapics[ioapic_idx].mp_config.apicid = i;
1660                } else {
1661                        physid_mask_t tmp;
1662                        apic->apicid_to_cpu_present(mpc_ioapic_id(ioapic_idx),
1663                                                    &tmp);
1664                        apic_printk(APIC_VERBOSE, "Setting %d in the "
1665                                        "phys_id_present_map\n",
1666                                        mpc_ioapic_id(ioapic_idx));
1667                        physids_or(phys_id_present_map, phys_id_present_map, tmp);
1668                }
1669
1670                /*
1671                 * We need to adjust the IRQ routing table
1672                 * if the ID changed.
1673                 */
1674                if (old_id != mpc_ioapic_id(ioapic_idx))
1675                        for (i = 0; i < mp_irq_entries; i++)
1676                                if (mp_irqs[i].dstapic == old_id)
1677                                        mp_irqs[i].dstapic
1678                                                = mpc_ioapic_id(ioapic_idx);
1679
1680                /*
1681                 * Update the ID register according to the right value
1682                 * from the MPC table if they are different.
1683                 */
1684                if (mpc_ioapic_id(ioapic_idx) == reg_00.bits.ID)
1685                        continue;
1686
1687                apic_printk(APIC_VERBOSE, KERN_INFO
1688                        "...changing IO-APIC physical APIC ID to %d ...",
1689                        mpc_ioapic_id(ioapic_idx));
1690
1691                reg_00.bits.ID = mpc_ioapic_id(ioapic_idx);
1692                raw_spin_lock_irqsave(&ioapic_lock, flags);
1693                io_apic_write(ioapic_idx, 0, reg_00.raw);
1694                raw_spin_unlock_irqrestore(&ioapic_lock, flags);
1695
1696                /*
1697                 * Sanity check
1698                 */
1699                raw_spin_lock_irqsave(&ioapic_lock, flags);
1700                reg_00.raw = io_apic_read(ioapic_idx, 0);
1701                raw_spin_unlock_irqrestore(&ioapic_lock, flags);
1702                if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx))
1703                        pr_cont("could not set ID!\n");
1704                else
1705                        apic_printk(APIC_VERBOSE, " ok.\n");
1706        }
1707}
1708
1709void __init setup_ioapic_ids_from_mpc(void)
1710{
1711
1712        if (acpi_ioapic)
1713                return;
1714        /*
1715         * Don't check I/O APIC IDs for xAPIC systems.  They have
1716         * no meaning without the serial APIC bus.
1717         */
1718        if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
1719                || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
1720                return;
1721        setup_ioapic_ids_from_mpc_nocheck();
1722}
1723#endif
1724
1725int no_timer_check __initdata;
1726
1727static int __init notimercheck(char *s)
1728{
1729        no_timer_check = 1;
1730        return 1;
1731}
1732__setup("no_timer_check", notimercheck);
1733
1734/*
1735 * There is a nasty bug in some older SMP boards, their mptable lies
1736 * about the timer IRQ. We do the following to work around the situation:
1737 *
1738 *      - timer IRQ defaults to IO-APIC IRQ
1739 *      - if this function detects that timer IRQs are defunct, then we fall
1740 *        back to ISA timer IRQs
1741 */
1742static int __init timer_irq_works(void)
1743{
1744        unsigned long t1 = jiffies;
1745        unsigned long flags;
1746
1747        if (no_timer_check)
1748                return 1;
1749
1750        local_save_flags(flags);
1751        local_irq_enable();
1752        /* Let ten ticks pass... */
1753        mdelay((10 * 1000) / HZ);
1754        local_irq_restore(flags);
1755
1756        /*
1757         * Expect a few ticks at least, to be sure some possible
1758         * glue logic does not lock up after one or two first
1759         * ticks in a non-ExtINT mode.  Also the local APIC
1760         * might have cached one ExtINT interrupt.  Finally, at
1761         * least one tick may be lost due to delays.
1762         */
1763
1764        /* jiffies wrap? */
1765        if (time_after(jiffies, t1 + 4))
1766                return 1;
1767        return 0;
1768}
1769
1770/*
1771 * In the SMP+IOAPIC case it might happen that there are an unspecified
1772 * number of pending IRQ events unhandled. These cases are very rare,
1773 * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
1774 * better to do it this way as thus we do not have to be aware of
1775 * 'pending' interrupts in the IRQ path, except at this point.
1776 */
1777/*
1778 * Edge triggered needs to resend any interrupt
1779 * that was delayed but this is now handled in the device
1780 * independent code.
1781 */
1782
1783/*
1784 * Starting up a edge-triggered IO-APIC interrupt is
1785 * nasty - we need to make sure that we get the edge.
1786 * If it is already asserted for some reason, we need
1787 * return 1 to indicate that is was pending.
1788 *
1789 * This is not complete - we should be able to fake
1790 * an edge even if it isn't on the 8259A...
1791 */
1792
1793static unsigned int startup_ioapic_irq(struct irq_data *data)
1794{
1795        int was_pending = 0, irq = data->irq;
1796        unsigned long flags;
1797
1798        raw_spin_lock_irqsave(&ioapic_lock, flags);
1799        if (irq < nr_legacy_irqs()) {
1800                legacy_pic->mask(irq);
1801                if (legacy_pic->irq_pending(irq))
1802                        was_pending = 1;
1803        }
1804        __unmask_ioapic(irqd_cfg(data));
1805        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
1806
1807        return was_pending;
1808}
1809
1810/*
1811 * Level and edge triggered IO-APIC interrupts need different handling,
1812 * so we use two separate IRQ descriptors. Edge triggered IRQs can be
1813 * handled with the level-triggered descriptor, but that one has slightly
1814 * more overhead. Level-triggered interrupts cannot be handled with the
1815 * edge-triggered handler, without risking IRQ storms and other ugly
1816 * races.
1817 */
1818
1819static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
1820{
1821        int apic, pin;
1822        struct irq_pin_list *entry;
1823        u8 vector = cfg->vector;
1824
1825        for_each_irq_pin(entry, cfg->irq_2_pin) {
1826                unsigned int reg;
1827
1828                apic = entry->apic;
1829                pin = entry->pin;
1830
1831                io_apic_write(apic, 0x11 + pin*2, dest);
1832                reg = io_apic_read(apic, 0x10 + pin*2);
1833                reg &= ~IO_APIC_REDIR_VECTOR_MASK;
1834                reg |= vector;
1835                io_apic_modify(apic, 0x10 + pin*2, reg);
1836        }
1837}
1838
1839int native_ioapic_set_affinity(struct irq_data *data,
1840                               const struct cpumask *mask,
1841                               bool force)
1842{
1843        unsigned int dest, irq = data->irq;
1844        unsigned long flags;
1845        int ret;
1846
1847        if (!config_enabled(CONFIG_SMP))
1848                return -EPERM;
1849
1850        raw_spin_lock_irqsave(&ioapic_lock, flags);
1851        ret = apic_set_affinity(data, mask, &dest);
1852        if (!ret) {
1853                /* Only the high 8 bits are valid. */
1854                dest = SET_APIC_LOGICAL_ID(dest);
1855                __target_IO_APIC_irq(irq, dest, irqd_cfg(data));
1856                ret = IRQ_SET_MASK_OK_NOCOPY;
1857        }
1858        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
1859        return ret;
1860}
1861
1862atomic_t irq_mis_count;
1863
1864#ifdef CONFIG_GENERIC_PENDING_IRQ
1865static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
1866{
1867        struct irq_pin_list *entry;
1868        unsigned long flags;
1869
1870        raw_spin_lock_irqsave(&ioapic_lock, flags);
1871        for_each_irq_pin(entry, cfg->irq_2_pin) {
1872                unsigned int reg;
1873                int pin;
1874
1875                pin = entry->pin;
1876                reg = io_apic_read(entry->apic, 0x10 + pin*2);
1877                /* Is the remote IRR bit set? */
1878                if (reg & IO_APIC_REDIR_REMOTE_IRR) {
1879                        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
1880                        return true;
1881                }
1882        }
1883        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
1884
1885        return false;
1886}
1887
1888static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg)
1889{
1890        /* If we are moving the irq we need to mask it */
1891        if (unlikely(irqd_is_setaffinity_pending(data))) {
1892                mask_ioapic(cfg);
1893                return true;
1894        }
1895        return false;
1896}
1897
1898static inline void ioapic_irqd_unmask(struct irq_data *data,
1899                                      struct irq_cfg *cfg, bool masked)
1900{
1901        if (unlikely(masked)) {
1902                /* Only migrate the irq if the ack has been received.
1903                 *
1904                 * On rare occasions the broadcast level triggered ack gets
1905                 * delayed going to ioapics, and if we reprogram the
1906                 * vector while Remote IRR is still set the irq will never
1907                 * fire again.
1908                 *
1909                 * To prevent this scenario we read the Remote IRR bit
1910                 * of the ioapic.  This has two effects.
1911                 * - On any sane system the read of the ioapic will
1912                 *   flush writes (and acks) going to the ioapic from
1913                 *   this cpu.
1914                 * - We get to see if the ACK has actually been delivered.
1915                 *
1916                 * Based on failed experiments of reprogramming the
1917                 * ioapic entry from outside of irq context starting
1918                 * with masking the ioapic entry and then polling until
1919                 * Remote IRR was clear before reprogramming the
1920                 * ioapic I don't trust the Remote IRR bit to be
1921                 * completey accurate.
1922                 *
1923                 * However there appears to be no other way to plug
1924                 * this race, so if the Remote IRR bit is not
1925                 * accurate and is causing problems then it is a hardware bug
1926                 * and you can go talk to the chipset vendor about it.
1927                 */
1928                if (!io_apic_level_ack_pending(cfg))
1929                        irq_move_masked_irq(data);
1930                unmask_ioapic(cfg);
1931        }
1932}
1933#else
1934static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg)
1935{
1936        return false;
1937}
1938static inline void ioapic_irqd_unmask(struct irq_data *data,
1939                                      struct irq_cfg *cfg, bool masked)
1940{
1941}
1942#endif
1943
1944static void ack_ioapic_level(struct irq_data *data)
1945{
1946        struct irq_cfg *cfg = irqd_cfg(data);
1947        int i, irq = data->irq;
1948        unsigned long v;
1949        bool masked;
1950
1951        irq_complete_move(cfg);
1952        masked = ioapic_irqd_mask(data, cfg);
1953
1954        /*
1955         * It appears there is an erratum which affects at least version 0x11
1956         * of I/O APIC (that's the 82093AA and cores integrated into various
1957         * chipsets).  Under certain conditions a level-triggered interrupt is
1958         * erroneously delivered as edge-triggered one but the respective IRR
1959         * bit gets set nevertheless.  As a result the I/O unit expects an EOI
1960         * message but it will never arrive and further interrupts are blocked
1961         * from the source.  The exact reason is so far unknown, but the
1962         * phenomenon was observed when two consecutive interrupt requests
1963         * from a given source get delivered to the same CPU and the source is
1964         * temporarily disabled in between.
1965         *
1966         * A workaround is to simulate an EOI message manually.  We achieve it
1967         * by setting the trigger mode to edge and then to level when the edge
1968         * trigger mode gets detected in the TMR of a local APIC for a
1969         * level-triggered interrupt.  We mask the source for the time of the
1970         * operation to prevent an edge-triggered interrupt escaping meanwhile.
1971         * The idea is from Manfred Spraul.  --macro
1972         *
1973         * Also in the case when cpu goes offline, fixup_irqs() will forward
1974         * any unhandled interrupt on the offlined cpu to the new cpu
1975         * destination that is handling the corresponding interrupt. This
1976         * interrupt forwarding is done via IPI's. Hence, in this case also
1977         * level-triggered io-apic interrupt will be seen as an edge
1978         * interrupt in the IRR. And we can't rely on the cpu's EOI
1979         * to be broadcasted to the IO-APIC's which will clear the remoteIRR
1980         * corresponding to the level-triggered interrupt. Hence on IO-APIC's
1981         * supporting EOI register, we do an explicit EOI to clear the
1982         * remote IRR and on IO-APIC's which don't have an EOI register,
1983         * we use the above logic (mask+edge followed by unmask+level) from
1984         * Manfred Spraul to clear the remote IRR.
1985         */
1986        i = cfg->vector;
1987        v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
1988
1989        /*
1990         * We must acknowledge the irq before we move it or the acknowledge will
1991         * not propagate properly.
1992         */
1993        ack_APIC_irq();
1994
1995        /*
1996         * Tail end of clearing remote IRR bit (either by delivering the EOI
1997         * message via io-apic EOI register write or simulating it using
1998         * mask+edge followed by unnask+level logic) manually when the
1999         * level triggered interrupt is seen as the edge triggered interrupt
2000         * at the cpu.
2001         */
2002        if (!(v & (1 << (i & 0x1f)))) {
2003                atomic_inc(&irq_mis_count);
2004
2005                eoi_ioapic_irq(irq, cfg);
2006        }
2007
2008        ioapic_irqd_unmask(data, cfg, masked);
2009}
2010
2011static struct irq_chip ioapic_chip __read_mostly = {
2012        .name                   = "IO-APIC",
2013        .irq_startup            = startup_ioapic_irq,
2014        .irq_mask               = mask_ioapic_irq,
2015        .irq_unmask             = unmask_ioapic_irq,
2016        .irq_ack                = apic_ack_edge,
2017        .irq_eoi                = ack_ioapic_level,
2018        .irq_set_affinity       = native_ioapic_set_affinity,
2019        .irq_retrigger          = apic_retrigger_irq,
2020        .flags                  = IRQCHIP_SKIP_SET_WAKE,
2021};
2022
2023static inline void init_IO_APIC_traps(void)
2024{
2025        struct irq_cfg *cfg;
2026        unsigned int irq;
2027
2028        for_each_active_irq(irq) {
2029                cfg = irq_cfg(irq);
2030                if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
2031                        /*
2032                         * Hmm.. We don't have an entry for this,
2033                         * so default to an old-fashioned 8259
2034                         * interrupt if we can..
2035                         */
2036                        if (irq < nr_legacy_irqs())
2037                                legacy_pic->make_irq(irq);
2038                        else
2039                                /* Strange. Oh, well.. */
2040                                irq_set_chip(irq, &no_irq_chip);
2041                }
2042        }
2043}
2044
2045/*
2046 * The local APIC irq-chip implementation:
2047 */
2048
2049static void mask_lapic_irq(struct irq_data *data)
2050{
2051        unsigned long v;
2052
2053        v = apic_read(APIC_LVT0);
2054        apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
2055}
2056
2057static void unmask_lapic_irq(struct irq_data *data)
2058{
2059        unsigned long v;
2060
2061        v = apic_read(APIC_LVT0);
2062        apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
2063}
2064
2065static void ack_lapic_irq(struct irq_data *data)
2066{
2067        ack_APIC_irq();
2068}
2069
2070static struct irq_chip lapic_chip __read_mostly = {
2071        .name           = "local-APIC",
2072        .irq_mask       = mask_lapic_irq,
2073        .irq_unmask     = unmask_lapic_irq,
2074        .irq_ack        = ack_lapic_irq,
2075};
2076
2077static void lapic_register_intr(int irq)
2078{
2079        irq_clear_status_flags(irq, IRQ_LEVEL);
2080        irq_set_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
2081                                      "edge");
2082}
2083
2084/*
2085 * This looks a bit hackish but it's about the only one way of sending
2086 * a few INTA cycles to 8259As and any associated glue logic.  ICR does
2087 * not support the ExtINT mode, unfortunately.  We need to send these
2088 * cycles as some i82489DX-based boards have glue logic that keeps the
2089 * 8259A interrupt line asserted until INTA.  --macro
2090 */
2091static inline void __init unlock_ExtINT_logic(void)
2092{
2093        int apic, pin, i;
2094        struct IO_APIC_route_entry entry0, entry1;
2095        unsigned char save_control, save_freq_select;
2096
2097        pin  = find_isa_irq_pin(8, mp_INT);
2098        if (pin == -1) {
2099                WARN_ON_ONCE(1);
2100                return;
2101        }
2102        apic = find_isa_irq_apic(8, mp_INT);
2103        if (apic == -1) {
2104                WARN_ON_ONCE(1);
2105                return;
2106        }
2107
2108        entry0 = ioapic_read_entry(apic, pin);
2109        clear_IO_APIC_pin(apic, pin);
2110
2111        memset(&entry1, 0, sizeof(entry1));
2112
2113        entry1.dest_mode = 0;                   /* physical delivery */
2114        entry1.mask = 0;                        /* unmask IRQ now */
2115        entry1.dest = hard_smp_processor_id();
2116        entry1.delivery_mode = dest_ExtINT;
2117        entry1.polarity = entry0.polarity;
2118        entry1.trigger = 0;
2119        entry1.vector = 0;
2120
2121        ioapic_write_entry(apic, pin, entry1);
2122
2123        save_control = CMOS_READ(RTC_CONTROL);
2124        save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
2125        CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
2126                   RTC_FREQ_SELECT);
2127        CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
2128
2129        i = 100;
2130        while (i-- > 0) {
2131                mdelay(10);
2132                if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
2133                        i -= 10;
2134        }
2135
2136        CMOS_WRITE(save_control, RTC_CONTROL);
2137        CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
2138        clear_IO_APIC_pin(apic, pin);
2139
2140        ioapic_write_entry(apic, pin, entry0);
2141}
2142
2143static int disable_timer_pin_1 __initdata;
2144/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
2145static int __init disable_timer_pin_setup(char *arg)
2146{
2147        disable_timer_pin_1 = 1;
2148        return 0;
2149}
2150early_param("disable_timer_pin_1", disable_timer_pin_setup);
2151
2152/*
2153 * This code may look a bit paranoid, but it's supposed to cooperate with
2154 * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
2155 * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
2156 * fanatically on his truly buggy board.
2157 *
2158 * FIXME: really need to revamp this for all platforms.
2159 */
2160static inline void __init check_timer(void)
2161{
2162        struct irq_cfg *cfg = irq_cfg(0);
2163        int node = cpu_to_node(0);
2164        int apic1, pin1, apic2, pin2;
2165        unsigned long flags;
2166        int no_pin1 = 0;
2167
2168        local_irq_save(flags);
2169
2170        /*
2171         * get/set the timer IRQ vector:
2172         */
2173        legacy_pic->mask(0);
2174        assign_irq_vector(0, cfg, apic->target_cpus());
2175
2176        /*
2177         * As IRQ0 is to be enabled in the 8259A, the virtual
2178         * wire has to be disabled in the local APIC.  Also
2179         * timer interrupts need to be acknowledged manually in
2180         * the 8259A for the i82489DX when using the NMI
2181         * watchdog as that APIC treats NMIs as level-triggered.
2182         * The AEOI mode will finish them in the 8259A
2183         * automatically.
2184         */
2185        apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
2186        legacy_pic->init(1);
2187
2188        pin1  = find_isa_irq_pin(0, mp_INT);
2189        apic1 = find_isa_irq_apic(0, mp_INT);
2190        pin2  = ioapic_i8259.pin;
2191        apic2 = ioapic_i8259.apic;
2192
2193        apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
2194                    "apic1=%d pin1=%d apic2=%d pin2=%d\n",
2195                    cfg->vector, apic1, pin1, apic2, pin2);
2196
2197        /*
2198         * Some BIOS writers are clueless and report the ExtINTA
2199         * I/O APIC input from the cascaded 8259A as the timer
2200         * interrupt input.  So just in case, if only one pin
2201         * was found above, try it both directly and through the
2202         * 8259A.
2203         */
2204        if (pin1 == -1) {
2205                panic_if_irq_remap("BIOS bug: timer not connected to IO-APIC");
2206                pin1 = pin2;
2207                apic1 = apic2;
2208                no_pin1 = 1;
2209        } else if (pin2 == -1) {
2210                pin2 = pin1;
2211                apic2 = apic1;
2212        }
2213
2214        if (pin1 != -1) {
2215                /*
2216                 * Ok, does IRQ0 through the IOAPIC work?
2217                 */
2218                if (no_pin1) {
2219                        add_pin_to_irq_node(cfg, node, apic1, pin1);
2220                        setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
2221                } else {
2222                        /* for edge trigger, setup_ioapic_irq already
2223                         * leave it unmasked.
2224                         * so only need to unmask if it is level-trigger
2225                         * do we really have level trigger timer?
2226                         */
2227                        int idx;
2228                        idx = find_irq_entry(apic1, pin1, mp_INT);
2229                        if (idx != -1 && irq_trigger(idx))
2230                                unmask_ioapic(cfg);
2231                }
2232                if (timer_irq_works()) {
2233                        if (disable_timer_pin_1 > 0)
2234                                clear_IO_APIC_pin(0, pin1);
2235                        goto out;
2236                }
2237                panic_if_irq_remap("timer doesn't work through Interrupt-remapped IO-APIC");
2238                local_irq_disable();
2239                clear_IO_APIC_pin(apic1, pin1);
2240                if (!no_pin1)
2241                        apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
2242                                    "8254 timer not connected to IO-APIC\n");
2243
2244                apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
2245                            "(IRQ0) through the 8259A ...\n");
2246                apic_printk(APIC_QUIET, KERN_INFO
2247                            "..... (found apic %d pin %d) ...\n", apic2, pin2);
2248                /*
2249                 * legacy devices should be connected to IO APIC #0
2250                 */
2251                replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2);
2252                setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
2253                legacy_pic->unmask(0);
2254                if (timer_irq_works()) {
2255                        apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
2256                        goto out;
2257                }
2258                /*
2259                 * Cleanup, just in case ...
2260                 */
2261                local_irq_disable();
2262                legacy_pic->mask(0);
2263                clear_IO_APIC_pin(apic2, pin2);
2264                apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
2265        }
2266
2267        apic_printk(APIC_QUIET, KERN_INFO
2268                    "...trying to set up timer as Virtual Wire IRQ...\n");
2269
2270        lapic_register_intr(0);
2271        apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);     /* Fixed mode */
2272        legacy_pic->unmask(0);
2273
2274        if (timer_irq_works()) {
2275                apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
2276                goto out;
2277        }
2278        local_irq_disable();
2279        legacy_pic->mask(0);
2280        apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
2281        apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
2282
2283        apic_printk(APIC_QUIET, KERN_INFO
2284                    "...trying to set up timer as ExtINT IRQ...\n");
2285
2286        legacy_pic->init(0);
2287        legacy_pic->make_irq(0);
2288        apic_write(APIC_LVT0, APIC_DM_EXTINT);
2289
2290        unlock_ExtINT_logic();
2291
2292        if (timer_irq_works()) {
2293                apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
2294                goto out;
2295        }
2296        local_irq_disable();
2297        apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
2298        if (x2apic_preenabled)
2299                apic_printk(APIC_QUIET, KERN_INFO
2300                            "Perhaps problem with the pre-enabled x2apic mode\n"
2301                            "Try booting with x2apic and interrupt-remapping disabled in the bios.\n");
2302        panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
2303                "report.  Then try booting with the 'noapic' option.\n");
2304out:
2305        local_irq_restore(flags);
2306}
2307
2308/*
2309 * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
2310 * to devices.  However there may be an I/O APIC pin available for
2311 * this interrupt regardless.  The pin may be left unconnected, but
2312 * typically it will be reused as an ExtINT cascade interrupt for
2313 * the master 8259A.  In the MPS case such a pin will normally be
2314 * reported as an ExtINT interrupt in the MP table.  With ACPI
2315 * there is no provision for ExtINT interrupts, and in the absence
2316 * of an override it would be treated as an ordinary ISA I/O APIC
2317 * interrupt, that is edge-triggered and unmasked by default.  We
2318 * used to do this, but it caused problems on some systems because
2319 * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
2320 * the same ExtINT cascade interrupt to drive the local APIC of the
2321 * bootstrap processor.  Therefore we refrain from routing IRQ2 to
2322 * the I/O APIC in all cases now.  No actual device should request
2323 * it anyway.  --macro
2324 */
2325#define PIC_IRQS        (1UL << PIC_CASCADE_IR)
2326
2327static int mp_irqdomain_create(int ioapic)
2328{
2329        size_t size;
2330        int hwirqs = mp_ioapic_pin_count(ioapic);
2331        struct ioapic *ip = &ioapics[ioapic];
2332        struct ioapic_domain_cfg *cfg = &ip->irqdomain_cfg;
2333        struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic);
2334
2335        size = sizeof(struct mp_pin_info) * mp_ioapic_pin_count(ioapic);
2336        ip->pin_info = kzalloc(size, GFP_KERNEL);
2337        if (!ip->pin_info)
2338                return -ENOMEM;
2339
2340        if (cfg->type == IOAPIC_DOMAIN_INVALID)
2341                return 0;
2342
2343        ip->irqdomain = irq_domain_add_linear(cfg->dev, hwirqs, cfg->ops,
2344                                              (void *)(long)ioapic);
2345        if(!ip->irqdomain) {
2346                kfree(ip->pin_info);
2347                ip->pin_info = NULL;
2348                return -ENOMEM;
2349        }
2350
2351        if (cfg->type == IOAPIC_DOMAIN_LEGACY ||
2352            cfg->type == IOAPIC_DOMAIN_STRICT)
2353                ioapic_dynirq_base = max(ioapic_dynirq_base,
2354                                         gsi_cfg->gsi_end + 1);
2355
2356        if (gsi_cfg->gsi_base == 0)
2357                irq_set_default_host(ip->irqdomain);
2358
2359        return 0;
2360}
2361
2362static void ioapic_destroy_irqdomain(int idx)
2363{
2364        if (ioapics[idx].irqdomain) {
2365                irq_domain_remove(ioapics[idx].irqdomain);
2366                ioapics[idx].irqdomain = NULL;
2367        }
2368        kfree(ioapics[idx].pin_info);
2369        ioapics[idx].pin_info = NULL;
2370}
2371
2372void __init setup_IO_APIC(void)
2373{
2374        int ioapic;
2375
2376        /*
2377         * calling enable_IO_APIC() is moved to setup_local_APIC for BP
2378         */
2379        io_apic_irqs = nr_legacy_irqs() ? ~PIC_IRQS : ~0UL;
2380
2381        apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
2382        for_each_ioapic(ioapic)
2383                BUG_ON(mp_irqdomain_create(ioapic));
2384
2385        /*
2386         * Set up IO-APIC IRQ routing.
2387         */
2388        x86_init.mpparse.setup_ioapic_ids();
2389
2390        sync_Arb_IDs();
2391        setup_IO_APIC_irqs();
2392        init_IO_APIC_traps();
2393        if (nr_legacy_irqs())
2394                check_timer();
2395
2396        ioapic_initialized = 1;
2397}
2398
2399/*
2400 *      Called after all the initialization is done. If we didn't find any
2401 *      APIC bugs then we can allow the modify fast path
2402 */
2403
2404static int __init io_apic_bug_finalize(void)
2405{
2406        if (sis_apic_bug == -1)
2407                sis_apic_bug = 0;
2408        return 0;
2409}
2410
2411late_initcall(io_apic_bug_finalize);
2412
2413static void resume_ioapic_id(int ioapic_idx)
2414{
2415        unsigned long flags;
2416        union IO_APIC_reg_00 reg_00;
2417
2418        raw_spin_lock_irqsave(&ioapic_lock, flags);
2419        reg_00.raw = io_apic_read(ioapic_idx, 0);
2420        if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx)) {
2421                reg_00.bits.ID = mpc_ioapic_id(ioapic_idx);
2422                io_apic_write(ioapic_idx, 0, reg_00.raw);
2423        }
2424        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2425}
2426
2427static void ioapic_resume(void)
2428{
2429        int ioapic_idx;
2430
2431        for_each_ioapic_reverse(ioapic_idx)
2432                resume_ioapic_id(ioapic_idx);
2433
2434        restore_ioapic_entries();
2435}
2436
2437static struct syscore_ops ioapic_syscore_ops = {
2438        .suspend = save_ioapic_entries,
2439        .resume = ioapic_resume,
2440};
2441
2442static int __init ioapic_init_ops(void)
2443{
2444        register_syscore_ops(&ioapic_syscore_ops);
2445
2446        return 0;
2447}
2448
2449device_initcall(ioapic_init_ops);
2450
2451static int
2452io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
2453{
2454        struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node);
2455        int ret;
2456
2457        if (!cfg)
2458                return -EINVAL;
2459        ret = __add_pin_to_irq_node(cfg, node, attr->ioapic, attr->ioapic_pin);
2460        if (!ret)
2461                setup_ioapic_irq(irq, cfg, attr);
2462        return ret;
2463}
2464
2465static int io_apic_get_redir_entries(int ioapic)
2466{
2467        union IO_APIC_reg_01    reg_01;
2468        unsigned long flags;
2469
2470        raw_spin_lock_irqsave(&ioapic_lock, flags);
2471        reg_01.raw = io_apic_read(ioapic, 1);
2472        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2473
2474        /* The register returns the maximum index redir index
2475         * supported, which is one less than the total number of redir
2476         * entries.
2477         */
2478        return reg_01.bits.entries + 1;
2479}
2480
2481unsigned int arch_dynirq_lower_bound(unsigned int from)
2482{
2483        /*
2484         * dmar_alloc_hwirq() may be called before setup_IO_APIC(), so use
2485         * gsi_top if ioapic_dynirq_base hasn't been initialized yet.
2486         */
2487        return ioapic_initialized ? ioapic_dynirq_base : gsi_top;
2488}
2489
2490#ifdef CONFIG_X86_32
2491static int io_apic_get_unique_id(int ioapic, int apic_id)
2492{
2493        union IO_APIC_reg_00 reg_00;
2494        static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
2495        physid_mask_t tmp;
2496        unsigned long flags;
2497        int i = 0;
2498
2499        /*
2500         * The P4 platform supports up to 256 APIC IDs on two separate APIC
2501         * buses (one for LAPICs, one for IOAPICs), where predecessors only
2502         * supports up to 16 on one shared APIC bus.
2503         *
2504         * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
2505         *      advantage of new APIC bus architecture.
2506         */
2507
2508        if (physids_empty(apic_id_map))
2509                apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map);
2510
2511        raw_spin_lock_irqsave(&ioapic_lock, flags);
2512        reg_00.raw = io_apic_read(ioapic, 0);
2513        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2514
2515        if (apic_id >= get_physical_broadcast()) {
2516                printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
2517                        "%d\n", ioapic, apic_id, reg_00.bits.ID);
2518                apic_id = reg_00.bits.ID;
2519        }
2520
2521        /*
2522         * Every APIC in a system must have a unique ID or we get lots of nice
2523         * 'stuck on smp_invalidate_needed IPI wait' messages.
2524         */
2525        if (apic->check_apicid_used(&apic_id_map, apic_id)) {
2526
2527                for (i = 0; i < get_physical_broadcast(); i++) {
2528                        if (!apic->check_apicid_used(&apic_id_map, i))
2529                                break;
2530                }
2531
2532                if (i == get_physical_broadcast())
2533                        panic("Max apic_id exceeded!\n");
2534
2535                printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
2536                        "trying %d\n", ioapic, apic_id, i);
2537
2538                apic_id = i;
2539        }
2540
2541        apic->apicid_to_cpu_present(apic_id, &tmp);
2542        physids_or(apic_id_map, apic_id_map, tmp);
2543
2544        if (reg_00.bits.ID != apic_id) {
2545                reg_00.bits.ID = apic_id;
2546
2547                raw_spin_lock_irqsave(&ioapic_lock, flags);
2548                io_apic_write(ioapic, 0, reg_00.raw);
2549                reg_00.raw = io_apic_read(ioapic, 0);
2550                raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2551
2552                /* Sanity check */
2553                if (reg_00.bits.ID != apic_id) {
2554                        pr_err("IOAPIC[%d]: Unable to change apic_id!\n",
2555                               ioapic);
2556                        return -1;
2557                }
2558        }
2559
2560        apic_printk(APIC_VERBOSE, KERN_INFO
2561                        "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
2562
2563        return apic_id;
2564}
2565
2566static u8 io_apic_unique_id(int idx, u8 id)
2567{
2568        if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
2569            !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
2570                return io_apic_get_unique_id(idx, id);
2571        else
2572                return id;
2573}
2574#else
2575static u8 io_apic_unique_id(int idx, u8 id)
2576{
2577        union IO_APIC_reg_00 reg_00;
2578        DECLARE_BITMAP(used, 256);
2579        unsigned long flags;
2580        u8 new_id;
2581        int i;
2582
2583        bitmap_zero(used, 256);
2584        for_each_ioapic(i)
2585                __set_bit(mpc_ioapic_id(i), used);
2586
2587        /* Hand out the requested id if available */
2588        if (!test_bit(id, used))
2589                return id;
2590
2591        /*
2592         * Read the current id from the ioapic and keep it if
2593         * available.
2594         */
2595        raw_spin_lock_irqsave(&ioapic_lock, flags);
2596        reg_00.raw = io_apic_read(idx, 0);
2597        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2598        new_id = reg_00.bits.ID;
2599        if (!test_bit(new_id, used)) {
2600                apic_printk(APIC_VERBOSE, KERN_INFO
2601                        "IOAPIC[%d]: Using reg apic_id %d instead of %d\n",
2602                         idx, new_id, id);
2603                return new_id;
2604        }
2605
2606        /*
2607         * Get the next free id and write it to the ioapic.
2608         */
2609        new_id = find_first_zero_bit(used, 256);
2610        reg_00.bits.ID = new_id;
2611        raw_spin_lock_irqsave(&ioapic_lock, flags);
2612        io_apic_write(idx, 0, reg_00.raw);
2613        reg_00.raw = io_apic_read(idx, 0);
2614        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2615        /* Sanity check */
2616        BUG_ON(reg_00.bits.ID != new_id);
2617
2618        return new_id;
2619}
2620#endif
2621
2622static int io_apic_get_version(int ioapic)
2623{
2624        union IO_APIC_reg_01    reg_01;
2625        unsigned long flags;
2626
2627        raw_spin_lock_irqsave(&ioapic_lock, flags);
2628        reg_01.raw = io_apic_read(ioapic, 1);
2629        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2630
2631        return reg_01.bits.version;
2632}
2633
2634int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity)
2635{
2636        int ioapic, pin, idx;
2637
2638        if (skip_ioapic_setup)
2639                return -1;
2640
2641        ioapic = mp_find_ioapic(gsi);
2642        if (ioapic < 0)
2643                return -1;
2644
2645        pin = mp_find_ioapic_pin(ioapic, gsi);
2646        if (pin < 0)
2647                return -1;
2648
2649        idx = find_irq_entry(ioapic, pin, mp_INT);
2650        if (idx < 0)
2651                return -1;
2652
2653        *trigger = irq_trigger(idx);
2654        *polarity = irq_polarity(idx);
2655        return 0;
2656}
2657
2658/*
2659 * This function currently is only a helper for the i386 smp boot process where
2660 * we need to reprogram the ioredtbls to cater for the cpus which have come online
2661 * so mask in all cases should simply be apic->target_cpus()
2662 */
2663#ifdef CONFIG_SMP
2664void __init setup_ioapic_dest(void)
2665{
2666        int pin, ioapic, irq, irq_entry;
2667        const struct cpumask *mask;
2668        struct irq_data *idata;
2669
2670        if (skip_ioapic_setup == 1)
2671                return;
2672
2673        for_each_ioapic_pin(ioapic, pin) {
2674                irq_entry = find_irq_entry(ioapic, pin, mp_INT);
2675                if (irq_entry == -1)
2676                        continue;
2677
2678                irq = pin_2_irq(irq_entry, ioapic, pin, 0);
2679                if (irq < 0 || !mp_init_irq_at_boot(ioapic, irq))
2680                        continue;
2681
2682                idata = irq_get_irq_data(irq);
2683
2684                /*
2685                 * Honour affinities which have been set in early boot
2686                 */
2687                if (!irqd_can_balance(idata) || irqd_affinity_was_set(idata))
2688                        mask = idata->affinity;
2689                else
2690                        mask = apic->target_cpus();
2691
2692                x86_io_apic_ops.set_affinity(idata, mask, false);
2693        }
2694
2695}
2696#endif
2697
2698#define IOAPIC_RESOURCE_NAME_SIZE 11
2699
2700static struct resource *ioapic_resources;
2701
2702static struct resource * __init ioapic_setup_resources(void)
2703{
2704        unsigned long n;
2705        struct resource *res;
2706        char *mem;
2707        int i, num = 0;
2708
2709        for_each_ioapic(i)
2710                num++;
2711        if (num == 0)
2712                return NULL;
2713
2714        n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
2715        n *= num;
2716
2717        mem = alloc_bootmem(n);
2718        res = (void *)mem;
2719
2720        mem += sizeof(struct resource) * num;
2721
2722        num = 0;
2723        for_each_ioapic(i) {
2724                res[num].name = mem;
2725                res[num].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
2726                snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i);
2727                mem += IOAPIC_RESOURCE_NAME_SIZE;
2728                num++;
2729                ioapics[i].iomem_res = res;
2730        }
2731
2732        ioapic_resources = res;
2733
2734        return res;
2735}
2736
2737void __init native_io_apic_init_mappings(void)
2738{
2739        unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
2740        struct resource *ioapic_res;
2741        int i;
2742
2743        ioapic_res = ioapic_setup_resources();
2744        for_each_ioapic(i) {
2745                if (smp_found_config) {
2746                        ioapic_phys = mpc_ioapic_addr(i);
2747#ifdef CONFIG_X86_32
2748                        if (!ioapic_phys) {
2749                                printk(KERN_ERR
2750                                       "WARNING: bogus zero IO-APIC "
2751                                       "address found in MPTABLE, "
2752                                       "disabling IO/APIC support!\n");
2753                                smp_found_config = 0;
2754                                skip_ioapic_setup = 1;
2755                                goto fake_ioapic_page;
2756                        }
2757#endif
2758                } else {
2759#ifdef CONFIG_X86_32
2760fake_ioapic_page:
2761#endif
2762                        ioapic_phys = (unsigned long)alloc_bootmem_pages(PAGE_SIZE);
2763                        ioapic_phys = __pa(ioapic_phys);
2764                }
2765                set_fixmap_nocache(idx, ioapic_phys);
2766                apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n",
2767                        __fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK),
2768                        ioapic_phys);
2769                idx++;
2770
2771                ioapic_res->start = ioapic_phys;
2772                ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1;
2773                ioapic_res++;
2774        }
2775}
2776
2777void __init ioapic_insert_resources(void)
2778{
2779        int i;
2780        struct resource *r = ioapic_resources;
2781
2782        if (!r) {
2783                if (nr_ioapics > 0)
2784                        printk(KERN_ERR
2785                                "IO APIC resources couldn't be allocated.\n");
2786                return;
2787        }
2788
2789        for_each_ioapic(i) {
2790                insert_resource(&iomem_resource, r);
2791                r++;
2792        }
2793}
2794
2795int mp_find_ioapic(u32 gsi)
2796{
2797        int i;
2798
2799        if (nr_ioapics == 0)
2800                return -1;
2801
2802        /* Find the IOAPIC that manages this GSI. */
2803        for_each_ioapic(i) {
2804                struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(i);
2805                if (gsi >= gsi_cfg->gsi_base && gsi <= gsi_cfg->gsi_end)
2806                        return i;
2807        }
2808
2809        printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
2810        return -1;
2811}
2812
2813int mp_find_ioapic_pin(int ioapic, u32 gsi)
2814{
2815        struct mp_ioapic_gsi *gsi_cfg;
2816
2817        if (WARN_ON(ioapic < 0))
2818                return -1;
2819
2820        gsi_cfg = mp_ioapic_gsi_routing(ioapic);
2821        if (WARN_ON(gsi > gsi_cfg->gsi_end))
2822                return -1;
2823
2824        return gsi - gsi_cfg->gsi_base;
2825}
2826
2827static int bad_ioapic_register(int idx)
2828{
2829        union IO_APIC_reg_00 reg_00;
2830        union IO_APIC_reg_01 reg_01;
2831        union IO_APIC_reg_02 reg_02;
2832
2833        reg_00.raw = io_apic_read(idx, 0);
2834        reg_01.raw = io_apic_read(idx, 1);
2835        reg_02.raw = io_apic_read(idx, 2);
2836
2837        if (reg_00.raw == -1 && reg_01.raw == -1 && reg_02.raw == -1) {
2838                pr_warn("I/O APIC 0x%x registers return all ones, skipping!\n",
2839                        mpc_ioapic_addr(idx));
2840                return 1;
2841        }
2842
2843        return 0;
2844}
2845
2846static int find_free_ioapic_entry(void)
2847{
2848        int idx;
2849
2850        for (idx = 0; idx < MAX_IO_APICS; idx++)
2851                if (ioapics[idx].nr_registers == 0)
2852                        return idx;
2853
2854        return MAX_IO_APICS;
2855}
2856
2857/**
2858 * mp_register_ioapic - Register an IOAPIC device
2859 * @id:         hardware IOAPIC ID
2860 * @address:    physical address of IOAPIC register area
2861 * @gsi_base:   base of GSI associated with the IOAPIC
2862 * @cfg:        configuration information for the IOAPIC
2863 */
2864int mp_register_ioapic(int id, u32 address, u32 gsi_base,
2865                       struct ioapic_domain_cfg *cfg)
2866{
2867        bool hotplug = !!ioapic_initialized;
2868        struct mp_ioapic_gsi *gsi_cfg;
2869        int idx, ioapic, entries;
2870        u32 gsi_end;
2871
2872        if (!address) {
2873                pr_warn("Bogus (zero) I/O APIC address found, skipping!\n");
2874                return -EINVAL;
2875        }
2876        for_each_ioapic(ioapic)
2877                if (ioapics[ioapic].mp_config.apicaddr == address) {
2878                        pr_warn("address 0x%x conflicts with IOAPIC%d\n",
2879                                address, ioapic);
2880                        return -EEXIST;
2881                }
2882
2883        idx = find_free_ioapic_entry();
2884        if (idx >= MAX_IO_APICS) {
2885                pr_warn("Max # of I/O APICs (%d) exceeded (found %d), skipping\n",
2886                        MAX_IO_APICS, idx);
2887                return -ENOSPC;
2888        }
2889
2890        ioapics[idx].mp_config.type = MP_IOAPIC;
2891        ioapics[idx].mp_config.flags = MPC_APIC_USABLE;
2892        ioapics[idx].mp_config.apicaddr = address;
2893
2894        set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
2895        if (bad_ioapic_register(idx)) {
2896                clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
2897                return -ENODEV;
2898        }
2899
2900        ioapics[idx].mp_config.apicid = io_apic_unique_id(idx, id);
2901        ioapics[idx].mp_config.apicver = io_apic_get_version(idx);
2902
2903        /*
2904         * Build basic GSI lookup table to facilitate gsi->io_apic lookups
2905         * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
2906         */
2907        entries = io_apic_get_redir_entries(idx);
2908        gsi_end = gsi_base + entries - 1;
2909        for_each_ioapic(ioapic) {
2910                gsi_cfg = mp_ioapic_gsi_routing(ioapic);
2911                if ((gsi_base >= gsi_cfg->gsi_base &&
2912                     gsi_base <= gsi_cfg->gsi_end) ||
2913                    (gsi_end >= gsi_cfg->gsi_base &&
2914                     gsi_end <= gsi_cfg->gsi_end)) {
2915                        pr_warn("GSI range [%u-%u] for new IOAPIC conflicts with GSI[%u-%u]\n",
2916                                gsi_base, gsi_end,
2917                                gsi_cfg->gsi_base, gsi_cfg->gsi_end);
2918                        clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
2919                        return -ENOSPC;
2920                }
2921        }
2922        gsi_cfg = mp_ioapic_gsi_routing(idx);
2923        gsi_cfg->gsi_base = gsi_base;
2924        gsi_cfg->gsi_end = gsi_end;
2925
2926        ioapics[idx].irqdomain = NULL;
2927        ioapics[idx].irqdomain_cfg = *cfg;
2928
2929        /*
2930         * If mp_register_ioapic() is called during early boot stage when
2931         * walking ACPI/SFI/DT tables, it's too early to create irqdomain,
2932         * we are still using bootmem allocator. So delay it to setup_IO_APIC().
2933         */
2934        if (hotplug) {
2935                if (mp_irqdomain_create(idx)) {
2936                        clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
2937                        return -ENOMEM;
2938                }
2939                alloc_ioapic_saved_registers(idx);
2940        }
2941
2942        if (gsi_cfg->gsi_end >= gsi_top)
2943                gsi_top = gsi_cfg->gsi_end + 1;
2944        if (nr_ioapics <= idx)
2945                nr_ioapics = idx + 1;
2946
2947        /* Set nr_registers to mark entry present */
2948        ioapics[idx].nr_registers = entries;
2949
2950        pr_info("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, GSI %d-%d\n",
2951                idx, mpc_ioapic_id(idx),
2952                mpc_ioapic_ver(idx), mpc_ioapic_addr(idx),
2953                gsi_cfg->gsi_base, gsi_cfg->gsi_end);
2954
2955        return 0;
2956}
2957
2958int mp_unregister_ioapic(u32 gsi_base)
2959{
2960        int ioapic, pin;
2961        int found = 0;
2962        struct mp_pin_info *pin_info;
2963
2964        for_each_ioapic(ioapic)
2965                if (ioapics[ioapic].gsi_config.gsi_base == gsi_base) {
2966                        found = 1;
2967                        break;
2968                }
2969        if (!found) {
2970                pr_warn("can't find IOAPIC for GSI %d\n", gsi_base);
2971                return -ENODEV;
2972        }
2973
2974        for_each_pin(ioapic, pin) {
2975                pin_info = mp_pin_info(ioapic, pin);
2976                if (pin_info->count) {
2977                        pr_warn("pin%d on IOAPIC%d is still in use.\n",
2978                                pin, ioapic);
2979                        return -EBUSY;
2980                }
2981        }
2982
2983        /* Mark entry not present */
2984        ioapics[ioapic].nr_registers  = 0;
2985        ioapic_destroy_irqdomain(ioapic);
2986        free_ioapic_saved_registers(ioapic);
2987        if (ioapics[ioapic].iomem_res)
2988                release_resource(ioapics[ioapic].iomem_res);
2989        clear_fixmap(FIX_IO_APIC_BASE_0 + ioapic);
2990        memset(&ioapics[ioapic], 0, sizeof(ioapics[ioapic]));
2991
2992        return 0;
2993}
2994
2995int mp_ioapic_registered(u32 gsi_base)
2996{
2997        int ioapic;
2998
2999        for_each_ioapic(ioapic)
3000                if (ioapics[ioapic].gsi_config.gsi_base == gsi_base)
3001                        return 1;
3002
3003        return 0;
3004}
3005
3006static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
3007                                        int ioapic, int ioapic_pin,
3008                                        int trigger, int polarity)
3009{
3010        irq_attr->ioapic        = ioapic;
3011        irq_attr->ioapic_pin    = ioapic_pin;
3012        irq_attr->trigger       = trigger;
3013        irq_attr->polarity      = polarity;
3014}
3015
3016int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq,
3017                     irq_hw_number_t hwirq)
3018{
3019        int ioapic = (int)(long)domain->host_data;
3020        struct mp_pin_info *info = mp_pin_info(ioapic, hwirq);
3021        struct io_apic_irq_attr attr;
3022
3023        /* Get default attribute if not set by caller yet */
3024        if (!info->set) {
3025                u32 gsi = mp_pin_to_gsi(ioapic, hwirq);
3026
3027                if (acpi_get_override_irq(gsi, &info->trigger,
3028                                          &info->polarity) < 0) {
3029                        /*
3030                         * PCI interrupts are always polarity one level
3031                         * triggered.
3032                         */
3033                        info->trigger = 1;
3034                        info->polarity = 1;
3035                }
3036                info->node = NUMA_NO_NODE;
3037
3038                /*
3039                 * setup_IO_APIC_irqs() programs all legacy IRQs with default
3040                 * trigger and polarity attributes. Don't set the flag for that
3041                 * case so the first legacy IRQ user could reprogram the pin
3042                 * with real trigger and polarity attributes.
3043                 */
3044                if (virq >= nr_legacy_irqs() || info->count)
3045                        info->set = 1;
3046        }
3047        set_io_apic_irq_attr(&attr, ioapic, hwirq, info->trigger,
3048                             info->polarity);
3049
3050        return io_apic_setup_irq_pin(virq, info->node, &attr);
3051}
3052
3053void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq)
3054{
3055        struct irq_data *data = irq_get_irq_data(virq);
3056        struct irq_cfg *cfg = irq_cfg(virq);
3057        int ioapic = (int)(long)domain->host_data;
3058        int pin = (int)data->hwirq;
3059
3060        ioapic_mask_entry(ioapic, pin);
3061        __remove_pin_from_irq(cfg, ioapic, pin);
3062        WARN_ON(!list_empty(&cfg->irq_2_pin));
3063        arch_teardown_hwirq(virq);
3064}
3065
3066int mp_set_gsi_attr(u32 gsi, int trigger, int polarity, int node)
3067{
3068        int ret = 0;
3069        int ioapic, pin;
3070        struct mp_pin_info *info;
3071
3072        ioapic = mp_find_ioapic(gsi);
3073        if (ioapic < 0)
3074                return -ENODEV;
3075
3076        pin = mp_find_ioapic_pin(ioapic, gsi);
3077        info = mp_pin_info(ioapic, pin);
3078        trigger = trigger ? 1 : 0;
3079        polarity = polarity ? 1 : 0;
3080
3081        mutex_lock(&ioapic_mutex);
3082        if (!info->set) {
3083                info->trigger = trigger;
3084                info->polarity = polarity;
3085                info->node = node;
3086                info->set = 1;
3087        } else if (info->trigger != trigger || info->polarity != polarity) {
3088                ret = -EBUSY;
3089        }
3090        mutex_unlock(&ioapic_mutex);
3091
3092        return ret;
3093}
3094
3095/* Enable IOAPIC early just for system timer */
3096void __init pre_init_apic_IRQ0(void)
3097{
3098        struct io_apic_irq_attr attr = { 0, 0, 0, 0 };
3099
3100        printk(KERN_INFO "Early APIC setup for system timer0\n");
3101#ifndef CONFIG_SMP
3102        physid_set_mask_of_physid(boot_cpu_physical_apicid,
3103                                         &phys_cpu_present_map);
3104#endif
3105        setup_local_APIC();
3106
3107        io_apic_setup_irq_pin(0, 0, &attr);
3108        irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq,
3109                                      "edge");
3110}
3111