linux/arch/x86/kernel/apic/io_apic.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *      Intel IO-APIC support for multi-Pentium hosts.
   4 *
   5 *      Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
   6 *
   7 *      Many thanks to Stig Venaas for trying out countless experimental
   8 *      patches and reporting/debugging problems patiently!
   9 *
  10 *      (c) 1999, Multiple IO-APIC support, developed by
  11 *      Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
  12 *      Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
  13 *      further tested and cleaned up by Zach Brown <zab@redhat.com>
  14 *      and Ingo Molnar <mingo@redhat.com>
  15 *
  16 *      Fixes
  17 *      Maciej W. Rozycki       :       Bits for genuine 82489DX APICs;
  18 *                                      thanks to Eric Gilmore
  19 *                                      and Rolf G. Tews
  20 *                                      for testing these extensively
  21 *      Paul Diefenbaugh        :       Added full ACPI support
  22 *
  23 * Historical information which is worth to be preserved:
  24 *
  25 * - SiS APIC rmw bug:
  26 *
  27 *      We used to have a workaround for a bug in SiS chips which
  28 *      required to rewrite the index register for a read-modify-write
  29 *      operation as the chip lost the index information which was
  30 *      setup for the read already. We cache the data now, so that
  31 *      workaround has been removed.
  32 */
  33
  34#include <linux/mm.h>
  35#include <linux/interrupt.h>
  36#include <linux/init.h>
  37#include <linux/delay.h>
  38#include <linux/sched.h>
  39#include <linux/pci.h>
  40#include <linux/mc146818rtc.h>
  41#include <linux/compiler.h>
  42#include <linux/acpi.h>
  43#include <linux/export.h>
  44#include <linux/syscore_ops.h>
  45#include <linux/freezer.h>
  46#include <linux/kthread.h>
  47#include <linux/jiffies.h>      /* time_after() */
  48#include <linux/slab.h>
  49#include <linux/bootmem.h>
  50
  51#include <asm/irqdomain.h>
  52#include <asm/io.h>
  53#include <asm/smp.h>
  54#include <asm/cpu.h>
  55#include <asm/desc.h>
  56#include <asm/proto.h>
  57#include <asm/acpi.h>
  58#include <asm/dma.h>
  59#include <asm/timer.h>
  60#include <asm/i8259.h>
  61#include <asm/setup.h>
  62#include <asm/irq_remapping.h>
  63#include <asm/hw_irq.h>
  64
  65#include <asm/apic.h>
  66
  67#define for_each_ioapic(idx)            \
  68        for ((idx) = 0; (idx) < nr_ioapics; (idx)++)
  69#define for_each_ioapic_reverse(idx)    \
  70        for ((idx) = nr_ioapics - 1; (idx) >= 0; (idx)--)
  71#define for_each_pin(idx, pin)          \
  72        for ((pin) = 0; (pin) < ioapics[(idx)].nr_registers; (pin)++)
  73#define for_each_ioapic_pin(idx, pin)   \
  74        for_each_ioapic((idx))          \
  75                for_each_pin((idx), (pin))
  76#define for_each_irq_pin(entry, head) \
  77        list_for_each_entry(entry, &head, list)
  78
  79static DEFINE_RAW_SPINLOCK(ioapic_lock);
  80static DEFINE_MUTEX(ioapic_mutex);
  81static unsigned int ioapic_dynirq_base;
  82static int ioapic_initialized;
  83
  84struct irq_pin_list {
  85        struct list_head list;
  86        int apic, pin;
  87};
  88
  89struct mp_chip_data {
  90        struct list_head irq_2_pin;
  91        struct IO_APIC_route_entry entry;
  92        int trigger;
  93        int polarity;
  94        u32 count;
  95        bool isa_irq;
  96};
  97
  98struct mp_ioapic_gsi {
  99        u32 gsi_base;
 100        u32 gsi_end;
 101};
 102
 103static struct ioapic {
 104        /*
 105         * # of IRQ routing registers
 106         */
 107        int nr_registers;
 108        /*
 109         * Saved state during suspend/resume, or while enabling intr-remap.
 110         */
 111        struct IO_APIC_route_entry *saved_registers;
 112        /* I/O APIC config */
 113        struct mpc_ioapic mp_config;
 114        /* IO APIC gsi routing info */
 115        struct mp_ioapic_gsi  gsi_config;
 116        struct ioapic_domain_cfg irqdomain_cfg;
 117        struct irq_domain *irqdomain;
 118        struct resource *iomem_res;
 119} ioapics[MAX_IO_APICS];
 120
 121#define mpc_ioapic_ver(ioapic_idx)      ioapics[ioapic_idx].mp_config.apicver
 122
 123int mpc_ioapic_id(int ioapic_idx)
 124{
 125        return ioapics[ioapic_idx].mp_config.apicid;
 126}
 127
 128unsigned int mpc_ioapic_addr(int ioapic_idx)
 129{
 130        return ioapics[ioapic_idx].mp_config.apicaddr;
 131}
 132
 133static inline struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic_idx)
 134{
 135        return &ioapics[ioapic_idx].gsi_config;
 136}
 137
 138static inline int mp_ioapic_pin_count(int ioapic)
 139{
 140        struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic);
 141
 142        return gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1;
 143}
 144
 145static inline u32 mp_pin_to_gsi(int ioapic, int pin)
 146{
 147        return mp_ioapic_gsi_routing(ioapic)->gsi_base + pin;
 148}
 149
 150static inline bool mp_is_legacy_irq(int irq)
 151{
 152        return irq >= 0 && irq < nr_legacy_irqs();
 153}
 154
 155/*
 156 * Initialize all legacy IRQs and all pins on the first IOAPIC
 157 * if we have legacy interrupt controller. Kernel boot option "pirq="
 158 * may rely on non-legacy pins on the first IOAPIC.
 159 */
 160static inline int mp_init_irq_at_boot(int ioapic, int irq)
 161{
 162        if (!nr_legacy_irqs())
 163                return 0;
 164
 165        return ioapic == 0 || mp_is_legacy_irq(irq);
 166}
 167
 168static inline struct irq_domain *mp_ioapic_irqdomain(int ioapic)
 169{
 170        return ioapics[ioapic].irqdomain;
 171}
 172
 173int nr_ioapics;
 174
 175/* The one past the highest gsi number used */
 176u32 gsi_top;
 177
 178/* MP IRQ source entries */
 179struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
 180
 181/* # of MP IRQ source entries */
 182int mp_irq_entries;
 183
 184#ifdef CONFIG_EISA
 185int mp_bus_id_to_type[MAX_MP_BUSSES];
 186#endif
 187
 188DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
 189
 190int skip_ioapic_setup;
 191
 192/**
 193 * disable_ioapic_support() - disables ioapic support at runtime
 194 */
 195void disable_ioapic_support(void)
 196{
 197#ifdef CONFIG_PCI
 198        noioapicquirk = 1;
 199        noioapicreroute = -1;
 200#endif
 201        skip_ioapic_setup = 1;
 202}
 203
 204static int __init parse_noapic(char *str)
 205{
 206        /* disable IO-APIC */
 207        disable_ioapic_support();
 208        return 0;
 209}
 210early_param("noapic", parse_noapic);
 211
 212/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
 213void mp_save_irq(struct mpc_intsrc *m)
 214{
 215        int i;
 216
 217        apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
 218                " IRQ %02x, APIC ID %x, APIC INT %02x\n",
 219                m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus,
 220                m->srcbusirq, m->dstapic, m->dstirq);
 221
 222        for (i = 0; i < mp_irq_entries; i++) {
 223                if (!memcmp(&mp_irqs[i], m, sizeof(*m)))
 224                        return;
 225        }
 226
 227        memcpy(&mp_irqs[mp_irq_entries], m, sizeof(*m));
 228        if (++mp_irq_entries == MAX_IRQ_SOURCES)
 229                panic("Max # of irq sources exceeded!!\n");
 230}
 231
 232static void alloc_ioapic_saved_registers(int idx)
 233{
 234        size_t size;
 235
 236        if (ioapics[idx].saved_registers)
 237                return;
 238
 239        size = sizeof(struct IO_APIC_route_entry) * ioapics[idx].nr_registers;
 240        ioapics[idx].saved_registers = kzalloc(size, GFP_KERNEL);
 241        if (!ioapics[idx].saved_registers)
 242                pr_err("IOAPIC %d: suspend/resume impossible!\n", idx);
 243}
 244
 245static void free_ioapic_saved_registers(int idx)
 246{
 247        kfree(ioapics[idx].saved_registers);
 248        ioapics[idx].saved_registers = NULL;
 249}
 250
 251int __init arch_early_ioapic_init(void)
 252{
 253        int i;
 254
 255        if (!nr_legacy_irqs())
 256                io_apic_irqs = ~0UL;
 257
 258        for_each_ioapic(i)
 259                alloc_ioapic_saved_registers(i);
 260
 261        return 0;
 262}
 263
 264struct io_apic {
 265        unsigned int index;
 266        unsigned int unused[3];
 267        unsigned int data;
 268        unsigned int unused2[11];
 269        unsigned int eoi;
 270};
 271
 272static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
 273{
 274        return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
 275                + (mpc_ioapic_addr(idx) & ~PAGE_MASK);
 276}
 277
 278static inline void io_apic_eoi(unsigned int apic, unsigned int vector)
 279{
 280        struct io_apic __iomem *io_apic = io_apic_base(apic);
 281        writel(vector, &io_apic->eoi);
 282}
 283
 284unsigned int native_io_apic_read(unsigned int apic, unsigned int reg)
 285{
 286        struct io_apic __iomem *io_apic = io_apic_base(apic);
 287        writel(reg, &io_apic->index);
 288        return readl(&io_apic->data);
 289}
 290
 291static void io_apic_write(unsigned int apic, unsigned int reg,
 292                          unsigned int value)
 293{
 294        struct io_apic __iomem *io_apic = io_apic_base(apic);
 295
 296        writel(reg, &io_apic->index);
 297        writel(value, &io_apic->data);
 298}
 299
 300union entry_union {
 301        struct { u32 w1, w2; };
 302        struct IO_APIC_route_entry entry;
 303};
 304
 305static struct IO_APIC_route_entry __ioapic_read_entry(int apic, int pin)
 306{
 307        union entry_union eu;
 308
 309        eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
 310        eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
 311
 312        return eu.entry;
 313}
 314
 315static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
 316{
 317        union entry_union eu;
 318        unsigned long flags;
 319
 320        raw_spin_lock_irqsave(&ioapic_lock, flags);
 321        eu.entry = __ioapic_read_entry(apic, pin);
 322        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 323
 324        return eu.entry;
 325}
 326
 327/*
 328 * When we write a new IO APIC routing entry, we need to write the high
 329 * word first! If the mask bit in the low word is clear, we will enable
 330 * the interrupt, and we need to make sure the entry is fully populated
 331 * before that happens.
 332 */
 333static void __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
 334{
 335        union entry_union eu = {{0, 0}};
 336
 337        eu.entry = e;
 338        io_apic_write(apic, 0x11 + 2*pin, eu.w2);
 339        io_apic_write(apic, 0x10 + 2*pin, eu.w1);
 340}
 341
 342static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
 343{
 344        unsigned long flags;
 345
 346        raw_spin_lock_irqsave(&ioapic_lock, flags);
 347        __ioapic_write_entry(apic, pin, e);
 348        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 349}
 350
 351/*
 352 * When we mask an IO APIC routing entry, we need to write the low
 353 * word first, in order to set the mask bit before we change the
 354 * high bits!
 355 */
 356static void ioapic_mask_entry(int apic, int pin)
 357{
 358        unsigned long flags;
 359        union entry_union eu = { .entry.mask = IOAPIC_MASKED };
 360
 361        raw_spin_lock_irqsave(&ioapic_lock, flags);
 362        io_apic_write(apic, 0x10 + 2*pin, eu.w1);
 363        io_apic_write(apic, 0x11 + 2*pin, eu.w2);
 364        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 365}
 366
 367/*
 368 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
 369 * shared ISA-space IRQs, so we have to support them. We are super
 370 * fast in the common case, and fast for shared ISA-space IRQs.
 371 */
 372static int __add_pin_to_irq_node(struct mp_chip_data *data,
 373                                 int node, int apic, int pin)
 374{
 375        struct irq_pin_list *entry;
 376
 377        /* don't allow duplicates */
 378        for_each_irq_pin(entry, data->irq_2_pin)
 379                if (entry->apic == apic && entry->pin == pin)
 380                        return 0;
 381
 382        entry = kzalloc_node(sizeof(struct irq_pin_list), GFP_ATOMIC, node);
 383        if (!entry) {
 384                pr_err("can not alloc irq_pin_list (%d,%d,%d)\n",
 385                       node, apic, pin);
 386                return -ENOMEM;
 387        }
 388        entry->apic = apic;
 389        entry->pin = pin;
 390        list_add_tail(&entry->list, &data->irq_2_pin);
 391
 392        return 0;
 393}
 394
 395static void __remove_pin_from_irq(struct mp_chip_data *data, int apic, int pin)
 396{
 397        struct irq_pin_list *tmp, *entry;
 398
 399        list_for_each_entry_safe(entry, tmp, &data->irq_2_pin, list)
 400                if (entry->apic == apic && entry->pin == pin) {
 401                        list_del(&entry->list);
 402                        kfree(entry);
 403                        return;
 404                }
 405}
 406
 407static void add_pin_to_irq_node(struct mp_chip_data *data,
 408                                int node, int apic, int pin)
 409{
 410        if (__add_pin_to_irq_node(data, node, apic, pin))
 411                panic("IO-APIC: failed to add irq-pin. Can not proceed\n");
 412}
 413
 414/*
 415 * Reroute an IRQ to a different pin.
 416 */
 417static void __init replace_pin_at_irq_node(struct mp_chip_data *data, int node,
 418                                           int oldapic, int oldpin,
 419                                           int newapic, int newpin)
 420{
 421        struct irq_pin_list *entry;
 422
 423        for_each_irq_pin(entry, data->irq_2_pin) {
 424                if (entry->apic == oldapic && entry->pin == oldpin) {
 425                        entry->apic = newapic;
 426                        entry->pin = newpin;
 427                        /* every one is different, right? */
 428                        return;
 429                }
 430        }
 431
 432        /* old apic/pin didn't exist, so just add new ones */
 433        add_pin_to_irq_node(data, node, newapic, newpin);
 434}
 435
 436static void io_apic_modify_irq(struct mp_chip_data *data,
 437                               int mask_and, int mask_or,
 438                               void (*final)(struct irq_pin_list *entry))
 439{
 440        union entry_union eu;
 441        struct irq_pin_list *entry;
 442
 443        eu.entry = data->entry;
 444        eu.w1 &= mask_and;
 445        eu.w1 |= mask_or;
 446        data->entry = eu.entry;
 447
 448        for_each_irq_pin(entry, data->irq_2_pin) {
 449                io_apic_write(entry->apic, 0x10 + 2 * entry->pin, eu.w1);
 450                if (final)
 451                        final(entry);
 452        }
 453}
 454
 455static void io_apic_sync(struct irq_pin_list *entry)
 456{
 457        /*
 458         * Synchronize the IO-APIC and the CPU by doing
 459         * a dummy read from the IO-APIC
 460         */
 461        struct io_apic __iomem *io_apic;
 462
 463        io_apic = io_apic_base(entry->apic);
 464        readl(&io_apic->data);
 465}
 466
 467static void mask_ioapic_irq(struct irq_data *irq_data)
 468{
 469        struct mp_chip_data *data = irq_data->chip_data;
 470        unsigned long flags;
 471
 472        raw_spin_lock_irqsave(&ioapic_lock, flags);
 473        io_apic_modify_irq(data, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
 474        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 475}
 476
 477static void __unmask_ioapic(struct mp_chip_data *data)
 478{
 479        io_apic_modify_irq(data, ~IO_APIC_REDIR_MASKED, 0, NULL);
 480}
 481
 482static void unmask_ioapic_irq(struct irq_data *irq_data)
 483{
 484        struct mp_chip_data *data = irq_data->chip_data;
 485        unsigned long flags;
 486
 487        raw_spin_lock_irqsave(&ioapic_lock, flags);
 488        __unmask_ioapic(data);
 489        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 490}
 491
 492/*
 493 * IO-APIC versions below 0x20 don't support EOI register.
 494 * For the record, here is the information about various versions:
 495 *     0Xh     82489DX
 496 *     1Xh     I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant
 497 *     2Xh     I/O(x)APIC which is PCI 2.2 Compliant
 498 *     30h-FFh Reserved
 499 *
 500 * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic
 501 * version as 0x2. This is an error with documentation and these ICH chips
 502 * use io-apic's of version 0x20.
 503 *
 504 * For IO-APIC's with EOI register, we use that to do an explicit EOI.
 505 * Otherwise, we simulate the EOI message manually by changing the trigger
 506 * mode to edge and then back to level, with RTE being masked during this.
 507 */
 508static void __eoi_ioapic_pin(int apic, int pin, int vector)
 509{
 510        if (mpc_ioapic_ver(apic) >= 0x20) {
 511                io_apic_eoi(apic, vector);
 512        } else {
 513                struct IO_APIC_route_entry entry, entry1;
 514
 515                entry = entry1 = __ioapic_read_entry(apic, pin);
 516
 517                /*
 518                 * Mask the entry and change the trigger mode to edge.
 519                 */
 520                entry1.mask = IOAPIC_MASKED;
 521                entry1.trigger = IOAPIC_EDGE;
 522
 523                __ioapic_write_entry(apic, pin, entry1);
 524
 525                /*
 526                 * Restore the previous level triggered entry.
 527                 */
 528                __ioapic_write_entry(apic, pin, entry);
 529        }
 530}
 531
 532static void eoi_ioapic_pin(int vector, struct mp_chip_data *data)
 533{
 534        unsigned long flags;
 535        struct irq_pin_list *entry;
 536
 537        raw_spin_lock_irqsave(&ioapic_lock, flags);
 538        for_each_irq_pin(entry, data->irq_2_pin)
 539                __eoi_ioapic_pin(entry->apic, entry->pin, vector);
 540        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 541}
 542
 543static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
 544{
 545        struct IO_APIC_route_entry entry;
 546
 547        /* Check delivery_mode to be sure we're not clearing an SMI pin */
 548        entry = ioapic_read_entry(apic, pin);
 549        if (entry.delivery_mode == dest_SMI)
 550                return;
 551
 552        /*
 553         * Make sure the entry is masked and re-read the contents to check
 554         * if it is a level triggered pin and if the remote-IRR is set.
 555         */
 556        if (entry.mask == IOAPIC_UNMASKED) {
 557                entry.mask = IOAPIC_MASKED;
 558                ioapic_write_entry(apic, pin, entry);
 559                entry = ioapic_read_entry(apic, pin);
 560        }
 561
 562        if (entry.irr) {
 563                unsigned long flags;
 564
 565                /*
 566                 * Make sure the trigger mode is set to level. Explicit EOI
 567                 * doesn't clear the remote-IRR if the trigger mode is not
 568                 * set to level.
 569                 */
 570                if (entry.trigger == IOAPIC_EDGE) {
 571                        entry.trigger = IOAPIC_LEVEL;
 572                        ioapic_write_entry(apic, pin, entry);
 573                }
 574                raw_spin_lock_irqsave(&ioapic_lock, flags);
 575                __eoi_ioapic_pin(apic, pin, entry.vector);
 576                raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 577        }
 578
 579        /*
 580         * Clear the rest of the bits in the IO-APIC RTE except for the mask
 581         * bit.
 582         */
 583        ioapic_mask_entry(apic, pin);
 584        entry = ioapic_read_entry(apic, pin);
 585        if (entry.irr)
 586                pr_err("Unable to reset IRR for apic: %d, pin :%d\n",
 587                       mpc_ioapic_id(apic), pin);
 588}
 589
 590static void clear_IO_APIC (void)
 591{
 592        int apic, pin;
 593
 594        for_each_ioapic_pin(apic, pin)
 595                clear_IO_APIC_pin(apic, pin);
 596}
 597
 598#ifdef CONFIG_X86_32
 599/*
 600 * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
 601 * specific CPU-side IRQs.
 602 */
 603
 604#define MAX_PIRQS 8
 605static int pirq_entries[MAX_PIRQS] = {
 606        [0 ... MAX_PIRQS - 1] = -1
 607};
 608
 609static int __init ioapic_pirq_setup(char *str)
 610{
 611        int i, max;
 612        int ints[MAX_PIRQS+1];
 613
 614        get_options(str, ARRAY_SIZE(ints), ints);
 615
 616        apic_printk(APIC_VERBOSE, KERN_INFO
 617                        "PIRQ redirection, working around broken MP-BIOS.\n");
 618        max = MAX_PIRQS;
 619        if (ints[0] < MAX_PIRQS)
 620                max = ints[0];
 621
 622        for (i = 0; i < max; i++) {
 623                apic_printk(APIC_VERBOSE, KERN_DEBUG
 624                                "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
 625                /*
 626                 * PIRQs are mapped upside down, usually.
 627                 */
 628                pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
 629        }
 630        return 1;
 631}
 632
 633__setup("pirq=", ioapic_pirq_setup);
 634#endif /* CONFIG_X86_32 */
 635
 636/*
 637 * Saves all the IO-APIC RTE's
 638 */
 639int save_ioapic_entries(void)
 640{
 641        int apic, pin;
 642        int err = 0;
 643
 644        for_each_ioapic(apic) {
 645                if (!ioapics[apic].saved_registers) {
 646                        err = -ENOMEM;
 647                        continue;
 648                }
 649
 650                for_each_pin(apic, pin)
 651                        ioapics[apic].saved_registers[pin] =
 652                                ioapic_read_entry(apic, pin);
 653        }
 654
 655        return err;
 656}
 657
 658/*
 659 * Mask all IO APIC entries.
 660 */
 661void mask_ioapic_entries(void)
 662{
 663        int apic, pin;
 664
 665        for_each_ioapic(apic) {
 666                if (!ioapics[apic].saved_registers)
 667                        continue;
 668
 669                for_each_pin(apic, pin) {
 670                        struct IO_APIC_route_entry entry;
 671
 672                        entry = ioapics[apic].saved_registers[pin];
 673                        if (entry.mask == IOAPIC_UNMASKED) {
 674                                entry.mask = IOAPIC_MASKED;
 675                                ioapic_write_entry(apic, pin, entry);
 676                        }
 677                }
 678        }
 679}
 680
 681/*
 682 * Restore IO APIC entries which was saved in the ioapic structure.
 683 */
 684int restore_ioapic_entries(void)
 685{
 686        int apic, pin;
 687
 688        for_each_ioapic(apic) {
 689                if (!ioapics[apic].saved_registers)
 690                        continue;
 691
 692                for_each_pin(apic, pin)
 693                        ioapic_write_entry(apic, pin,
 694                                           ioapics[apic].saved_registers[pin]);
 695        }
 696        return 0;
 697}
 698
 699/*
 700 * Find the IRQ entry number of a certain pin.
 701 */
 702static int find_irq_entry(int ioapic_idx, int pin, int type)
 703{
 704        int i;
 705
 706        for (i = 0; i < mp_irq_entries; i++)
 707                if (mp_irqs[i].irqtype == type &&
 708                    (mp_irqs[i].dstapic == mpc_ioapic_id(ioapic_idx) ||
 709                     mp_irqs[i].dstapic == MP_APIC_ALL) &&
 710                    mp_irqs[i].dstirq == pin)
 711                        return i;
 712
 713        return -1;
 714}
 715
 716/*
 717 * Find the pin to which IRQ[irq] (ISA) is connected
 718 */
 719static int __init find_isa_irq_pin(int irq, int type)
 720{
 721        int i;
 722
 723        for (i = 0; i < mp_irq_entries; i++) {
 724                int lbus = mp_irqs[i].srcbus;
 725
 726                if (test_bit(lbus, mp_bus_not_pci) &&
 727                    (mp_irqs[i].irqtype == type) &&
 728                    (mp_irqs[i].srcbusirq == irq))
 729
 730                        return mp_irqs[i].dstirq;
 731        }
 732        return -1;
 733}
 734
 735static int __init find_isa_irq_apic(int irq, int type)
 736{
 737        int i;
 738
 739        for (i = 0; i < mp_irq_entries; i++) {
 740                int lbus = mp_irqs[i].srcbus;
 741
 742                if (test_bit(lbus, mp_bus_not_pci) &&
 743                    (mp_irqs[i].irqtype == type) &&
 744                    (mp_irqs[i].srcbusirq == irq))
 745                        break;
 746        }
 747
 748        if (i < mp_irq_entries) {
 749                int ioapic_idx;
 750
 751                for_each_ioapic(ioapic_idx)
 752                        if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic)
 753                                return ioapic_idx;
 754        }
 755
 756        return -1;
 757}
 758
 759#ifdef CONFIG_EISA
 760/*
 761 * EISA Edge/Level control register, ELCR
 762 */
 763static int EISA_ELCR(unsigned int irq)
 764{
 765        if (irq < nr_legacy_irqs()) {
 766                unsigned int port = 0x4d0 + (irq >> 3);
 767                return (inb(port) >> (irq & 7)) & 1;
 768        }
 769        apic_printk(APIC_VERBOSE, KERN_INFO
 770                        "Broken MPtable reports ISA irq %d\n", irq);
 771        return 0;
 772}
 773
 774#endif
 775
 776/* ISA interrupts are always active high edge triggered,
 777 * when listed as conforming in the MP table. */
 778
 779#define default_ISA_trigger(idx)        (IOAPIC_EDGE)
 780#define default_ISA_polarity(idx)       (IOAPIC_POL_HIGH)
 781
 782/* EISA interrupts are always polarity zero and can be edge or level
 783 * trigger depending on the ELCR value.  If an interrupt is listed as
 784 * EISA conforming in the MP table, that means its trigger type must
 785 * be read in from the ELCR */
 786
 787#define default_EISA_trigger(idx)       (EISA_ELCR(mp_irqs[idx].srcbusirq))
 788#define default_EISA_polarity(idx)      default_ISA_polarity(idx)
 789
 790/* PCI interrupts are always active low level triggered,
 791 * when listed as conforming in the MP table. */
 792
 793#define default_PCI_trigger(idx)        (IOAPIC_LEVEL)
 794#define default_PCI_polarity(idx)       (IOAPIC_POL_LOW)
 795
 796static int irq_polarity(int idx)
 797{
 798        int bus = mp_irqs[idx].srcbus;
 799
 800        /*
 801         * Determine IRQ line polarity (high active or low active):
 802         */
 803        switch (mp_irqs[idx].irqflag & MP_IRQPOL_MASK) {
 804        case MP_IRQPOL_DEFAULT:
 805                /* conforms to spec, ie. bus-type dependent polarity */
 806                if (test_bit(bus, mp_bus_not_pci))
 807                        return default_ISA_polarity(idx);
 808                else
 809                        return default_PCI_polarity(idx);
 810        case MP_IRQPOL_ACTIVE_HIGH:
 811                return IOAPIC_POL_HIGH;
 812        case MP_IRQPOL_RESERVED:
 813                pr_warn("IOAPIC: Invalid polarity: 2, defaulting to low\n");
 814        case MP_IRQPOL_ACTIVE_LOW:
 815        default: /* Pointless default required due to do gcc stupidity */
 816                return IOAPIC_POL_LOW;
 817        }
 818}
 819
 820#ifdef CONFIG_EISA
 821static int eisa_irq_trigger(int idx, int bus, int trigger)
 822{
 823        switch (mp_bus_id_to_type[bus]) {
 824        case MP_BUS_PCI:
 825        case MP_BUS_ISA:
 826                return trigger;
 827        case MP_BUS_EISA:
 828                return default_EISA_trigger(idx);
 829        }
 830        pr_warn("IOAPIC: Invalid srcbus: %d defaulting to level\n", bus);
 831        return IOAPIC_LEVEL;
 832}
 833#else
 834static inline int eisa_irq_trigger(int idx, int bus, int trigger)
 835{
 836        return trigger;
 837}
 838#endif
 839
 840static int irq_trigger(int idx)
 841{
 842        int bus = mp_irqs[idx].srcbus;
 843        int trigger;
 844
 845        /*
 846         * Determine IRQ trigger mode (edge or level sensitive):
 847         */
 848        switch (mp_irqs[idx].irqflag & MP_IRQTRIG_MASK) {
 849        case MP_IRQTRIG_DEFAULT:
 850                /* conforms to spec, ie. bus-type dependent trigger mode */
 851                if (test_bit(bus, mp_bus_not_pci))
 852                        trigger = default_ISA_trigger(idx);
 853                else
 854                        trigger = default_PCI_trigger(idx);
 855                /* Take EISA into account */
 856                return eisa_irq_trigger(idx, bus, trigger);
 857        case MP_IRQTRIG_EDGE:
 858                return IOAPIC_EDGE;
 859        case MP_IRQTRIG_RESERVED:
 860                pr_warn("IOAPIC: Invalid trigger mode 2 defaulting to level\n");
 861        case MP_IRQTRIG_LEVEL:
 862        default: /* Pointless default required due to do gcc stupidity */
 863                return IOAPIC_LEVEL;
 864        }
 865}
 866
 867void ioapic_set_alloc_attr(struct irq_alloc_info *info, int node,
 868                           int trigger, int polarity)
 869{
 870        init_irq_alloc_info(info, NULL);
 871        info->type = X86_IRQ_ALLOC_TYPE_IOAPIC;
 872        info->ioapic_node = node;
 873        info->ioapic_trigger = trigger;
 874        info->ioapic_polarity = polarity;
 875        info->ioapic_valid = 1;
 876}
 877
 878#ifndef CONFIG_ACPI
 879int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity);
 880#endif
 881
 882static void ioapic_copy_alloc_attr(struct irq_alloc_info *dst,
 883                                   struct irq_alloc_info *src,
 884                                   u32 gsi, int ioapic_idx, int pin)
 885{
 886        int trigger, polarity;
 887
 888        copy_irq_alloc_info(dst, src);
 889        dst->type = X86_IRQ_ALLOC_TYPE_IOAPIC;
 890        dst->ioapic_id = mpc_ioapic_id(ioapic_idx);
 891        dst->ioapic_pin = pin;
 892        dst->ioapic_valid = 1;
 893        if (src && src->ioapic_valid) {
 894                dst->ioapic_node = src->ioapic_node;
 895                dst->ioapic_trigger = src->ioapic_trigger;
 896                dst->ioapic_polarity = src->ioapic_polarity;
 897        } else {
 898                dst->ioapic_node = NUMA_NO_NODE;
 899                if (acpi_get_override_irq(gsi, &trigger, &polarity) >= 0) {
 900                        dst->ioapic_trigger = trigger;
 901                        dst->ioapic_polarity = polarity;
 902                } else {
 903                        /*
 904                         * PCI interrupts are always active low level
 905                         * triggered.
 906                         */
 907                        dst->ioapic_trigger = IOAPIC_LEVEL;
 908                        dst->ioapic_polarity = IOAPIC_POL_LOW;
 909                }
 910        }
 911}
 912
 913static int ioapic_alloc_attr_node(struct irq_alloc_info *info)
 914{
 915        return (info && info->ioapic_valid) ? info->ioapic_node : NUMA_NO_NODE;
 916}
 917
 918static void mp_register_handler(unsigned int irq, unsigned long trigger)
 919{
 920        irq_flow_handler_t hdl;
 921        bool fasteoi;
 922
 923        if (trigger) {
 924                irq_set_status_flags(irq, IRQ_LEVEL);
 925                fasteoi = true;
 926        } else {
 927                irq_clear_status_flags(irq, IRQ_LEVEL);
 928                fasteoi = false;
 929        }
 930
 931        hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq;
 932        __irq_set_handler(irq, hdl, 0, fasteoi ? "fasteoi" : "edge");
 933}
 934
 935static bool mp_check_pin_attr(int irq, struct irq_alloc_info *info)
 936{
 937        struct mp_chip_data *data = irq_get_chip_data(irq);
 938
 939        /*
 940         * setup_IO_APIC_irqs() programs all legacy IRQs with default trigger
 941         * and polarity attirbutes. So allow the first user to reprogram the
 942         * pin with real trigger and polarity attributes.
 943         */
 944        if (irq < nr_legacy_irqs() && data->count == 1) {
 945                if (info->ioapic_trigger != data->trigger)
 946                        mp_register_handler(irq, info->ioapic_trigger);
 947                data->entry.trigger = data->trigger = info->ioapic_trigger;
 948                data->entry.polarity = data->polarity = info->ioapic_polarity;
 949        }
 950
 951        return data->trigger == info->ioapic_trigger &&
 952               data->polarity == info->ioapic_polarity;
 953}
 954
 955static int alloc_irq_from_domain(struct irq_domain *domain, int ioapic, u32 gsi,
 956                                 struct irq_alloc_info *info)
 957{
 958        bool legacy = false;
 959        int irq = -1;
 960        int type = ioapics[ioapic].irqdomain_cfg.type;
 961
 962        switch (type) {
 963        case IOAPIC_DOMAIN_LEGACY:
 964                /*
 965                 * Dynamically allocate IRQ number for non-ISA IRQs in the first
 966                 * 16 GSIs on some weird platforms.
 967                 */
 968                if (!ioapic_initialized || gsi >= nr_legacy_irqs())
 969                        irq = gsi;
 970                legacy = mp_is_legacy_irq(irq);
 971                break;
 972        case IOAPIC_DOMAIN_STRICT:
 973                irq = gsi;
 974                break;
 975        case IOAPIC_DOMAIN_DYNAMIC:
 976                break;
 977        default:
 978                WARN(1, "ioapic: unknown irqdomain type %d\n", type);
 979                return -1;
 980        }
 981
 982        return __irq_domain_alloc_irqs(domain, irq, 1,
 983                                       ioapic_alloc_attr_node(info),
 984                                       info, legacy, NULL);
 985}
 986
 987/*
 988 * Need special handling for ISA IRQs because there may be multiple IOAPIC pins
 989 * sharing the same ISA IRQ number and irqdomain only supports 1:1 mapping
 990 * between IOAPIC pin and IRQ number. A typical IOAPIC has 24 pins, pin 0-15 are
 991 * used for legacy IRQs and pin 16-23 are used for PCI IRQs (PIRQ A-H).
 992 * When ACPI is disabled, only legacy IRQ numbers (IRQ0-15) are available, and
 993 * some BIOSes may use MP Interrupt Source records to override IRQ numbers for
 994 * PIRQs instead of reprogramming the interrupt routing logic. Thus there may be
 995 * multiple pins sharing the same legacy IRQ number when ACPI is disabled.
 996 */
 997static int alloc_isa_irq_from_domain(struct irq_domain *domain,
 998                                     int irq, int ioapic, int pin,
 999                                     struct irq_alloc_info *info)
1000{
1001        struct mp_chip_data *data;
1002        struct irq_data *irq_data = irq_get_irq_data(irq);
1003        int node = ioapic_alloc_attr_node(info);
1004
1005        /*
1006         * Legacy ISA IRQ has already been allocated, just add pin to
1007         * the pin list assoicated with this IRQ and program the IOAPIC
1008         * entry. The IOAPIC entry
1009         */
1010        if (irq_data && irq_data->parent_data) {
1011                if (!mp_check_pin_attr(irq, info))
1012                        return -EBUSY;
1013                if (__add_pin_to_irq_node(irq_data->chip_data, node, ioapic,
1014                                          info->ioapic_pin))
1015                        return -ENOMEM;
1016        } else {
1017                info->flags |= X86_IRQ_ALLOC_LEGACY;
1018                irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true,
1019                                              NULL);
1020                if (irq >= 0) {
1021                        irq_data = irq_domain_get_irq_data(domain, irq);
1022                        data = irq_data->chip_data;
1023                        data->isa_irq = true;
1024                }
1025        }
1026
1027        return irq;
1028}
1029
1030static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin,
1031                             unsigned int flags, struct irq_alloc_info *info)
1032{
1033        int irq;
1034        bool legacy = false;
1035        struct irq_alloc_info tmp;
1036        struct mp_chip_data *data;
1037        struct irq_domain *domain = mp_ioapic_irqdomain(ioapic);
1038
1039        if (!domain)
1040                return -ENOSYS;
1041
1042        if (idx >= 0 && test_bit(mp_irqs[idx].srcbus, mp_bus_not_pci)) {
1043                irq = mp_irqs[idx].srcbusirq;
1044                legacy = mp_is_legacy_irq(irq);
1045        }
1046
1047        mutex_lock(&ioapic_mutex);
1048        if (!(flags & IOAPIC_MAP_ALLOC)) {
1049                if (!legacy) {
1050                        irq = irq_find_mapping(domain, pin);
1051                        if (irq == 0)
1052                                irq = -ENOENT;
1053                }
1054        } else {
1055                ioapic_copy_alloc_attr(&tmp, info, gsi, ioapic, pin);
1056                if (legacy)
1057                        irq = alloc_isa_irq_from_domain(domain, irq,
1058                                                        ioapic, pin, &tmp);
1059                else if ((irq = irq_find_mapping(domain, pin)) == 0)
1060                        irq = alloc_irq_from_domain(domain, ioapic, gsi, &tmp);
1061                else if (!mp_check_pin_attr(irq, &tmp))
1062                        irq = -EBUSY;
1063                if (irq >= 0) {
1064                        data = irq_get_chip_data(irq);
1065                        data->count++;
1066                }
1067        }
1068        mutex_unlock(&ioapic_mutex);
1069
1070        return irq;
1071}
1072
1073static int pin_2_irq(int idx, int ioapic, int pin, unsigned int flags)
1074{
1075        u32 gsi = mp_pin_to_gsi(ioapic, pin);
1076
1077        /*
1078         * Debugging check, we are in big trouble if this message pops up!
1079         */
1080        if (mp_irqs[idx].dstirq != pin)
1081                pr_err("broken BIOS or MPTABLE parser, ayiee!!\n");
1082
1083#ifdef CONFIG_X86_32
1084        /*
1085         * PCI IRQ command line redirection. Yes, limits are hardcoded.
1086         */
1087        if ((pin >= 16) && (pin <= 23)) {
1088                if (pirq_entries[pin-16] != -1) {
1089                        if (!pirq_entries[pin-16]) {
1090                                apic_printk(APIC_VERBOSE, KERN_DEBUG
1091                                                "disabling PIRQ%d\n", pin-16);
1092                        } else {
1093                                int irq = pirq_entries[pin-16];
1094                                apic_printk(APIC_VERBOSE, KERN_DEBUG
1095                                                "using PIRQ%d -> IRQ %d\n",
1096                                                pin-16, irq);
1097                                return irq;
1098                        }
1099                }
1100        }
1101#endif
1102
1103        return  mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags, NULL);
1104}
1105
1106int mp_map_gsi_to_irq(u32 gsi, unsigned int flags, struct irq_alloc_info *info)
1107{
1108        int ioapic, pin, idx;
1109
1110        ioapic = mp_find_ioapic(gsi);
1111        if (ioapic < 0)
1112                return -ENODEV;
1113
1114        pin = mp_find_ioapic_pin(ioapic, gsi);
1115        idx = find_irq_entry(ioapic, pin, mp_INT);
1116        if ((flags & IOAPIC_MAP_CHECK) && idx < 0)
1117                return -ENODEV;
1118
1119        return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags, info);
1120}
1121
1122void mp_unmap_irq(int irq)
1123{
1124        struct irq_data *irq_data = irq_get_irq_data(irq);
1125        struct mp_chip_data *data;
1126
1127        if (!irq_data || !irq_data->domain)
1128                return;
1129
1130        data = irq_data->chip_data;
1131        if (!data || data->isa_irq)
1132                return;
1133
1134        mutex_lock(&ioapic_mutex);
1135        if (--data->count == 0)
1136                irq_domain_free_irqs(irq, 1);
1137        mutex_unlock(&ioapic_mutex);
1138}
1139
1140/*
1141 * Find a specific PCI IRQ entry.
1142 * Not an __init, possibly needed by modules
1143 */
1144int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
1145{
1146        int irq, i, best_ioapic = -1, best_idx = -1;
1147
1148        apic_printk(APIC_DEBUG,
1149                    "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
1150                    bus, slot, pin);
1151        if (test_bit(bus, mp_bus_not_pci)) {
1152                apic_printk(APIC_VERBOSE,
1153                            "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
1154                return -1;
1155        }
1156
1157        for (i = 0; i < mp_irq_entries; i++) {
1158                int lbus = mp_irqs[i].srcbus;
1159                int ioapic_idx, found = 0;
1160
1161                if (bus != lbus || mp_irqs[i].irqtype != mp_INT ||
1162                    slot != ((mp_irqs[i].srcbusirq >> 2) & 0x1f))
1163                        continue;
1164
1165                for_each_ioapic(ioapic_idx)
1166                        if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic ||
1167                            mp_irqs[i].dstapic == MP_APIC_ALL) {
1168                                found = 1;
1169                                break;
1170                        }
1171                if (!found)
1172                        continue;
1173
1174                /* Skip ISA IRQs */
1175                irq = pin_2_irq(i, ioapic_idx, mp_irqs[i].dstirq, 0);
1176                if (irq > 0 && !IO_APIC_IRQ(irq))
1177                        continue;
1178
1179                if (pin == (mp_irqs[i].srcbusirq & 3)) {
1180                        best_idx = i;
1181                        best_ioapic = ioapic_idx;
1182                        goto out;
1183                }
1184
1185                /*
1186                 * Use the first all-but-pin matching entry as a
1187                 * best-guess fuzzy result for broken mptables.
1188                 */
1189                if (best_idx < 0) {
1190                        best_idx = i;
1191                        best_ioapic = ioapic_idx;
1192                }
1193        }
1194        if (best_idx < 0)
1195                return -1;
1196
1197out:
1198        return pin_2_irq(best_idx, best_ioapic, mp_irqs[best_idx].dstirq,
1199                         IOAPIC_MAP_ALLOC);
1200}
1201EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
1202
1203static struct irq_chip ioapic_chip, ioapic_ir_chip;
1204
1205static void __init setup_IO_APIC_irqs(void)
1206{
1207        unsigned int ioapic, pin;
1208        int idx;
1209
1210        apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
1211
1212        for_each_ioapic_pin(ioapic, pin) {
1213                idx = find_irq_entry(ioapic, pin, mp_INT);
1214                if (idx < 0)
1215                        apic_printk(APIC_VERBOSE,
1216                                    KERN_DEBUG " apic %d pin %d not connected\n",
1217                                    mpc_ioapic_id(ioapic), pin);
1218                else
1219                        pin_2_irq(idx, ioapic, pin,
1220                                  ioapic ? 0 : IOAPIC_MAP_ALLOC);
1221        }
1222}
1223
1224void ioapic_zap_locks(void)
1225{
1226        raw_spin_lock_init(&ioapic_lock);
1227}
1228
1229static void io_apic_print_entries(unsigned int apic, unsigned int nr_entries)
1230{
1231        int i;
1232        char buf[256];
1233        struct IO_APIC_route_entry entry;
1234        struct IR_IO_APIC_route_entry *ir_entry = (void *)&entry;
1235
1236        printk(KERN_DEBUG "IOAPIC %d:\n", apic);
1237        for (i = 0; i <= nr_entries; i++) {
1238                entry = ioapic_read_entry(apic, i);
1239                snprintf(buf, sizeof(buf),
1240                         " pin%02x, %s, %s, %s, V(%02X), IRR(%1d), S(%1d)",
1241                         i,
1242                         entry.mask == IOAPIC_MASKED ? "disabled" : "enabled ",
1243                         entry.trigger == IOAPIC_LEVEL ? "level" : "edge ",
1244                         entry.polarity == IOAPIC_POL_LOW ? "low " : "high",
1245                         entry.vector, entry.irr, entry.delivery_status);
1246                if (ir_entry->format)
1247                        printk(KERN_DEBUG "%s, remapped, I(%04X),  Z(%X)\n",
1248                               buf, (ir_entry->index2 << 15) | ir_entry->index,
1249                               ir_entry->zero);
1250                else
1251                        printk(KERN_DEBUG "%s, %s, D(%02X), M(%1d)\n",
1252                               buf,
1253                               entry.dest_mode == IOAPIC_DEST_MODE_LOGICAL ?
1254                               "logical " : "physical",
1255                               entry.dest, entry.delivery_mode);
1256        }
1257}
1258
1259static void __init print_IO_APIC(int ioapic_idx)
1260{
1261        union IO_APIC_reg_00 reg_00;
1262        union IO_APIC_reg_01 reg_01;
1263        union IO_APIC_reg_02 reg_02;
1264        union IO_APIC_reg_03 reg_03;
1265        unsigned long flags;
1266
1267        raw_spin_lock_irqsave(&ioapic_lock, flags);
1268        reg_00.raw = io_apic_read(ioapic_idx, 0);
1269        reg_01.raw = io_apic_read(ioapic_idx, 1);
1270        if (reg_01.bits.version >= 0x10)
1271                reg_02.raw = io_apic_read(ioapic_idx, 2);
1272        if (reg_01.bits.version >= 0x20)
1273                reg_03.raw = io_apic_read(ioapic_idx, 3);
1274        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
1275
1276        printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx));
1277        printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
1278        printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
1279        printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
1280        printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
1281
1282        printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
1283        printk(KERN_DEBUG ".......     : max redirection entries: %02X\n",
1284                reg_01.bits.entries);
1285
1286        printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
1287        printk(KERN_DEBUG ".......     : IO APIC version: %02X\n",
1288                reg_01.bits.version);
1289
1290        /*
1291         * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
1292         * but the value of reg_02 is read as the previous read register
1293         * value, so ignore it if reg_02 == reg_01.
1294         */
1295        if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
1296                printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
1297                printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
1298        }
1299
1300        /*
1301         * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
1302         * or reg_03, but the value of reg_0[23] is read as the previous read
1303         * register value, so ignore it if reg_03 == reg_0[12].
1304         */
1305        if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
1306            reg_03.raw != reg_01.raw) {
1307                printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
1308                printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
1309        }
1310
1311        printk(KERN_DEBUG ".... IRQ redirection table:\n");
1312        io_apic_print_entries(ioapic_idx, reg_01.bits.entries);
1313}
1314
1315void __init print_IO_APICs(void)
1316{
1317        int ioapic_idx;
1318        unsigned int irq;
1319
1320        printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
1321        for_each_ioapic(ioapic_idx)
1322                printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
1323                       mpc_ioapic_id(ioapic_idx),
1324                       ioapics[ioapic_idx].nr_registers);
1325
1326        /*
1327         * We are a bit conservative about what we expect.  We have to
1328         * know about every hardware change ASAP.
1329         */
1330        printk(KERN_INFO "testing the IO APIC.......................\n");
1331
1332        for_each_ioapic(ioapic_idx)
1333                print_IO_APIC(ioapic_idx);
1334
1335        printk(KERN_DEBUG "IRQ to pin mappings:\n");
1336        for_each_active_irq(irq) {
1337                struct irq_pin_list *entry;
1338                struct irq_chip *chip;
1339                struct mp_chip_data *data;
1340
1341                chip = irq_get_chip(irq);
1342                if (chip != &ioapic_chip && chip != &ioapic_ir_chip)
1343                        continue;
1344                data = irq_get_chip_data(irq);
1345                if (!data)
1346                        continue;
1347                if (list_empty(&data->irq_2_pin))
1348                        continue;
1349
1350                printk(KERN_DEBUG "IRQ%d ", irq);
1351                for_each_irq_pin(entry, data->irq_2_pin)
1352                        pr_cont("-> %d:%d", entry->apic, entry->pin);
1353                pr_cont("\n");
1354        }
1355
1356        printk(KERN_INFO ".................................... done.\n");
1357}
1358
1359/* Where if anywhere is the i8259 connect in external int mode */
1360static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
1361
1362void __init enable_IO_APIC(void)
1363{
1364        int i8259_apic, i8259_pin;
1365        int apic, pin;
1366
1367        if (skip_ioapic_setup)
1368                nr_ioapics = 0;
1369
1370        if (!nr_legacy_irqs() || !nr_ioapics)
1371                return;
1372
1373        for_each_ioapic_pin(apic, pin) {
1374                /* See if any of the pins is in ExtINT mode */
1375                struct IO_APIC_route_entry entry = ioapic_read_entry(apic, pin);
1376
1377                /* If the interrupt line is enabled and in ExtInt mode
1378                 * I have found the pin where the i8259 is connected.
1379                 */
1380                if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
1381                        ioapic_i8259.apic = apic;
1382                        ioapic_i8259.pin  = pin;
1383                        goto found_i8259;
1384                }
1385        }
1386 found_i8259:
1387        /* Look to see what if the MP table has reported the ExtINT */
1388        /* If we could not find the appropriate pin by looking at the ioapic
1389         * the i8259 probably is not connected the ioapic but give the
1390         * mptable a chance anyway.
1391         */
1392        i8259_pin  = find_isa_irq_pin(0, mp_ExtINT);
1393        i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
1394        /* Trust the MP table if nothing is setup in the hardware */
1395        if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
1396                printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
1397                ioapic_i8259.pin  = i8259_pin;
1398                ioapic_i8259.apic = i8259_apic;
1399        }
1400        /* Complain if the MP table and the hardware disagree */
1401        if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
1402                (i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
1403        {
1404                printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
1405        }
1406
1407        /*
1408         * Do not trust the IO-APIC being empty at bootup
1409         */
1410        clear_IO_APIC();
1411}
1412
1413void native_disable_io_apic(void)
1414{
1415        /*
1416         * If the i8259 is routed through an IOAPIC
1417         * Put that IOAPIC in virtual wire mode
1418         * so legacy interrupts can be delivered.
1419         */
1420        if (ioapic_i8259.pin != -1) {
1421                struct IO_APIC_route_entry entry;
1422
1423                memset(&entry, 0, sizeof(entry));
1424                entry.mask              = IOAPIC_UNMASKED;
1425                entry.trigger           = IOAPIC_EDGE;
1426                entry.polarity          = IOAPIC_POL_HIGH;
1427                entry.dest_mode         = IOAPIC_DEST_MODE_PHYSICAL;
1428                entry.delivery_mode     = dest_ExtINT;
1429                entry.dest              = read_apic_id();
1430
1431                /*
1432                 * Add it to the IO-APIC irq-routing table:
1433                 */
1434                ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
1435        }
1436
1437        if (boot_cpu_has(X86_FEATURE_APIC) || apic_from_smp_config())
1438                disconnect_bsp_APIC(ioapic_i8259.pin != -1);
1439}
1440
1441/*
1442 * Not an __init, needed by the reboot code
1443 */
1444void disable_IO_APIC(void)
1445{
1446        /*
1447         * Clear the IO-APIC before rebooting:
1448         */
1449        clear_IO_APIC();
1450
1451        if (!nr_legacy_irqs())
1452                return;
1453
1454        x86_io_apic_ops.disable();
1455}
1456
1457#ifdef CONFIG_X86_32
1458/*
1459 * function to set the IO-APIC physical IDs based on the
1460 * values stored in the MPC table.
1461 *
1462 * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
1463 */
1464void __init setup_ioapic_ids_from_mpc_nocheck(void)
1465{
1466        union IO_APIC_reg_00 reg_00;
1467        physid_mask_t phys_id_present_map;
1468        int ioapic_idx;
1469        int i;
1470        unsigned char old_id;
1471        unsigned long flags;
1472
1473        /*
1474         * This is broken; anything with a real cpu count has to
1475         * circumvent this idiocy regardless.
1476         */
1477        apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map);
1478
1479        /*
1480         * Set the IOAPIC ID to the value stored in the MPC table.
1481         */
1482        for_each_ioapic(ioapic_idx) {
1483                /* Read the register 0 value */
1484                raw_spin_lock_irqsave(&ioapic_lock, flags);
1485                reg_00.raw = io_apic_read(ioapic_idx, 0);
1486                raw_spin_unlock_irqrestore(&ioapic_lock, flags);
1487
1488                old_id = mpc_ioapic_id(ioapic_idx);
1489
1490                if (mpc_ioapic_id(ioapic_idx) >= get_physical_broadcast()) {
1491                        printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
1492                                ioapic_idx, mpc_ioapic_id(ioapic_idx));
1493                        printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
1494                                reg_00.bits.ID);
1495                        ioapics[ioapic_idx].mp_config.apicid = reg_00.bits.ID;
1496                }
1497
1498                /*
1499                 * Sanity check, is the ID really free? Every APIC in a
1500                 * system must have a unique ID or we get lots of nice
1501                 * 'stuck on smp_invalidate_needed IPI wait' messages.
1502                 */
1503                if (apic->check_apicid_used(&phys_id_present_map,
1504                                            mpc_ioapic_id(ioapic_idx))) {
1505                        printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
1506                                ioapic_idx, mpc_ioapic_id(ioapic_idx));
1507                        for (i = 0; i < get_physical_broadcast(); i++)
1508                                if (!physid_isset(i, phys_id_present_map))
1509                                        break;
1510                        if (i >= get_physical_broadcast())
1511                                panic("Max APIC ID exceeded!\n");
1512                        printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
1513                                i);
1514                        physid_set(i, phys_id_present_map);
1515                        ioapics[ioapic_idx].mp_config.apicid = i;
1516                } else {
1517                        physid_mask_t tmp;
1518                        apic->apicid_to_cpu_present(mpc_ioapic_id(ioapic_idx),
1519                                                    &tmp);
1520                        apic_printk(APIC_VERBOSE, "Setting %d in the "
1521                                        "phys_id_present_map\n",
1522                                        mpc_ioapic_id(ioapic_idx));
1523                        physids_or(phys_id_present_map, phys_id_present_map, tmp);
1524                }
1525
1526                /*
1527                 * We need to adjust the IRQ routing table
1528                 * if the ID changed.
1529                 */
1530                if (old_id != mpc_ioapic_id(ioapic_idx))
1531                        for (i = 0; i < mp_irq_entries; i++)
1532                                if (mp_irqs[i].dstapic == old_id)
1533                                        mp_irqs[i].dstapic
1534                                                = mpc_ioapic_id(ioapic_idx);
1535
1536                /*
1537                 * Update the ID register according to the right value
1538                 * from the MPC table if they are different.
1539                 */
1540                if (mpc_ioapic_id(ioapic_idx) == reg_00.bits.ID)
1541                        continue;
1542
1543                apic_printk(APIC_VERBOSE, KERN_INFO
1544                        "...changing IO-APIC physical APIC ID to %d ...",
1545                        mpc_ioapic_id(ioapic_idx));
1546
1547                reg_00.bits.ID = mpc_ioapic_id(ioapic_idx);
1548                raw_spin_lock_irqsave(&ioapic_lock, flags);
1549                io_apic_write(ioapic_idx, 0, reg_00.raw);
1550                raw_spin_unlock_irqrestore(&ioapic_lock, flags);
1551
1552                /*
1553                 * Sanity check
1554                 */
1555                raw_spin_lock_irqsave(&ioapic_lock, flags);
1556                reg_00.raw = io_apic_read(ioapic_idx, 0);
1557                raw_spin_unlock_irqrestore(&ioapic_lock, flags);
1558                if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx))
1559                        pr_cont("could not set ID!\n");
1560                else
1561                        apic_printk(APIC_VERBOSE, " ok.\n");
1562        }
1563}
1564
1565void __init setup_ioapic_ids_from_mpc(void)
1566{
1567
1568        if (acpi_ioapic)
1569                return;
1570        /*
1571         * Don't check I/O APIC IDs for xAPIC systems.  They have
1572         * no meaning without the serial APIC bus.
1573         */
1574        if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
1575                || APIC_XAPIC(boot_cpu_apic_version))
1576                return;
1577        setup_ioapic_ids_from_mpc_nocheck();
1578}
1579#endif
1580
1581int no_timer_check __initdata;
1582
1583static int __init notimercheck(char *s)
1584{
1585        no_timer_check = 1;
1586        return 1;
1587}
1588__setup("no_timer_check", notimercheck);
1589
1590static void __init delay_with_tsc(void)
1591{
1592        unsigned long long start, now;
1593        unsigned long end = jiffies + 4;
1594
1595        start = rdtsc();
1596
1597        /*
1598         * We don't know the TSC frequency yet, but waiting for
1599         * 40000000000/HZ TSC cycles is safe:
1600         * 4 GHz == 10 jiffies
1601         * 1 GHz == 40 jiffies
1602         */
1603        do {
1604                rep_nop();
1605                now = rdtsc();
1606        } while ((now - start) < 40000000000ULL / HZ &&
1607                time_before_eq(jiffies, end));
1608}
1609
1610static void __init delay_without_tsc(void)
1611{
1612        unsigned long end = jiffies + 4;
1613        int band = 1;
1614
1615        /*
1616         * We don't know any frequency yet, but waiting for
1617         * 40940000000/HZ cycles is safe:
1618         * 4 GHz == 10 jiffies
1619         * 1 GHz == 40 jiffies
1620         * 1 << 1 + 1 << 2 +...+ 1 << 11 = 4094
1621         */
1622        do {
1623                __delay(((1U << band++) * 10000000UL) / HZ);
1624        } while (band < 12 && time_before_eq(jiffies, end));
1625}
1626
1627/*
1628 * There is a nasty bug in some older SMP boards, their mptable lies
1629 * about the timer IRQ. We do the following to work around the situation:
1630 *
1631 *      - timer IRQ defaults to IO-APIC IRQ
1632 *      - if this function detects that timer IRQs are defunct, then we fall
1633 *        back to ISA timer IRQs
1634 */
1635static int __init timer_irq_works(void)
1636{
1637        unsigned long t1 = jiffies;
1638        unsigned long flags;
1639
1640        if (no_timer_check)
1641                return 1;
1642
1643        local_save_flags(flags);
1644        local_irq_enable();
1645
1646        if (boot_cpu_has(X86_FEATURE_TSC))
1647                delay_with_tsc();
1648        else
1649                delay_without_tsc();
1650
1651        local_irq_restore(flags);
1652
1653        /*
1654         * Expect a few ticks at least, to be sure some possible
1655         * glue logic does not lock up after one or two first
1656         * ticks in a non-ExtINT mode.  Also the local APIC
1657         * might have cached one ExtINT interrupt.  Finally, at
1658         * least one tick may be lost due to delays.
1659         */
1660
1661        /* jiffies wrap? */
1662        if (time_after(jiffies, t1 + 4))
1663                return 1;
1664        return 0;
1665}
1666
1667/*
1668 * In the SMP+IOAPIC case it might happen that there are an unspecified
1669 * number of pending IRQ events unhandled. These cases are very rare,
1670 * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
1671 * better to do it this way as thus we do not have to be aware of
1672 * 'pending' interrupts in the IRQ path, except at this point.
1673 */
1674/*
1675 * Edge triggered needs to resend any interrupt
1676 * that was delayed but this is now handled in the device
1677 * independent code.
1678 */
1679
1680/*
1681 * Starting up a edge-triggered IO-APIC interrupt is
1682 * nasty - we need to make sure that we get the edge.
1683 * If it is already asserted for some reason, we need
1684 * return 1 to indicate that is was pending.
1685 *
1686 * This is not complete - we should be able to fake
1687 * an edge even if it isn't on the 8259A...
1688 */
1689static unsigned int startup_ioapic_irq(struct irq_data *data)
1690{
1691        int was_pending = 0, irq = data->irq;
1692        unsigned long flags;
1693
1694        raw_spin_lock_irqsave(&ioapic_lock, flags);
1695        if (irq < nr_legacy_irqs()) {
1696                legacy_pic->mask(irq);
1697                if (legacy_pic->irq_pending(irq))
1698                        was_pending = 1;
1699        }
1700        __unmask_ioapic(data->chip_data);
1701        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
1702
1703        return was_pending;
1704}
1705
1706atomic_t irq_mis_count;
1707
1708#ifdef CONFIG_GENERIC_PENDING_IRQ
1709static bool io_apic_level_ack_pending(struct mp_chip_data *data)
1710{
1711        struct irq_pin_list *entry;
1712        unsigned long flags;
1713
1714        raw_spin_lock_irqsave(&ioapic_lock, flags);
1715        for_each_irq_pin(entry, data->irq_2_pin) {
1716                unsigned int reg;
1717                int pin;
1718
1719                pin = entry->pin;
1720                reg = io_apic_read(entry->apic, 0x10 + pin*2);
1721                /* Is the remote IRR bit set? */
1722                if (reg & IO_APIC_REDIR_REMOTE_IRR) {
1723                        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
1724                        return true;
1725                }
1726        }
1727        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
1728
1729        return false;
1730}
1731
1732static inline bool ioapic_irqd_mask(struct irq_data *data)
1733{
1734        /* If we are moving the irq we need to mask it */
1735        if (unlikely(irqd_is_setaffinity_pending(data))) {
1736                mask_ioapic_irq(data);
1737                return true;
1738        }
1739        return false;
1740}
1741
1742static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked)
1743{
1744        if (unlikely(masked)) {
1745                /* Only migrate the irq if the ack has been received.
1746                 *
1747                 * On rare occasions the broadcast level triggered ack gets
1748                 * delayed going to ioapics, and if we reprogram the
1749                 * vector while Remote IRR is still set the irq will never
1750                 * fire again.
1751                 *
1752                 * To prevent this scenario we read the Remote IRR bit
1753                 * of the ioapic.  This has two effects.
1754                 * - On any sane system the read of the ioapic will
1755                 *   flush writes (and acks) going to the ioapic from
1756                 *   this cpu.
1757                 * - We get to see if the ACK has actually been delivered.
1758                 *
1759                 * Based on failed experiments of reprogramming the
1760                 * ioapic entry from outside of irq context starting
1761                 * with masking the ioapic entry and then polling until
1762                 * Remote IRR was clear before reprogramming the
1763                 * ioapic I don't trust the Remote IRR bit to be
1764                 * completey accurate.
1765                 *
1766                 * However there appears to be no other way to plug
1767                 * this race, so if the Remote IRR bit is not
1768                 * accurate and is causing problems then it is a hardware bug
1769                 * and you can go talk to the chipset vendor about it.
1770                 */
1771                if (!io_apic_level_ack_pending(data->chip_data))
1772                        irq_move_masked_irq(data);
1773                unmask_ioapic_irq(data);
1774        }
1775}
1776#else
1777static inline bool ioapic_irqd_mask(struct irq_data *data)
1778{
1779        return false;
1780}
1781static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked)
1782{
1783}
1784#endif
1785
1786static void ioapic_ack_level(struct irq_data *irq_data)
1787{
1788        struct irq_cfg *cfg = irqd_cfg(irq_data);
1789        unsigned long v;
1790        bool masked;
1791        int i;
1792
1793        irq_complete_move(cfg);
1794        masked = ioapic_irqd_mask(irq_data);
1795
1796        /*
1797         * It appears there is an erratum which affects at least version 0x11
1798         * of I/O APIC (that's the 82093AA and cores integrated into various
1799         * chipsets).  Under certain conditions a level-triggered interrupt is
1800         * erroneously delivered as edge-triggered one but the respective IRR
1801         * bit gets set nevertheless.  As a result the I/O unit expects an EOI
1802         * message but it will never arrive and further interrupts are blocked
1803         * from the source.  The exact reason is so far unknown, but the
1804         * phenomenon was observed when two consecutive interrupt requests
1805         * from a given source get delivered to the same CPU and the source is
1806         * temporarily disabled in between.
1807         *
1808         * A workaround is to simulate an EOI message manually.  We achieve it
1809         * by setting the trigger mode to edge and then to level when the edge
1810         * trigger mode gets detected in the TMR of a local APIC for a
1811         * level-triggered interrupt.  We mask the source for the time of the
1812         * operation to prevent an edge-triggered interrupt escaping meanwhile.
1813         * The idea is from Manfred Spraul.  --macro
1814         *
1815         * Also in the case when cpu goes offline, fixup_irqs() will forward
1816         * any unhandled interrupt on the offlined cpu to the new cpu
1817         * destination that is handling the corresponding interrupt. This
1818         * interrupt forwarding is done via IPI's. Hence, in this case also
1819         * level-triggered io-apic interrupt will be seen as an edge
1820         * interrupt in the IRR. And we can't rely on the cpu's EOI
1821         * to be broadcasted to the IO-APIC's which will clear the remoteIRR
1822         * corresponding to the level-triggered interrupt. Hence on IO-APIC's
1823         * supporting EOI register, we do an explicit EOI to clear the
1824         * remote IRR and on IO-APIC's which don't have an EOI register,
1825         * we use the above logic (mask+edge followed by unmask+level) from
1826         * Manfred Spraul to clear the remote IRR.
1827         */
1828        i = cfg->vector;
1829        v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
1830
1831        /*
1832         * We must acknowledge the irq before we move it or the acknowledge will
1833         * not propagate properly.
1834         */
1835        ack_APIC_irq();
1836
1837        /*
1838         * Tail end of clearing remote IRR bit (either by delivering the EOI
1839         * message via io-apic EOI register write or simulating it using
1840         * mask+edge followed by unnask+level logic) manually when the
1841         * level triggered interrupt is seen as the edge triggered interrupt
1842         * at the cpu.
1843         */
1844        if (!(v & (1 << (i & 0x1f)))) {
1845                atomic_inc(&irq_mis_count);
1846                eoi_ioapic_pin(cfg->vector, irq_data->chip_data);
1847        }
1848
1849        ioapic_irqd_unmask(irq_data, masked);
1850}
1851
1852static void ioapic_ir_ack_level(struct irq_data *irq_data)
1853{
1854        struct mp_chip_data *data = irq_data->chip_data;
1855
1856        /*
1857         * Intr-remapping uses pin number as the virtual vector
1858         * in the RTE. Actual vector is programmed in
1859         * intr-remapping table entry. Hence for the io-apic
1860         * EOI we use the pin number.
1861         */
1862        ack_APIC_irq();
1863        eoi_ioapic_pin(data->entry.vector, data);
1864}
1865
1866static void ioapic_configure_entry(struct irq_data *irqd)
1867{
1868        struct mp_chip_data *mpd = irqd->chip_data;
1869        struct irq_cfg *cfg = irqd_cfg(irqd);
1870        struct irq_pin_list *entry;
1871
1872        /*
1873         * Only update when the parent is the vector domain, don't touch it
1874         * if the parent is the remapping domain. Check the installed
1875         * ioapic chip to verify that.
1876         */
1877        if (irqd->chip == &ioapic_chip) {
1878                mpd->entry.dest = cfg->dest_apicid;
1879                mpd->entry.vector = cfg->vector;
1880        }
1881        for_each_irq_pin(entry, mpd->irq_2_pin)
1882                __ioapic_write_entry(entry->apic, entry->pin, mpd->entry);
1883}
1884
1885static int ioapic_set_affinity(struct irq_data *irq_data,
1886                               const struct cpumask *mask, bool force)
1887{
1888        struct irq_data *parent = irq_data->parent_data;
1889        unsigned long flags;
1890        int ret;
1891
1892        ret = parent->chip->irq_set_affinity(parent, mask, force);
1893        raw_spin_lock_irqsave(&ioapic_lock, flags);
1894        if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE)
1895                ioapic_configure_entry(irq_data);
1896        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
1897
1898        return ret;
1899}
1900
1901static struct irq_chip ioapic_chip __read_mostly = {
1902        .name                   = "IO-APIC",
1903        .irq_startup            = startup_ioapic_irq,
1904        .irq_mask               = mask_ioapic_irq,
1905        .irq_unmask             = unmask_ioapic_irq,
1906        .irq_ack                = irq_chip_ack_parent,
1907        .irq_eoi                = ioapic_ack_level,
1908        .irq_set_affinity       = ioapic_set_affinity,
1909        .irq_retrigger          = irq_chip_retrigger_hierarchy,
1910        .flags                  = IRQCHIP_SKIP_SET_WAKE,
1911};
1912
1913static struct irq_chip ioapic_ir_chip __read_mostly = {
1914        .name                   = "IR-IO-APIC",
1915        .irq_startup            = startup_ioapic_irq,
1916        .irq_mask               = mask_ioapic_irq,
1917        .irq_unmask             = unmask_ioapic_irq,
1918        .irq_ack                = irq_chip_ack_parent,
1919        .irq_eoi                = ioapic_ir_ack_level,
1920        .irq_set_affinity       = ioapic_set_affinity,
1921        .irq_retrigger          = irq_chip_retrigger_hierarchy,
1922        .flags                  = IRQCHIP_SKIP_SET_WAKE,
1923};
1924
1925static inline void init_IO_APIC_traps(void)
1926{
1927        struct irq_cfg *cfg;
1928        unsigned int irq;
1929
1930        for_each_active_irq(irq) {
1931                cfg = irq_cfg(irq);
1932                if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
1933                        /*
1934                         * Hmm.. We don't have an entry for this,
1935                         * so default to an old-fashioned 8259
1936                         * interrupt if we can..
1937                         */
1938                        if (irq < nr_legacy_irqs())
1939                                legacy_pic->make_irq(irq);
1940                        else
1941                                /* Strange. Oh, well.. */
1942                                irq_set_chip(irq, &no_irq_chip);
1943                }
1944        }
1945}
1946
1947/*
1948 * The local APIC irq-chip implementation:
1949 */
1950
1951static void mask_lapic_irq(struct irq_data *data)
1952{
1953        unsigned long v;
1954
1955        v = apic_read(APIC_LVT0);
1956        apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
1957}
1958
1959static void unmask_lapic_irq(struct irq_data *data)
1960{
1961        unsigned long v;
1962
1963        v = apic_read(APIC_LVT0);
1964        apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
1965}
1966
1967static void ack_lapic_irq(struct irq_data *data)
1968{
1969        ack_APIC_irq();
1970}
1971
1972static struct irq_chip lapic_chip __read_mostly = {
1973        .name           = "local-APIC",
1974        .irq_mask       = mask_lapic_irq,
1975        .irq_unmask     = unmask_lapic_irq,
1976        .irq_ack        = ack_lapic_irq,
1977};
1978
1979static void lapic_register_intr(int irq)
1980{
1981        irq_clear_status_flags(irq, IRQ_LEVEL);
1982        irq_set_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
1983                                      "edge");
1984}
1985
1986/*
1987 * This looks a bit hackish but it's about the only one way of sending
1988 * a few INTA cycles to 8259As and any associated glue logic.  ICR does
1989 * not support the ExtINT mode, unfortunately.  We need to send these
1990 * cycles as some i82489DX-based boards have glue logic that keeps the
1991 * 8259A interrupt line asserted until INTA.  --macro
1992 */
1993static inline void __init unlock_ExtINT_logic(void)
1994{
1995        int apic, pin, i;
1996        struct IO_APIC_route_entry entry0, entry1;
1997        unsigned char save_control, save_freq_select;
1998
1999        pin  = find_isa_irq_pin(8, mp_INT);
2000        if (pin == -1) {
2001                WARN_ON_ONCE(1);
2002                return;
2003        }
2004        apic = find_isa_irq_apic(8, mp_INT);
2005        if (apic == -1) {
2006                WARN_ON_ONCE(1);
2007                return;
2008        }
2009
2010        entry0 = ioapic_read_entry(apic, pin);
2011        clear_IO_APIC_pin(apic, pin);
2012
2013        memset(&entry1, 0, sizeof(entry1));
2014
2015        entry1.dest_mode = IOAPIC_DEST_MODE_PHYSICAL;
2016        entry1.mask = IOAPIC_UNMASKED;
2017        entry1.dest = hard_smp_processor_id();
2018        entry1.delivery_mode = dest_ExtINT;
2019        entry1.polarity = entry0.polarity;
2020        entry1.trigger = IOAPIC_EDGE;
2021        entry1.vector = 0;
2022
2023        ioapic_write_entry(apic, pin, entry1);
2024
2025        save_control = CMOS_READ(RTC_CONTROL);
2026        save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
2027        CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
2028                   RTC_FREQ_SELECT);
2029        CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
2030
2031        i = 100;
2032        while (i-- > 0) {
2033                mdelay(10);
2034                if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
2035                        i -= 10;
2036        }
2037
2038        CMOS_WRITE(save_control, RTC_CONTROL);
2039        CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
2040        clear_IO_APIC_pin(apic, pin);
2041
2042        ioapic_write_entry(apic, pin, entry0);
2043}
2044
2045static int disable_timer_pin_1 __initdata;
2046/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
2047static int __init disable_timer_pin_setup(char *arg)
2048{
2049        disable_timer_pin_1 = 1;
2050        return 0;
2051}
2052early_param("disable_timer_pin_1", disable_timer_pin_setup);
2053
2054static int mp_alloc_timer_irq(int ioapic, int pin)
2055{
2056        int irq = -1;
2057        struct irq_domain *domain = mp_ioapic_irqdomain(ioapic);
2058
2059        if (domain) {
2060                struct irq_alloc_info info;
2061
2062                ioapic_set_alloc_attr(&info, NUMA_NO_NODE, 0, 0);
2063                info.ioapic_id = mpc_ioapic_id(ioapic);
2064                info.ioapic_pin = pin;
2065                mutex_lock(&ioapic_mutex);
2066                irq = alloc_isa_irq_from_domain(domain, 0, ioapic, pin, &info);
2067                mutex_unlock(&ioapic_mutex);
2068        }
2069
2070        return irq;
2071}
2072
2073/*
2074 * This code may look a bit paranoid, but it's supposed to cooperate with
2075 * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
2076 * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
2077 * fanatically on his truly buggy board.
2078 *
2079 * FIXME: really need to revamp this for all platforms.
2080 */
2081static inline void __init check_timer(void)
2082{
2083        struct irq_data *irq_data = irq_get_irq_data(0);
2084        struct mp_chip_data *data = irq_data->chip_data;
2085        struct irq_cfg *cfg = irqd_cfg(irq_data);
2086        int node = cpu_to_node(0);
2087        int apic1, pin1, apic2, pin2;
2088        unsigned long flags;
2089        int no_pin1 = 0;
2090
2091        local_irq_save(flags);
2092
2093        /*
2094         * get/set the timer IRQ vector:
2095         */
2096        legacy_pic->mask(0);
2097
2098        /*
2099         * As IRQ0 is to be enabled in the 8259A, the virtual
2100         * wire has to be disabled in the local APIC.  Also
2101         * timer interrupts need to be acknowledged manually in
2102         * the 8259A for the i82489DX when using the NMI
2103         * watchdog as that APIC treats NMIs as level-triggered.
2104         * The AEOI mode will finish them in the 8259A
2105         * automatically.
2106         */
2107        apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
2108        legacy_pic->init(1);
2109
2110        pin1  = find_isa_irq_pin(0, mp_INT);
2111        apic1 = find_isa_irq_apic(0, mp_INT);
2112        pin2  = ioapic_i8259.pin;
2113        apic2 = ioapic_i8259.apic;
2114
2115        apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
2116                    "apic1=%d pin1=%d apic2=%d pin2=%d\n",
2117                    cfg->vector, apic1, pin1, apic2, pin2);
2118
2119        /*
2120         * Some BIOS writers are clueless and report the ExtINTA
2121         * I/O APIC input from the cascaded 8259A as the timer
2122         * interrupt input.  So just in case, if only one pin
2123         * was found above, try it both directly and through the
2124         * 8259A.
2125         */
2126        if (pin1 == -1) {
2127                panic_if_irq_remap("BIOS bug: timer not connected to IO-APIC");
2128                pin1 = pin2;
2129                apic1 = apic2;
2130                no_pin1 = 1;
2131        } else if (pin2 == -1) {
2132                pin2 = pin1;
2133                apic2 = apic1;
2134        }
2135
2136        if (pin1 != -1) {
2137                /* Ok, does IRQ0 through the IOAPIC work? */
2138                if (no_pin1) {
2139                        mp_alloc_timer_irq(apic1, pin1);
2140                } else {
2141                        /*
2142                         * for edge trigger, it's already unmasked,
2143                         * so only need to unmask if it is level-trigger
2144                         * do we really have level trigger timer?
2145                         */
2146                        int idx;
2147                        idx = find_irq_entry(apic1, pin1, mp_INT);
2148                        if (idx != -1 && irq_trigger(idx))
2149                                unmask_ioapic_irq(irq_get_irq_data(0));
2150                }
2151                irq_domain_deactivate_irq(irq_data);
2152                irq_domain_activate_irq(irq_data, false);
2153                if (timer_irq_works()) {
2154                        if (disable_timer_pin_1 > 0)
2155                                clear_IO_APIC_pin(0, pin1);
2156                        goto out;
2157                }
2158                panic_if_irq_remap("timer doesn't work through Interrupt-remapped IO-APIC");
2159                local_irq_disable();
2160                clear_IO_APIC_pin(apic1, pin1);
2161                if (!no_pin1)
2162                        apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
2163                                    "8254 timer not connected to IO-APIC\n");
2164
2165                apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
2166                            "(IRQ0) through the 8259A ...\n");
2167                apic_printk(APIC_QUIET, KERN_INFO
2168                            "..... (found apic %d pin %d) ...\n", apic2, pin2);
2169                /*
2170                 * legacy devices should be connected to IO APIC #0
2171                 */
2172                replace_pin_at_irq_node(data, node, apic1, pin1, apic2, pin2);
2173                irq_domain_deactivate_irq(irq_data);
2174                irq_domain_activate_irq(irq_data, false);
2175                legacy_pic->unmask(0);
2176                if (timer_irq_works()) {
2177                        apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
2178                        goto out;
2179                }
2180                /*
2181                 * Cleanup, just in case ...
2182                 */
2183                local_irq_disable();
2184                legacy_pic->mask(0);
2185                clear_IO_APIC_pin(apic2, pin2);
2186                apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
2187        }
2188
2189        apic_printk(APIC_QUIET, KERN_INFO
2190                    "...trying to set up timer as Virtual Wire IRQ...\n");
2191
2192        lapic_register_intr(0);
2193        apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);     /* Fixed mode */
2194        legacy_pic->unmask(0);
2195
2196        if (timer_irq_works()) {
2197                apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
2198                goto out;
2199        }
2200        local_irq_disable();
2201        legacy_pic->mask(0);
2202        apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
2203        apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
2204
2205        apic_printk(APIC_QUIET, KERN_INFO
2206                    "...trying to set up timer as ExtINT IRQ...\n");
2207
2208        legacy_pic->init(0);
2209        legacy_pic->make_irq(0);
2210        apic_write(APIC_LVT0, APIC_DM_EXTINT);
2211
2212        unlock_ExtINT_logic();
2213
2214        if (timer_irq_works()) {
2215                apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
2216                goto out;
2217        }
2218        local_irq_disable();
2219        apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
2220        if (apic_is_x2apic_enabled())
2221                apic_printk(APIC_QUIET, KERN_INFO
2222                            "Perhaps problem with the pre-enabled x2apic mode\n"
2223                            "Try booting with x2apic and interrupt-remapping disabled in the bios.\n");
2224        panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
2225                "report.  Then try booting with the 'noapic' option.\n");
2226out:
2227        local_irq_restore(flags);
2228}
2229
2230/*
2231 * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
2232 * to devices.  However there may be an I/O APIC pin available for
2233 * this interrupt regardless.  The pin may be left unconnected, but
2234 * typically it will be reused as an ExtINT cascade interrupt for
2235 * the master 8259A.  In the MPS case such a pin will normally be
2236 * reported as an ExtINT interrupt in the MP table.  With ACPI
2237 * there is no provision for ExtINT interrupts, and in the absence
2238 * of an override it would be treated as an ordinary ISA I/O APIC
2239 * interrupt, that is edge-triggered and unmasked by default.  We
2240 * used to do this, but it caused problems on some systems because
2241 * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
2242 * the same ExtINT cascade interrupt to drive the local APIC of the
2243 * bootstrap processor.  Therefore we refrain from routing IRQ2 to
2244 * the I/O APIC in all cases now.  No actual device should request
2245 * it anyway.  --macro
2246 */
2247#define PIC_IRQS        (1UL << PIC_CASCADE_IR)
2248
2249static int mp_irqdomain_create(int ioapic)
2250{
2251        struct irq_alloc_info info;
2252        struct irq_domain *parent;
2253        int hwirqs = mp_ioapic_pin_count(ioapic);
2254        struct ioapic *ip = &ioapics[ioapic];
2255        struct ioapic_domain_cfg *cfg = &ip->irqdomain_cfg;
2256        struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic);
2257        struct fwnode_handle *fn;
2258        char *name = "IO-APIC";
2259
2260        if (cfg->type == IOAPIC_DOMAIN_INVALID)
2261                return 0;
2262
2263        init_irq_alloc_info(&info, NULL);
2264        info.type = X86_IRQ_ALLOC_TYPE_IOAPIC;
2265        info.ioapic_id = mpc_ioapic_id(ioapic);
2266        parent = irq_remapping_get_ir_irq_domain(&info);
2267        if (!parent)
2268                parent = x86_vector_domain;
2269        else
2270                name = "IO-APIC-IR";
2271
2272        /* Handle device tree enumerated APICs proper */
2273        if (cfg->dev) {
2274                fn = of_node_to_fwnode(cfg->dev);
2275        } else {
2276                fn = irq_domain_alloc_named_id_fwnode(name, ioapic);
2277                if (!fn)
2278                        return -ENOMEM;
2279        }
2280
2281        ip->irqdomain = irq_domain_create_linear(fn, hwirqs, cfg->ops,
2282                                                 (void *)(long)ioapic);
2283
2284        /* Release fw handle if it was allocated above */
2285        if (!cfg->dev)
2286                irq_domain_free_fwnode(fn);
2287
2288        if (!ip->irqdomain)
2289                return -ENOMEM;
2290
2291        ip->irqdomain->parent = parent;
2292
2293        if (cfg->type == IOAPIC_DOMAIN_LEGACY ||
2294            cfg->type == IOAPIC_DOMAIN_STRICT)
2295                ioapic_dynirq_base = max(ioapic_dynirq_base,
2296                                         gsi_cfg->gsi_end + 1);
2297
2298        return 0;
2299}
2300
2301static void ioapic_destroy_irqdomain(int idx)
2302{
2303        if (ioapics[idx].irqdomain) {
2304                irq_domain_remove(ioapics[idx].irqdomain);
2305                ioapics[idx].irqdomain = NULL;
2306        }
2307}
2308
2309void __init setup_IO_APIC(void)
2310{
2311        int ioapic;
2312
2313        if (skip_ioapic_setup || !nr_ioapics)
2314                return;
2315
2316        io_apic_irqs = nr_legacy_irqs() ? ~PIC_IRQS : ~0UL;
2317
2318        apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
2319        for_each_ioapic(ioapic)
2320                BUG_ON(mp_irqdomain_create(ioapic));
2321
2322        /*
2323         * Set up IO-APIC IRQ routing.
2324         */
2325        x86_init.mpparse.setup_ioapic_ids();
2326
2327        sync_Arb_IDs();
2328        setup_IO_APIC_irqs();
2329        init_IO_APIC_traps();
2330        if (nr_legacy_irqs())
2331                check_timer();
2332
2333        ioapic_initialized = 1;
2334}
2335
2336static void resume_ioapic_id(int ioapic_idx)
2337{
2338        unsigned long flags;
2339        union IO_APIC_reg_00 reg_00;
2340
2341        raw_spin_lock_irqsave(&ioapic_lock, flags);
2342        reg_00.raw = io_apic_read(ioapic_idx, 0);
2343        if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx)) {
2344                reg_00.bits.ID = mpc_ioapic_id(ioapic_idx);
2345                io_apic_write(ioapic_idx, 0, reg_00.raw);
2346        }
2347        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2348}
2349
2350static void ioapic_resume(void)
2351{
2352        int ioapic_idx;
2353
2354        for_each_ioapic_reverse(ioapic_idx)
2355                resume_ioapic_id(ioapic_idx);
2356
2357        restore_ioapic_entries();
2358}
2359
2360static struct syscore_ops ioapic_syscore_ops = {
2361        .suspend = save_ioapic_entries,
2362        .resume = ioapic_resume,
2363};
2364
2365static int __init ioapic_init_ops(void)
2366{
2367        register_syscore_ops(&ioapic_syscore_ops);
2368
2369        return 0;
2370}
2371
2372device_initcall(ioapic_init_ops);
2373
2374static int io_apic_get_redir_entries(int ioapic)
2375{
2376        union IO_APIC_reg_01    reg_01;
2377        unsigned long flags;
2378
2379        raw_spin_lock_irqsave(&ioapic_lock, flags);
2380        reg_01.raw = io_apic_read(ioapic, 1);
2381        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2382
2383        /* The register returns the maximum index redir index
2384         * supported, which is one less than the total number of redir
2385         * entries.
2386         */
2387        return reg_01.bits.entries + 1;
2388}
2389
2390unsigned int arch_dynirq_lower_bound(unsigned int from)
2391{
2392        /*
2393         * dmar_alloc_hwirq() may be called before setup_IO_APIC(), so use
2394         * gsi_top if ioapic_dynirq_base hasn't been initialized yet.
2395         */
2396        return ioapic_initialized ? ioapic_dynirq_base : gsi_top;
2397}
2398
2399#ifdef CONFIG_X86_32
2400static int io_apic_get_unique_id(int ioapic, int apic_id)
2401{
2402        union IO_APIC_reg_00 reg_00;
2403        static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
2404        physid_mask_t tmp;
2405        unsigned long flags;
2406        int i = 0;
2407
2408        /*
2409         * The P4 platform supports up to 256 APIC IDs on two separate APIC
2410         * buses (one for LAPICs, one for IOAPICs), where predecessors only
2411         * supports up to 16 on one shared APIC bus.
2412         *
2413         * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
2414         *      advantage of new APIC bus architecture.
2415         */
2416
2417        if (physids_empty(apic_id_map))
2418                apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map);
2419
2420        raw_spin_lock_irqsave(&ioapic_lock, flags);
2421        reg_00.raw = io_apic_read(ioapic, 0);
2422        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2423
2424        if (apic_id >= get_physical_broadcast()) {
2425                printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
2426                        "%d\n", ioapic, apic_id, reg_00.bits.ID);
2427                apic_id = reg_00.bits.ID;
2428        }
2429
2430        /*
2431         * Every APIC in a system must have a unique ID or we get lots of nice
2432         * 'stuck on smp_invalidate_needed IPI wait' messages.
2433         */
2434        if (apic->check_apicid_used(&apic_id_map, apic_id)) {
2435
2436                for (i = 0; i < get_physical_broadcast(); i++) {
2437                        if (!apic->check_apicid_used(&apic_id_map, i))
2438                                break;
2439                }
2440
2441                if (i == get_physical_broadcast())
2442                        panic("Max apic_id exceeded!\n");
2443
2444                printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
2445                        "trying %d\n", ioapic, apic_id, i);
2446
2447                apic_id = i;
2448        }
2449
2450        apic->apicid_to_cpu_present(apic_id, &tmp);
2451        physids_or(apic_id_map, apic_id_map, tmp);
2452
2453        if (reg_00.bits.ID != apic_id) {
2454                reg_00.bits.ID = apic_id;
2455
2456                raw_spin_lock_irqsave(&ioapic_lock, flags);
2457                io_apic_write(ioapic, 0, reg_00.raw);
2458                reg_00.raw = io_apic_read(ioapic, 0);
2459                raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2460
2461                /* Sanity check */
2462                if (reg_00.bits.ID != apic_id) {
2463                        pr_err("IOAPIC[%d]: Unable to change apic_id!\n",
2464                               ioapic);
2465                        return -1;
2466                }
2467        }
2468
2469        apic_printk(APIC_VERBOSE, KERN_INFO
2470                        "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
2471
2472        return apic_id;
2473}
2474
2475static u8 io_apic_unique_id(int idx, u8 id)
2476{
2477        if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
2478            !APIC_XAPIC(boot_cpu_apic_version))
2479                return io_apic_get_unique_id(idx, id);
2480        else
2481                return id;
2482}
2483#else
2484static u8 io_apic_unique_id(int idx, u8 id)
2485{
2486        union IO_APIC_reg_00 reg_00;
2487        DECLARE_BITMAP(used, 256);
2488        unsigned long flags;
2489        u8 new_id;
2490        int i;
2491
2492        bitmap_zero(used, 256);
2493        for_each_ioapic(i)
2494                __set_bit(mpc_ioapic_id(i), used);
2495
2496        /* Hand out the requested id if available */
2497        if (!test_bit(id, used))
2498                return id;
2499
2500        /*
2501         * Read the current id from the ioapic and keep it if
2502         * available.
2503         */
2504        raw_spin_lock_irqsave(&ioapic_lock, flags);
2505        reg_00.raw = io_apic_read(idx, 0);
2506        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2507        new_id = reg_00.bits.ID;
2508        if (!test_bit(new_id, used)) {
2509                apic_printk(APIC_VERBOSE, KERN_INFO
2510                        "IOAPIC[%d]: Using reg apic_id %d instead of %d\n",
2511                         idx, new_id, id);
2512                return new_id;
2513        }
2514
2515        /*
2516         * Get the next free id and write it to the ioapic.
2517         */
2518        new_id = find_first_zero_bit(used, 256);
2519        reg_00.bits.ID = new_id;
2520        raw_spin_lock_irqsave(&ioapic_lock, flags);
2521        io_apic_write(idx, 0, reg_00.raw);
2522        reg_00.raw = io_apic_read(idx, 0);
2523        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2524        /* Sanity check */
2525        BUG_ON(reg_00.bits.ID != new_id);
2526
2527        return new_id;
2528}
2529#endif
2530
2531static int io_apic_get_version(int ioapic)
2532{
2533        union IO_APIC_reg_01    reg_01;
2534        unsigned long flags;
2535
2536        raw_spin_lock_irqsave(&ioapic_lock, flags);
2537        reg_01.raw = io_apic_read(ioapic, 1);
2538        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2539
2540        return reg_01.bits.version;
2541}
2542
2543int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity)
2544{
2545        int ioapic, pin, idx;
2546
2547        if (skip_ioapic_setup)
2548                return -1;
2549
2550        ioapic = mp_find_ioapic(gsi);
2551        if (ioapic < 0)
2552                return -1;
2553
2554        pin = mp_find_ioapic_pin(ioapic, gsi);
2555        if (pin < 0)
2556                return -1;
2557
2558        idx = find_irq_entry(ioapic, pin, mp_INT);
2559        if (idx < 0)
2560                return -1;
2561
2562        *trigger = irq_trigger(idx);
2563        *polarity = irq_polarity(idx);
2564        return 0;
2565}
2566
2567/*
2568 * This function updates target affinity of IOAPIC interrupts to include
2569 * the CPUs which came online during SMP bringup.
2570 */
2571#define IOAPIC_RESOURCE_NAME_SIZE 11
2572
2573static struct resource *ioapic_resources;
2574
2575static struct resource * __init ioapic_setup_resources(void)
2576{
2577        unsigned long n;
2578        struct resource *res;
2579        char *mem;
2580        int i;
2581
2582        if (nr_ioapics == 0)
2583                return NULL;
2584
2585        n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
2586        n *= nr_ioapics;
2587
2588        mem = alloc_bootmem(n);
2589        res = (void *)mem;
2590
2591        mem += sizeof(struct resource) * nr_ioapics;
2592
2593        for_each_ioapic(i) {
2594                res[i].name = mem;
2595                res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
2596                snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i);
2597                mem += IOAPIC_RESOURCE_NAME_SIZE;
2598                ioapics[i].iomem_res = &res[i];
2599        }
2600
2601        ioapic_resources = res;
2602
2603        return res;
2604}
2605
2606void __init io_apic_init_mappings(void)
2607{
2608        unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
2609        struct resource *ioapic_res;
2610        int i;
2611
2612        ioapic_res = ioapic_setup_resources();
2613        for_each_ioapic(i) {
2614                if (smp_found_config) {
2615                        ioapic_phys = mpc_ioapic_addr(i);
2616#ifdef CONFIG_X86_32
2617                        if (!ioapic_phys) {
2618                                printk(KERN_ERR
2619                                       "WARNING: bogus zero IO-APIC "
2620                                       "address found in MPTABLE, "
2621                                       "disabling IO/APIC support!\n");
2622                                smp_found_config = 0;
2623                                skip_ioapic_setup = 1;
2624                                goto fake_ioapic_page;
2625                        }
2626#endif
2627                } else {
2628#ifdef CONFIG_X86_32
2629fake_ioapic_page:
2630#endif
2631                        ioapic_phys = (unsigned long)alloc_bootmem_pages(PAGE_SIZE);
2632                        ioapic_phys = __pa(ioapic_phys);
2633                }
2634                set_fixmap_nocache(idx, ioapic_phys);
2635                apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n",
2636                        __fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK),
2637                        ioapic_phys);
2638                idx++;
2639
2640                ioapic_res->start = ioapic_phys;
2641                ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1;
2642                ioapic_res++;
2643        }
2644}
2645
2646void __init ioapic_insert_resources(void)
2647{
2648        int i;
2649        struct resource *r = ioapic_resources;
2650
2651        if (!r) {
2652                if (nr_ioapics > 0)
2653                        printk(KERN_ERR
2654                                "IO APIC resources couldn't be allocated.\n");
2655                return;
2656        }
2657
2658        for_each_ioapic(i) {
2659                insert_resource(&iomem_resource, r);
2660                r++;
2661        }
2662}
2663
2664int mp_find_ioapic(u32 gsi)
2665{
2666        int i;
2667
2668        if (nr_ioapics == 0)
2669                return -1;
2670
2671        /* Find the IOAPIC that manages this GSI. */
2672        for_each_ioapic(i) {
2673                struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(i);
2674                if (gsi >= gsi_cfg->gsi_base && gsi <= gsi_cfg->gsi_end)
2675                        return i;
2676        }
2677
2678        printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
2679        return -1;
2680}
2681
2682int mp_find_ioapic_pin(int ioapic, u32 gsi)
2683{
2684        struct mp_ioapic_gsi *gsi_cfg;
2685
2686        if (WARN_ON(ioapic < 0))
2687                return -1;
2688
2689        gsi_cfg = mp_ioapic_gsi_routing(ioapic);
2690        if (WARN_ON(gsi > gsi_cfg->gsi_end))
2691                return -1;
2692
2693        return gsi - gsi_cfg->gsi_base;
2694}
2695
2696static int bad_ioapic_register(int idx)
2697{
2698        union IO_APIC_reg_00 reg_00;
2699        union IO_APIC_reg_01 reg_01;
2700        union IO_APIC_reg_02 reg_02;
2701
2702        reg_00.raw = io_apic_read(idx, 0);
2703        reg_01.raw = io_apic_read(idx, 1);
2704        reg_02.raw = io_apic_read(idx, 2);
2705
2706        if (reg_00.raw == -1 && reg_01.raw == -1 && reg_02.raw == -1) {
2707                pr_warn("I/O APIC 0x%x registers return all ones, skipping!\n",
2708                        mpc_ioapic_addr(idx));
2709                return 1;
2710        }
2711
2712        return 0;
2713}
2714
2715static int find_free_ioapic_entry(void)
2716{
2717        int idx;
2718
2719        for (idx = 0; idx < MAX_IO_APICS; idx++)
2720                if (ioapics[idx].nr_registers == 0)
2721                        return idx;
2722
2723        return MAX_IO_APICS;
2724}
2725
2726/**
2727 * mp_register_ioapic - Register an IOAPIC device
2728 * @id:         hardware IOAPIC ID
2729 * @address:    physical address of IOAPIC register area
2730 * @gsi_base:   base of GSI associated with the IOAPIC
2731 * @cfg:        configuration information for the IOAPIC
2732 */
2733int mp_register_ioapic(int id, u32 address, u32 gsi_base,
2734                       struct ioapic_domain_cfg *cfg)
2735{
2736        bool hotplug = !!ioapic_initialized;
2737        struct mp_ioapic_gsi *gsi_cfg;
2738        int idx, ioapic, entries;
2739        u32 gsi_end;
2740
2741        if (!address) {
2742                pr_warn("Bogus (zero) I/O APIC address found, skipping!\n");
2743                return -EINVAL;
2744        }
2745        for_each_ioapic(ioapic)
2746                if (ioapics[ioapic].mp_config.apicaddr == address) {
2747                        pr_warn("address 0x%x conflicts with IOAPIC%d\n",
2748                                address, ioapic);
2749                        return -EEXIST;
2750                }
2751
2752        idx = find_free_ioapic_entry();
2753        if (idx >= MAX_IO_APICS) {
2754                pr_warn("Max # of I/O APICs (%d) exceeded (found %d), skipping\n",
2755                        MAX_IO_APICS, idx);
2756                return -ENOSPC;
2757        }
2758
2759        ioapics[idx].mp_config.type = MP_IOAPIC;
2760        ioapics[idx].mp_config.flags = MPC_APIC_USABLE;
2761        ioapics[idx].mp_config.apicaddr = address;
2762
2763        set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
2764        if (bad_ioapic_register(idx)) {
2765                clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
2766                return -ENODEV;
2767        }
2768
2769        ioapics[idx].mp_config.apicid = io_apic_unique_id(idx, id);
2770        ioapics[idx].mp_config.apicver = io_apic_get_version(idx);
2771
2772        /*
2773         * Build basic GSI lookup table to facilitate gsi->io_apic lookups
2774         * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
2775         */
2776        entries = io_apic_get_redir_entries(idx);
2777        gsi_end = gsi_base + entries - 1;
2778        for_each_ioapic(ioapic) {
2779                gsi_cfg = mp_ioapic_gsi_routing(ioapic);
2780                if ((gsi_base >= gsi_cfg->gsi_base &&
2781                     gsi_base <= gsi_cfg->gsi_end) ||
2782                    (gsi_end >= gsi_cfg->gsi_base &&
2783                     gsi_end <= gsi_cfg->gsi_end)) {
2784                        pr_warn("GSI range [%u-%u] for new IOAPIC conflicts with GSI[%u-%u]\n",
2785                                gsi_base, gsi_end,
2786                                gsi_cfg->gsi_base, gsi_cfg->gsi_end);
2787                        clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
2788                        return -ENOSPC;
2789                }
2790        }
2791        gsi_cfg = mp_ioapic_gsi_routing(idx);
2792        gsi_cfg->gsi_base = gsi_base;
2793        gsi_cfg->gsi_end = gsi_end;
2794
2795        ioapics[idx].irqdomain = NULL;
2796        ioapics[idx].irqdomain_cfg = *cfg;
2797
2798        /*
2799         * If mp_register_ioapic() is called during early boot stage when
2800         * walking ACPI/SFI/DT tables, it's too early to create irqdomain,
2801         * we are still using bootmem allocator. So delay it to setup_IO_APIC().
2802         */
2803        if (hotplug) {
2804                if (mp_irqdomain_create(idx)) {
2805                        clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
2806                        return -ENOMEM;
2807                }
2808                alloc_ioapic_saved_registers(idx);
2809        }
2810
2811        if (gsi_cfg->gsi_end >= gsi_top)
2812                gsi_top = gsi_cfg->gsi_end + 1;
2813        if (nr_ioapics <= idx)
2814                nr_ioapics = idx + 1;
2815
2816        /* Set nr_registers to mark entry present */
2817        ioapics[idx].nr_registers = entries;
2818
2819        pr_info("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, GSI %d-%d\n",
2820                idx, mpc_ioapic_id(idx),
2821                mpc_ioapic_ver(idx), mpc_ioapic_addr(idx),
2822                gsi_cfg->gsi_base, gsi_cfg->gsi_end);
2823
2824        return 0;
2825}
2826
2827int mp_unregister_ioapic(u32 gsi_base)
2828{
2829        int ioapic, pin;
2830        int found = 0;
2831
2832        for_each_ioapic(ioapic)
2833                if (ioapics[ioapic].gsi_config.gsi_base == gsi_base) {
2834                        found = 1;
2835                        break;
2836                }
2837        if (!found) {
2838                pr_warn("can't find IOAPIC for GSI %d\n", gsi_base);
2839                return -ENODEV;
2840        }
2841
2842        for_each_pin(ioapic, pin) {
2843                u32 gsi = mp_pin_to_gsi(ioapic, pin);
2844                int irq = mp_map_gsi_to_irq(gsi, 0, NULL);
2845                struct mp_chip_data *data;
2846
2847                if (irq >= 0) {
2848                        data = irq_get_chip_data(irq);
2849                        if (data && data->count) {
2850                                pr_warn("pin%d on IOAPIC%d is still in use.\n",
2851                                        pin, ioapic);
2852                                return -EBUSY;
2853                        }
2854                }
2855        }
2856
2857        /* Mark entry not present */
2858        ioapics[ioapic].nr_registers  = 0;
2859        ioapic_destroy_irqdomain(ioapic);
2860        free_ioapic_saved_registers(ioapic);
2861        if (ioapics[ioapic].iomem_res)
2862                release_resource(ioapics[ioapic].iomem_res);
2863        clear_fixmap(FIX_IO_APIC_BASE_0 + ioapic);
2864        memset(&ioapics[ioapic], 0, sizeof(ioapics[ioapic]));
2865
2866        return 0;
2867}
2868
2869int mp_ioapic_registered(u32 gsi_base)
2870{
2871        int ioapic;
2872
2873        for_each_ioapic(ioapic)
2874                if (ioapics[ioapic].gsi_config.gsi_base == gsi_base)
2875                        return 1;
2876
2877        return 0;
2878}
2879
2880static void mp_irqdomain_get_attr(u32 gsi, struct mp_chip_data *data,
2881                                  struct irq_alloc_info *info)
2882{
2883        if (info && info->ioapic_valid) {
2884                data->trigger = info->ioapic_trigger;
2885                data->polarity = info->ioapic_polarity;
2886        } else if (acpi_get_override_irq(gsi, &data->trigger,
2887                                         &data->polarity) < 0) {
2888                /* PCI interrupts are always active low level triggered. */
2889                data->trigger = IOAPIC_LEVEL;
2890                data->polarity = IOAPIC_POL_LOW;
2891        }
2892}
2893
2894static void mp_setup_entry(struct irq_cfg *cfg, struct mp_chip_data *data,
2895                           struct IO_APIC_route_entry *entry)
2896{
2897        memset(entry, 0, sizeof(*entry));
2898        entry->delivery_mode = apic->irq_delivery_mode;
2899        entry->dest_mode     = apic->irq_dest_mode;
2900        entry->dest          = cfg->dest_apicid;
2901        entry->vector        = cfg->vector;
2902        entry->trigger       = data->trigger;
2903        entry->polarity      = data->polarity;
2904        /*
2905         * Mask level triggered irqs. Edge triggered irqs are masked
2906         * by the irq core code in case they fire.
2907         */
2908        if (data->trigger == IOAPIC_LEVEL)
2909                entry->mask = IOAPIC_MASKED;
2910        else
2911                entry->mask = IOAPIC_UNMASKED;
2912}
2913
2914int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
2915                       unsigned int nr_irqs, void *arg)
2916{
2917        int ret, ioapic, pin;
2918        struct irq_cfg *cfg;
2919        struct irq_data *irq_data;
2920        struct mp_chip_data *data;
2921        struct irq_alloc_info *info = arg;
2922        unsigned long flags;
2923
2924        if (!info || nr_irqs > 1)
2925                return -EINVAL;
2926        irq_data = irq_domain_get_irq_data(domain, virq);
2927        if (!irq_data)
2928                return -EINVAL;
2929
2930        ioapic = mp_irqdomain_ioapic_idx(domain);
2931        pin = info->ioapic_pin;
2932        if (irq_find_mapping(domain, (irq_hw_number_t)pin) > 0)
2933                return -EEXIST;
2934
2935        data = kzalloc(sizeof(*data), GFP_KERNEL);
2936        if (!data)
2937                return -ENOMEM;
2938
2939        info->ioapic_entry = &data->entry;
2940        ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, info);
2941        if (ret < 0) {
2942                kfree(data);
2943                return ret;
2944        }
2945
2946        INIT_LIST_HEAD(&data->irq_2_pin);
2947        irq_data->hwirq = info->ioapic_pin;
2948        irq_data->chip = (domain->parent == x86_vector_domain) ?
2949                          &ioapic_chip : &ioapic_ir_chip;
2950        irq_data->chip_data = data;
2951        mp_irqdomain_get_attr(mp_pin_to_gsi(ioapic, pin), data, info);
2952
2953        cfg = irqd_cfg(irq_data);
2954        add_pin_to_irq_node(data, ioapic_alloc_attr_node(info), ioapic, pin);
2955
2956        local_irq_save(flags);
2957        if (info->ioapic_entry)
2958                mp_setup_entry(cfg, data, info->ioapic_entry);
2959        mp_register_handler(virq, data->trigger);
2960        if (virq < nr_legacy_irqs())
2961                legacy_pic->mask(virq);
2962        local_irq_restore(flags);
2963
2964        apic_printk(APIC_VERBOSE, KERN_DEBUG
2965                    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i Dest:%d)\n",
2966                    ioapic, mpc_ioapic_id(ioapic), pin, cfg->vector,
2967                    virq, data->trigger, data->polarity, cfg->dest_apicid);
2968
2969        return 0;
2970}
2971
2972void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
2973                       unsigned int nr_irqs)
2974{
2975        struct irq_data *irq_data;
2976        struct mp_chip_data *data;
2977
2978        BUG_ON(nr_irqs != 1);
2979        irq_data = irq_domain_get_irq_data(domain, virq);
2980        if (irq_data && irq_data->chip_data) {
2981                data = irq_data->chip_data;
2982                __remove_pin_from_irq(data, mp_irqdomain_ioapic_idx(domain),
2983                                      (int)irq_data->hwirq);
2984                WARN_ON(!list_empty(&data->irq_2_pin));
2985                kfree(irq_data->chip_data);
2986        }
2987        irq_domain_free_irqs_top(domain, virq, nr_irqs);
2988}
2989
2990int mp_irqdomain_activate(struct irq_domain *domain,
2991                          struct irq_data *irq_data, bool reserve)
2992{
2993        unsigned long flags;
2994
2995        raw_spin_lock_irqsave(&ioapic_lock, flags);
2996        ioapic_configure_entry(irq_data);
2997        raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2998        return 0;
2999}
3000
3001void mp_irqdomain_deactivate(struct irq_domain *domain,
3002                             struct irq_data *irq_data)
3003{
3004        /* It won't be called for IRQ with multiple IOAPIC pins associated */
3005        ioapic_mask_entry(mp_irqdomain_ioapic_idx(domain),
3006                          (int)irq_data->hwirq);
3007}
3008
3009int mp_irqdomain_ioapic_idx(struct irq_domain *domain)
3010{
3011        return (int)(long)domain->host_data;
3012}
3013
3014const struct irq_domain_ops mp_ioapic_irqdomain_ops = {
3015        .alloc          = mp_irqdomain_alloc,
3016        .free           = mp_irqdomain_free,
3017        .activate       = mp_irqdomain_activate,
3018        .deactivate     = mp_irqdomain_deactivate,
3019};
3020