qemu/hw/intc/ioapic.c
<<
>>
Prefs
   1/*
   2 *  ioapic.c IOAPIC emulation logic
   3 *
   4 *  Copyright (c) 2004-2005 Fabrice Bellard
   5 *
   6 *  Split the ioapic logic from apic.c
   7 *  Xiantao Zhang <xiantao.zhang@intel.com>
   8 *
   9 * This library is free software; you can redistribute it and/or
  10 * modify it under the terms of the GNU Lesser General Public
  11 * License as published by the Free Software Foundation; either
  12 * version 2 of the License, or (at your option) any later version.
  13 *
  14 * This library is distributed in the hope that it will be useful,
  15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 * Lesser General Public License for more details.
  18 *
  19 * You should have received a copy of the GNU Lesser General Public
  20 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  21 */
  22
  23#include "qemu/osdep.h"
  24#include "qemu/error-report.h"
  25#include "monitor/monitor.h"
  26#include "hw/hw.h"
  27#include "hw/i386/pc.h"
  28#include "hw/i386/apic.h"
  29#include "hw/i386/ioapic.h"
  30#include "hw/i386/ioapic_internal.h"
  31#include "include/hw/pci/msi.h"
  32#include "sysemu/kvm.h"
  33#include "target/i386/cpu.h"
  34#include "hw/i386/apic-msidef.h"
  35#include "hw/i386/x86-iommu.h"
  36#include "trace.h"
  37
  38//#define DEBUG_IOAPIC
  39
  40#ifdef DEBUG_IOAPIC
  41#define DPRINTF(fmt, ...)                                       \
  42    do { printf("ioapic: " fmt , ## __VA_ARGS__); } while (0)
  43#else
  44#define DPRINTF(fmt, ...)
  45#endif
  46
  47#define APIC_DELIVERY_MODE_SHIFT 8
  48#define APIC_POLARITY_SHIFT 14
  49#define APIC_TRIG_MODE_SHIFT 15
  50
  51static IOAPICCommonState *ioapics[MAX_IOAPICS];
  52
  53/* global variable from ioapic_common.c */
  54extern int ioapic_no;
  55
  56struct ioapic_entry_info {
  57    /* fields parsed from IOAPIC entries */
  58    uint8_t masked;
  59    uint8_t trig_mode;
  60    uint16_t dest_idx;
  61    uint8_t dest_mode;
  62    uint8_t delivery_mode;
  63    uint8_t vector;
  64
  65    /* MSI message generated from above parsed fields */
  66    uint32_t addr;
  67    uint32_t data;
  68};
  69
  70static void ioapic_entry_parse(uint64_t entry, struct ioapic_entry_info *info)
  71{
  72    memset(info, 0, sizeof(*info));
  73    info->masked = (entry >> IOAPIC_LVT_MASKED_SHIFT) & 1;
  74    info->trig_mode = (entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1;
  75    /*
  76     * By default, this would be dest_id[8] + reserved[8]. When IR
  77     * is enabled, this would be interrupt_index[15] +
  78     * interrupt_format[1]. This field never means anything, but
  79     * only used to generate corresponding MSI.
  80     */
  81    info->dest_idx = (entry >> IOAPIC_LVT_DEST_IDX_SHIFT) & 0xffff;
  82    info->dest_mode = (entry >> IOAPIC_LVT_DEST_MODE_SHIFT) & 1;
  83    info->delivery_mode = (entry >> IOAPIC_LVT_DELIV_MODE_SHIFT) \
  84        & IOAPIC_DM_MASK;
  85    if (info->delivery_mode == IOAPIC_DM_EXTINT) {
  86        info->vector = pic_read_irq(isa_pic);
  87    } else {
  88        info->vector = entry & IOAPIC_VECTOR_MASK;
  89    }
  90
  91    info->addr = APIC_DEFAULT_ADDRESS | \
  92        (info->dest_idx << MSI_ADDR_DEST_IDX_SHIFT) | \
  93        (info->dest_mode << MSI_ADDR_DEST_MODE_SHIFT);
  94    info->data = (info->vector << MSI_DATA_VECTOR_SHIFT) | \
  95        (info->trig_mode << MSI_DATA_TRIGGER_SHIFT) | \
  96        (info->delivery_mode << MSI_DATA_DELIVERY_MODE_SHIFT);
  97}
  98
  99static void ioapic_service(IOAPICCommonState *s)
 100{
 101    AddressSpace *ioapic_as = PC_MACHINE(qdev_get_machine())->ioapic_as;
 102    struct ioapic_entry_info info;
 103    uint8_t i;
 104    uint32_t mask;
 105    uint64_t entry;
 106
 107    for (i = 0; i < IOAPIC_NUM_PINS; i++) {
 108        mask = 1 << i;
 109        if (s->irr & mask) {
 110            int coalesce = 0;
 111
 112            entry = s->ioredtbl[i];
 113            ioapic_entry_parse(entry, &info);
 114            if (!info.masked) {
 115                if (info.trig_mode == IOAPIC_TRIGGER_EDGE) {
 116                    s->irr &= ~mask;
 117                } else {
 118                    coalesce = s->ioredtbl[i] & IOAPIC_LVT_REMOTE_IRR;
 119                    trace_ioapic_set_remote_irr(i);
 120                    s->ioredtbl[i] |= IOAPIC_LVT_REMOTE_IRR;
 121                }
 122
 123                if (coalesce) {
 124                    /* We are level triggered interrupts, and the
 125                     * guest should be still working on previous one,
 126                     * so skip it. */
 127                    continue;
 128                }
 129
 130#ifdef CONFIG_KVM
 131                if (kvm_irqchip_is_split()) {
 132                    if (info.trig_mode == IOAPIC_TRIGGER_EDGE) {
 133                        kvm_set_irq(kvm_state, i, 1);
 134                        kvm_set_irq(kvm_state, i, 0);
 135                    } else {
 136                        kvm_set_irq(kvm_state, i, 1);
 137                    }
 138                    continue;
 139                }
 140#endif
 141
 142                /* No matter whether IR is enabled, we translate
 143                 * the IOAPIC message into a MSI one, and its
 144                 * address space will decide whether we need a
 145                 * translation. */
 146                stl_le_phys(ioapic_as, info.addr, info.data);
 147            }
 148        }
 149    }
 150}
 151
 152static void ioapic_set_irq(void *opaque, int vector, int level)
 153{
 154    IOAPICCommonState *s = opaque;
 155
 156    /* ISA IRQs map to GSI 1-1 except for IRQ0 which maps
 157     * to GSI 2.  GSI maps to ioapic 1-1.  This is not
 158     * the cleanest way of doing it but it should work. */
 159
 160    DPRINTF("%s: %s vec %x\n", __func__, level ? "raise" : "lower", vector);
 161    if (vector == 0) {
 162        vector = 2;
 163    }
 164    if (vector >= 0 && vector < IOAPIC_NUM_PINS) {
 165        uint32_t mask = 1 << vector;
 166        uint64_t entry = s->ioredtbl[vector];
 167
 168        if (((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1) ==
 169            IOAPIC_TRIGGER_LEVEL) {
 170            /* level triggered */
 171            if (level) {
 172                s->irr |= mask;
 173                if (!(entry & IOAPIC_LVT_REMOTE_IRR)) {
 174                    ioapic_service(s);
 175                }
 176            } else {
 177                s->irr &= ~mask;
 178            }
 179        } else {
 180            /* According to the 82093AA manual, we must ignore edge requests
 181             * if the input pin is masked. */
 182            if (level && !(entry & IOAPIC_LVT_MASKED)) {
 183                s->irr |= mask;
 184                ioapic_service(s);
 185            }
 186        }
 187    }
 188}
 189
 190static void ioapic_update_kvm_routes(IOAPICCommonState *s)
 191{
 192#ifdef CONFIG_KVM
 193    int i;
 194
 195    if (kvm_irqchip_is_split()) {
 196        for (i = 0; i < IOAPIC_NUM_PINS; i++) {
 197            MSIMessage msg;
 198            struct ioapic_entry_info info;
 199            ioapic_entry_parse(s->ioredtbl[i], &info);
 200            msg.address = info.addr;
 201            msg.data = info.data;
 202            kvm_irqchip_update_msi_route(kvm_state, i, msg, NULL);
 203        }
 204        kvm_irqchip_commit_routes(kvm_state);
 205    }
 206#endif
 207}
 208
 209#ifdef CONFIG_KVM
 210static void ioapic_iec_notifier(void *private, bool global,
 211                                uint32_t index, uint32_t mask)
 212{
 213    IOAPICCommonState *s = (IOAPICCommonState *)private;
 214    /* For simplicity, we just update all the routes */
 215    ioapic_update_kvm_routes(s);
 216}
 217#endif
 218
 219void ioapic_eoi_broadcast(int vector)
 220{
 221    IOAPICCommonState *s;
 222    uint64_t entry;
 223    int i, n;
 224
 225    trace_ioapic_eoi_broadcast(vector);
 226
 227    for (i = 0; i < MAX_IOAPICS; i++) {
 228        s = ioapics[i];
 229        if (!s) {
 230            continue;
 231        }
 232        for (n = 0; n < IOAPIC_NUM_PINS; n++) {
 233            entry = s->ioredtbl[n];
 234            if ((entry & IOAPIC_LVT_REMOTE_IRR)
 235                && (entry & IOAPIC_VECTOR_MASK) == vector) {
 236                trace_ioapic_clear_remote_irr(n, vector);
 237                s->ioredtbl[n] = entry & ~IOAPIC_LVT_REMOTE_IRR;
 238                if (!(entry & IOAPIC_LVT_MASKED) && (s->irr & (1 << n))) {
 239                    ioapic_service(s);
 240                }
 241            }
 242        }
 243    }
 244}
 245
 246void ioapic_dump_state(Monitor *mon, const QDict *qdict)
 247{
 248    int i;
 249
 250    for (i = 0; i < MAX_IOAPICS; i++) {
 251        if (ioapics[i] != 0) {
 252            ioapic_print_redtbl(mon, ioapics[i]);
 253        }
 254    }
 255}
 256
 257static uint64_t
 258ioapic_mem_read(void *opaque, hwaddr addr, unsigned int size)
 259{
 260    IOAPICCommonState *s = opaque;
 261    int index;
 262    uint32_t val = 0;
 263
 264    addr &= 0xff;
 265
 266    switch (addr) {
 267    case IOAPIC_IOREGSEL:
 268        val = s->ioregsel;
 269        break;
 270    case IOAPIC_IOWIN:
 271        if (size != 4) {
 272            break;
 273        }
 274        switch (s->ioregsel) {
 275        case IOAPIC_REG_ID:
 276        case IOAPIC_REG_ARB:
 277            val = s->id << IOAPIC_ID_SHIFT;
 278            break;
 279        case IOAPIC_REG_VER:
 280            val = s->version |
 281                ((IOAPIC_NUM_PINS - 1) << IOAPIC_VER_ENTRIES_SHIFT);
 282            break;
 283        default:
 284            index = (s->ioregsel - IOAPIC_REG_REDTBL_BASE) >> 1;
 285            if (index >= 0 && index < IOAPIC_NUM_PINS) {
 286                if (s->ioregsel & 1) {
 287                    val = s->ioredtbl[index] >> 32;
 288                } else {
 289                    val = s->ioredtbl[index] & 0xffffffff;
 290                }
 291            }
 292        }
 293        DPRINTF("read: %08x = %08x\n", s->ioregsel, val);
 294        break;
 295    }
 296
 297    trace_ioapic_mem_read(addr, size, val);
 298
 299    return val;
 300}
 301
 302/*
 303 * This is to satisfy the hack in Linux kernel. One hack of it is to
 304 * simulate clearing the Remote IRR bit of IOAPIC entry using the
 305 * following:
 306 *
 307 * "For IO-APIC's with EOI register, we use that to do an explicit EOI.
 308 * Otherwise, we simulate the EOI message manually by changing the trigger
 309 * mode to edge and then back to level, with RTE being masked during
 310 * this."
 311 *
 312 * (See linux kernel __eoi_ioapic_pin() comment in commit c0205701)
 313 *
 314 * This is based on the assumption that, Remote IRR bit will be
 315 * cleared by IOAPIC hardware when configured as edge-triggered
 316 * interrupts.
 317 *
 318 * Without this, level-triggered interrupts in IR mode might fail to
 319 * work correctly.
 320 */
 321static inline void
 322ioapic_fix_edge_remote_irr(uint64_t *entry)
 323{
 324    if (!(*entry & IOAPIC_LVT_TRIGGER_MODE)) {
 325        /* Edge-triggered interrupts, make sure remote IRR is zero */
 326        *entry &= ~((uint64_t)IOAPIC_LVT_REMOTE_IRR);
 327    }
 328}
 329
 330static void
 331ioapic_mem_write(void *opaque, hwaddr addr, uint64_t val,
 332                 unsigned int size)
 333{
 334    IOAPICCommonState *s = opaque;
 335    int index;
 336
 337    addr &= 0xff;
 338    trace_ioapic_mem_write(addr, size, val);
 339
 340    switch (addr) {
 341    case IOAPIC_IOREGSEL:
 342        s->ioregsel = val;
 343        break;
 344    case IOAPIC_IOWIN:
 345        if (size != 4) {
 346            break;
 347        }
 348        DPRINTF("write: %08x = %08" PRIx64 "\n", s->ioregsel, val);
 349        switch (s->ioregsel) {
 350        case IOAPIC_REG_ID:
 351            s->id = (val >> IOAPIC_ID_SHIFT) & IOAPIC_ID_MASK;
 352            break;
 353        case IOAPIC_REG_VER:
 354        case IOAPIC_REG_ARB:
 355            break;
 356        default:
 357            index = (s->ioregsel - IOAPIC_REG_REDTBL_BASE) >> 1;
 358            if (index >= 0 && index < IOAPIC_NUM_PINS) {
 359                uint64_t ro_bits = s->ioredtbl[index] & IOAPIC_RO_BITS;
 360                if (s->ioregsel & 1) {
 361                    s->ioredtbl[index] &= 0xffffffff;
 362                    s->ioredtbl[index] |= (uint64_t)val << 32;
 363                } else {
 364                    s->ioredtbl[index] &= ~0xffffffffULL;
 365                    s->ioredtbl[index] |= val;
 366                }
 367                /* restore RO bits */
 368                s->ioredtbl[index] &= IOAPIC_RW_BITS;
 369                s->ioredtbl[index] |= ro_bits;
 370                ioapic_fix_edge_remote_irr(&s->ioredtbl[index]);
 371                ioapic_service(s);
 372            }
 373        }
 374        break;
 375    case IOAPIC_EOI:
 376        /* Explicit EOI is only supported for IOAPIC version 0x20 */
 377        if (size != 4 || s->version != 0x20) {
 378            break;
 379        }
 380        ioapic_eoi_broadcast(val);
 381        break;
 382    }
 383
 384    ioapic_update_kvm_routes(s);
 385}
 386
 387static const MemoryRegionOps ioapic_io_ops = {
 388    .read = ioapic_mem_read,
 389    .write = ioapic_mem_write,
 390    .endianness = DEVICE_NATIVE_ENDIAN,
 391};
 392
 393static void ioapic_machine_done_notify(Notifier *notifier, void *data)
 394{
 395#ifdef CONFIG_KVM
 396    IOAPICCommonState *s = container_of(notifier, IOAPICCommonState,
 397                                        machine_done);
 398
 399    if (kvm_irqchip_is_split()) {
 400        X86IOMMUState *iommu = x86_iommu_get_default();
 401        if (iommu) {
 402            /* Register this IOAPIC with IOMMU IEC notifier, so that
 403             * when there are IR invalidates, we can be notified to
 404             * update kernel IR cache. */
 405            x86_iommu_iec_register_notifier(iommu, ioapic_iec_notifier, s);
 406        }
 407    }
 408#endif
 409}
 410
 411#define IOAPIC_VER_DEF 0x20
 412
 413static void ioapic_realize(DeviceState *dev, Error **errp)
 414{
 415    IOAPICCommonState *s = IOAPIC_COMMON(dev);
 416
 417    if (s->version != 0x11 && s->version != 0x20) {
 418        error_report("IOAPIC only supports version 0x11 or 0x20 "
 419                     "(default: 0x%x).", IOAPIC_VER_DEF);
 420        exit(1);
 421    }
 422
 423    memory_region_init_io(&s->io_memory, OBJECT(s), &ioapic_io_ops, s,
 424                          "ioapic", 0x1000);
 425
 426    qdev_init_gpio_in(dev, ioapic_set_irq, IOAPIC_NUM_PINS);
 427
 428    ioapics[ioapic_no] = s;
 429    s->machine_done.notify = ioapic_machine_done_notify;
 430    qemu_add_machine_init_done_notifier(&s->machine_done);
 431}
 432
 433static Property ioapic_properties[] = {
 434    DEFINE_PROP_UINT8("version", IOAPICCommonState, version, IOAPIC_VER_DEF),
 435    DEFINE_PROP_END_OF_LIST(),
 436};
 437
 438static void ioapic_class_init(ObjectClass *klass, void *data)
 439{
 440    IOAPICCommonClass *k = IOAPIC_COMMON_CLASS(klass);
 441    DeviceClass *dc = DEVICE_CLASS(klass);
 442
 443    k->realize = ioapic_realize;
 444    /*
 445     * If APIC is in kernel, we need to update the kernel cache after
 446     * migration, otherwise first 24 gsi routes will be invalid.
 447     */
 448    k->post_load = ioapic_update_kvm_routes;
 449    dc->reset = ioapic_reset_common;
 450    dc->props = ioapic_properties;
 451}
 452
 453static const TypeInfo ioapic_info = {
 454    .name          = "ioapic",
 455    .parent        = TYPE_IOAPIC_COMMON,
 456    .instance_size = sizeof(IOAPICCommonState),
 457    .class_init    = ioapic_class_init,
 458};
 459
 460static void ioapic_register_types(void)
 461{
 462    type_register_static(&ioapic_info);
 463}
 464
 465type_init(ioapic_register_types)
 466