qemu/hw/intc/ioapic.c
<<
>>
Prefs
   1/*
   2 *  ioapic.c IOAPIC emulation logic
   3 *
   4 *  Copyright (c) 2004-2005 Fabrice Bellard
   5 *
   6 *  Split the ioapic logic from apic.c
   7 *  Xiantao Zhang <xiantao.zhang@intel.com>
   8 *
   9 * This library is free software; you can redistribute it and/or
  10 * modify it under the terms of the GNU Lesser General Public
  11 * License as published by the Free Software Foundation; either
  12 * version 2 of the License, or (at your option) any later version.
  13 *
  14 * This library is distributed in the hope that it will be useful,
  15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 * Lesser General Public License for more details.
  18 *
  19 * You should have received a copy of the GNU Lesser General Public
  20 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  21 */
  22
  23#include "qemu/osdep.h"
  24#include "qapi/error.h"
  25#include "monitor/monitor.h"
  26#include "hw/hw.h"
  27#include "hw/i386/pc.h"
  28#include "hw/i386/apic.h"
  29#include "hw/i386/ioapic.h"
  30#include "hw/i386/ioapic_internal.h"
  31#include "hw/pci/msi.h"
  32#include "sysemu/kvm.h"
  33#include "hw/i386/apic-msidef.h"
  34#include "hw/i386/x86-iommu.h"
  35#include "trace.h"
  36
  37#define APIC_DELIVERY_MODE_SHIFT 8
  38#define APIC_POLARITY_SHIFT 14
  39#define APIC_TRIG_MODE_SHIFT 15
  40
  41static IOAPICCommonState *ioapics[MAX_IOAPICS];
  42
  43/* global variable from ioapic_common.c */
  44extern int ioapic_no;
  45
  46struct ioapic_entry_info {
  47    /* fields parsed from IOAPIC entries */
  48    uint8_t masked;
  49    uint8_t trig_mode;
  50    uint16_t dest_idx;
  51    uint8_t dest_mode;
  52    uint8_t delivery_mode;
  53    uint8_t vector;
  54
  55    /* MSI message generated from above parsed fields */
  56    uint32_t addr;
  57    uint32_t data;
  58};
  59
  60static void ioapic_entry_parse(uint64_t entry, struct ioapic_entry_info *info)
  61{
  62    memset(info, 0, sizeof(*info));
  63    info->masked = (entry >> IOAPIC_LVT_MASKED_SHIFT) & 1;
  64    info->trig_mode = (entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1;
  65    /*
  66     * By default, this would be dest_id[8] + reserved[8]. When IR
  67     * is enabled, this would be interrupt_index[15] +
  68     * interrupt_format[1]. This field never means anything, but
  69     * only used to generate corresponding MSI.
  70     */
  71    info->dest_idx = (entry >> IOAPIC_LVT_DEST_IDX_SHIFT) & 0xffff;
  72    info->dest_mode = (entry >> IOAPIC_LVT_DEST_MODE_SHIFT) & 1;
  73    info->delivery_mode = (entry >> IOAPIC_LVT_DELIV_MODE_SHIFT) \
  74        & IOAPIC_DM_MASK;
  75    if (info->delivery_mode == IOAPIC_DM_EXTINT) {
  76        info->vector = pic_read_irq(isa_pic);
  77    } else {
  78        info->vector = entry & IOAPIC_VECTOR_MASK;
  79    }
  80
  81    info->addr = APIC_DEFAULT_ADDRESS | \
  82        (info->dest_idx << MSI_ADDR_DEST_IDX_SHIFT) | \
  83        (info->dest_mode << MSI_ADDR_DEST_MODE_SHIFT);
  84    info->data = (info->vector << MSI_DATA_VECTOR_SHIFT) | \
  85        (info->trig_mode << MSI_DATA_TRIGGER_SHIFT) | \
  86        (info->delivery_mode << MSI_DATA_DELIVERY_MODE_SHIFT);
  87}
  88
  89static void ioapic_service(IOAPICCommonState *s)
  90{
  91    AddressSpace *ioapic_as = PC_MACHINE(qdev_get_machine())->ioapic_as;
  92    struct ioapic_entry_info info;
  93    uint8_t i;
  94    uint32_t mask;
  95    uint64_t entry;
  96
  97    for (i = 0; i < IOAPIC_NUM_PINS; i++) {
  98        mask = 1 << i;
  99        if (s->irr & mask) {
 100            int coalesce = 0;
 101
 102            entry = s->ioredtbl[i];
 103            ioapic_entry_parse(entry, &info);
 104            if (!info.masked) {
 105                if (info.trig_mode == IOAPIC_TRIGGER_EDGE) {
 106                    s->irr &= ~mask;
 107                } else {
 108                    coalesce = s->ioredtbl[i] & IOAPIC_LVT_REMOTE_IRR;
 109                    trace_ioapic_set_remote_irr(i);
 110                    s->ioredtbl[i] |= IOAPIC_LVT_REMOTE_IRR;
 111                }
 112
 113                if (coalesce) {
 114                    /* We are level triggered interrupts, and the
 115                     * guest should be still working on previous one,
 116                     * so skip it. */
 117                    continue;
 118                }
 119
 120#ifdef CONFIG_KVM
 121                if (kvm_irqchip_is_split()) {
 122                    if (info.trig_mode == IOAPIC_TRIGGER_EDGE) {
 123                        kvm_set_irq(kvm_state, i, 1);
 124                        kvm_set_irq(kvm_state, i, 0);
 125                    } else {
 126                        kvm_set_irq(kvm_state, i, 1);
 127                    }
 128                    continue;
 129                }
 130#endif
 131
 132                /* No matter whether IR is enabled, we translate
 133                 * the IOAPIC message into a MSI one, and its
 134                 * address space will decide whether we need a
 135                 * translation. */
 136                stl_le_phys(ioapic_as, info.addr, info.data);
 137            }
 138        }
 139    }
 140}
 141
 142#define SUCCESSIVE_IRQ_MAX_COUNT 10000
 143
 144static void delayed_ioapic_service_cb(void *opaque)
 145{
 146    IOAPICCommonState *s = opaque;
 147
 148    ioapic_service(s);
 149}
 150
 151static void ioapic_set_irq(void *opaque, int vector, int level)
 152{
 153    IOAPICCommonState *s = opaque;
 154
 155    /* ISA IRQs map to GSI 1-1 except for IRQ0 which maps
 156     * to GSI 2.  GSI maps to ioapic 1-1.  This is not
 157     * the cleanest way of doing it but it should work. */
 158
 159    trace_ioapic_set_irq(vector, level);
 160    ioapic_stat_update_irq(s, vector, level);
 161    if (vector == 0) {
 162        vector = 2;
 163    }
 164    if (vector < IOAPIC_NUM_PINS) {
 165        uint32_t mask = 1 << vector;
 166        uint64_t entry = s->ioredtbl[vector];
 167
 168        if (((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1) ==
 169            IOAPIC_TRIGGER_LEVEL) {
 170            /* level triggered */
 171            if (level) {
 172                s->irr |= mask;
 173                if (!(entry & IOAPIC_LVT_REMOTE_IRR)) {
 174                    ioapic_service(s);
 175                }
 176            } else {
 177                s->irr &= ~mask;
 178            }
 179        } else {
 180            /* According to the 82093AA manual, we must ignore edge requests
 181             * if the input pin is masked. */
 182            if (level && !(entry & IOAPIC_LVT_MASKED)) {
 183                s->irr |= mask;
 184                ioapic_service(s);
 185            }
 186        }
 187    }
 188}
 189
 190static void ioapic_update_kvm_routes(IOAPICCommonState *s)
 191{
 192#ifdef CONFIG_KVM
 193    int i;
 194
 195    if (kvm_irqchip_is_split()) {
 196        for (i = 0; i < IOAPIC_NUM_PINS; i++) {
 197            MSIMessage msg;
 198            struct ioapic_entry_info info;
 199            ioapic_entry_parse(s->ioredtbl[i], &info);
 200            if (!info.masked) {
 201                msg.address = info.addr;
 202                msg.data = info.data;
 203                kvm_irqchip_update_msi_route(kvm_state, i, msg, NULL);
 204            }
 205        }
 206        kvm_irqchip_commit_routes(kvm_state);
 207    }
 208#endif
 209}
 210
 211#ifdef CONFIG_KVM
 212static void ioapic_iec_notifier(void *private, bool global,
 213                                uint32_t index, uint32_t mask)
 214{
 215    IOAPICCommonState *s = (IOAPICCommonState *)private;
 216    /* For simplicity, we just update all the routes */
 217    ioapic_update_kvm_routes(s);
 218}
 219#endif
 220
 221void ioapic_eoi_broadcast(int vector)
 222{
 223    IOAPICCommonState *s;
 224    uint64_t entry;
 225    int i, n;
 226
 227    trace_ioapic_eoi_broadcast(vector);
 228
 229    for (i = 0; i < MAX_IOAPICS; i++) {
 230        s = ioapics[i];
 231        if (!s) {
 232            continue;
 233        }
 234        for (n = 0; n < IOAPIC_NUM_PINS; n++) {
 235            entry = s->ioredtbl[n];
 236
 237            if ((entry & IOAPIC_VECTOR_MASK) != vector ||
 238                ((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1) != IOAPIC_TRIGGER_LEVEL) {
 239                continue;
 240            }
 241
 242            if (!(entry & IOAPIC_LVT_REMOTE_IRR)) {
 243                continue;
 244            }
 245
 246            trace_ioapic_clear_remote_irr(n, vector);
 247            s->ioredtbl[n] = entry & ~IOAPIC_LVT_REMOTE_IRR;
 248
 249            if (!(entry & IOAPIC_LVT_MASKED) && (s->irr & (1 << n))) {
 250                ++s->irq_eoi[n];
 251                if (s->irq_eoi[n] >= SUCCESSIVE_IRQ_MAX_COUNT) {
 252                    /*
 253                     * Real hardware does not deliver the interrupt immediately
 254                     * during eoi broadcast, and this lets a buggy guest make
 255                     * slow progress even if it does not correctly handle a
 256                     * level-triggered interrupt. Emulate this behavior if we
 257                     * detect an interrupt storm.
 258                     */
 259                    s->irq_eoi[n] = 0;
 260                    timer_mod_anticipate(s->delayed_ioapic_service_timer,
 261                                         qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
 262                                         NANOSECONDS_PER_SECOND / 100);
 263                    trace_ioapic_eoi_delayed_reassert(n);
 264                } else {
 265                    ioapic_service(s);
 266                }
 267            } else {
 268                s->irq_eoi[n] = 0;
 269            }
 270        }
 271    }
 272}
 273
 274static uint64_t
 275ioapic_mem_read(void *opaque, hwaddr addr, unsigned int size)
 276{
 277    IOAPICCommonState *s = opaque;
 278    int index;
 279    uint32_t val = 0;
 280
 281    addr &= 0xff;
 282
 283    switch (addr) {
 284    case IOAPIC_IOREGSEL:
 285        val = s->ioregsel;
 286        break;
 287    case IOAPIC_IOWIN:
 288        if (size != 4) {
 289            break;
 290        }
 291        switch (s->ioregsel) {
 292        case IOAPIC_REG_ID:
 293        case IOAPIC_REG_ARB:
 294            val = s->id << IOAPIC_ID_SHIFT;
 295            break;
 296        case IOAPIC_REG_VER:
 297            val = s->version |
 298                ((IOAPIC_NUM_PINS - 1) << IOAPIC_VER_ENTRIES_SHIFT);
 299            break;
 300        default:
 301            index = (s->ioregsel - IOAPIC_REG_REDTBL_BASE) >> 1;
 302            if (index >= 0 && index < IOAPIC_NUM_PINS) {
 303                if (s->ioregsel & 1) {
 304                    val = s->ioredtbl[index] >> 32;
 305                } else {
 306                    val = s->ioredtbl[index] & 0xffffffff;
 307                }
 308            }
 309        }
 310        break;
 311    }
 312
 313    trace_ioapic_mem_read(addr, s->ioregsel, size, val);
 314
 315    return val;
 316}
 317
 318/*
 319 * This is to satisfy the hack in Linux kernel. One hack of it is to
 320 * simulate clearing the Remote IRR bit of IOAPIC entry using the
 321 * following:
 322 *
 323 * "For IO-APIC's with EOI register, we use that to do an explicit EOI.
 324 * Otherwise, we simulate the EOI message manually by changing the trigger
 325 * mode to edge and then back to level, with RTE being masked during
 326 * this."
 327 *
 328 * (See linux kernel __eoi_ioapic_pin() comment in commit c0205701)
 329 *
 330 * This is based on the assumption that, Remote IRR bit will be
 331 * cleared by IOAPIC hardware when configured as edge-triggered
 332 * interrupts.
 333 *
 334 * Without this, level-triggered interrupts in IR mode might fail to
 335 * work correctly.
 336 */
 337static inline void
 338ioapic_fix_edge_remote_irr(uint64_t *entry)
 339{
 340    if (!(*entry & IOAPIC_LVT_TRIGGER_MODE)) {
 341        /* Edge-triggered interrupts, make sure remote IRR is zero */
 342        *entry &= ~((uint64_t)IOAPIC_LVT_REMOTE_IRR);
 343    }
 344}
 345
 346static void
 347ioapic_mem_write(void *opaque, hwaddr addr, uint64_t val,
 348                 unsigned int size)
 349{
 350    IOAPICCommonState *s = opaque;
 351    int index;
 352
 353    addr &= 0xff;
 354    trace_ioapic_mem_write(addr, s->ioregsel, size, val);
 355
 356    switch (addr) {
 357    case IOAPIC_IOREGSEL:
 358        s->ioregsel = val;
 359        break;
 360    case IOAPIC_IOWIN:
 361        if (size != 4) {
 362            break;
 363        }
 364        switch (s->ioregsel) {
 365        case IOAPIC_REG_ID:
 366            s->id = (val >> IOAPIC_ID_SHIFT) & IOAPIC_ID_MASK;
 367            break;
 368        case IOAPIC_REG_VER:
 369        case IOAPIC_REG_ARB:
 370            break;
 371        default:
 372            index = (s->ioregsel - IOAPIC_REG_REDTBL_BASE) >> 1;
 373            if (index >= 0 && index < IOAPIC_NUM_PINS) {
 374                uint64_t ro_bits = s->ioredtbl[index] & IOAPIC_RO_BITS;
 375                if (s->ioregsel & 1) {
 376                    s->ioredtbl[index] &= 0xffffffff;
 377                    s->ioredtbl[index] |= (uint64_t)val << 32;
 378                } else {
 379                    s->ioredtbl[index] &= ~0xffffffffULL;
 380                    s->ioredtbl[index] |= val;
 381                }
 382                /* restore RO bits */
 383                s->ioredtbl[index] &= IOAPIC_RW_BITS;
 384                s->ioredtbl[index] |= ro_bits;
 385                s->irq_eoi[index] = 0;
 386                ioapic_fix_edge_remote_irr(&s->ioredtbl[index]);
 387                ioapic_service(s);
 388            }
 389        }
 390        break;
 391    case IOAPIC_EOI:
 392        /* Explicit EOI is only supported for IOAPIC version 0x20 */
 393        if (size != 4 || s->version != 0x20) {
 394            break;
 395        }
 396        ioapic_eoi_broadcast(val);
 397        break;
 398    }
 399
 400    ioapic_update_kvm_routes(s);
 401}
 402
 403static const MemoryRegionOps ioapic_io_ops = {
 404    .read = ioapic_mem_read,
 405    .write = ioapic_mem_write,
 406    .endianness = DEVICE_NATIVE_ENDIAN,
 407};
 408
 409static void ioapic_machine_done_notify(Notifier *notifier, void *data)
 410{
 411#ifdef CONFIG_KVM
 412    IOAPICCommonState *s = container_of(notifier, IOAPICCommonState,
 413                                        machine_done);
 414
 415    if (kvm_irqchip_is_split()) {
 416        X86IOMMUState *iommu = x86_iommu_get_default();
 417        if (iommu) {
 418            /* Register this IOAPIC with IOMMU IEC notifier, so that
 419             * when there are IR invalidates, we can be notified to
 420             * update kernel IR cache. */
 421            x86_iommu_iec_register_notifier(iommu, ioapic_iec_notifier, s);
 422        }
 423    }
 424#endif
 425}
 426
 427#define IOAPIC_VER_DEF 0x20
 428
 429static void ioapic_realize(DeviceState *dev, Error **errp)
 430{
 431    IOAPICCommonState *s = IOAPIC_COMMON(dev);
 432
 433    if (s->version != 0x11 && s->version != 0x20) {
 434        error_setg(errp, "IOAPIC only supports version 0x11 or 0x20 "
 435                   "(default: 0x%x).", IOAPIC_VER_DEF);
 436        return;
 437    }
 438
 439    memory_region_init_io(&s->io_memory, OBJECT(s), &ioapic_io_ops, s,
 440                          "ioapic", 0x1000);
 441
 442    s->delayed_ioapic_service_timer =
 443        timer_new_ns(QEMU_CLOCK_VIRTUAL, delayed_ioapic_service_cb, s);
 444
 445    qdev_init_gpio_in(dev, ioapic_set_irq, IOAPIC_NUM_PINS);
 446
 447    ioapics[ioapic_no] = s;
 448    s->machine_done.notify = ioapic_machine_done_notify;
 449    qemu_add_machine_init_done_notifier(&s->machine_done);
 450}
 451
 452static void ioapic_unrealize(DeviceState *dev, Error **errp)
 453{
 454    IOAPICCommonState *s = IOAPIC_COMMON(dev);
 455
 456    timer_del(s->delayed_ioapic_service_timer);
 457    timer_free(s->delayed_ioapic_service_timer);
 458}
 459
 460static Property ioapic_properties[] = {
 461    DEFINE_PROP_UINT8("version", IOAPICCommonState, version, IOAPIC_VER_DEF),
 462    DEFINE_PROP_END_OF_LIST(),
 463};
 464
 465static void ioapic_class_init(ObjectClass *klass, void *data)
 466{
 467    IOAPICCommonClass *k = IOAPIC_COMMON_CLASS(klass);
 468    DeviceClass *dc = DEVICE_CLASS(klass);
 469
 470    k->realize = ioapic_realize;
 471    k->unrealize = ioapic_unrealize;
 472    /*
 473     * If APIC is in kernel, we need to update the kernel cache after
 474     * migration, otherwise first 24 gsi routes will be invalid.
 475     */
 476    k->post_load = ioapic_update_kvm_routes;
 477    dc->reset = ioapic_reset_common;
 478    dc->props = ioapic_properties;
 479}
 480
 481static const TypeInfo ioapic_info = {
 482    .name          = TYPE_IOAPIC,
 483    .parent        = TYPE_IOAPIC_COMMON,
 484    .instance_size = sizeof(IOAPICCommonState),
 485    .class_init    = ioapic_class_init,
 486};
 487
 488static void ioapic_register_types(void)
 489{
 490    type_register_static(&ioapic_info);
 491}
 492
 493type_init(ioapic_register_types)
 494