qemu/hw/pci/msi.c
<<
>>
Prefs
   1/*
   2 * msi.c
   3 *
   4 * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp>
   5 *                    VA Linux Systems Japan K.K.
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License as published by
   9 * the Free Software Foundation; either version 2 of the License, or
  10 * (at your option) any later version.
  11
  12 * This program is distributed in the hope that it will be useful,
  13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 * GNU General Public License for more details.
  16
  17 * You should have received a copy of the GNU General Public License along
  18 * with this program; if not, see <http://www.gnu.org/licenses/>.
  19 */
  20
  21#include "qemu/osdep.h"
  22#include "hw/pci/msi.h"
  23#include "hw/xen/xen.h"
  24#include "qemu/range.h"
  25#include "qapi/error.h"
  26
  27#include "hw/i386/kvm/xen_evtchn.h"
  28
  29/* PCI_MSI_ADDRESS_LO */
  30#define PCI_MSI_ADDRESS_LO_MASK         (~0x3)
  31
  32/* If we get rid of cap allocator, we won't need those. */
  33#define PCI_MSI_32_SIZEOF       0x0a
  34#define PCI_MSI_64_SIZEOF       0x0e
  35#define PCI_MSI_32M_SIZEOF      0x14
  36#define PCI_MSI_64M_SIZEOF      0x18
  37
  38#define PCI_MSI_VECTORS_MAX     32
  39
  40/*
  41 * Flag for interrupt controllers to declare broken MSI/MSI-X support.
  42 * values: false - broken; true - non-broken.
  43 *
  44 * Setting this flag to false will remove MSI/MSI-X capability from all devices.
  45 *
  46 * It is preferable for controllers to set this to true (non-broken) even if
  47 * they do not actually support MSI/MSI-X: guests normally probe the controller
  48 * type and do not attempt to enable MSI/MSI-X with interrupt controllers not
  49 * supporting such, so removing the capability is not required, and
  50 * it seems cleaner to have a given device look the same for all boards.
  51 *
  52 * TODO: some existing controllers violate the above rule. Identify and fix them.
  53 */
  54bool msi_nonbroken;
  55
  56/* If we get rid of cap allocator, we won't need this. */
  57static inline uint8_t msi_cap_sizeof(uint16_t flags)
  58{
  59    switch (flags & (PCI_MSI_FLAGS_MASKBIT | PCI_MSI_FLAGS_64BIT)) {
  60    case PCI_MSI_FLAGS_MASKBIT | PCI_MSI_FLAGS_64BIT:
  61        return PCI_MSI_64M_SIZEOF;
  62    case PCI_MSI_FLAGS_64BIT:
  63        return PCI_MSI_64_SIZEOF;
  64    case PCI_MSI_FLAGS_MASKBIT:
  65        return PCI_MSI_32M_SIZEOF;
  66    case 0:
  67        return PCI_MSI_32_SIZEOF;
  68    default:
  69        abort();
  70        break;
  71    }
  72    return 0;
  73}
  74
  75//#define MSI_DEBUG
  76
  77#ifdef MSI_DEBUG
  78# define MSI_DPRINTF(fmt, ...)                                          \
  79    fprintf(stderr, "%s:%d " fmt, __func__, __LINE__, ## __VA_ARGS__)
  80#else
  81# define MSI_DPRINTF(fmt, ...)  do { } while (0)
  82#endif
  83#define MSI_DEV_PRINTF(dev, fmt, ...)                                   \
  84    MSI_DPRINTF("%s:%x " fmt, (dev)->name, (dev)->devfn, ## __VA_ARGS__)
  85
  86static inline unsigned int msi_nr_vectors(uint16_t flags)
  87{
  88    return 1U <<
  89        ((flags & PCI_MSI_FLAGS_QSIZE) >> ctz32(PCI_MSI_FLAGS_QSIZE));
  90}
  91
  92static inline uint8_t msi_flags_off(const PCIDevice* dev)
  93{
  94    return dev->msi_cap + PCI_MSI_FLAGS;
  95}
  96
  97static inline uint8_t msi_address_lo_off(const PCIDevice* dev)
  98{
  99    return dev->msi_cap + PCI_MSI_ADDRESS_LO;
 100}
 101
 102static inline uint8_t msi_address_hi_off(const PCIDevice* dev)
 103{
 104    return dev->msi_cap + PCI_MSI_ADDRESS_HI;
 105}
 106
 107static inline uint8_t msi_data_off(const PCIDevice* dev, bool msi64bit)
 108{
 109    return dev->msi_cap + (msi64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32);
 110}
 111
 112static inline uint8_t msi_mask_off(const PCIDevice* dev, bool msi64bit)
 113{
 114    return dev->msi_cap + (msi64bit ? PCI_MSI_MASK_64 : PCI_MSI_MASK_32);
 115}
 116
 117static inline uint8_t msi_pending_off(const PCIDevice* dev, bool msi64bit)
 118{
 119    return dev->msi_cap + (msi64bit ? PCI_MSI_PENDING_64 : PCI_MSI_PENDING_32);
 120}
 121
 122/*
 123 * Special API for POWER to configure the vectors through
 124 * a side channel. Should never be used by devices.
 125 */
 126void msi_set_message(PCIDevice *dev, MSIMessage msg)
 127{
 128    uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
 129    bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
 130
 131    if (msi64bit) {
 132        pci_set_quad(dev->config + msi_address_lo_off(dev), msg.address);
 133    } else {
 134        pci_set_long(dev->config + msi_address_lo_off(dev), msg.address);
 135    }
 136    pci_set_word(dev->config + msi_data_off(dev, msi64bit), msg.data);
 137}
 138
 139static MSIMessage msi_prepare_message(PCIDevice *dev, unsigned int vector)
 140{
 141    uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
 142    bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
 143    unsigned int nr_vectors = msi_nr_vectors(flags);
 144    MSIMessage msg;
 145
 146    assert(vector < nr_vectors);
 147
 148    if (msi64bit) {
 149        msg.address = pci_get_quad(dev->config + msi_address_lo_off(dev));
 150    } else {
 151        msg.address = pci_get_long(dev->config + msi_address_lo_off(dev));
 152    }
 153
 154    /* upper bit 31:16 is zero */
 155    msg.data = pci_get_word(dev->config + msi_data_off(dev, msi64bit));
 156    if (nr_vectors > 1) {
 157        msg.data &= ~(nr_vectors - 1);
 158        msg.data |= vector;
 159    }
 160
 161    return msg;
 162}
 163
 164MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector)
 165{
 166    return dev->msi_prepare_message(dev, vector);
 167}
 168
 169bool msi_enabled(const PCIDevice *dev)
 170{
 171    return msi_present(dev) &&
 172        (pci_get_word(dev->config + msi_flags_off(dev)) &
 173         PCI_MSI_FLAGS_ENABLE);
 174}
 175
 176/*
 177 * Make PCI device @dev MSI-capable.
 178 * Non-zero @offset puts capability MSI at that offset in PCI config
 179 * space.
 180 * @nr_vectors is the number of MSI vectors (1, 2, 4, 8, 16 or 32).
 181 * If @msi64bit, make the device capable of sending a 64-bit message
 182 * address.
 183 * If @msi_per_vector_mask, make the device support per-vector masking.
 184 * @errp is for returning errors.
 185 * Return 0 on success; set @errp and return -errno on error.
 186 *
 187 * -ENOTSUP means lacking msi support for a msi-capable platform.
 188 * -EINVAL means capability overlap, happens when @offset is non-zero,
 189 *  also means a programming error, except device assignment, which can check
 190 *  if a real HW is broken.
 191 */
 192int msi_init(struct PCIDevice *dev, uint8_t offset,
 193             unsigned int nr_vectors, bool msi64bit,
 194             bool msi_per_vector_mask, Error **errp)
 195{
 196    unsigned int vectors_order;
 197    uint16_t flags;
 198    uint8_t cap_size;
 199    int config_offset;
 200
 201    if (!msi_nonbroken) {
 202        error_setg(errp, "MSI is not supported by interrupt controller");
 203        return -ENOTSUP;
 204    }
 205
 206    MSI_DEV_PRINTF(dev,
 207                   "init offset: 0x%"PRIx8" vector: %"PRId8
 208                   " 64bit %d mask %d\n",
 209                   offset, nr_vectors, msi64bit, msi_per_vector_mask);
 210
 211    assert(!(nr_vectors & (nr_vectors - 1)));   /* power of 2 */
 212    assert(nr_vectors > 0);
 213    assert(nr_vectors <= PCI_MSI_VECTORS_MAX);
 214    /* the nr of MSI vectors is up to 32 */
 215    vectors_order = ctz32(nr_vectors);
 216
 217    flags = vectors_order << ctz32(PCI_MSI_FLAGS_QMASK);
 218    if (msi64bit) {
 219        flags |= PCI_MSI_FLAGS_64BIT;
 220    }
 221    if (msi_per_vector_mask) {
 222        flags |= PCI_MSI_FLAGS_MASKBIT;
 223    }
 224
 225    cap_size = msi_cap_sizeof(flags);
 226    config_offset = pci_add_capability(dev, PCI_CAP_ID_MSI, offset,
 227                                        cap_size, errp);
 228    if (config_offset < 0) {
 229        return config_offset;
 230    }
 231
 232    dev->msi_cap = config_offset;
 233    dev->cap_present |= QEMU_PCI_CAP_MSI;
 234
 235    pci_set_word(dev->config + msi_flags_off(dev), flags);
 236    pci_set_word(dev->wmask + msi_flags_off(dev),
 237                 PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE);
 238    pci_set_long(dev->wmask + msi_address_lo_off(dev),
 239                 PCI_MSI_ADDRESS_LO_MASK);
 240    if (msi64bit) {
 241        pci_set_long(dev->wmask + msi_address_hi_off(dev), 0xffffffff);
 242    }
 243    pci_set_word(dev->wmask + msi_data_off(dev, msi64bit), 0xffff);
 244
 245    if (msi_per_vector_mask) {
 246        /* Make mask bits 0 to nr_vectors - 1 writable. */
 247        pci_set_long(dev->wmask + msi_mask_off(dev, msi64bit),
 248                     0xffffffff >> (PCI_MSI_VECTORS_MAX - nr_vectors));
 249    }
 250
 251    dev->msi_prepare_message = msi_prepare_message;
 252
 253    return 0;
 254}
 255
 256void msi_uninit(struct PCIDevice *dev)
 257{
 258    uint16_t flags;
 259    uint8_t cap_size;
 260
 261    if (!msi_present(dev)) {
 262        return;
 263    }
 264    flags = pci_get_word(dev->config + msi_flags_off(dev));
 265    cap_size = msi_cap_sizeof(flags);
 266    pci_del_capability(dev, PCI_CAP_ID_MSI, cap_size);
 267    dev->cap_present &= ~QEMU_PCI_CAP_MSI;
 268    dev->msi_prepare_message = NULL;
 269
 270    MSI_DEV_PRINTF(dev, "uninit\n");
 271}
 272
 273void msi_reset(PCIDevice *dev)
 274{
 275    uint16_t flags;
 276    bool msi64bit;
 277
 278    if (!msi_present(dev)) {
 279        return;
 280    }
 281
 282    flags = pci_get_word(dev->config + msi_flags_off(dev));
 283    flags &= ~(PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE);
 284    msi64bit = flags & PCI_MSI_FLAGS_64BIT;
 285
 286    pci_set_word(dev->config + msi_flags_off(dev), flags);
 287    pci_set_long(dev->config + msi_address_lo_off(dev), 0);
 288    if (msi64bit) {
 289        pci_set_long(dev->config + msi_address_hi_off(dev), 0);
 290    }
 291    pci_set_word(dev->config + msi_data_off(dev, msi64bit), 0);
 292    if (flags & PCI_MSI_FLAGS_MASKBIT) {
 293        pci_set_long(dev->config + msi_mask_off(dev, msi64bit), 0);
 294        pci_set_long(dev->config + msi_pending_off(dev, msi64bit), 0);
 295    }
 296    MSI_DEV_PRINTF(dev, "reset\n");
 297}
 298
 299bool msi_is_masked(const PCIDevice *dev, unsigned int vector)
 300{
 301    uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
 302    uint32_t mask, data;
 303    bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
 304    assert(vector < PCI_MSI_VECTORS_MAX);
 305
 306    if (!(flags & PCI_MSI_FLAGS_MASKBIT)) {
 307        return false;
 308    }
 309
 310    data = pci_get_word(dev->config + msi_data_off(dev, msi64bit));
 311    if (xen_is_pirq_msi(data)) {
 312        return false;
 313    }
 314
 315    mask = pci_get_long(dev->config +
 316                        msi_mask_off(dev, flags & PCI_MSI_FLAGS_64BIT));
 317    return mask & (1U << vector);
 318}
 319
 320void msi_set_mask(PCIDevice *dev, int vector, bool mask, Error **errp)
 321{
 322    uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
 323    bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
 324    uint32_t irq_state, vector_mask, pending;
 325
 326    if (vector >= PCI_MSI_VECTORS_MAX) {
 327        error_setg(errp, "msi: vector %d not allocated. max vector is %d",
 328                   vector, (PCI_MSI_VECTORS_MAX - 1));
 329        return;
 330    }
 331
 332    vector_mask = (1U << vector);
 333
 334    irq_state = pci_get_long(dev->config + msi_mask_off(dev, msi64bit));
 335
 336    if (mask) {
 337        irq_state |= vector_mask;
 338    } else {
 339        irq_state &= ~vector_mask;
 340    }
 341
 342    pci_set_long(dev->config + msi_mask_off(dev, msi64bit), irq_state);
 343
 344    pending = pci_get_long(dev->config + msi_pending_off(dev, msi64bit));
 345    if (!mask && (pending & vector_mask)) {
 346        pending &= ~vector_mask;
 347        pci_set_long(dev->config + msi_pending_off(dev, msi64bit), pending);
 348        msi_notify(dev, vector);
 349    }
 350}
 351
 352void msi_notify(PCIDevice *dev, unsigned int vector)
 353{
 354    uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
 355    bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
 356    unsigned int nr_vectors = msi_nr_vectors(flags);
 357    MSIMessage msg;
 358
 359    assert(vector < nr_vectors);
 360    if (msi_is_masked(dev, vector)) {
 361        assert(flags & PCI_MSI_FLAGS_MASKBIT);
 362        pci_long_test_and_set_mask(
 363            dev->config + msi_pending_off(dev, msi64bit), 1U << vector);
 364        MSI_DEV_PRINTF(dev, "pending vector 0x%x\n", vector);
 365        return;
 366    }
 367
 368    msg = msi_get_message(dev, vector);
 369
 370    MSI_DEV_PRINTF(dev,
 371                   "notify vector 0x%x"
 372                   " address: 0x%"PRIx64" data: 0x%"PRIx32"\n",
 373                   vector, msg.address, msg.data);
 374    msi_send_message(dev, msg);
 375}
 376
 377void msi_send_message(PCIDevice *dev, MSIMessage msg)
 378{
 379    dev->msi_trigger(dev, msg);
 380}
 381
 382/* Normally called by pci_default_write_config(). */
 383void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len)
 384{
 385    uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
 386    bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
 387    bool msi_per_vector_mask = flags & PCI_MSI_FLAGS_MASKBIT;
 388    unsigned int nr_vectors;
 389    uint8_t log_num_vecs;
 390    uint8_t log_max_vecs;
 391    unsigned int vector;
 392    uint32_t pending;
 393
 394    if (!msi_present(dev) ||
 395        !ranges_overlap(addr, len, dev->msi_cap, msi_cap_sizeof(flags))) {
 396        return;
 397    }
 398
 399#ifdef MSI_DEBUG
 400    MSI_DEV_PRINTF(dev, "addr 0x%"PRIx32" val 0x%"PRIx32" len %d\n",
 401                   addr, val, len);
 402    MSI_DEV_PRINTF(dev, "ctrl: 0x%"PRIx16" address: 0x%"PRIx32,
 403                   flags,
 404                   pci_get_long(dev->config + msi_address_lo_off(dev)));
 405    if (msi64bit) {
 406        fprintf(stderr, " address-hi: 0x%"PRIx32,
 407                pci_get_long(dev->config + msi_address_hi_off(dev)));
 408    }
 409    fprintf(stderr, " data: 0x%"PRIx16,
 410            pci_get_word(dev->config + msi_data_off(dev, msi64bit)));
 411    if (flags & PCI_MSI_FLAGS_MASKBIT) {
 412        fprintf(stderr, " mask 0x%"PRIx32" pending 0x%"PRIx32,
 413                pci_get_long(dev->config + msi_mask_off(dev, msi64bit)),
 414                pci_get_long(dev->config + msi_pending_off(dev, msi64bit)));
 415    }
 416    fprintf(stderr, "\n");
 417#endif
 418
 419    if (xen_mode == XEN_EMULATE) {
 420        for (vector = 0; vector < msi_nr_vectors(flags); vector++) {
 421            MSIMessage msg = msi_prepare_message(dev, vector);
 422
 423            xen_evtchn_snoop_msi(dev, false, vector, msg.address, msg.data,
 424                                 msi_is_masked(dev, vector));
 425        }
 426    }
 427
 428    if (!(flags & PCI_MSI_FLAGS_ENABLE)) {
 429        return;
 430    }
 431
 432    /*
 433     * Now MSI is enabled, clear INTx# interrupts.
 434     * the driver is prohibited from writing enable bit to mask
 435     * a service request. But the guest OS could do this.
 436     * So we just discard the interrupts as moderate fallback.
 437     *
 438     * 6.8.3.3. Enabling Operation
 439     *   While enabled for MSI or MSI-X operation, a function is prohibited
 440     *   from using its INTx# pin (if implemented) to request
 441     *   service (MSI, MSI-X, and INTx# are mutually exclusive).
 442     */
 443    pci_device_deassert_intx(dev);
 444
 445    /*
 446     * nr_vectors might be set bigger than capable. So clamp it.
 447     * This is not legal by spec, so we can do anything we like,
 448     * just don't crash the host
 449     */
 450    log_num_vecs =
 451        (flags & PCI_MSI_FLAGS_QSIZE) >> ctz32(PCI_MSI_FLAGS_QSIZE);
 452    log_max_vecs =
 453        (flags & PCI_MSI_FLAGS_QMASK) >> ctz32(PCI_MSI_FLAGS_QMASK);
 454    if (log_num_vecs > log_max_vecs) {
 455        flags &= ~PCI_MSI_FLAGS_QSIZE;
 456        flags |= log_max_vecs << ctz32(PCI_MSI_FLAGS_QSIZE);
 457        pci_set_word(dev->config + msi_flags_off(dev), flags);
 458    }
 459
 460    if (!msi_per_vector_mask) {
 461        /* if per vector masking isn't supported,
 462           there is no pending interrupt. */
 463        return;
 464    }
 465
 466    nr_vectors = msi_nr_vectors(flags);
 467
 468    /* This will discard pending interrupts, if any. */
 469    pending = pci_get_long(dev->config + msi_pending_off(dev, msi64bit));
 470    pending &= 0xffffffff >> (PCI_MSI_VECTORS_MAX - nr_vectors);
 471    pci_set_long(dev->config + msi_pending_off(dev, msi64bit), pending);
 472
 473    /* deliver pending interrupts which are unmasked */
 474    for (vector = 0; vector < nr_vectors; ++vector) {
 475        if (msi_is_masked(dev, vector) || !(pending & (1U << vector))) {
 476            continue;
 477        }
 478
 479        pci_long_test_and_clear_mask(
 480            dev->config + msi_pending_off(dev, msi64bit), 1U << vector);
 481        msi_notify(dev, vector);
 482    }
 483}
 484
 485unsigned int msi_nr_vectors_allocated(const PCIDevice *dev)
 486{
 487    uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
 488    return msi_nr_vectors(flags);
 489}
 490