linux/drivers/pci/msi/msi.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * PCI Message Signaled Interrupt (MSI)
   4 *
   5 * Copyright (C) 2003-2004 Intel
   6 * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
   7 * Copyright (C) 2016 Christoph Hellwig.
   8 */
   9#include <linux/err.h>
  10#include <linux/export.h>
  11#include <linux/irq.h>
  12
  13#include "../pci.h"
  14#include "msi.h"
  15
  16static int pci_msi_enable = 1;
  17int pci_msi_ignore_mask;
  18
  19static noinline void pci_msi_update_mask(struct msi_desc *desc, u32 clear, u32 set)
  20{
  21        raw_spinlock_t *lock = &to_pci_dev(desc->dev)->msi_lock;
  22        unsigned long flags;
  23
  24        if (!desc->pci.msi_attrib.can_mask)
  25                return;
  26
  27        raw_spin_lock_irqsave(lock, flags);
  28        desc->pci.msi_mask &= ~clear;
  29        desc->pci.msi_mask |= set;
  30        pci_write_config_dword(msi_desc_to_pci_dev(desc), desc->pci.mask_pos,
  31                               desc->pci.msi_mask);
  32        raw_spin_unlock_irqrestore(lock, flags);
  33}
  34
  35static inline void pci_msi_mask(struct msi_desc *desc, u32 mask)
  36{
  37        pci_msi_update_mask(desc, 0, mask);
  38}
  39
  40static inline void pci_msi_unmask(struct msi_desc *desc, u32 mask)
  41{
  42        pci_msi_update_mask(desc, mask, 0);
  43}
  44
  45static inline void __iomem *pci_msix_desc_addr(struct msi_desc *desc)
  46{
  47        return desc->pci.mask_base + desc->msi_index * PCI_MSIX_ENTRY_SIZE;
  48}
  49
  50/*
  51 * This internal function does not flush PCI writes to the device.  All
  52 * users must ensure that they read from the device before either assuming
  53 * that the device state is up to date, or returning out of this file.
  54 * It does not affect the msi_desc::msix_ctrl cache either. Use with care!
  55 */
  56static void pci_msix_write_vector_ctrl(struct msi_desc *desc, u32 ctrl)
  57{
  58        void __iomem *desc_addr = pci_msix_desc_addr(desc);
  59
  60        if (desc->pci.msi_attrib.can_mask)
  61                writel(ctrl, desc_addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
  62}
  63
  64static inline void pci_msix_mask(struct msi_desc *desc)
  65{
  66        desc->pci.msix_ctrl |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
  67        pci_msix_write_vector_ctrl(desc, desc->pci.msix_ctrl);
  68        /* Flush write to device */
  69        readl(desc->pci.mask_base);
  70}
  71
  72static inline void pci_msix_unmask(struct msi_desc *desc)
  73{
  74        desc->pci.msix_ctrl &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
  75        pci_msix_write_vector_ctrl(desc, desc->pci.msix_ctrl);
  76}
  77
  78static void __pci_msi_mask_desc(struct msi_desc *desc, u32 mask)
  79{
  80        if (desc->pci.msi_attrib.is_msix)
  81                pci_msix_mask(desc);
  82        else
  83                pci_msi_mask(desc, mask);
  84}
  85
  86static void __pci_msi_unmask_desc(struct msi_desc *desc, u32 mask)
  87{
  88        if (desc->pci.msi_attrib.is_msix)
  89                pci_msix_unmask(desc);
  90        else
  91                pci_msi_unmask(desc, mask);
  92}
  93
  94/**
  95 * pci_msi_mask_irq - Generic IRQ chip callback to mask PCI/MSI interrupts
  96 * @data:       pointer to irqdata associated to that interrupt
  97 */
  98void pci_msi_mask_irq(struct irq_data *data)
  99{
 100        struct msi_desc *desc = irq_data_get_msi_desc(data);
 101
 102        __pci_msi_mask_desc(desc, BIT(data->irq - desc->irq));
 103}
 104EXPORT_SYMBOL_GPL(pci_msi_mask_irq);
 105
 106/**
 107 * pci_msi_unmask_irq - Generic IRQ chip callback to unmask PCI/MSI interrupts
 108 * @data:       pointer to irqdata associated to that interrupt
 109 */
 110void pci_msi_unmask_irq(struct irq_data *data)
 111{
 112        struct msi_desc *desc = irq_data_get_msi_desc(data);
 113
 114        __pci_msi_unmask_desc(desc, BIT(data->irq - desc->irq));
 115}
 116EXPORT_SYMBOL_GPL(pci_msi_unmask_irq);
 117
 118void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
 119{
 120        struct pci_dev *dev = msi_desc_to_pci_dev(entry);
 121
 122        BUG_ON(dev->current_state != PCI_D0);
 123
 124        if (entry->pci.msi_attrib.is_msix) {
 125                void __iomem *base = pci_msix_desc_addr(entry);
 126
 127                if (WARN_ON_ONCE(entry->pci.msi_attrib.is_virtual))
 128                        return;
 129
 130                msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR);
 131                msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR);
 132                msg->data = readl(base + PCI_MSIX_ENTRY_DATA);
 133        } else {
 134                int pos = dev->msi_cap;
 135                u16 data;
 136
 137                pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_LO,
 138                                      &msg->address_lo);
 139                if (entry->pci.msi_attrib.is_64) {
 140                        pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_HI,
 141                                              &msg->address_hi);
 142                        pci_read_config_word(dev, pos + PCI_MSI_DATA_64, &data);
 143                } else {
 144                        msg->address_hi = 0;
 145                        pci_read_config_word(dev, pos + PCI_MSI_DATA_32, &data);
 146                }
 147                msg->data = data;
 148        }
 149}
 150
 151void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
 152{
 153        struct pci_dev *dev = msi_desc_to_pci_dev(entry);
 154
 155        if (dev->current_state != PCI_D0 || pci_dev_is_disconnected(dev)) {
 156                /* Don't touch the hardware now */
 157        } else if (entry->pci.msi_attrib.is_msix) {
 158                void __iomem *base = pci_msix_desc_addr(entry);
 159                u32 ctrl = entry->pci.msix_ctrl;
 160                bool unmasked = !(ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT);
 161
 162                if (entry->pci.msi_attrib.is_virtual)
 163                        goto skip;
 164
 165                /*
 166                 * The specification mandates that the entry is masked
 167                 * when the message is modified:
 168                 *
 169                 * "If software changes the Address or Data value of an
 170                 * entry while the entry is unmasked, the result is
 171                 * undefined."
 172                 */
 173                if (unmasked)
 174                        pci_msix_write_vector_ctrl(entry, ctrl | PCI_MSIX_ENTRY_CTRL_MASKBIT);
 175
 176                writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR);
 177                writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR);
 178                writel(msg->data, base + PCI_MSIX_ENTRY_DATA);
 179
 180                if (unmasked)
 181                        pci_msix_write_vector_ctrl(entry, ctrl);
 182
 183                /* Ensure that the writes are visible in the device */
 184                readl(base + PCI_MSIX_ENTRY_DATA);
 185        } else {
 186                int pos = dev->msi_cap;
 187                u16 msgctl;
 188
 189                pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
 190                msgctl &= ~PCI_MSI_FLAGS_QSIZE;
 191                msgctl |= entry->pci.msi_attrib.multiple << 4;
 192                pci_write_config_word(dev, pos + PCI_MSI_FLAGS, msgctl);
 193
 194                pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_LO,
 195                                       msg->address_lo);
 196                if (entry->pci.msi_attrib.is_64) {
 197                        pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_HI,
 198                                               msg->address_hi);
 199                        pci_write_config_word(dev, pos + PCI_MSI_DATA_64,
 200                                              msg->data);
 201                } else {
 202                        pci_write_config_word(dev, pos + PCI_MSI_DATA_32,
 203                                              msg->data);
 204                }
 205                /* Ensure that the writes are visible in the device */
 206                pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
 207        }
 208
 209skip:
 210        entry->msg = *msg;
 211
 212        if (entry->write_msi_msg)
 213                entry->write_msi_msg(entry, entry->write_msi_msg_data);
 214
 215}
 216
 217void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg)
 218{
 219        struct msi_desc *entry = irq_get_msi_desc(irq);
 220
 221        __pci_write_msi_msg(entry, msg);
 222}
 223EXPORT_SYMBOL_GPL(pci_write_msi_msg);
 224
 225static void free_msi_irqs(struct pci_dev *dev)
 226{
 227        pci_msi_teardown_msi_irqs(dev);
 228
 229        if (dev->msix_base) {
 230                iounmap(dev->msix_base);
 231                dev->msix_base = NULL;
 232        }
 233}
 234
 235static void pci_intx_for_msi(struct pci_dev *dev, int enable)
 236{
 237        if (!(dev->dev_flags & PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG))
 238                pci_intx(dev, enable);
 239}
 240
 241static void pci_msi_set_enable(struct pci_dev *dev, int enable)
 242{
 243        u16 control;
 244
 245        pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
 246        control &= ~PCI_MSI_FLAGS_ENABLE;
 247        if (enable)
 248                control |= PCI_MSI_FLAGS_ENABLE;
 249        pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
 250}
 251
 252/*
 253 * Architecture override returns true when the PCI MSI message should be
 254 * written by the generic restore function.
 255 */
 256bool __weak arch_restore_msi_irqs(struct pci_dev *dev)
 257{
 258        return true;
 259}
 260
 261static void __pci_restore_msi_state(struct pci_dev *dev)
 262{
 263        struct msi_desc *entry;
 264        u16 control;
 265
 266        if (!dev->msi_enabled)
 267                return;
 268
 269        entry = irq_get_msi_desc(dev->irq);
 270
 271        pci_intx_for_msi(dev, 0);
 272        pci_msi_set_enable(dev, 0);
 273        if (arch_restore_msi_irqs(dev))
 274                __pci_write_msi_msg(entry, &entry->msg);
 275
 276        pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
 277        pci_msi_update_mask(entry, 0, 0);
 278        control &= ~PCI_MSI_FLAGS_QSIZE;
 279        control |= (entry->pci.msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE;
 280        pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
 281}
 282
 283static void pci_msix_clear_and_set_ctrl(struct pci_dev *dev, u16 clear, u16 set)
 284{
 285        u16 ctrl;
 286
 287        pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &ctrl);
 288        ctrl &= ~clear;
 289        ctrl |= set;
 290        pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, ctrl);
 291}
 292
 293static void __pci_restore_msix_state(struct pci_dev *dev)
 294{
 295        struct msi_desc *entry;
 296        bool write_msg;
 297
 298        if (!dev->msix_enabled)
 299                return;
 300
 301        /* route the table */
 302        pci_intx_for_msi(dev, 0);
 303        pci_msix_clear_and_set_ctrl(dev, 0,
 304                                PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL);
 305
 306        write_msg = arch_restore_msi_irqs(dev);
 307
 308        msi_lock_descs(&dev->dev);
 309        msi_for_each_desc(entry, &dev->dev, MSI_DESC_ALL) {
 310                if (write_msg)
 311                        __pci_write_msi_msg(entry, &entry->msg);
 312                pci_msix_write_vector_ctrl(entry, entry->pci.msix_ctrl);
 313        }
 314        msi_unlock_descs(&dev->dev);
 315
 316        pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0);
 317}
 318
 319void pci_restore_msi_state(struct pci_dev *dev)
 320{
 321        __pci_restore_msi_state(dev);
 322        __pci_restore_msix_state(dev);
 323}
 324EXPORT_SYMBOL_GPL(pci_restore_msi_state);
 325
 326static void pcim_msi_release(void *pcidev)
 327{
 328        struct pci_dev *dev = pcidev;
 329
 330        dev->is_msi_managed = false;
 331        pci_free_irq_vectors(dev);
 332}
 333
 334/*
 335 * Needs to be separate from pcim_release to prevent an ordering problem
 336 * vs. msi_device_data_release() in the MSI core code.
 337 */
 338static int pcim_setup_msi_release(struct pci_dev *dev)
 339{
 340        int ret;
 341
 342        if (!pci_is_managed(dev) || dev->is_msi_managed)
 343                return 0;
 344
 345        ret = devm_add_action(&dev->dev, pcim_msi_release, dev);
 346        if (!ret)
 347                dev->is_msi_managed = true;
 348        return ret;
 349}
 350
 351/*
 352 * Ordering vs. devres: msi device data has to be installed first so that
 353 * pcim_msi_release() is invoked before it on device release.
 354 */
 355static int pci_setup_msi_context(struct pci_dev *dev)
 356{
 357        int ret = msi_setup_device_data(&dev->dev);
 358
 359        if (!ret)
 360                ret = pcim_setup_msi_release(dev);
 361        return ret;
 362}
 363
 364static int msi_setup_msi_desc(struct pci_dev *dev, int nvec,
 365                              struct irq_affinity_desc *masks)
 366{
 367        struct msi_desc desc;
 368        u16 control;
 369
 370        /* MSI Entry Initialization */
 371        memset(&desc, 0, sizeof(desc));
 372
 373        pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
 374        /* Lies, damned lies, and MSIs */
 375        if (dev->dev_flags & PCI_DEV_FLAGS_HAS_MSI_MASKING)
 376                control |= PCI_MSI_FLAGS_MASKBIT;
 377        /* Respect XEN's mask disabling */
 378        if (pci_msi_ignore_mask)
 379                control &= ~PCI_MSI_FLAGS_MASKBIT;
 380
 381        desc.nvec_used                  = nvec;
 382        desc.pci.msi_attrib.is_64       = !!(control & PCI_MSI_FLAGS_64BIT);
 383        desc.pci.msi_attrib.can_mask    = !!(control & PCI_MSI_FLAGS_MASKBIT);
 384        desc.pci.msi_attrib.default_irq = dev->irq;
 385        desc.pci.msi_attrib.multi_cap   = (control & PCI_MSI_FLAGS_QMASK) >> 1;
 386        desc.pci.msi_attrib.multiple    = ilog2(__roundup_pow_of_two(nvec));
 387        desc.affinity                   = masks;
 388
 389        if (control & PCI_MSI_FLAGS_64BIT)
 390                desc.pci.mask_pos = dev->msi_cap + PCI_MSI_MASK_64;
 391        else
 392                desc.pci.mask_pos = dev->msi_cap + PCI_MSI_MASK_32;
 393
 394        /* Save the initial mask status */
 395        if (desc.pci.msi_attrib.can_mask)
 396                pci_read_config_dword(dev, desc.pci.mask_pos, &desc.pci.msi_mask);
 397
 398        return msi_add_msi_desc(&dev->dev, &desc);
 399}
 400
 401static int msi_verify_entries(struct pci_dev *dev)
 402{
 403        struct msi_desc *entry;
 404
 405        if (!dev->no_64bit_msi)
 406                return 0;
 407
 408        msi_for_each_desc(entry, &dev->dev, MSI_DESC_ALL) {
 409                if (entry->msg.address_hi) {
 410                        pci_err(dev, "arch assigned 64-bit MSI address %#x%08x but device only supports 32 bits\n",
 411                                entry->msg.address_hi, entry->msg.address_lo);
 412                        break;
 413                }
 414        }
 415        return !entry ? 0 : -EIO;
 416}
 417
 418/**
 419 * msi_capability_init - configure device's MSI capability structure
 420 * @dev: pointer to the pci_dev data structure of MSI device function
 421 * @nvec: number of interrupts to allocate
 422 * @affd: description of automatic IRQ affinity assignments (may be %NULL)
 423 *
 424 * Setup the MSI capability structure of the device with the requested
 425 * number of interrupts.  A return value of zero indicates the successful
 426 * setup of an entry with the new MSI IRQ.  A negative return value indicates
 427 * an error, and a positive return value indicates the number of interrupts
 428 * which could have been allocated.
 429 */
 430static int msi_capability_init(struct pci_dev *dev, int nvec,
 431                               struct irq_affinity *affd)
 432{
 433        struct irq_affinity_desc *masks = NULL;
 434        struct msi_desc *entry;
 435        int ret;
 436
 437        /*
 438         * Disable MSI during setup in the hardware, but mark it enabled
 439         * so that setup code can evaluate it.
 440         */
 441        pci_msi_set_enable(dev, 0);
 442        dev->msi_enabled = 1;
 443
 444        if (affd)
 445                masks = irq_create_affinity_masks(nvec, affd);
 446
 447        msi_lock_descs(&dev->dev);
 448        ret = msi_setup_msi_desc(dev, nvec, masks);
 449        if (ret)
 450                goto fail;
 451
 452        /* All MSIs are unmasked by default; mask them all */
 453        entry = msi_first_desc(&dev->dev, MSI_DESC_ALL);
 454        pci_msi_mask(entry, msi_multi_mask(entry));
 455
 456        /* Configure MSI capability structure */
 457        ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI);
 458        if (ret)
 459                goto err;
 460
 461        ret = msi_verify_entries(dev);
 462        if (ret)
 463                goto err;
 464
 465        /* Set MSI enabled bits */
 466        pci_intx_for_msi(dev, 0);
 467        pci_msi_set_enable(dev, 1);
 468
 469        pcibios_free_irq(dev);
 470        dev->irq = entry->irq;
 471        goto unlock;
 472
 473err:
 474        pci_msi_unmask(entry, msi_multi_mask(entry));
 475        free_msi_irqs(dev);
 476fail:
 477        dev->msi_enabled = 0;
 478unlock:
 479        msi_unlock_descs(&dev->dev);
 480        kfree(masks);
 481        return ret;
 482}
 483
 484static void __iomem *msix_map_region(struct pci_dev *dev,
 485                                     unsigned int nr_entries)
 486{
 487        resource_size_t phys_addr;
 488        u32 table_offset;
 489        unsigned long flags;
 490        u8 bir;
 491
 492        pci_read_config_dword(dev, dev->msix_cap + PCI_MSIX_TABLE,
 493                              &table_offset);
 494        bir = (u8)(table_offset & PCI_MSIX_TABLE_BIR);
 495        flags = pci_resource_flags(dev, bir);
 496        if (!flags || (flags & IORESOURCE_UNSET))
 497                return NULL;
 498
 499        table_offset &= PCI_MSIX_TABLE_OFFSET;
 500        phys_addr = pci_resource_start(dev, bir) + table_offset;
 501
 502        return ioremap(phys_addr, nr_entries * PCI_MSIX_ENTRY_SIZE);
 503}
 504
 505static int msix_setup_msi_descs(struct pci_dev *dev, void __iomem *base,
 506                                struct msix_entry *entries, int nvec,
 507                                struct irq_affinity_desc *masks)
 508{
 509        int ret = 0, i, vec_count = pci_msix_vec_count(dev);
 510        struct irq_affinity_desc *curmsk;
 511        struct msi_desc desc;
 512        void __iomem *addr;
 513
 514        memset(&desc, 0, sizeof(desc));
 515
 516        desc.nvec_used                  = 1;
 517        desc.pci.msi_attrib.is_msix     = 1;
 518        desc.pci.msi_attrib.is_64       = 1;
 519        desc.pci.msi_attrib.default_irq = dev->irq;
 520        desc.pci.mask_base              = base;
 521
 522        for (i = 0, curmsk = masks; i < nvec; i++, curmsk++) {
 523                desc.msi_index = entries ? entries[i].entry : i;
 524                desc.affinity = masks ? curmsk : NULL;
 525                desc.pci.msi_attrib.is_virtual = desc.msi_index >= vec_count;
 526                desc.pci.msi_attrib.can_mask = !pci_msi_ignore_mask &&
 527                                               !desc.pci.msi_attrib.is_virtual;
 528
 529                if (!desc.pci.msi_attrib.can_mask) {
 530                        addr = pci_msix_desc_addr(&desc);
 531                        desc.pci.msix_ctrl = readl(addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
 532                }
 533
 534                ret = msi_add_msi_desc(&dev->dev, &desc);
 535                if (ret)
 536                        break;
 537        }
 538        return ret;
 539}
 540
 541static void msix_update_entries(struct pci_dev *dev, struct msix_entry *entries)
 542{
 543        struct msi_desc *desc;
 544
 545        if (entries) {
 546                msi_for_each_desc(desc, &dev->dev, MSI_DESC_ALL) {
 547                        entries->vector = desc->irq;
 548                        entries++;
 549                }
 550        }
 551}
 552
 553static void msix_mask_all(void __iomem *base, int tsize)
 554{
 555        u32 ctrl = PCI_MSIX_ENTRY_CTRL_MASKBIT;
 556        int i;
 557
 558        if (pci_msi_ignore_mask)
 559                return;
 560
 561        for (i = 0; i < tsize; i++, base += PCI_MSIX_ENTRY_SIZE)
 562                writel(ctrl, base + PCI_MSIX_ENTRY_VECTOR_CTRL);
 563}
 564
 565static int msix_setup_interrupts(struct pci_dev *dev, void __iomem *base,
 566                                 struct msix_entry *entries, int nvec,
 567                                 struct irq_affinity *affd)
 568{
 569        struct irq_affinity_desc *masks = NULL;
 570        int ret;
 571
 572        if (affd)
 573                masks = irq_create_affinity_masks(nvec, affd);
 574
 575        msi_lock_descs(&dev->dev);
 576        ret = msix_setup_msi_descs(dev, base, entries, nvec, masks);
 577        if (ret)
 578                goto out_free;
 579
 580        ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
 581        if (ret)
 582                goto out_free;
 583
 584        /* Check if all MSI entries honor device restrictions */
 585        ret = msi_verify_entries(dev);
 586        if (ret)
 587                goto out_free;
 588
 589        msix_update_entries(dev, entries);
 590        goto out_unlock;
 591
 592out_free:
 593        free_msi_irqs(dev);
 594out_unlock:
 595        msi_unlock_descs(&dev->dev);
 596        kfree(masks);
 597        return ret;
 598}
 599
 600/**
 601 * msix_capability_init - configure device's MSI-X capability
 602 * @dev: pointer to the pci_dev data structure of MSI-X device function
 603 * @entries: pointer to an array of struct msix_entry entries
 604 * @nvec: number of @entries
 605 * @affd: Optional pointer to enable automatic affinity assignment
 606 *
 607 * Setup the MSI-X capability structure of device function with a
 608 * single MSI-X IRQ. A return of zero indicates the successful setup of
 609 * requested MSI-X entries with allocated IRQs or non-zero for otherwise.
 610 **/
 611static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries,
 612                                int nvec, struct irq_affinity *affd)
 613{
 614        void __iomem *base;
 615        int ret, tsize;
 616        u16 control;
 617
 618        /*
 619         * Some devices require MSI-X to be enabled before the MSI-X
 620         * registers can be accessed.  Mask all the vectors to prevent
 621         * interrupts coming in before they're fully set up.
 622         */
 623        pci_msix_clear_and_set_ctrl(dev, 0, PCI_MSIX_FLAGS_MASKALL |
 624                                    PCI_MSIX_FLAGS_ENABLE);
 625
 626        /* Mark it enabled so setup functions can query it */
 627        dev->msix_enabled = 1;
 628
 629        pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control);
 630        /* Request & Map MSI-X table region */
 631        tsize = msix_table_size(control);
 632        base = msix_map_region(dev, tsize);
 633        if (!base) {
 634                ret = -ENOMEM;
 635                goto out_disable;
 636        }
 637
 638        dev->msix_base = base;
 639
 640        ret = msix_setup_interrupts(dev, base, entries, nvec, affd);
 641        if (ret)
 642                goto out_disable;
 643
 644        /* Disable INTX */
 645        pci_intx_for_msi(dev, 0);
 646
 647        /*
 648         * Ensure that all table entries are masked to prevent
 649         * stale entries from firing in a crash kernel.
 650         *
 651         * Done late to deal with a broken Marvell NVME device
 652         * which takes the MSI-X mask bits into account even
 653         * when MSI-X is disabled, which prevents MSI delivery.
 654         */
 655        msix_mask_all(base, tsize);
 656        pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0);
 657
 658        pcibios_free_irq(dev);
 659        return 0;
 660
 661out_disable:
 662        dev->msix_enabled = 0;
 663        pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE, 0);
 664
 665        return ret;
 666}
 667
 668/**
 669 * pci_msi_supported - check whether MSI may be enabled on a device
 670 * @dev: pointer to the pci_dev data structure of MSI device function
 671 * @nvec: how many MSIs have been requested?
 672 *
 673 * Look at global flags, the device itself, and its parent buses
 674 * to determine if MSI/-X are supported for the device. If MSI/-X is
 675 * supported return 1, else return 0.
 676 **/
 677static int pci_msi_supported(struct pci_dev *dev, int nvec)
 678{
 679        struct pci_bus *bus;
 680
 681        /* MSI must be globally enabled and supported by the device */
 682        if (!pci_msi_enable)
 683                return 0;
 684
 685        if (!dev || dev->no_msi)
 686                return 0;
 687
 688        /*
 689         * You can't ask to have 0 or less MSIs configured.
 690         *  a) it's stupid ..
 691         *  b) the list manipulation code assumes nvec >= 1.
 692         */
 693        if (nvec < 1)
 694                return 0;
 695
 696        /*
 697         * Any bridge which does NOT route MSI transactions from its
 698         * secondary bus to its primary bus must set NO_MSI flag on
 699         * the secondary pci_bus.
 700         *
 701         * The NO_MSI flag can either be set directly by:
 702         * - arch-specific PCI host bus controller drivers (deprecated)
 703         * - quirks for specific PCI bridges
 704         *
 705         * or indirectly by platform-specific PCI host bridge drivers by
 706         * advertising the 'msi_domain' property, which results in
 707         * the NO_MSI flag when no MSI domain is found for this bridge
 708         * at probe time.
 709         */
 710        for (bus = dev->bus; bus; bus = bus->parent)
 711                if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
 712                        return 0;
 713
 714        return 1;
 715}
 716
 717/**
 718 * pci_msi_vec_count - Return the number of MSI vectors a device can send
 719 * @dev: device to report about
 720 *
 721 * This function returns the number of MSI vectors a device requested via
 722 * Multiple Message Capable register. It returns a negative errno if the
 723 * device is not capable sending MSI interrupts. Otherwise, the call succeeds
 724 * and returns a power of two, up to a maximum of 2^5 (32), according to the
 725 * MSI specification.
 726 **/
 727int pci_msi_vec_count(struct pci_dev *dev)
 728{
 729        int ret;
 730        u16 msgctl;
 731
 732        if (!dev->msi_cap)
 733                return -EINVAL;
 734
 735        pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &msgctl);
 736        ret = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1);
 737
 738        return ret;
 739}
 740EXPORT_SYMBOL(pci_msi_vec_count);
 741
 742static void pci_msi_shutdown(struct pci_dev *dev)
 743{
 744        struct msi_desc *desc;
 745
 746        if (!pci_msi_enable || !dev || !dev->msi_enabled)
 747                return;
 748
 749        pci_msi_set_enable(dev, 0);
 750        pci_intx_for_msi(dev, 1);
 751        dev->msi_enabled = 0;
 752
 753        /* Return the device with MSI unmasked as initial states */
 754        desc = msi_first_desc(&dev->dev, MSI_DESC_ALL);
 755        if (!WARN_ON_ONCE(!desc))
 756                pci_msi_unmask(desc, msi_multi_mask(desc));
 757
 758        /* Restore dev->irq to its default pin-assertion IRQ */
 759        dev->irq = desc->pci.msi_attrib.default_irq;
 760        pcibios_alloc_irq(dev);
 761}
 762
 763void pci_disable_msi(struct pci_dev *dev)
 764{
 765        if (!pci_msi_enable || !dev || !dev->msi_enabled)
 766                return;
 767
 768        msi_lock_descs(&dev->dev);
 769        pci_msi_shutdown(dev);
 770        free_msi_irqs(dev);
 771        msi_unlock_descs(&dev->dev);
 772}
 773EXPORT_SYMBOL(pci_disable_msi);
 774
 775/**
 776 * pci_msix_vec_count - return the number of device's MSI-X table entries
 777 * @dev: pointer to the pci_dev data structure of MSI-X device function
 778 * This function returns the number of device's MSI-X table entries and
 779 * therefore the number of MSI-X vectors device is capable of sending.
 780 * It returns a negative errno if the device is not capable of sending MSI-X
 781 * interrupts.
 782 **/
 783int pci_msix_vec_count(struct pci_dev *dev)
 784{
 785        u16 control;
 786
 787        if (!dev->msix_cap)
 788                return -EINVAL;
 789
 790        pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control);
 791        return msix_table_size(control);
 792}
 793EXPORT_SYMBOL(pci_msix_vec_count);
 794
 795static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
 796                             int nvec, struct irq_affinity *affd, int flags)
 797{
 798        int nr_entries;
 799        int i, j;
 800
 801        if (!pci_msi_supported(dev, nvec) || dev->current_state != PCI_D0)
 802                return -EINVAL;
 803
 804        nr_entries = pci_msix_vec_count(dev);
 805        if (nr_entries < 0)
 806                return nr_entries;
 807        if (nvec > nr_entries && !(flags & PCI_IRQ_VIRTUAL))
 808                return nr_entries;
 809
 810        if (entries) {
 811                /* Check for any invalid entries */
 812                for (i = 0; i < nvec; i++) {
 813                        if (entries[i].entry >= nr_entries)
 814                                return -EINVAL;         /* invalid entry */
 815                        for (j = i + 1; j < nvec; j++) {
 816                                if (entries[i].entry == entries[j].entry)
 817                                        return -EINVAL; /* duplicate entry */
 818                        }
 819                }
 820        }
 821
 822        /* Check whether driver already requested for MSI IRQ */
 823        if (dev->msi_enabled) {
 824                pci_info(dev, "can't enable MSI-X (MSI IRQ already assigned)\n");
 825                return -EINVAL;
 826        }
 827        return msix_capability_init(dev, entries, nvec, affd);
 828}
 829
 830static void pci_msix_shutdown(struct pci_dev *dev)
 831{
 832        struct msi_desc *desc;
 833
 834        if (!pci_msi_enable || !dev || !dev->msix_enabled)
 835                return;
 836
 837        if (pci_dev_is_disconnected(dev)) {
 838                dev->msix_enabled = 0;
 839                return;
 840        }
 841
 842        /* Return the device with MSI-X masked as initial states */
 843        msi_for_each_desc(desc, &dev->dev, MSI_DESC_ALL)
 844                pci_msix_mask(desc);
 845
 846        pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0);
 847        pci_intx_for_msi(dev, 1);
 848        dev->msix_enabled = 0;
 849        pcibios_alloc_irq(dev);
 850}
 851
 852void pci_disable_msix(struct pci_dev *dev)
 853{
 854        if (!pci_msi_enable || !dev || !dev->msix_enabled)
 855                return;
 856
 857        msi_lock_descs(&dev->dev);
 858        pci_msix_shutdown(dev);
 859        free_msi_irqs(dev);
 860        msi_unlock_descs(&dev->dev);
 861}
 862EXPORT_SYMBOL(pci_disable_msix);
 863
 864static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec,
 865                                  struct irq_affinity *affd)
 866{
 867        int nvec;
 868        int rc;
 869
 870        if (!pci_msi_supported(dev, minvec) || dev->current_state != PCI_D0)
 871                return -EINVAL;
 872
 873        /* Check whether driver already requested MSI-X IRQs */
 874        if (dev->msix_enabled) {
 875                pci_info(dev, "can't enable MSI (MSI-X already enabled)\n");
 876                return -EINVAL;
 877        }
 878
 879        if (maxvec < minvec)
 880                return -ERANGE;
 881
 882        if (WARN_ON_ONCE(dev->msi_enabled))
 883                return -EINVAL;
 884
 885        nvec = pci_msi_vec_count(dev);
 886        if (nvec < 0)
 887                return nvec;
 888        if (nvec < minvec)
 889                return -ENOSPC;
 890
 891        if (nvec > maxvec)
 892                nvec = maxvec;
 893
 894        rc = pci_setup_msi_context(dev);
 895        if (rc)
 896                return rc;
 897
 898        for (;;) {
 899                if (affd) {
 900                        nvec = irq_calc_affinity_vectors(minvec, nvec, affd);
 901                        if (nvec < minvec)
 902                                return -ENOSPC;
 903                }
 904
 905                rc = msi_capability_init(dev, nvec, affd);
 906                if (rc == 0)
 907                        return nvec;
 908
 909                if (rc < 0)
 910                        return rc;
 911                if (rc < minvec)
 912                        return -ENOSPC;
 913
 914                nvec = rc;
 915        }
 916}
 917
 918/* deprecated, don't use */
 919int pci_enable_msi(struct pci_dev *dev)
 920{
 921        int rc = __pci_enable_msi_range(dev, 1, 1, NULL);
 922        if (rc < 0)
 923                return rc;
 924        return 0;
 925}
 926EXPORT_SYMBOL(pci_enable_msi);
 927
 928static int __pci_enable_msix_range(struct pci_dev *dev,
 929                                   struct msix_entry *entries, int minvec,
 930                                   int maxvec, struct irq_affinity *affd,
 931                                   int flags)
 932{
 933        int rc, nvec = maxvec;
 934
 935        if (maxvec < minvec)
 936                return -ERANGE;
 937
 938        if (WARN_ON_ONCE(dev->msix_enabled))
 939                return -EINVAL;
 940
 941        rc = pci_setup_msi_context(dev);
 942        if (rc)
 943                return rc;
 944
 945        for (;;) {
 946                if (affd) {
 947                        nvec = irq_calc_affinity_vectors(minvec, nvec, affd);
 948                        if (nvec < minvec)
 949                                return -ENOSPC;
 950                }
 951
 952                rc = __pci_enable_msix(dev, entries, nvec, affd, flags);
 953                if (rc == 0)
 954                        return nvec;
 955
 956                if (rc < 0)
 957                        return rc;
 958                if (rc < minvec)
 959                        return -ENOSPC;
 960
 961                nvec = rc;
 962        }
 963}
 964
 965/**
 966 * pci_enable_msix_range - configure device's MSI-X capability structure
 967 * @dev: pointer to the pci_dev data structure of MSI-X device function
 968 * @entries: pointer to an array of MSI-X entries
 969 * @minvec: minimum number of MSI-X IRQs requested
 970 * @maxvec: maximum number of MSI-X IRQs requested
 971 *
 972 * Setup the MSI-X capability structure of device function with a maximum
 973 * possible number of interrupts in the range between @minvec and @maxvec
 974 * upon its software driver call to request for MSI-X mode enabled on its
 975 * hardware device function. It returns a negative errno if an error occurs.
 976 * If it succeeds, it returns the actual number of interrupts allocated and
 977 * indicates the successful configuration of MSI-X capability structure
 978 * with new allocated MSI-X interrupts.
 979 **/
 980int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
 981                int minvec, int maxvec)
 982{
 983        return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL, 0);
 984}
 985EXPORT_SYMBOL(pci_enable_msix_range);
 986
 987/**
 988 * pci_alloc_irq_vectors_affinity - allocate multiple IRQs for a device
 989 * @dev:                PCI device to operate on
 990 * @min_vecs:           minimum number of vectors required (must be >= 1)
 991 * @max_vecs:           maximum (desired) number of vectors
 992 * @flags:              flags or quirks for the allocation
 993 * @affd:               optional description of the affinity requirements
 994 *
 995 * Allocate up to @max_vecs interrupt vectors for @dev, using MSI-X or MSI
 996 * vectors if available, and fall back to a single legacy vector
 997 * if neither is available.  Return the number of vectors allocated,
 998 * (which might be smaller than @max_vecs) if successful, or a negative
 999 * error code on error. If less than @min_vecs interrupt vectors are
1000 * available for @dev the function will fail with -ENOSPC.
1001 *
1002 * To get the Linux IRQ number used for a vector that can be passed to
1003 * request_irq() use the pci_irq_vector() helper.
1004 */
1005int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
1006                                   unsigned int max_vecs, unsigned int flags,
1007                                   struct irq_affinity *affd)
1008{
1009        struct irq_affinity msi_default_affd = {0};
1010        int nvecs = -ENOSPC;
1011
1012        if (flags & PCI_IRQ_AFFINITY) {
1013                if (!affd)
1014                        affd = &msi_default_affd;
1015        } else {
1016                if (WARN_ON(affd))
1017                        affd = NULL;
1018        }
1019
1020        if (flags & PCI_IRQ_MSIX) {
1021                nvecs = __pci_enable_msix_range(dev, NULL, min_vecs, max_vecs,
1022                                                affd, flags);
1023                if (nvecs > 0)
1024                        return nvecs;
1025        }
1026
1027        if (flags & PCI_IRQ_MSI) {
1028                nvecs = __pci_enable_msi_range(dev, min_vecs, max_vecs, affd);
1029                if (nvecs > 0)
1030                        return nvecs;
1031        }
1032
1033        /* use legacy IRQ if allowed */
1034        if (flags & PCI_IRQ_LEGACY) {
1035                if (min_vecs == 1 && dev->irq) {
1036                        /*
1037                         * Invoke the affinity spreading logic to ensure that
1038                         * the device driver can adjust queue configuration
1039                         * for the single interrupt case.
1040                         */
1041                        if (affd)
1042                                irq_create_affinity_masks(1, affd);
1043                        pci_intx(dev, 1);
1044                        return 1;
1045                }
1046        }
1047
1048        return nvecs;
1049}
1050EXPORT_SYMBOL(pci_alloc_irq_vectors_affinity);
1051
1052/**
1053 * pci_free_irq_vectors - free previously allocated IRQs for a device
1054 * @dev:                PCI device to operate on
1055 *
1056 * Undoes the allocations and enabling in pci_alloc_irq_vectors().
1057 */
1058void pci_free_irq_vectors(struct pci_dev *dev)
1059{
1060        pci_disable_msix(dev);
1061        pci_disable_msi(dev);
1062}
1063EXPORT_SYMBOL(pci_free_irq_vectors);
1064
1065/**
1066 * pci_irq_vector - return Linux IRQ number of a device vector
1067 * @dev:        PCI device to operate on
1068 * @nr:         Interrupt vector index (0-based)
1069 *
1070 * @nr has the following meanings depending on the interrupt mode:
1071 *   MSI-X:     The index in the MSI-X vector table
1072 *   MSI:       The index of the enabled MSI vectors
1073 *   INTx:      Must be 0
1074 *
1075 * Return: The Linux interrupt number or -EINVAl if @nr is out of range.
1076 */
1077int pci_irq_vector(struct pci_dev *dev, unsigned int nr)
1078{
1079        unsigned int irq;
1080
1081        if (!dev->msi_enabled && !dev->msix_enabled)
1082                return !nr ? dev->irq : -EINVAL;
1083
1084        irq = msi_get_virq(&dev->dev, nr);
1085        return irq ? irq : -EINVAL;
1086}
1087EXPORT_SYMBOL(pci_irq_vector);
1088
1089/**
1090 * pci_irq_get_affinity - return the affinity of a particular MSI vector
1091 * @dev:        PCI device to operate on
1092 * @nr:         device-relative interrupt vector index (0-based).
1093 *
1094 * @nr has the following meanings depending on the interrupt mode:
1095 *   MSI-X:     The index in the MSI-X vector table
1096 *   MSI:       The index of the enabled MSI vectors
1097 *   INTx:      Must be 0
1098 *
1099 * Return: A cpumask pointer or NULL if @nr is out of range
1100 */
1101const struct cpumask *pci_irq_get_affinity(struct pci_dev *dev, int nr)
1102{
1103        int idx, irq = pci_irq_vector(dev, nr);
1104        struct msi_desc *desc;
1105
1106        if (WARN_ON_ONCE(irq <= 0))
1107                return NULL;
1108
1109        desc = irq_get_msi_desc(irq);
1110        /* Non-MSI does not have the information handy */
1111        if (!desc)
1112                return cpu_possible_mask;
1113
1114        /* MSI[X] interrupts can be allocated without affinity descriptor */
1115        if (!desc->affinity)
1116                return NULL;
1117
1118        /*
1119         * MSI has a mask array in the descriptor.
1120         * MSI-X has a single mask.
1121         */
1122        idx = dev->msi_enabled ? nr : 0;
1123        return &desc->affinity[idx].mask;
1124}
1125EXPORT_SYMBOL(pci_irq_get_affinity);
1126
1127struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc)
1128{
1129        return to_pci_dev(desc->dev);
1130}
1131EXPORT_SYMBOL(msi_desc_to_pci_dev);
1132
1133void pci_no_msi(void)
1134{
1135        pci_msi_enable = 0;
1136}
1137
1138/**
1139 * pci_msi_enabled - is MSI enabled?
1140 *
1141 * Returns true if MSI has not been disabled by the command-line option
1142 * pci=nomsi.
1143 **/
1144int pci_msi_enabled(void)
1145{
1146        return pci_msi_enable;
1147}
1148EXPORT_SYMBOL(pci_msi_enabled);
1149