linux/drivers/pci/controller/vmd.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Volume Management Device driver
   4 * Copyright (c) 2015, Intel Corporation.
   5 */
   6
   7#include <linux/device.h>
   8#include <linux/interrupt.h>
   9#include <linux/irq.h>
  10#include <linux/kernel.h>
  11#include <linux/module.h>
  12#include <linux/msi.h>
  13#include <linux/pci.h>
  14#include <linux/srcu.h>
  15#include <linux/rculist.h>
  16#include <linux/rcupdate.h>
  17
  18#include <asm/irqdomain.h>
  19#include <asm/device.h>
  20#include <asm/msi.h>
  21#include <asm/msidef.h>
  22
  23#define VMD_CFGBAR      0
  24#define VMD_MEMBAR1     2
  25#define VMD_MEMBAR2     4
  26
  27#define PCI_REG_VMCAP           0x40
  28#define BUS_RESTRICT_CAP(vmcap) (vmcap & 0x1)
  29#define PCI_REG_VMCONFIG        0x44
  30#define BUS_RESTRICT_CFG(vmcfg) ((vmcfg >> 8) & 0x3)
  31#define PCI_REG_VMLOCK          0x70
  32#define MB2_SHADOW_EN(vmlock)   (vmlock & 0x2)
  33
  34#define MB2_SHADOW_OFFSET       0x2000
  35#define MB2_SHADOW_SIZE         16
  36
  37enum vmd_features {
  38        /*
  39         * Device may contain registers which hint the physical location of the
  40         * membars, in order to allow proper address translation during
  41         * resource assignment to enable guest virtualization
  42         */
  43        VMD_FEAT_HAS_MEMBAR_SHADOW              = (1 << 0),
  44
  45        /*
  46         * Device may provide root port configuration information which limits
  47         * bus numbering
  48         */
  49        VMD_FEAT_HAS_BUS_RESTRICTIONS           = (1 << 1),
  50
  51        /*
  52         * Device contains physical location shadow registers in
  53         * vendor-specific capability space
  54         */
  55        VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP        = (1 << 2),
  56};
  57
  58/*
  59 * Lock for manipulating VMD IRQ lists.
  60 */
  61static DEFINE_RAW_SPINLOCK(list_lock);
  62
  63/**
  64 * struct vmd_irq - private data to map driver IRQ to the VMD shared vector
  65 * @node:       list item for parent traversal.
  66 * @irq:        back pointer to parent.
  67 * @enabled:    true if driver enabled IRQ
  68 * @virq:       the virtual IRQ value provided to the requesting driver.
  69 *
  70 * Every MSI/MSI-X IRQ requested for a device in a VMD domain will be mapped to
  71 * a VMD IRQ using this structure.
  72 */
  73struct vmd_irq {
  74        struct list_head        node;
  75        struct vmd_irq_list     *irq;
  76        bool                    enabled;
  77        unsigned int            virq;
  78};
  79
  80/**
  81 * struct vmd_irq_list - list of driver requested IRQs mapping to a VMD vector
  82 * @irq_list:   the list of irq's the VMD one demuxes to.
  83 * @srcu:       SRCU struct for local synchronization.
  84 * @count:      number of child IRQs assigned to this vector; used to track
  85 *              sharing.
  86 */
  87struct vmd_irq_list {
  88        struct list_head        irq_list;
  89        struct srcu_struct      srcu;
  90        unsigned int            count;
  91};
  92
  93struct vmd_dev {
  94        struct pci_dev          *dev;
  95
  96        spinlock_t              cfg_lock;
  97        char __iomem            *cfgbar;
  98
  99        int msix_count;
 100        struct vmd_irq_list     *irqs;
 101
 102        struct pci_sysdata      sysdata;
 103        struct resource         resources[3];
 104        struct irq_domain       *irq_domain;
 105        struct pci_bus          *bus;
 106        u8                      busn_start;
 107};
 108
 109static inline struct vmd_dev *vmd_from_bus(struct pci_bus *bus)
 110{
 111        return container_of(bus->sysdata, struct vmd_dev, sysdata);
 112}
 113
 114static inline unsigned int index_from_irqs(struct vmd_dev *vmd,
 115                                           struct vmd_irq_list *irqs)
 116{
 117        return irqs - vmd->irqs;
 118}
 119
 120/*
 121 * Drivers managing a device in a VMD domain allocate their own IRQs as before,
 122 * but the MSI entry for the hardware it's driving will be programmed with a
 123 * destination ID for the VMD MSI-X table.  The VMD muxes interrupts in its
 124 * domain into one of its own, and the VMD driver de-muxes these for the
 125 * handlers sharing that VMD IRQ.  The vmd irq_domain provides the operations
 126 * and irq_chip to set this up.
 127 */
 128static void vmd_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
 129{
 130        struct vmd_irq *vmdirq = data->chip_data;
 131        struct vmd_irq_list *irq = vmdirq->irq;
 132        struct vmd_dev *vmd = irq_data_get_irq_handler_data(data);
 133
 134        msg->address_hi = MSI_ADDR_BASE_HI;
 135        msg->address_lo = MSI_ADDR_BASE_LO |
 136                          MSI_ADDR_DEST_ID(index_from_irqs(vmd, irq));
 137        msg->data = 0;
 138}
 139
 140/*
 141 * We rely on MSI_FLAG_USE_DEF_CHIP_OPS to set the IRQ mask/unmask ops.
 142 */
 143static void vmd_irq_enable(struct irq_data *data)
 144{
 145        struct vmd_irq *vmdirq = data->chip_data;
 146        unsigned long flags;
 147
 148        raw_spin_lock_irqsave(&list_lock, flags);
 149        WARN_ON(vmdirq->enabled);
 150        list_add_tail_rcu(&vmdirq->node, &vmdirq->irq->irq_list);
 151        vmdirq->enabled = true;
 152        raw_spin_unlock_irqrestore(&list_lock, flags);
 153
 154        data->chip->irq_unmask(data);
 155}
 156
 157static void vmd_irq_disable(struct irq_data *data)
 158{
 159        struct vmd_irq *vmdirq = data->chip_data;
 160        unsigned long flags;
 161
 162        data->chip->irq_mask(data);
 163
 164        raw_spin_lock_irqsave(&list_lock, flags);
 165        if (vmdirq->enabled) {
 166                list_del_rcu(&vmdirq->node);
 167                vmdirq->enabled = false;
 168        }
 169        raw_spin_unlock_irqrestore(&list_lock, flags);
 170}
 171
 172/*
 173 * XXX: Stubbed until we develop acceptable way to not create conflicts with
 174 * other devices sharing the same vector.
 175 */
 176static int vmd_irq_set_affinity(struct irq_data *data,
 177                                const struct cpumask *dest, bool force)
 178{
 179        return -EINVAL;
 180}
 181
 182static struct irq_chip vmd_msi_controller = {
 183        .name                   = "VMD-MSI",
 184        .irq_enable             = vmd_irq_enable,
 185        .irq_disable            = vmd_irq_disable,
 186        .irq_compose_msi_msg    = vmd_compose_msi_msg,
 187        .irq_set_affinity       = vmd_irq_set_affinity,
 188};
 189
 190static irq_hw_number_t vmd_get_hwirq(struct msi_domain_info *info,
 191                                     msi_alloc_info_t *arg)
 192{
 193        return 0;
 194}
 195
 196/*
 197 * XXX: We can be even smarter selecting the best IRQ once we solve the
 198 * affinity problem.
 199 */
 200static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *desc)
 201{
 202        int i, best = 1;
 203        unsigned long flags;
 204
 205        if (vmd->msix_count == 1)
 206                return &vmd->irqs[0];
 207
 208        /*
 209         * White list for fast-interrupt handlers. All others will share the
 210         * "slow" interrupt vector.
 211         */
 212        switch (msi_desc_to_pci_dev(desc)->class) {
 213        case PCI_CLASS_STORAGE_EXPRESS:
 214                break;
 215        default:
 216                return &vmd->irqs[0];
 217        }
 218
 219        raw_spin_lock_irqsave(&list_lock, flags);
 220        for (i = 1; i < vmd->msix_count; i++)
 221                if (vmd->irqs[i].count < vmd->irqs[best].count)
 222                        best = i;
 223        vmd->irqs[best].count++;
 224        raw_spin_unlock_irqrestore(&list_lock, flags);
 225
 226        return &vmd->irqs[best];
 227}
 228
 229static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info,
 230                        unsigned int virq, irq_hw_number_t hwirq,
 231                        msi_alloc_info_t *arg)
 232{
 233        struct msi_desc *desc = arg->desc;
 234        struct vmd_dev *vmd = vmd_from_bus(msi_desc_to_pci_dev(desc)->bus);
 235        struct vmd_irq *vmdirq = kzalloc(sizeof(*vmdirq), GFP_KERNEL);
 236        unsigned int index, vector;
 237
 238        if (!vmdirq)
 239                return -ENOMEM;
 240
 241        INIT_LIST_HEAD(&vmdirq->node);
 242        vmdirq->irq = vmd_next_irq(vmd, desc);
 243        vmdirq->virq = virq;
 244        index = index_from_irqs(vmd, vmdirq->irq);
 245        vector = pci_irq_vector(vmd->dev, index);
 246
 247        irq_domain_set_info(domain, virq, vector, info->chip, vmdirq,
 248                            handle_untracked_irq, vmd, NULL);
 249        return 0;
 250}
 251
 252static void vmd_msi_free(struct irq_domain *domain,
 253                        struct msi_domain_info *info, unsigned int virq)
 254{
 255        struct vmd_irq *vmdirq = irq_get_chip_data(virq);
 256        unsigned long flags;
 257
 258        synchronize_srcu(&vmdirq->irq->srcu);
 259
 260        /* XXX: Potential optimization to rebalance */
 261        raw_spin_lock_irqsave(&list_lock, flags);
 262        vmdirq->irq->count--;
 263        raw_spin_unlock_irqrestore(&list_lock, flags);
 264
 265        kfree(vmdirq);
 266}
 267
 268static int vmd_msi_prepare(struct irq_domain *domain, struct device *dev,
 269                           int nvec, msi_alloc_info_t *arg)
 270{
 271        struct pci_dev *pdev = to_pci_dev(dev);
 272        struct vmd_dev *vmd = vmd_from_bus(pdev->bus);
 273
 274        if (nvec > vmd->msix_count)
 275                return vmd->msix_count;
 276
 277        memset(arg, 0, sizeof(*arg));
 278        return 0;
 279}
 280
 281static void vmd_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc)
 282{
 283        arg->desc = desc;
 284}
 285
 286static struct msi_domain_ops vmd_msi_domain_ops = {
 287        .get_hwirq      = vmd_get_hwirq,
 288        .msi_init       = vmd_msi_init,
 289        .msi_free       = vmd_msi_free,
 290        .msi_prepare    = vmd_msi_prepare,
 291        .set_desc       = vmd_set_desc,
 292};
 293
 294static struct msi_domain_info vmd_msi_domain_info = {
 295        .flags          = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
 296                          MSI_FLAG_PCI_MSIX,
 297        .ops            = &vmd_msi_domain_ops,
 298        .chip           = &vmd_msi_controller,
 299};
 300
 301static char __iomem *vmd_cfg_addr(struct vmd_dev *vmd, struct pci_bus *bus,
 302                                  unsigned int devfn, int reg, int len)
 303{
 304        char __iomem *addr = vmd->cfgbar +
 305                             ((bus->number - vmd->busn_start) << 20) +
 306                             (devfn << 12) + reg;
 307
 308        if ((addr - vmd->cfgbar) + len >=
 309            resource_size(&vmd->dev->resource[VMD_CFGBAR]))
 310                return NULL;
 311
 312        return addr;
 313}
 314
 315/*
 316 * CPU may deadlock if config space is not serialized on some versions of this
 317 * hardware, so all config space access is done under a spinlock.
 318 */
 319static int vmd_pci_read(struct pci_bus *bus, unsigned int devfn, int reg,
 320                        int len, u32 *value)
 321{
 322        struct vmd_dev *vmd = vmd_from_bus(bus);
 323        char __iomem *addr = vmd_cfg_addr(vmd, bus, devfn, reg, len);
 324        unsigned long flags;
 325        int ret = 0;
 326
 327        if (!addr)
 328                return -EFAULT;
 329
 330        spin_lock_irqsave(&vmd->cfg_lock, flags);
 331        switch (len) {
 332        case 1:
 333                *value = readb(addr);
 334                break;
 335        case 2:
 336                *value = readw(addr);
 337                break;
 338        case 4:
 339                *value = readl(addr);
 340                break;
 341        default:
 342                ret = -EINVAL;
 343                break;
 344        }
 345        spin_unlock_irqrestore(&vmd->cfg_lock, flags);
 346        return ret;
 347}
 348
 349/*
 350 * VMD h/w converts non-posted config writes to posted memory writes. The
 351 * read-back in this function forces the completion so it returns only after
 352 * the config space was written, as expected.
 353 */
 354static int vmd_pci_write(struct pci_bus *bus, unsigned int devfn, int reg,
 355                         int len, u32 value)
 356{
 357        struct vmd_dev *vmd = vmd_from_bus(bus);
 358        char __iomem *addr = vmd_cfg_addr(vmd, bus, devfn, reg, len);
 359        unsigned long flags;
 360        int ret = 0;
 361
 362        if (!addr)
 363                return -EFAULT;
 364
 365        spin_lock_irqsave(&vmd->cfg_lock, flags);
 366        switch (len) {
 367        case 1:
 368                writeb(value, addr);
 369                readb(addr);
 370                break;
 371        case 2:
 372                writew(value, addr);
 373                readw(addr);
 374                break;
 375        case 4:
 376                writel(value, addr);
 377                readl(addr);
 378                break;
 379        default:
 380                ret = -EINVAL;
 381                break;
 382        }
 383        spin_unlock_irqrestore(&vmd->cfg_lock, flags);
 384        return ret;
 385}
 386
 387static struct pci_ops vmd_ops = {
 388        .read           = vmd_pci_read,
 389        .write          = vmd_pci_write,
 390};
 391
 392static void vmd_attach_resources(struct vmd_dev *vmd)
 393{
 394        vmd->dev->resource[VMD_MEMBAR1].child = &vmd->resources[1];
 395        vmd->dev->resource[VMD_MEMBAR2].child = &vmd->resources[2];
 396}
 397
 398static void vmd_detach_resources(struct vmd_dev *vmd)
 399{
 400        vmd->dev->resource[VMD_MEMBAR1].child = NULL;
 401        vmd->dev->resource[VMD_MEMBAR2].child = NULL;
 402}
 403
 404/*
 405 * VMD domains start at 0x10000 to not clash with ACPI _SEG domains.
 406 * Per ACPI r6.0, sec 6.5.6,  _SEG returns an integer, of which the lower
 407 * 16 bits are the PCI Segment Group (domain) number.  Other bits are
 408 * currently reserved.
 409 */
 410static int vmd_find_free_domain(void)
 411{
 412        int domain = 0xffff;
 413        struct pci_bus *bus = NULL;
 414
 415        while ((bus = pci_find_next_bus(bus)) != NULL)
 416                domain = max_t(int, domain, pci_domain_nr(bus));
 417        return domain + 1;
 418}
 419
 420static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
 421{
 422        struct pci_sysdata *sd = &vmd->sysdata;
 423        struct fwnode_handle *fn;
 424        struct resource *res;
 425        u32 upper_bits;
 426        unsigned long flags;
 427        LIST_HEAD(resources);
 428        resource_size_t offset[2] = {0};
 429        resource_size_t membar2_offset = 0x2000;
 430        struct pci_bus *child;
 431
 432        /*
 433         * Shadow registers may exist in certain VMD device ids which allow
 434         * guests to correctly assign host physical addresses to the root ports
 435         * and child devices. These registers will either return the host value
 436         * or 0, depending on an enable bit in the VMD device.
 437         */
 438        if (features & VMD_FEAT_HAS_MEMBAR_SHADOW) {
 439                u32 vmlock;
 440                int ret;
 441
 442                membar2_offset = MB2_SHADOW_OFFSET + MB2_SHADOW_SIZE;
 443                ret = pci_read_config_dword(vmd->dev, PCI_REG_VMLOCK, &vmlock);
 444                if (ret || vmlock == ~0)
 445                        return -ENODEV;
 446
 447                if (MB2_SHADOW_EN(vmlock)) {
 448                        void __iomem *membar2;
 449
 450                        membar2 = pci_iomap(vmd->dev, VMD_MEMBAR2, 0);
 451                        if (!membar2)
 452                                return -ENOMEM;
 453                        offset[0] = vmd->dev->resource[VMD_MEMBAR1].start -
 454                                        (readq(membar2 + MB2_SHADOW_OFFSET) &
 455                                         PCI_BASE_ADDRESS_MEM_MASK);
 456                        offset[1] = vmd->dev->resource[VMD_MEMBAR2].start -
 457                                        (readq(membar2 + MB2_SHADOW_OFFSET + 8) &
 458                                         PCI_BASE_ADDRESS_MEM_MASK);
 459                        pci_iounmap(vmd->dev, membar2);
 460                }
 461        }
 462
 463        if (features & VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP) {
 464                int pos = pci_find_capability(vmd->dev, PCI_CAP_ID_VNDR);
 465                u32 reg, regu;
 466
 467                pci_read_config_dword(vmd->dev, pos + 4, &reg);
 468
 469                /* "SHDW" */
 470                if (pos && reg == 0x53484457) {
 471                        pci_read_config_dword(vmd->dev, pos + 8, &reg);
 472                        pci_read_config_dword(vmd->dev, pos + 12, &regu);
 473                        offset[0] = vmd->dev->resource[VMD_MEMBAR1].start -
 474                                        (((u64) regu << 32 | reg) &
 475                                         PCI_BASE_ADDRESS_MEM_MASK);
 476
 477                        pci_read_config_dword(vmd->dev, pos + 16, &reg);
 478                        pci_read_config_dword(vmd->dev, pos + 20, &regu);
 479                        offset[1] = vmd->dev->resource[VMD_MEMBAR2].start -
 480                                        (((u64) regu << 32 | reg) &
 481                                         PCI_BASE_ADDRESS_MEM_MASK);
 482                }
 483        }
 484
 485        /*
 486         * Certain VMD devices may have a root port configuration option which
 487         * limits the bus range to between 0-127, 128-255, or 224-255
 488         */
 489        if (features & VMD_FEAT_HAS_BUS_RESTRICTIONS) {
 490                u16 reg16;
 491
 492                pci_read_config_word(vmd->dev, PCI_REG_VMCAP, &reg16);
 493                if (BUS_RESTRICT_CAP(reg16)) {
 494                        pci_read_config_word(vmd->dev, PCI_REG_VMCONFIG,
 495                                             &reg16);
 496
 497                        switch (BUS_RESTRICT_CFG(reg16)) {
 498                        case 1:
 499                                vmd->busn_start = 128;
 500                                break;
 501                        case 2:
 502                                vmd->busn_start = 224;
 503                                break;
 504                        case 3:
 505                                pci_err(vmd->dev, "Unknown Bus Offset Setting\n");
 506                                return -ENODEV;
 507                        default:
 508                                break;
 509                        }
 510                }
 511        }
 512
 513        res = &vmd->dev->resource[VMD_CFGBAR];
 514        vmd->resources[0] = (struct resource) {
 515                .name  = "VMD CFGBAR",
 516                .start = vmd->busn_start,
 517                .end   = vmd->busn_start + (resource_size(res) >> 20) - 1,
 518                .flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED,
 519        };
 520
 521        /*
 522         * If the window is below 4GB, clear IORESOURCE_MEM_64 so we can
 523         * put 32-bit resources in the window.
 524         *
 525         * There's no hardware reason why a 64-bit window *couldn't*
 526         * contain a 32-bit resource, but pbus_size_mem() computes the
 527         * bridge window size assuming a 64-bit window will contain no
 528         * 32-bit resources.  __pci_assign_resource() enforces that
 529         * artificial restriction to make sure everything will fit.
 530         *
 531         * The only way we could use a 64-bit non-prefetchable MEMBAR is
 532         * if its address is <4GB so that we can convert it to a 32-bit
 533         * resource.  To be visible to the host OS, all VMD endpoints must
 534         * be initially configured by platform BIOS, which includes setting
 535         * up these resources.  We can assume the device is configured
 536         * according to the platform needs.
 537         */
 538        res = &vmd->dev->resource[VMD_MEMBAR1];
 539        upper_bits = upper_32_bits(res->end);
 540        flags = res->flags & ~IORESOURCE_SIZEALIGN;
 541        if (!upper_bits)
 542                flags &= ~IORESOURCE_MEM_64;
 543        vmd->resources[1] = (struct resource) {
 544                .name  = "VMD MEMBAR1",
 545                .start = res->start,
 546                .end   = res->end,
 547                .flags = flags,
 548                .parent = res,
 549        };
 550
 551        res = &vmd->dev->resource[VMD_MEMBAR2];
 552        upper_bits = upper_32_bits(res->end);
 553        flags = res->flags & ~IORESOURCE_SIZEALIGN;
 554        if (!upper_bits)
 555                flags &= ~IORESOURCE_MEM_64;
 556        vmd->resources[2] = (struct resource) {
 557                .name  = "VMD MEMBAR2",
 558                .start = res->start + membar2_offset,
 559                .end   = res->end,
 560                .flags = flags,
 561                .parent = res,
 562        };
 563
 564        sd->vmd_dev = vmd->dev;
 565        sd->domain = vmd_find_free_domain();
 566        if (sd->domain < 0)
 567                return sd->domain;
 568
 569        sd->node = pcibus_to_node(vmd->dev->bus);
 570
 571        fn = irq_domain_alloc_named_id_fwnode("VMD-MSI", vmd->sysdata.domain);
 572        if (!fn)
 573                return -ENODEV;
 574
 575        vmd->irq_domain = pci_msi_create_irq_domain(fn, &vmd_msi_domain_info,
 576                                                    x86_vector_domain);
 577        if (!vmd->irq_domain) {
 578                irq_domain_free_fwnode(fn);
 579                return -ENODEV;
 580        }
 581
 582        pci_add_resource(&resources, &vmd->resources[0]);
 583        pci_add_resource_offset(&resources, &vmd->resources[1], offset[0]);
 584        pci_add_resource_offset(&resources, &vmd->resources[2], offset[1]);
 585
 586        vmd->bus = pci_create_root_bus(&vmd->dev->dev, vmd->busn_start,
 587                                       &vmd_ops, sd, &resources);
 588        if (!vmd->bus) {
 589                pci_free_resource_list(&resources);
 590                irq_domain_remove(vmd->irq_domain);
 591                irq_domain_free_fwnode(fn);
 592                return -ENODEV;
 593        }
 594
 595        vmd_attach_resources(vmd);
 596        dev_set_msi_domain(&vmd->bus->dev, vmd->irq_domain);
 597
 598        pci_scan_child_bus(vmd->bus);
 599        pci_assign_unassigned_bus_resources(vmd->bus);
 600
 601        /*
 602         * VMD root buses are virtual and don't return true on pci_is_pcie()
 603         * and will fail pcie_bus_configure_settings() early. It can instead be
 604         * run on each of the real root ports.
 605         */
 606        list_for_each_entry(child, &vmd->bus->children, node)
 607                pcie_bus_configure_settings(child);
 608
 609        pci_bus_add_devices(vmd->bus);
 610
 611        WARN(sysfs_create_link(&vmd->dev->dev.kobj, &vmd->bus->dev.kobj,
 612                               "domain"), "Can't create symlink to domain\n");
 613        return 0;
 614}
 615
 616static irqreturn_t vmd_irq(int irq, void *data)
 617{
 618        struct vmd_irq_list *irqs = data;
 619        struct vmd_irq *vmdirq;
 620        int idx;
 621
 622        idx = srcu_read_lock(&irqs->srcu);
 623        list_for_each_entry_rcu(vmdirq, &irqs->irq_list, node)
 624                generic_handle_irq(vmdirq->virq);
 625        srcu_read_unlock(&irqs->srcu, idx);
 626
 627        return IRQ_HANDLED;
 628}
 629
 630static int vmd_probe(struct pci_dev *dev, const struct pci_device_id *id)
 631{
 632        struct vmd_dev *vmd;
 633        int i, err;
 634
 635        if (resource_size(&dev->resource[VMD_CFGBAR]) < (1 << 20))
 636                return -ENOMEM;
 637
 638        vmd = devm_kzalloc(&dev->dev, sizeof(*vmd), GFP_KERNEL);
 639        if (!vmd)
 640                return -ENOMEM;
 641
 642        vmd->dev = dev;
 643        err = pcim_enable_device(dev);
 644        if (err < 0)
 645                return err;
 646
 647        vmd->cfgbar = pcim_iomap(dev, VMD_CFGBAR, 0);
 648        if (!vmd->cfgbar)
 649                return -ENOMEM;
 650
 651        pci_set_master(dev);
 652        if (dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(64)) &&
 653            dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(32)))
 654                return -ENODEV;
 655
 656        vmd->msix_count = pci_msix_vec_count(dev);
 657        if (vmd->msix_count < 0)
 658                return -ENODEV;
 659
 660        vmd->msix_count = pci_alloc_irq_vectors(dev, 1, vmd->msix_count,
 661                                        PCI_IRQ_MSIX);
 662        if (vmd->msix_count < 0)
 663                return vmd->msix_count;
 664
 665        vmd->irqs = devm_kcalloc(&dev->dev, vmd->msix_count, sizeof(*vmd->irqs),
 666                                 GFP_KERNEL);
 667        if (!vmd->irqs)
 668                return -ENOMEM;
 669
 670        for (i = 0; i < vmd->msix_count; i++) {
 671                err = init_srcu_struct(&vmd->irqs[i].srcu);
 672                if (err)
 673                        return err;
 674
 675                INIT_LIST_HEAD(&vmd->irqs[i].irq_list);
 676                err = devm_request_irq(&dev->dev, pci_irq_vector(dev, i),
 677                                       vmd_irq, IRQF_NO_THREAD,
 678                                       "vmd", &vmd->irqs[i]);
 679                if (err)
 680                        return err;
 681        }
 682
 683        spin_lock_init(&vmd->cfg_lock);
 684        pci_set_drvdata(dev, vmd);
 685        err = vmd_enable_domain(vmd, (unsigned long) id->driver_data);
 686        if (err)
 687                return err;
 688
 689        dev_info(&vmd->dev->dev, "Bound to PCI domain %04x\n",
 690                 vmd->sysdata.domain);
 691        return 0;
 692}
 693
 694static void vmd_cleanup_srcu(struct vmd_dev *vmd)
 695{
 696        int i;
 697
 698        for (i = 0; i < vmd->msix_count; i++)
 699                cleanup_srcu_struct(&vmd->irqs[i].srcu);
 700}
 701
 702static void vmd_remove(struct pci_dev *dev)
 703{
 704        struct vmd_dev *vmd = pci_get_drvdata(dev);
 705        struct fwnode_handle *fn = vmd->irq_domain->fwnode;
 706
 707        sysfs_remove_link(&vmd->dev->dev.kobj, "domain");
 708        pci_stop_root_bus(vmd->bus);
 709        pci_remove_root_bus(vmd->bus);
 710        vmd_cleanup_srcu(vmd);
 711        vmd_detach_resources(vmd);
 712        irq_domain_remove(vmd->irq_domain);
 713        irq_domain_free_fwnode(fn);
 714}
 715
 716#ifdef CONFIG_PM_SLEEP
 717static int vmd_suspend(struct device *dev)
 718{
 719        struct pci_dev *pdev = to_pci_dev(dev);
 720        struct vmd_dev *vmd = pci_get_drvdata(pdev);
 721        int i;
 722
 723        for (i = 0; i < vmd->msix_count; i++)
 724                devm_free_irq(dev, pci_irq_vector(pdev, i), &vmd->irqs[i]);
 725
 726        pci_save_state(pdev);
 727        return 0;
 728}
 729
 730static int vmd_resume(struct device *dev)
 731{
 732        struct pci_dev *pdev = to_pci_dev(dev);
 733        struct vmd_dev *vmd = pci_get_drvdata(pdev);
 734        int err, i;
 735
 736        for (i = 0; i < vmd->msix_count; i++) {
 737                err = devm_request_irq(dev, pci_irq_vector(pdev, i),
 738                                       vmd_irq, IRQF_NO_THREAD,
 739                                       "vmd", &vmd->irqs[i]);
 740                if (err)
 741                        return err;
 742        }
 743
 744        pci_restore_state(pdev);
 745        return 0;
 746}
 747#endif
 748static SIMPLE_DEV_PM_OPS(vmd_dev_pm_ops, vmd_suspend, vmd_resume);
 749
 750static const struct pci_device_id vmd_ids[] = {
 751        {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_VMD_201D),
 752                .driver_data = VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP,},
 753        {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_VMD_28C0),
 754                .driver_data = VMD_FEAT_HAS_MEMBAR_SHADOW |
 755                                VMD_FEAT_HAS_BUS_RESTRICTIONS,},
 756        {PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x467f),
 757                .driver_data = VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP |
 758                                VMD_FEAT_HAS_BUS_RESTRICTIONS,},
 759        {PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4c3d),
 760                .driver_data = VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP |
 761                                VMD_FEAT_HAS_BUS_RESTRICTIONS,},
 762        {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_VMD_9A0B),
 763                .driver_data = VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP |
 764                                VMD_FEAT_HAS_BUS_RESTRICTIONS,},
 765        {0,}
 766};
 767MODULE_DEVICE_TABLE(pci, vmd_ids);
 768
 769static struct pci_driver vmd_drv = {
 770        .name           = "vmd",
 771        .id_table       = vmd_ids,
 772        .probe          = vmd_probe,
 773        .remove         = vmd_remove,
 774        .driver         = {
 775                .pm     = &vmd_dev_pm_ops,
 776        },
 777};
 778module_pci_driver(vmd_drv);
 779
 780MODULE_AUTHOR("Intel Corporation");
 781MODULE_LICENSE("GPL v2");
 782MODULE_VERSION("0.6");
 783