linux/drivers/pci/controller/vmd.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Volume Management Device driver
   4 * Copyright (c) 2015, Intel Corporation.
   5 */
   6
   7#include <linux/device.h>
   8#include <linux/interrupt.h>
   9#include <linux/irq.h>
  10#include <linux/kernel.h>
  11#include <linux/module.h>
  12#include <linux/msi.h>
  13#include <linux/pci.h>
  14#include <linux/srcu.h>
  15#include <linux/rculist.h>
  16#include <linux/rcupdate.h>
  17
  18#include <asm/irqdomain.h>
  19#include <asm/device.h>
  20#include <asm/msi.h>
  21#include <asm/msidef.h>
  22
  23#define VMD_CFGBAR      0
  24#define VMD_MEMBAR1     2
  25#define VMD_MEMBAR2     4
  26
  27#define PCI_REG_VMCAP           0x40
  28#define BUS_RESTRICT_CAP(vmcap) (vmcap & 0x1)
  29#define PCI_REG_VMCONFIG        0x44
  30#define BUS_RESTRICT_CFG(vmcfg) ((vmcfg >> 8) & 0x3)
  31#define PCI_REG_VMLOCK          0x70
  32#define MB2_SHADOW_EN(vmlock)   (vmlock & 0x2)
  33
  34#define MB2_SHADOW_OFFSET       0x2000
  35#define MB2_SHADOW_SIZE         16
  36
  37enum vmd_features {
  38        /*
  39         * Device may contain registers which hint the physical location of the
  40         * membars, in order to allow proper address translation during
  41         * resource assignment to enable guest virtualization
  42         */
  43        VMD_FEAT_HAS_MEMBAR_SHADOW      = (1 << 0),
  44
  45        /*
  46         * Device may provide root port configuration information which limits
  47         * bus numbering
  48         */
  49        VMD_FEAT_HAS_BUS_RESTRICTIONS   = (1 << 1),
  50};
  51
  52/*
  53 * Lock for manipulating VMD IRQ lists.
  54 */
  55static DEFINE_RAW_SPINLOCK(list_lock);
  56
  57/**
  58 * struct vmd_irq - private data to map driver IRQ to the VMD shared vector
  59 * @node:       list item for parent traversal.
  60 * @irq:        back pointer to parent.
  61 * @enabled:    true if driver enabled IRQ
  62 * @virq:       the virtual IRQ value provided to the requesting driver.
  63 *
  64 * Every MSI/MSI-X IRQ requested for a device in a VMD domain will be mapped to
  65 * a VMD IRQ using this structure.
  66 */
  67struct vmd_irq {
  68        struct list_head        node;
  69        struct vmd_irq_list     *irq;
  70        bool                    enabled;
  71        unsigned int            virq;
  72};
  73
  74/**
  75 * struct vmd_irq_list - list of driver requested IRQs mapping to a VMD vector
  76 * @irq_list:   the list of irq's the VMD one demuxes to.
  77 * @srcu:       SRCU struct for local synchronization.
  78 * @count:      number of child IRQs assigned to this vector; used to track
  79 *              sharing.
  80 */
  81struct vmd_irq_list {
  82        struct list_head        irq_list;
  83        struct srcu_struct      srcu;
  84        unsigned int            count;
  85};
  86
  87struct vmd_dev {
  88        struct pci_dev          *dev;
  89
  90        spinlock_t              cfg_lock;
  91        char __iomem            *cfgbar;
  92
  93        int msix_count;
  94        struct vmd_irq_list     *irqs;
  95
  96        struct pci_sysdata      sysdata;
  97        struct resource         resources[3];
  98        struct irq_domain       *irq_domain;
  99        struct pci_bus          *bus;
 100        u8                      busn_start;
 101
 102#ifdef CONFIG_X86_DEV_DMA_OPS
 103        struct dma_map_ops      dma_ops;
 104        struct dma_domain       dma_domain;
 105#endif
 106};
 107
 108static inline struct vmd_dev *vmd_from_bus(struct pci_bus *bus)
 109{
 110        return container_of(bus->sysdata, struct vmd_dev, sysdata);
 111}
 112
 113static inline unsigned int index_from_irqs(struct vmd_dev *vmd,
 114                                           struct vmd_irq_list *irqs)
 115{
 116        return irqs - vmd->irqs;
 117}
 118
 119/*
 120 * Drivers managing a device in a VMD domain allocate their own IRQs as before,
 121 * but the MSI entry for the hardware it's driving will be programmed with a
 122 * destination ID for the VMD MSI-X table.  The VMD muxes interrupts in its
 123 * domain into one of its own, and the VMD driver de-muxes these for the
 124 * handlers sharing that VMD IRQ.  The vmd irq_domain provides the operations
 125 * and irq_chip to set this up.
 126 */
 127static void vmd_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
 128{
 129        struct vmd_irq *vmdirq = data->chip_data;
 130        struct vmd_irq_list *irq = vmdirq->irq;
 131        struct vmd_dev *vmd = irq_data_get_irq_handler_data(data);
 132
 133        msg->address_hi = MSI_ADDR_BASE_HI;
 134        msg->address_lo = MSI_ADDR_BASE_LO |
 135                          MSI_ADDR_DEST_ID(index_from_irqs(vmd, irq));
 136        msg->data = 0;
 137}
 138
 139/*
 140 * We rely on MSI_FLAG_USE_DEF_CHIP_OPS to set the IRQ mask/unmask ops.
 141 */
 142static void vmd_irq_enable(struct irq_data *data)
 143{
 144        struct vmd_irq *vmdirq = data->chip_data;
 145        unsigned long flags;
 146
 147        raw_spin_lock_irqsave(&list_lock, flags);
 148        WARN_ON(vmdirq->enabled);
 149        list_add_tail_rcu(&vmdirq->node, &vmdirq->irq->irq_list);
 150        vmdirq->enabled = true;
 151        raw_spin_unlock_irqrestore(&list_lock, flags);
 152
 153        data->chip->irq_unmask(data);
 154}
 155
 156static void vmd_irq_disable(struct irq_data *data)
 157{
 158        struct vmd_irq *vmdirq = data->chip_data;
 159        unsigned long flags;
 160
 161        data->chip->irq_mask(data);
 162
 163        raw_spin_lock_irqsave(&list_lock, flags);
 164        if (vmdirq->enabled) {
 165                list_del_rcu(&vmdirq->node);
 166                vmdirq->enabled = false;
 167        }
 168        raw_spin_unlock_irqrestore(&list_lock, flags);
 169}
 170
 171/*
 172 * XXX: Stubbed until we develop acceptable way to not create conflicts with
 173 * other devices sharing the same vector.
 174 */
 175static int vmd_irq_set_affinity(struct irq_data *data,
 176                                const struct cpumask *dest, bool force)
 177{
 178        return -EINVAL;
 179}
 180
 181static struct irq_chip vmd_msi_controller = {
 182        .name                   = "VMD-MSI",
 183        .irq_enable             = vmd_irq_enable,
 184        .irq_disable            = vmd_irq_disable,
 185        .irq_compose_msi_msg    = vmd_compose_msi_msg,
 186        .irq_set_affinity       = vmd_irq_set_affinity,
 187};
 188
 189static irq_hw_number_t vmd_get_hwirq(struct msi_domain_info *info,
 190                                     msi_alloc_info_t *arg)
 191{
 192        return 0;
 193}
 194
 195/*
 196 * XXX: We can be even smarter selecting the best IRQ once we solve the
 197 * affinity problem.
 198 */
 199static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *desc)
 200{
 201        int i, best = 1;
 202        unsigned long flags;
 203
 204        if (vmd->msix_count == 1)
 205                return &vmd->irqs[0];
 206
 207        /*
 208         * White list for fast-interrupt handlers. All others will share the
 209         * "slow" interrupt vector.
 210         */
 211        switch (msi_desc_to_pci_dev(desc)->class) {
 212        case PCI_CLASS_STORAGE_EXPRESS:
 213                break;
 214        default:
 215                return &vmd->irqs[0];
 216        }
 217
 218        raw_spin_lock_irqsave(&list_lock, flags);
 219        for (i = 1; i < vmd->msix_count; i++)
 220                if (vmd->irqs[i].count < vmd->irqs[best].count)
 221                        best = i;
 222        vmd->irqs[best].count++;
 223        raw_spin_unlock_irqrestore(&list_lock, flags);
 224
 225        return &vmd->irqs[best];
 226}
 227
 228static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info,
 229                        unsigned int virq, irq_hw_number_t hwirq,
 230                        msi_alloc_info_t *arg)
 231{
 232        struct msi_desc *desc = arg->desc;
 233        struct vmd_dev *vmd = vmd_from_bus(msi_desc_to_pci_dev(desc)->bus);
 234        struct vmd_irq *vmdirq = kzalloc(sizeof(*vmdirq), GFP_KERNEL);
 235        unsigned int index, vector;
 236
 237        if (!vmdirq)
 238                return -ENOMEM;
 239
 240        INIT_LIST_HEAD(&vmdirq->node);
 241        vmdirq->irq = vmd_next_irq(vmd, desc);
 242        vmdirq->virq = virq;
 243        index = index_from_irqs(vmd, vmdirq->irq);
 244        vector = pci_irq_vector(vmd->dev, index);
 245
 246        irq_domain_set_info(domain, virq, vector, info->chip, vmdirq,
 247                            handle_untracked_irq, vmd, NULL);
 248        return 0;
 249}
 250
 251static void vmd_msi_free(struct irq_domain *domain,
 252                        struct msi_domain_info *info, unsigned int virq)
 253{
 254        struct vmd_irq *vmdirq = irq_get_chip_data(virq);
 255        unsigned long flags;
 256
 257        synchronize_srcu(&vmdirq->irq->srcu);
 258
 259        /* XXX: Potential optimization to rebalance */
 260        raw_spin_lock_irqsave(&list_lock, flags);
 261        vmdirq->irq->count--;
 262        raw_spin_unlock_irqrestore(&list_lock, flags);
 263
 264        kfree(vmdirq);
 265}
 266
 267static int vmd_msi_prepare(struct irq_domain *domain, struct device *dev,
 268                           int nvec, msi_alloc_info_t *arg)
 269{
 270        struct pci_dev *pdev = to_pci_dev(dev);
 271        struct vmd_dev *vmd = vmd_from_bus(pdev->bus);
 272
 273        if (nvec > vmd->msix_count)
 274                return vmd->msix_count;
 275
 276        memset(arg, 0, sizeof(*arg));
 277        return 0;
 278}
 279
 280static void vmd_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc)
 281{
 282        arg->desc = desc;
 283}
 284
 285static struct msi_domain_ops vmd_msi_domain_ops = {
 286        .get_hwirq      = vmd_get_hwirq,
 287        .msi_init       = vmd_msi_init,
 288        .msi_free       = vmd_msi_free,
 289        .msi_prepare    = vmd_msi_prepare,
 290        .set_desc       = vmd_set_desc,
 291};
 292
 293static struct msi_domain_info vmd_msi_domain_info = {
 294        .flags          = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
 295                          MSI_FLAG_PCI_MSIX,
 296        .ops            = &vmd_msi_domain_ops,
 297        .chip           = &vmd_msi_controller,
 298};
 299
 300#ifdef CONFIG_X86_DEV_DMA_OPS
 301/*
 302 * VMD replaces the requester ID with its own.  DMA mappings for devices in a
 303 * VMD domain need to be mapped for the VMD, not the device requiring
 304 * the mapping.
 305 */
 306static struct device *to_vmd_dev(struct device *dev)
 307{
 308        struct pci_dev *pdev = to_pci_dev(dev);
 309        struct vmd_dev *vmd = vmd_from_bus(pdev->bus);
 310
 311        return &vmd->dev->dev;
 312}
 313
 314static void *vmd_alloc(struct device *dev, size_t size, dma_addr_t *addr,
 315                       gfp_t flag, unsigned long attrs)
 316{
 317        return dma_alloc_attrs(to_vmd_dev(dev), size, addr, flag, attrs);
 318}
 319
 320static void vmd_free(struct device *dev, size_t size, void *vaddr,
 321                     dma_addr_t addr, unsigned long attrs)
 322{
 323        return dma_free_attrs(to_vmd_dev(dev), size, vaddr, addr, attrs);
 324}
 325
 326static int vmd_mmap(struct device *dev, struct vm_area_struct *vma,
 327                    void *cpu_addr, dma_addr_t addr, size_t size,
 328                    unsigned long attrs)
 329{
 330        return dma_mmap_attrs(to_vmd_dev(dev), vma, cpu_addr, addr, size,
 331                        attrs);
 332}
 333
 334static int vmd_get_sgtable(struct device *dev, struct sg_table *sgt,
 335                           void *cpu_addr, dma_addr_t addr, size_t size,
 336                           unsigned long attrs)
 337{
 338        return dma_get_sgtable_attrs(to_vmd_dev(dev), sgt, cpu_addr, addr, size,
 339                        attrs);
 340}
 341
 342static dma_addr_t vmd_map_page(struct device *dev, struct page *page,
 343                               unsigned long offset, size_t size,
 344                               enum dma_data_direction dir,
 345                               unsigned long attrs)
 346{
 347        return dma_map_page_attrs(to_vmd_dev(dev), page, offset, size, dir,
 348                        attrs);
 349}
 350
 351static void vmd_unmap_page(struct device *dev, dma_addr_t addr, size_t size,
 352                           enum dma_data_direction dir, unsigned long attrs)
 353{
 354        dma_unmap_page_attrs(to_vmd_dev(dev), addr, size, dir, attrs);
 355}
 356
 357static int vmd_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 358                      enum dma_data_direction dir, unsigned long attrs)
 359{
 360        return dma_map_sg_attrs(to_vmd_dev(dev), sg, nents, dir, attrs);
 361}
 362
 363static void vmd_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
 364                         enum dma_data_direction dir, unsigned long attrs)
 365{
 366        dma_unmap_sg_attrs(to_vmd_dev(dev), sg, nents, dir, attrs);
 367}
 368
 369static void vmd_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
 370                                    size_t size, enum dma_data_direction dir)
 371{
 372        dma_sync_single_for_cpu(to_vmd_dev(dev), addr, size, dir);
 373}
 374
 375static void vmd_sync_single_for_device(struct device *dev, dma_addr_t addr,
 376                                       size_t size, enum dma_data_direction dir)
 377{
 378        dma_sync_single_for_device(to_vmd_dev(dev), addr, size, dir);
 379}
 380
 381static void vmd_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
 382                                int nents, enum dma_data_direction dir)
 383{
 384        dma_sync_sg_for_cpu(to_vmd_dev(dev), sg, nents, dir);
 385}
 386
 387static void vmd_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
 388                                   int nents, enum dma_data_direction dir)
 389{
 390        dma_sync_sg_for_device(to_vmd_dev(dev), sg, nents, dir);
 391}
 392
 393static int vmd_dma_supported(struct device *dev, u64 mask)
 394{
 395        return dma_supported(to_vmd_dev(dev), mask);
 396}
 397
 398static u64 vmd_get_required_mask(struct device *dev)
 399{
 400        return dma_get_required_mask(to_vmd_dev(dev));
 401}
 402
 403static void vmd_teardown_dma_ops(struct vmd_dev *vmd)
 404{
 405        struct dma_domain *domain = &vmd->dma_domain;
 406
 407        if (get_dma_ops(&vmd->dev->dev))
 408                del_dma_domain(domain);
 409}
 410
 411#define ASSIGN_VMD_DMA_OPS(source, dest, fn)    \
 412        do {                                    \
 413                if (source->fn)                 \
 414                        dest->fn = vmd_##fn;    \
 415        } while (0)
 416
 417static void vmd_setup_dma_ops(struct vmd_dev *vmd)
 418{
 419        const struct dma_map_ops *source = get_dma_ops(&vmd->dev->dev);
 420        struct dma_map_ops *dest = &vmd->dma_ops;
 421        struct dma_domain *domain = &vmd->dma_domain;
 422
 423        domain->domain_nr = vmd->sysdata.domain;
 424        domain->dma_ops = dest;
 425
 426        if (!source)
 427                return;
 428        ASSIGN_VMD_DMA_OPS(source, dest, alloc);
 429        ASSIGN_VMD_DMA_OPS(source, dest, free);
 430        ASSIGN_VMD_DMA_OPS(source, dest, mmap);
 431        ASSIGN_VMD_DMA_OPS(source, dest, get_sgtable);
 432        ASSIGN_VMD_DMA_OPS(source, dest, map_page);
 433        ASSIGN_VMD_DMA_OPS(source, dest, unmap_page);
 434        ASSIGN_VMD_DMA_OPS(source, dest, map_sg);
 435        ASSIGN_VMD_DMA_OPS(source, dest, unmap_sg);
 436        ASSIGN_VMD_DMA_OPS(source, dest, sync_single_for_cpu);
 437        ASSIGN_VMD_DMA_OPS(source, dest, sync_single_for_device);
 438        ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_cpu);
 439        ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_device);
 440        ASSIGN_VMD_DMA_OPS(source, dest, dma_supported);
 441        ASSIGN_VMD_DMA_OPS(source, dest, get_required_mask);
 442        add_dma_domain(domain);
 443}
 444#undef ASSIGN_VMD_DMA_OPS
 445#else
 446static void vmd_teardown_dma_ops(struct vmd_dev *vmd) {}
 447static void vmd_setup_dma_ops(struct vmd_dev *vmd) {}
 448#endif
 449
 450static char __iomem *vmd_cfg_addr(struct vmd_dev *vmd, struct pci_bus *bus,
 451                                  unsigned int devfn, int reg, int len)
 452{
 453        char __iomem *addr = vmd->cfgbar +
 454                             ((bus->number - vmd->busn_start) << 20) +
 455                             (devfn << 12) + reg;
 456
 457        if ((addr - vmd->cfgbar) + len >=
 458            resource_size(&vmd->dev->resource[VMD_CFGBAR]))
 459                return NULL;
 460
 461        return addr;
 462}
 463
 464/*
 465 * CPU may deadlock if config space is not serialized on some versions of this
 466 * hardware, so all config space access is done under a spinlock.
 467 */
 468static int vmd_pci_read(struct pci_bus *bus, unsigned int devfn, int reg,
 469                        int len, u32 *value)
 470{
 471        struct vmd_dev *vmd = vmd_from_bus(bus);
 472        char __iomem *addr = vmd_cfg_addr(vmd, bus, devfn, reg, len);
 473        unsigned long flags;
 474        int ret = 0;
 475
 476        if (!addr)
 477                return -EFAULT;
 478
 479        spin_lock_irqsave(&vmd->cfg_lock, flags);
 480        switch (len) {
 481        case 1:
 482                *value = readb(addr);
 483                break;
 484        case 2:
 485                *value = readw(addr);
 486                break;
 487        case 4:
 488                *value = readl(addr);
 489                break;
 490        default:
 491                ret = -EINVAL;
 492                break;
 493        }
 494        spin_unlock_irqrestore(&vmd->cfg_lock, flags);
 495        return ret;
 496}
 497
 498/*
 499 * VMD h/w converts non-posted config writes to posted memory writes. The
 500 * read-back in this function forces the completion so it returns only after
 501 * the config space was written, as expected.
 502 */
 503static int vmd_pci_write(struct pci_bus *bus, unsigned int devfn, int reg,
 504                         int len, u32 value)
 505{
 506        struct vmd_dev *vmd = vmd_from_bus(bus);
 507        char __iomem *addr = vmd_cfg_addr(vmd, bus, devfn, reg, len);
 508        unsigned long flags;
 509        int ret = 0;
 510
 511        if (!addr)
 512                return -EFAULT;
 513
 514        spin_lock_irqsave(&vmd->cfg_lock, flags);
 515        switch (len) {
 516        case 1:
 517                writeb(value, addr);
 518                readb(addr);
 519                break;
 520        case 2:
 521                writew(value, addr);
 522                readw(addr);
 523                break;
 524        case 4:
 525                writel(value, addr);
 526                readl(addr);
 527                break;
 528        default:
 529                ret = -EINVAL;
 530                break;
 531        }
 532        spin_unlock_irqrestore(&vmd->cfg_lock, flags);
 533        return ret;
 534}
 535
 536static struct pci_ops vmd_ops = {
 537        .read           = vmd_pci_read,
 538        .write          = vmd_pci_write,
 539};
 540
 541static void vmd_attach_resources(struct vmd_dev *vmd)
 542{
 543        vmd->dev->resource[VMD_MEMBAR1].child = &vmd->resources[1];
 544        vmd->dev->resource[VMD_MEMBAR2].child = &vmd->resources[2];
 545}
 546
 547static void vmd_detach_resources(struct vmd_dev *vmd)
 548{
 549        vmd->dev->resource[VMD_MEMBAR1].child = NULL;
 550        vmd->dev->resource[VMD_MEMBAR2].child = NULL;
 551}
 552
 553/*
 554 * VMD domains start at 0x10000 to not clash with ACPI _SEG domains.
 555 * Per ACPI r6.0, sec 6.5.6,  _SEG returns an integer, of which the lower
 556 * 16 bits are the PCI Segment Group (domain) number.  Other bits are
 557 * currently reserved.
 558 */
 559static int vmd_find_free_domain(void)
 560{
 561        int domain = 0xffff;
 562        struct pci_bus *bus = NULL;
 563
 564        while ((bus = pci_find_next_bus(bus)) != NULL)
 565                domain = max_t(int, domain, pci_domain_nr(bus));
 566        return domain + 1;
 567}
 568
 569static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
 570{
 571        struct pci_sysdata *sd = &vmd->sysdata;
 572        struct fwnode_handle *fn;
 573        struct resource *res;
 574        u32 upper_bits;
 575        unsigned long flags;
 576        LIST_HEAD(resources);
 577        resource_size_t offset[2] = {0};
 578        resource_size_t membar2_offset = 0x2000;
 579        struct pci_bus *child;
 580
 581        /*
 582         * Shadow registers may exist in certain VMD device ids which allow
 583         * guests to correctly assign host physical addresses to the root ports
 584         * and child devices. These registers will either return the host value
 585         * or 0, depending on an enable bit in the VMD device.
 586         */
 587        if (features & VMD_FEAT_HAS_MEMBAR_SHADOW) {
 588                u32 vmlock;
 589                int ret;
 590
 591                membar2_offset = MB2_SHADOW_OFFSET + MB2_SHADOW_SIZE;
 592                ret = pci_read_config_dword(vmd->dev, PCI_REG_VMLOCK, &vmlock);
 593                if (ret || vmlock == ~0)
 594                        return -ENODEV;
 595
 596                if (MB2_SHADOW_EN(vmlock)) {
 597                        void __iomem *membar2;
 598
 599                        membar2 = pci_iomap(vmd->dev, VMD_MEMBAR2, 0);
 600                        if (!membar2)
 601                                return -ENOMEM;
 602                        offset[0] = vmd->dev->resource[VMD_MEMBAR1].start -
 603                                        readq(membar2 + MB2_SHADOW_OFFSET);
 604                        offset[1] = vmd->dev->resource[VMD_MEMBAR2].start -
 605                                        readq(membar2 + MB2_SHADOW_OFFSET + 8);
 606                        pci_iounmap(vmd->dev, membar2);
 607                }
 608        }
 609
 610        /*
 611         * Certain VMD devices may have a root port configuration option which
 612         * limits the bus range to between 0-127, 128-255, or 224-255
 613         */
 614        if (features & VMD_FEAT_HAS_BUS_RESTRICTIONS) {
 615                u16 reg16;
 616
 617                pci_read_config_word(vmd->dev, PCI_REG_VMCAP, &reg16);
 618                if (BUS_RESTRICT_CAP(reg16)) {
 619                        pci_read_config_word(vmd->dev, PCI_REG_VMCONFIG,
 620                                             &reg16);
 621
 622                        switch (BUS_RESTRICT_CFG(reg16)) {
 623                        case 1:
 624                                vmd->busn_start = 128;
 625                                break;
 626                        case 2:
 627                                vmd->busn_start = 224;
 628                                break;
 629                        case 3:
 630                                pci_err(vmd->dev, "Unknown Bus Offset Setting\n");
 631                                return -ENODEV;
 632                        default:
 633                                break;
 634                        }
 635                }
 636        }
 637
 638        res = &vmd->dev->resource[VMD_CFGBAR];
 639        vmd->resources[0] = (struct resource) {
 640                .name  = "VMD CFGBAR",
 641                .start = vmd->busn_start,
 642                .end   = vmd->busn_start + (resource_size(res) >> 20) - 1,
 643                .flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED,
 644        };
 645
 646        /*
 647         * If the window is below 4GB, clear IORESOURCE_MEM_64 so we can
 648         * put 32-bit resources in the window.
 649         *
 650         * There's no hardware reason why a 64-bit window *couldn't*
 651         * contain a 32-bit resource, but pbus_size_mem() computes the
 652         * bridge window size assuming a 64-bit window will contain no
 653         * 32-bit resources.  __pci_assign_resource() enforces that
 654         * artificial restriction to make sure everything will fit.
 655         *
 656         * The only way we could use a 64-bit non-prefetchable MEMBAR is
 657         * if its address is <4GB so that we can convert it to a 32-bit
 658         * resource.  To be visible to the host OS, all VMD endpoints must
 659         * be initially configured by platform BIOS, which includes setting
 660         * up these resources.  We can assume the device is configured
 661         * according to the platform needs.
 662         */
 663        res = &vmd->dev->resource[VMD_MEMBAR1];
 664        upper_bits = upper_32_bits(res->end);
 665        flags = res->flags & ~IORESOURCE_SIZEALIGN;
 666        if (!upper_bits)
 667                flags &= ~IORESOURCE_MEM_64;
 668        vmd->resources[1] = (struct resource) {
 669                .name  = "VMD MEMBAR1",
 670                .start = res->start,
 671                .end   = res->end,
 672                .flags = flags,
 673                .parent = res,
 674        };
 675
 676        res = &vmd->dev->resource[VMD_MEMBAR2];
 677        upper_bits = upper_32_bits(res->end);
 678        flags = res->flags & ~IORESOURCE_SIZEALIGN;
 679        if (!upper_bits)
 680                flags &= ~IORESOURCE_MEM_64;
 681        vmd->resources[2] = (struct resource) {
 682                .name  = "VMD MEMBAR2",
 683                .start = res->start + membar2_offset,
 684                .end   = res->end,
 685                .flags = flags,
 686                .parent = res,
 687        };
 688
 689        sd->vmd_dev = vmd->dev;
 690        sd->domain = vmd_find_free_domain();
 691        if (sd->domain < 0)
 692                return sd->domain;
 693
 694        sd->node = pcibus_to_node(vmd->dev->bus);
 695
 696        fn = irq_domain_alloc_named_id_fwnode("VMD-MSI", vmd->sysdata.domain);
 697        if (!fn)
 698                return -ENODEV;
 699
 700        vmd->irq_domain = pci_msi_create_irq_domain(fn, &vmd_msi_domain_info,
 701                                                    x86_vector_domain);
 702        irq_domain_free_fwnode(fn);
 703        if (!vmd->irq_domain)
 704                return -ENODEV;
 705
 706        pci_add_resource(&resources, &vmd->resources[0]);
 707        pci_add_resource_offset(&resources, &vmd->resources[1], offset[0]);
 708        pci_add_resource_offset(&resources, &vmd->resources[2], offset[1]);
 709
 710        vmd->bus = pci_create_root_bus(&vmd->dev->dev, vmd->busn_start,
 711                                       &vmd_ops, sd, &resources);
 712        if (!vmd->bus) {
 713                pci_free_resource_list(&resources);
 714                irq_domain_remove(vmd->irq_domain);
 715                return -ENODEV;
 716        }
 717
 718        vmd_attach_resources(vmd);
 719        vmd_setup_dma_ops(vmd);
 720        dev_set_msi_domain(&vmd->bus->dev, vmd->irq_domain);
 721
 722        pci_scan_child_bus(vmd->bus);
 723        pci_assign_unassigned_bus_resources(vmd->bus);
 724
 725        /*
 726         * VMD root buses are virtual and don't return true on pci_is_pcie()
 727         * and will fail pcie_bus_configure_settings() early. It can instead be
 728         * run on each of the real root ports.
 729         */
 730        list_for_each_entry(child, &vmd->bus->children, node)
 731                pcie_bus_configure_settings(child);
 732
 733        pci_bus_add_devices(vmd->bus);
 734
 735        WARN(sysfs_create_link(&vmd->dev->dev.kobj, &vmd->bus->dev.kobj,
 736                               "domain"), "Can't create symlink to domain\n");
 737        return 0;
 738}
 739
 740static irqreturn_t vmd_irq(int irq, void *data)
 741{
 742        struct vmd_irq_list *irqs = data;
 743        struct vmd_irq *vmdirq;
 744        int idx;
 745
 746        idx = srcu_read_lock(&irqs->srcu);
 747        list_for_each_entry_rcu(vmdirq, &irqs->irq_list, node)
 748                generic_handle_irq(vmdirq->virq);
 749        srcu_read_unlock(&irqs->srcu, idx);
 750
 751        return IRQ_HANDLED;
 752}
 753
 754static int vmd_probe(struct pci_dev *dev, const struct pci_device_id *id)
 755{
 756        struct vmd_dev *vmd;
 757        int i, err;
 758
 759        if (resource_size(&dev->resource[VMD_CFGBAR]) < (1 << 20))
 760                return -ENOMEM;
 761
 762        vmd = devm_kzalloc(&dev->dev, sizeof(*vmd), GFP_KERNEL);
 763        if (!vmd)
 764                return -ENOMEM;
 765
 766        vmd->dev = dev;
 767        err = pcim_enable_device(dev);
 768        if (err < 0)
 769                return err;
 770
 771        vmd->cfgbar = pcim_iomap(dev, VMD_CFGBAR, 0);
 772        if (!vmd->cfgbar)
 773                return -ENOMEM;
 774
 775        pci_set_master(dev);
 776        if (dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(64)) &&
 777            dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(32)))
 778                return -ENODEV;
 779
 780        vmd->msix_count = pci_msix_vec_count(dev);
 781        if (vmd->msix_count < 0)
 782                return -ENODEV;
 783
 784        vmd->msix_count = pci_alloc_irq_vectors(dev, 1, vmd->msix_count,
 785                                        PCI_IRQ_MSIX);
 786        if (vmd->msix_count < 0)
 787                return vmd->msix_count;
 788
 789        vmd->irqs = devm_kcalloc(&dev->dev, vmd->msix_count, sizeof(*vmd->irqs),
 790                                 GFP_KERNEL);
 791        if (!vmd->irqs)
 792                return -ENOMEM;
 793
 794        for (i = 0; i < vmd->msix_count; i++) {
 795                err = init_srcu_struct(&vmd->irqs[i].srcu);
 796                if (err)
 797                        return err;
 798
 799                INIT_LIST_HEAD(&vmd->irqs[i].irq_list);
 800                err = devm_request_irq(&dev->dev, pci_irq_vector(dev, i),
 801                                       vmd_irq, IRQF_NO_THREAD,
 802                                       "vmd", &vmd->irqs[i]);
 803                if (err)
 804                        return err;
 805        }
 806
 807        spin_lock_init(&vmd->cfg_lock);
 808        pci_set_drvdata(dev, vmd);
 809        err = vmd_enable_domain(vmd, (unsigned long) id->driver_data);
 810        if (err)
 811                return err;
 812
 813        dev_info(&vmd->dev->dev, "Bound to PCI domain %04x\n",
 814                 vmd->sysdata.domain);
 815        return 0;
 816}
 817
 818static void vmd_cleanup_srcu(struct vmd_dev *vmd)
 819{
 820        int i;
 821
 822        for (i = 0; i < vmd->msix_count; i++)
 823                cleanup_srcu_struct(&vmd->irqs[i].srcu);
 824}
 825
 826static void vmd_remove(struct pci_dev *dev)
 827{
 828        struct vmd_dev *vmd = pci_get_drvdata(dev);
 829
 830        sysfs_remove_link(&vmd->dev->dev.kobj, "domain");
 831        pci_stop_root_bus(vmd->bus);
 832        pci_remove_root_bus(vmd->bus);
 833        vmd_cleanup_srcu(vmd);
 834        vmd_teardown_dma_ops(vmd);
 835        vmd_detach_resources(vmd);
 836        irq_domain_remove(vmd->irq_domain);
 837}
 838
 839#ifdef CONFIG_PM_SLEEP
 840static int vmd_suspend(struct device *dev)
 841{
 842        struct pci_dev *pdev = to_pci_dev(dev);
 843        struct vmd_dev *vmd = pci_get_drvdata(pdev);
 844        int i;
 845
 846        for (i = 0; i < vmd->msix_count; i++)
 847                devm_free_irq(dev, pci_irq_vector(pdev, i), &vmd->irqs[i]);
 848
 849        pci_save_state(pdev);
 850        return 0;
 851}
 852
 853static int vmd_resume(struct device *dev)
 854{
 855        struct pci_dev *pdev = to_pci_dev(dev);
 856        struct vmd_dev *vmd = pci_get_drvdata(pdev);
 857        int err, i;
 858
 859        for (i = 0; i < vmd->msix_count; i++) {
 860                err = devm_request_irq(dev, pci_irq_vector(pdev, i),
 861                                       vmd_irq, IRQF_NO_THREAD,
 862                                       "vmd", &vmd->irqs[i]);
 863                if (err)
 864                        return err;
 865        }
 866
 867        pci_restore_state(pdev);
 868        return 0;
 869}
 870#endif
 871static SIMPLE_DEV_PM_OPS(vmd_dev_pm_ops, vmd_suspend, vmd_resume);
 872
 873static const struct pci_device_id vmd_ids[] = {
 874        {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_VMD_201D),},
 875        {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_VMD_28C0),
 876                .driver_data = VMD_FEAT_HAS_MEMBAR_SHADOW |
 877                                VMD_FEAT_HAS_BUS_RESTRICTIONS,},
 878        {PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x467f),
 879                .driver_data = VMD_FEAT_HAS_BUS_RESTRICTIONS,},
 880        {PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4c3d),
 881                .driver_data = VMD_FEAT_HAS_BUS_RESTRICTIONS,},
 882        {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_VMD_9A0B),
 883                .driver_data = VMD_FEAT_HAS_BUS_RESTRICTIONS,},
 884        {0,}
 885};
 886MODULE_DEVICE_TABLE(pci, vmd_ids);
 887
 888static struct pci_driver vmd_drv = {
 889        .name           = "vmd",
 890        .id_table       = vmd_ids,
 891        .probe          = vmd_probe,
 892        .remove         = vmd_remove,
 893        .driver         = {
 894                .pm     = &vmd_dev_pm_ops,
 895        },
 896};
 897module_pci_driver(vmd_drv);
 898
 899MODULE_AUTHOR("Intel Corporation");
 900MODULE_LICENSE("GPL v2");
 901MODULE_VERSION("0.6");
 902