linux/arch/x86/pci/xen.c
<<
>>
Prefs
   1/*
   2 * Xen PCI - handle PCI (INTx) and MSI infrastructure calls for PV, HVM and
   3 * initial domain support. We also handle the DSDT _PRT callbacks for GSI's
   4 * used in HVM and initial domain mode (PV does not parse ACPI, so it has no
   5 * concept of GSIs). Under PV we hook under the pnbbios API for IRQs and
   6 * 0xcf8 PCI configuration read/write.
   7 *
   8 *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
   9 *           Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
  10 *           Stefano Stabellini <stefano.stabellini@eu.citrix.com>
  11 */
  12#include <linux/module.h>
  13#include <linux/init.h>
  14#include <linux/pci.h>
  15#include <linux/acpi.h>
  16
  17#include <linux/io.h>
  18#include <asm/io_apic.h>
  19#include <asm/pci_x86.h>
  20
  21#include <asm/xen/hypervisor.h>
  22
  23#include <xen/features.h>
  24#include <xen/events.h>
  25#include <asm/xen/pci.h>
  26#include <asm/i8259.h>
  27
  28static int xen_pcifront_enable_irq(struct pci_dev *dev)
  29{
  30        int rc;
  31        int share = 1;
  32        int pirq;
  33        u8 gsi;
  34
  35        rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi);
  36        if (rc < 0) {
  37                dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n",
  38                         rc);
  39                return rc;
  40        }
  41        /* In PV DomU the Xen PCI backend puts the PIRQ in the interrupt line.*/
  42        pirq = gsi;
  43
  44        if (gsi < nr_legacy_irqs())
  45                share = 0;
  46
  47        rc = xen_bind_pirq_gsi_to_irq(gsi, pirq, share, "pcifront");
  48        if (rc < 0) {
  49                dev_warn(&dev->dev, "Xen PCI: failed to bind GSI%d (PIRQ%d) to IRQ: %d\n",
  50                         gsi, pirq, rc);
  51                return rc;
  52        }
  53
  54        dev->irq = rc;
  55        dev_info(&dev->dev, "Xen PCI mapped GSI%d to IRQ%d\n", gsi, dev->irq);
  56        return 0;
  57}
  58
  59#ifdef CONFIG_ACPI
  60static int xen_register_pirq(u32 gsi, int gsi_override, int triggering,
  61                             bool set_pirq)
  62{
  63        int rc, pirq = -1, irq = -1;
  64        struct physdev_map_pirq map_irq;
  65        int shareable = 0;
  66        char *name;
  67
  68        irq = xen_irq_from_gsi(gsi);
  69        if (irq > 0)
  70                return irq;
  71
  72        if (set_pirq)
  73                pirq = gsi;
  74
  75        map_irq.domid = DOMID_SELF;
  76        map_irq.type = MAP_PIRQ_TYPE_GSI;
  77        map_irq.index = gsi;
  78        map_irq.pirq = pirq;
  79
  80        rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
  81        if (rc) {
  82                printk(KERN_WARNING "xen map irq failed %d\n", rc);
  83                return -1;
  84        }
  85
  86        if (triggering == ACPI_EDGE_SENSITIVE) {
  87                shareable = 0;
  88                name = "ioapic-edge";
  89        } else {
  90                shareable = 1;
  91                name = "ioapic-level";
  92        }
  93
  94        if (gsi_override >= 0)
  95                gsi = gsi_override;
  96
  97        irq = xen_bind_pirq_gsi_to_irq(gsi, map_irq.pirq, shareable, name);
  98        if (irq < 0)
  99                goto out;
 100
 101        printk(KERN_DEBUG "xen: --> pirq=%d -> irq=%d (gsi=%d)\n", map_irq.pirq, irq, gsi);
 102out:
 103        return irq;
 104}
 105
 106static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
 107                                     int trigger, int polarity)
 108{
 109        if (!xen_hvm_domain())
 110                return -1;
 111
 112        return xen_register_pirq(gsi, -1 /* no GSI override */, trigger,
 113                                 false /* no mapping of GSI to PIRQ */);
 114}
 115
 116#ifdef CONFIG_XEN_DOM0
 117static int xen_register_gsi(u32 gsi, int gsi_override, int triggering, int polarity)
 118{
 119        int rc, irq;
 120        struct physdev_setup_gsi setup_gsi;
 121
 122        if (!xen_pv_domain())
 123                return -1;
 124
 125        printk(KERN_DEBUG "xen: registering gsi %u triggering %d polarity %d\n",
 126                        gsi, triggering, polarity);
 127
 128        irq = xen_register_pirq(gsi, gsi_override, triggering, true);
 129
 130        setup_gsi.gsi = gsi;
 131        setup_gsi.triggering = (triggering == ACPI_EDGE_SENSITIVE ? 0 : 1);
 132        setup_gsi.polarity = (polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
 133
 134        rc = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi);
 135        if (rc == -EEXIST)
 136                printk(KERN_INFO "Already setup the GSI :%d\n", gsi);
 137        else if (rc) {
 138                printk(KERN_ERR "Failed to setup GSI :%d, err_code:%d\n",
 139                                gsi, rc);
 140        }
 141
 142        return irq;
 143}
 144
 145static int acpi_register_gsi_xen(struct device *dev, u32 gsi,
 146                                 int trigger, int polarity)
 147{
 148        return xen_register_gsi(gsi, -1 /* no GSI override */, trigger, polarity);
 149}
 150#endif
 151#endif
 152
 153#if defined(CONFIG_PCI_MSI)
 154#include <linux/msi.h>
 155#include <asm/msidef.h>
 156
 157struct xen_pci_frontend_ops *xen_pci_frontend;
 158EXPORT_SYMBOL_GPL(xen_pci_frontend);
 159
 160static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 161{
 162        int irq, ret, i;
 163        struct msi_desc *msidesc;
 164        int *v;
 165
 166        if (type == PCI_CAP_ID_MSI && nvec > 1)
 167                return 1;
 168
 169        v = kzalloc(sizeof(int) * max(1, nvec), GFP_KERNEL);
 170        if (!v)
 171                return -ENOMEM;
 172
 173        if (type == PCI_CAP_ID_MSIX)
 174                ret = xen_pci_frontend_enable_msix(dev, v, nvec);
 175        else
 176                ret = xen_pci_frontend_enable_msi(dev, v);
 177        if (ret)
 178                goto error;
 179        i = 0;
 180        list_for_each_entry(msidesc, &dev->msi_list, list) {
 181                irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i],
 182                                               (type == PCI_CAP_ID_MSI) ? nvec : 1,
 183                                               (type == PCI_CAP_ID_MSIX) ?
 184                                               "pcifront-msi-x" :
 185                                               "pcifront-msi",
 186                                                DOMID_SELF);
 187                if (irq < 0) {
 188                        ret = irq;
 189                        goto free;
 190                }
 191                i++;
 192        }
 193        kfree(v);
 194        return 0;
 195
 196error:
 197        dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n");
 198free:
 199        kfree(v);
 200        return ret;
 201}
 202
 203#define XEN_PIRQ_MSI_DATA  (MSI_DATA_TRIGGER_EDGE | \
 204                MSI_DATA_LEVEL_ASSERT | (3 << 8) | MSI_DATA_VECTOR(0))
 205
 206static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq,
 207                struct msi_msg *msg)
 208{
 209        /* We set vector == 0 to tell the hypervisor we don't care about it,
 210         * but we want a pirq setup instead.
 211         * We use the dest_id field to pass the pirq that we want. */
 212        msg->address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(pirq);
 213        msg->address_lo =
 214                MSI_ADDR_BASE_LO |
 215                MSI_ADDR_DEST_MODE_PHYSICAL |
 216                MSI_ADDR_REDIRECTION_CPU |
 217                MSI_ADDR_DEST_ID(pirq);
 218
 219        msg->data = XEN_PIRQ_MSI_DATA;
 220}
 221
 222static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 223{
 224        int irq, pirq;
 225        struct msi_desc *msidesc;
 226        struct msi_msg msg;
 227
 228        if (type == PCI_CAP_ID_MSI && nvec > 1)
 229                return 1;
 230
 231        list_for_each_entry(msidesc, &dev->msi_list, list) {
 232                __read_msi_msg(msidesc, &msg);
 233                pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) |
 234                        ((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff);
 235                if (msg.data != XEN_PIRQ_MSI_DATA ||
 236                    xen_irq_from_pirq(pirq) < 0) {
 237                        pirq = xen_allocate_pirq_msi(dev, msidesc);
 238                        if (pirq < 0) {
 239                                irq = -ENODEV;
 240                                goto error;
 241                        }
 242                        xen_msi_compose_msg(dev, pirq, &msg);
 243                        __write_msi_msg(msidesc, &msg);
 244                        dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq);
 245                } else {
 246                        dev_dbg(&dev->dev,
 247                                "xen: msi already bound to pirq=%d\n", pirq);
 248                }
 249                irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq,
 250                                               (type == PCI_CAP_ID_MSI) ? nvec : 1,
 251                                               (type == PCI_CAP_ID_MSIX) ?
 252                                               "msi-x" : "msi",
 253                                               DOMID_SELF);
 254                if (irq < 0)
 255                        goto error;
 256                dev_dbg(&dev->dev,
 257                        "xen: msi --> pirq=%d --> irq=%d\n", pirq, irq);
 258        }
 259        return 0;
 260
 261error:
 262        dev_err(&dev->dev,
 263                "Xen PCI frontend has not registered MSI/MSI-X support!\n");
 264        return irq;
 265}
 266
 267#ifdef CONFIG_XEN_DOM0
 268static bool __read_mostly pci_seg_supported = true;
 269
 270static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 271{
 272        int ret = 0;
 273        struct msi_desc *msidesc;
 274
 275        list_for_each_entry(msidesc, &dev->msi_list, list) {
 276                struct physdev_map_pirq map_irq;
 277                domid_t domid;
 278
 279                domid = ret = xen_find_device_domain_owner(dev);
 280                /* N.B. Casting int's -ENODEV to uint16_t results in 0xFFED,
 281                 * hence check ret value for < 0. */
 282                if (ret < 0)
 283                        domid = DOMID_SELF;
 284
 285                memset(&map_irq, 0, sizeof(map_irq));
 286                map_irq.domid = domid;
 287                map_irq.type = MAP_PIRQ_TYPE_MSI_SEG;
 288                map_irq.index = -1;
 289                map_irq.pirq = -1;
 290                map_irq.bus = dev->bus->number |
 291                              (pci_domain_nr(dev->bus) << 16);
 292                map_irq.devfn = dev->devfn;
 293
 294                if (type == PCI_CAP_ID_MSI && nvec > 1) {
 295                        map_irq.type = MAP_PIRQ_TYPE_MULTI_MSI;
 296                        map_irq.entry_nr = nvec;
 297                } else if (type == PCI_CAP_ID_MSIX) {
 298                        int pos;
 299                        u32 table_offset, bir;
 300
 301                        pos = dev->msix_cap;
 302                        pci_read_config_dword(dev, pos + PCI_MSIX_TABLE,
 303                                              &table_offset);
 304                        bir = (u8)(table_offset & PCI_MSIX_TABLE_BIR);
 305
 306                        map_irq.table_base = pci_resource_start(dev, bir);
 307                        map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
 308                }
 309
 310                ret = -EINVAL;
 311                if (pci_seg_supported)
 312                        ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq,
 313                                                    &map_irq);
 314                if (type == PCI_CAP_ID_MSI && nvec > 1 && ret) {
 315                        /*
 316                         * If MAP_PIRQ_TYPE_MULTI_MSI is not available
 317                         * there's nothing else we can do in this case.
 318                         * Just set ret > 0 so driver can retry with
 319                         * single MSI.
 320                         */
 321                        ret = 1;
 322                        goto out;
 323                }
 324                if (ret == -EINVAL && !pci_domain_nr(dev->bus)) {
 325                        map_irq.type = MAP_PIRQ_TYPE_MSI;
 326                        map_irq.index = -1;
 327                        map_irq.pirq = -1;
 328                        map_irq.bus = dev->bus->number;
 329                        ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq,
 330                                                    &map_irq);
 331                        if (ret != -EINVAL)
 332                                pci_seg_supported = false;
 333                }
 334                if (ret) {
 335                        dev_warn(&dev->dev, "xen map irq failed %d for %d domain\n",
 336                                 ret, domid);
 337                        goto out;
 338                }
 339
 340                ret = xen_bind_pirq_msi_to_irq(dev, msidesc, map_irq.pirq,
 341                                               (type == PCI_CAP_ID_MSI) ? nvec : 1,
 342                                               (type == PCI_CAP_ID_MSIX) ? "msi-x" : "msi",
 343                                               domid);
 344                if (ret < 0)
 345                        goto out;
 346        }
 347        ret = 0;
 348out:
 349        return ret;
 350}
 351
 352static void xen_initdom_restore_msi_irqs(struct pci_dev *dev)
 353{
 354        int ret = 0;
 355
 356        if (pci_seg_supported) {
 357                struct physdev_pci_device restore_ext;
 358
 359                restore_ext.seg = pci_domain_nr(dev->bus);
 360                restore_ext.bus = dev->bus->number;
 361                restore_ext.devfn = dev->devfn;
 362                ret = HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi_ext,
 363                                        &restore_ext);
 364                if (ret == -ENOSYS)
 365                        pci_seg_supported = false;
 366                WARN(ret && ret != -ENOSYS, "restore_msi_ext -> %d\n", ret);
 367        }
 368        if (!pci_seg_supported) {
 369                struct physdev_restore_msi restore;
 370
 371                restore.bus = dev->bus->number;
 372                restore.devfn = dev->devfn;
 373                ret = HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi, &restore);
 374                WARN(ret && ret != -ENOSYS, "restore_msi -> %d\n", ret);
 375        }
 376}
 377#endif
 378
 379static void xen_teardown_msi_irqs(struct pci_dev *dev)
 380{
 381        struct msi_desc *msidesc;
 382
 383        msidesc = list_entry(dev->msi_list.next, struct msi_desc, list);
 384        if (msidesc->msi_attrib.is_msix)
 385                xen_pci_frontend_disable_msix(dev);
 386        else
 387                xen_pci_frontend_disable_msi(dev);
 388
 389        /* Free the IRQ's and the msidesc using the generic code. */
 390        default_teardown_msi_irqs(dev);
 391}
 392
 393static void xen_teardown_msi_irq(unsigned int irq)
 394{
 395        xen_destroy_irq(irq);
 396}
 397static u32 xen_nop_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
 398{
 399        return 0;
 400}
 401static u32 xen_nop_msix_mask_irq(struct msi_desc *desc, u32 flag)
 402{
 403        return 0;
 404}
 405#endif
 406
 407int __init pci_xen_init(void)
 408{
 409        if (!xen_pv_domain() || xen_initial_domain())
 410                return -ENODEV;
 411
 412        printk(KERN_INFO "PCI: setting up Xen PCI frontend stub\n");
 413
 414        pcibios_set_cache_line_size();
 415
 416        pcibios_enable_irq = xen_pcifront_enable_irq;
 417        pcibios_disable_irq = NULL;
 418
 419#ifdef CONFIG_ACPI
 420        /* Keep ACPI out of the picture */
 421        acpi_noirq = 1;
 422#endif
 423
 424#ifdef CONFIG_PCI_MSI
 425        x86_msi.setup_msi_irqs = xen_setup_msi_irqs;
 426        x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
 427        x86_msi.teardown_msi_irqs = xen_teardown_msi_irqs;
 428        x86_msi.msi_mask_irq = xen_nop_msi_mask_irq;
 429        x86_msi.msix_mask_irq = xen_nop_msix_mask_irq;
 430#endif
 431        return 0;
 432}
 433
 434int __init pci_xen_hvm_init(void)
 435{
 436        if (!xen_have_vector_callback || !xen_feature(XENFEAT_hvm_pirqs))
 437                return 0;
 438
 439#ifdef CONFIG_ACPI
 440        /*
 441         * We don't want to change the actual ACPI delivery model,
 442         * just how GSIs get registered.
 443         */
 444        __acpi_register_gsi = acpi_register_gsi_xen_hvm;
 445#endif
 446
 447#ifdef CONFIG_PCI_MSI
 448        x86_msi.setup_msi_irqs = xen_hvm_setup_msi_irqs;
 449        x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
 450#endif
 451        return 0;
 452}
 453
 454#ifdef CONFIG_XEN_DOM0
 455static __init void xen_setup_acpi_sci(void)
 456{
 457        int rc;
 458        int trigger, polarity;
 459        int gsi = acpi_sci_override_gsi;
 460        int irq = -1;
 461        int gsi_override = -1;
 462
 463        if (!gsi)
 464                return;
 465
 466        rc = acpi_get_override_irq(gsi, &trigger, &polarity);
 467        if (rc) {
 468                printk(KERN_WARNING "xen: acpi_get_override_irq failed for acpi"
 469                                " sci, rc=%d\n", rc);
 470                return;
 471        }
 472        trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE;
 473        polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH;
 474
 475        printk(KERN_INFO "xen: sci override: global_irq=%d trigger=%d "
 476                        "polarity=%d\n", gsi, trigger, polarity);
 477
 478        /* Before we bind the GSI to a Linux IRQ, check whether
 479         * we need to override it with bus_irq (IRQ) value. Usually for
 480         * IRQs below IRQ_LEGACY_IRQ this holds IRQ == GSI, as so:
 481         *  ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 low level)
 482         * but there are oddballs where the IRQ != GSI:
 483         *  ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 20 low level)
 484         * which ends up being: gsi_to_irq[9] == 20
 485         * (which is what acpi_gsi_to_irq ends up calling when starting the
 486         * the ACPI interpreter and keels over since IRQ 9 has not been
 487         * setup as we had setup IRQ 20 for it).
 488         */
 489        if (acpi_gsi_to_irq(gsi, &irq) == 0) {
 490                /* Use the provided value if it's valid. */
 491                if (irq >= 0)
 492                        gsi_override = irq;
 493        }
 494
 495        gsi = xen_register_gsi(gsi, gsi_override, trigger, polarity);
 496        printk(KERN_INFO "xen: acpi sci %d\n", gsi);
 497
 498        return;
 499}
 500
 501int __init pci_xen_initial_domain(void)
 502{
 503        int irq;
 504
 505#ifdef CONFIG_PCI_MSI
 506        x86_msi.setup_msi_irqs = xen_initdom_setup_msi_irqs;
 507        x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
 508        x86_msi.restore_msi_irqs = xen_initdom_restore_msi_irqs;
 509        x86_msi.msi_mask_irq = xen_nop_msi_mask_irq;
 510        x86_msi.msix_mask_irq = xen_nop_msix_mask_irq;
 511#endif
 512        xen_setup_acpi_sci();
 513        __acpi_register_gsi = acpi_register_gsi_xen;
 514        /* Pre-allocate legacy irqs */
 515        for (irq = 0; irq < nr_legacy_irqs(); irq++) {
 516                int trigger, polarity;
 517
 518                if (acpi_get_override_irq(irq, &trigger, &polarity) == -1)
 519                        continue;
 520
 521                xen_register_pirq(irq, -1 /* no GSI override */,
 522                        trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE,
 523                        true /* Map GSI to PIRQ */);
 524        }
 525        if (0 == nr_ioapics) {
 526                for (irq = 0; irq < nr_legacy_irqs(); irq++)
 527                        xen_bind_pirq_gsi_to_irq(irq, irq, 0, "xt-pic");
 528        }
 529        return 0;
 530}
 531
 532struct xen_device_domain_owner {
 533        domid_t domain;
 534        struct pci_dev *dev;
 535        struct list_head list;
 536};
 537
 538static DEFINE_SPINLOCK(dev_domain_list_spinlock);
 539static struct list_head dev_domain_list = LIST_HEAD_INIT(dev_domain_list);
 540
 541static struct xen_device_domain_owner *find_device(struct pci_dev *dev)
 542{
 543        struct xen_device_domain_owner *owner;
 544
 545        list_for_each_entry(owner, &dev_domain_list, list) {
 546                if (owner->dev == dev)
 547                        return owner;
 548        }
 549        return NULL;
 550}
 551
 552int xen_find_device_domain_owner(struct pci_dev *dev)
 553{
 554        struct xen_device_domain_owner *owner;
 555        int domain = -ENODEV;
 556
 557        spin_lock(&dev_domain_list_spinlock);
 558        owner = find_device(dev);
 559        if (owner)
 560                domain = owner->domain;
 561        spin_unlock(&dev_domain_list_spinlock);
 562        return domain;
 563}
 564EXPORT_SYMBOL_GPL(xen_find_device_domain_owner);
 565
 566int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain)
 567{
 568        struct xen_device_domain_owner *owner;
 569
 570        owner = kzalloc(sizeof(struct xen_device_domain_owner), GFP_KERNEL);
 571        if (!owner)
 572                return -ENODEV;
 573
 574        spin_lock(&dev_domain_list_spinlock);
 575        if (find_device(dev)) {
 576                spin_unlock(&dev_domain_list_spinlock);
 577                kfree(owner);
 578                return -EEXIST;
 579        }
 580        owner->domain = domain;
 581        owner->dev = dev;
 582        list_add_tail(&owner->list, &dev_domain_list);
 583        spin_unlock(&dev_domain_list_spinlock);
 584        return 0;
 585}
 586EXPORT_SYMBOL_GPL(xen_register_device_domain_owner);
 587
 588int xen_unregister_device_domain_owner(struct pci_dev *dev)
 589{
 590        struct xen_device_domain_owner *owner;
 591
 592        spin_lock(&dev_domain_list_spinlock);
 593        owner = find_device(dev);
 594        if (!owner) {
 595                spin_unlock(&dev_domain_list_spinlock);
 596                return -ENODEV;
 597        }
 598        list_del(&owner->list);
 599        spin_unlock(&dev_domain_list_spinlock);
 600        kfree(owner);
 601        return 0;
 602}
 603EXPORT_SYMBOL_GPL(xen_unregister_device_domain_owner);
 604#endif
 605