linux/arch/x86/pci/xen.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Xen PCI - handle PCI (INTx) and MSI infrastructure calls for PV, HVM and
   4 * initial domain support. We also handle the DSDT _PRT callbacks for GSI's
   5 * used in HVM and initial domain mode (PV does not parse ACPI, so it has no
   6 * concept of GSIs). Under PV we hook under the pnbbios API for IRQs and
   7 * 0xcf8 PCI configuration read/write.
   8 *
   9 *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
  10 *           Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
  11 *           Stefano Stabellini <stefano.stabellini@eu.citrix.com>
  12 */
  13#include <linux/export.h>
  14#include <linux/init.h>
  15#include <linux/pci.h>
  16#include <linux/acpi.h>
  17
  18#include <linux/io.h>
  19#include <asm/io_apic.h>
  20#include <asm/pci_x86.h>
  21
  22#include <asm/xen/hypervisor.h>
  23
  24#include <xen/features.h>
  25#include <xen/events.h>
  26#include <asm/xen/pci.h>
  27#include <asm/xen/cpuid.h>
  28#include <asm/apic.h>
  29#include <asm/acpi.h>
  30#include <asm/i8259.h>
  31
  32static int xen_pcifront_enable_irq(struct pci_dev *dev)
  33{
  34        int rc;
  35        int share = 1;
  36        int pirq;
  37        u8 gsi;
  38
  39        rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi);
  40        if (rc < 0) {
  41                dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n",
  42                         rc);
  43                return rc;
  44        }
  45        /* In PV DomU the Xen PCI backend puts the PIRQ in the interrupt line.*/
  46        pirq = gsi;
  47
  48        if (gsi < nr_legacy_irqs())
  49                share = 0;
  50
  51        rc = xen_bind_pirq_gsi_to_irq(gsi, pirq, share, "pcifront");
  52        if (rc < 0) {
  53                dev_warn(&dev->dev, "Xen PCI: failed to bind GSI%d (PIRQ%d) to IRQ: %d\n",
  54                         gsi, pirq, rc);
  55                return rc;
  56        }
  57
  58        dev->irq = rc;
  59        dev_info(&dev->dev, "Xen PCI mapped GSI%d to IRQ%d\n", gsi, dev->irq);
  60        return 0;
  61}
  62
  63#ifdef CONFIG_ACPI
  64static int xen_register_pirq(u32 gsi, int triggering, bool set_pirq)
  65{
  66        int rc, pirq = -1, irq;
  67        struct physdev_map_pirq map_irq;
  68        int shareable = 0;
  69        char *name;
  70
  71        irq = xen_irq_from_gsi(gsi);
  72        if (irq > 0)
  73                return irq;
  74
  75        if (set_pirq)
  76                pirq = gsi;
  77
  78        map_irq.domid = DOMID_SELF;
  79        map_irq.type = MAP_PIRQ_TYPE_GSI;
  80        map_irq.index = gsi;
  81        map_irq.pirq = pirq;
  82
  83        rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
  84        if (rc) {
  85                printk(KERN_WARNING "xen map irq failed %d\n", rc);
  86                return -1;
  87        }
  88
  89        if (triggering == ACPI_EDGE_SENSITIVE) {
  90                shareable = 0;
  91                name = "ioapic-edge";
  92        } else {
  93                shareable = 1;
  94                name = "ioapic-level";
  95        }
  96
  97        irq = xen_bind_pirq_gsi_to_irq(gsi, map_irq.pirq, shareable, name);
  98        if (irq < 0)
  99                goto out;
 100
 101        printk(KERN_DEBUG "xen: --> pirq=%d -> irq=%d (gsi=%d)\n", map_irq.pirq, irq, gsi);
 102out:
 103        return irq;
 104}
 105
 106static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
 107                                     int trigger, int polarity)
 108{
 109        if (!xen_hvm_domain())
 110                return -1;
 111
 112        return xen_register_pirq(gsi, trigger,
 113                                 false /* no mapping of GSI to PIRQ */);
 114}
 115
 116#ifdef CONFIG_XEN_DOM0
 117static int xen_register_gsi(u32 gsi, int triggering, int polarity)
 118{
 119        int rc, irq;
 120        struct physdev_setup_gsi setup_gsi;
 121
 122        if (!xen_pv_domain())
 123                return -1;
 124
 125        printk(KERN_DEBUG "xen: registering gsi %u triggering %d polarity %d\n",
 126                        gsi, triggering, polarity);
 127
 128        irq = xen_register_pirq(gsi, triggering, true);
 129
 130        setup_gsi.gsi = gsi;
 131        setup_gsi.triggering = (triggering == ACPI_EDGE_SENSITIVE ? 0 : 1);
 132        setup_gsi.polarity = (polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
 133
 134        rc = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi);
 135        if (rc == -EEXIST)
 136                printk(KERN_INFO "Already setup the GSI :%d\n", gsi);
 137        else if (rc) {
 138                printk(KERN_ERR "Failed to setup GSI :%d, err_code:%d\n",
 139                                gsi, rc);
 140        }
 141
 142        return irq;
 143}
 144
 145static int acpi_register_gsi_xen(struct device *dev, u32 gsi,
 146                                 int trigger, int polarity)
 147{
 148        return xen_register_gsi(gsi, trigger, polarity);
 149}
 150#endif
 151#endif
 152
 153#if defined(CONFIG_PCI_MSI)
 154#include <linux/msi.h>
 155#include <asm/msidef.h>
 156
 157struct xen_pci_frontend_ops *xen_pci_frontend;
 158EXPORT_SYMBOL_GPL(xen_pci_frontend);
 159
 160static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 161{
 162        int irq, ret, i;
 163        struct msi_desc *msidesc;
 164        int *v;
 165
 166        if (type == PCI_CAP_ID_MSI && nvec > 1)
 167                return 1;
 168
 169        v = kcalloc(max(1, nvec), sizeof(int), GFP_KERNEL);
 170        if (!v)
 171                return -ENOMEM;
 172
 173        if (type == PCI_CAP_ID_MSIX)
 174                ret = xen_pci_frontend_enable_msix(dev, v, nvec);
 175        else
 176                ret = xen_pci_frontend_enable_msi(dev, v);
 177        if (ret)
 178                goto error;
 179        i = 0;
 180        for_each_pci_msi_entry(msidesc, dev) {
 181                irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i],
 182                                               (type == PCI_CAP_ID_MSI) ? nvec : 1,
 183                                               (type == PCI_CAP_ID_MSIX) ?
 184                                               "pcifront-msi-x" :
 185                                               "pcifront-msi",
 186                                                DOMID_SELF);
 187                if (irq < 0) {
 188                        ret = irq;
 189                        goto free;
 190                }
 191                i++;
 192        }
 193        kfree(v);
 194        return 0;
 195
 196error:
 197        if (ret == -ENOSYS)
 198                dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n");
 199        else if (ret)
 200                dev_err(&dev->dev, "Xen PCI frontend error: %d!\n", ret);
 201free:
 202        kfree(v);
 203        return ret;
 204}
 205
 206#define XEN_PIRQ_MSI_DATA  (MSI_DATA_TRIGGER_EDGE | \
 207                MSI_DATA_LEVEL_ASSERT | (3 << 8) | MSI_DATA_VECTOR(0))
 208
 209static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq,
 210                struct msi_msg *msg)
 211{
 212        /* We set vector == 0 to tell the hypervisor we don't care about it,
 213         * but we want a pirq setup instead.
 214         * We use the dest_id field to pass the pirq that we want. */
 215        msg->address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(pirq);
 216        msg->address_lo =
 217                MSI_ADDR_BASE_LO |
 218                MSI_ADDR_DEST_MODE_PHYSICAL |
 219                MSI_ADDR_REDIRECTION_CPU |
 220                MSI_ADDR_DEST_ID(pirq);
 221
 222        msg->data = XEN_PIRQ_MSI_DATA;
 223}
 224
 225static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 226{
 227        int irq, pirq;
 228        struct msi_desc *msidesc;
 229        struct msi_msg msg;
 230
 231        if (type == PCI_CAP_ID_MSI && nvec > 1)
 232                return 1;
 233
 234        for_each_pci_msi_entry(msidesc, dev) {
 235                pirq = xen_allocate_pirq_msi(dev, msidesc);
 236                if (pirq < 0) {
 237                        irq = -ENODEV;
 238                        goto error;
 239                }
 240                xen_msi_compose_msg(dev, pirq, &msg);
 241                __pci_write_msi_msg(msidesc, &msg);
 242                dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq);
 243                irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq,
 244                                               (type == PCI_CAP_ID_MSI) ? nvec : 1,
 245                                               (type == PCI_CAP_ID_MSIX) ?
 246                                               "msi-x" : "msi",
 247                                               DOMID_SELF);
 248                if (irq < 0)
 249                        goto error;
 250                dev_dbg(&dev->dev,
 251                        "xen: msi --> pirq=%d --> irq=%d\n", pirq, irq);
 252        }
 253        return 0;
 254
 255error:
 256        dev_err(&dev->dev, "Failed to create MSI%s! ret=%d!\n",
 257                type == PCI_CAP_ID_MSI ? "" : "-X", irq);
 258        return irq;
 259}
 260
 261#ifdef CONFIG_XEN_DOM0
 262static bool __read_mostly pci_seg_supported = true;
 263
 264static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 265{
 266        int ret = 0;
 267        struct msi_desc *msidesc;
 268
 269        for_each_pci_msi_entry(msidesc, dev) {
 270                struct physdev_map_pirq map_irq;
 271                domid_t domid;
 272
 273                domid = ret = xen_find_device_domain_owner(dev);
 274                /* N.B. Casting int's -ENODEV to uint16_t results in 0xFFED,
 275                 * hence check ret value for < 0. */
 276                if (ret < 0)
 277                        domid = DOMID_SELF;
 278
 279                memset(&map_irq, 0, sizeof(map_irq));
 280                map_irq.domid = domid;
 281                map_irq.type = MAP_PIRQ_TYPE_MSI_SEG;
 282                map_irq.index = -1;
 283                map_irq.pirq = -1;
 284                map_irq.bus = dev->bus->number |
 285                              (pci_domain_nr(dev->bus) << 16);
 286                map_irq.devfn = dev->devfn;
 287
 288                if (type == PCI_CAP_ID_MSI && nvec > 1) {
 289                        map_irq.type = MAP_PIRQ_TYPE_MULTI_MSI;
 290                        map_irq.entry_nr = nvec;
 291                } else if (type == PCI_CAP_ID_MSIX) {
 292                        int pos;
 293                        unsigned long flags;
 294                        u32 table_offset, bir;
 295
 296                        pos = dev->msix_cap;
 297                        pci_read_config_dword(dev, pos + PCI_MSIX_TABLE,
 298                                              &table_offset);
 299                        bir = (u8)(table_offset & PCI_MSIX_TABLE_BIR);
 300                        flags = pci_resource_flags(dev, bir);
 301                        if (!flags || (flags & IORESOURCE_UNSET))
 302                                return -EINVAL;
 303
 304                        map_irq.table_base = pci_resource_start(dev, bir);
 305                        map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
 306                }
 307
 308                ret = -EINVAL;
 309                if (pci_seg_supported)
 310                        ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq,
 311                                                    &map_irq);
 312                if (type == PCI_CAP_ID_MSI && nvec > 1 && ret) {
 313                        /*
 314                         * If MAP_PIRQ_TYPE_MULTI_MSI is not available
 315                         * there's nothing else we can do in this case.
 316                         * Just set ret > 0 so driver can retry with
 317                         * single MSI.
 318                         */
 319                        ret = 1;
 320                        goto out;
 321                }
 322                if (ret == -EINVAL && !pci_domain_nr(dev->bus)) {
 323                        map_irq.type = MAP_PIRQ_TYPE_MSI;
 324                        map_irq.index = -1;
 325                        map_irq.pirq = -1;
 326                        map_irq.bus = dev->bus->number;
 327                        ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq,
 328                                                    &map_irq);
 329                        if (ret != -EINVAL)
 330                                pci_seg_supported = false;
 331                }
 332                if (ret) {
 333                        dev_warn(&dev->dev, "xen map irq failed %d for %d domain\n",
 334                                 ret, domid);
 335                        goto out;
 336                }
 337
 338                ret = xen_bind_pirq_msi_to_irq(dev, msidesc, map_irq.pirq,
 339                                               (type == PCI_CAP_ID_MSI) ? nvec : 1,
 340                                               (type == PCI_CAP_ID_MSIX) ? "msi-x" : "msi",
 341                                               domid);
 342                if (ret < 0)
 343                        goto out;
 344        }
 345        ret = 0;
 346out:
 347        return ret;
 348}
 349
 350static void xen_initdom_restore_msi_irqs(struct pci_dev *dev)
 351{
 352        int ret = 0;
 353
 354        if (pci_seg_supported) {
 355                struct physdev_pci_device restore_ext;
 356
 357                restore_ext.seg = pci_domain_nr(dev->bus);
 358                restore_ext.bus = dev->bus->number;
 359                restore_ext.devfn = dev->devfn;
 360                ret = HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi_ext,
 361                                        &restore_ext);
 362                if (ret == -ENOSYS)
 363                        pci_seg_supported = false;
 364                WARN(ret && ret != -ENOSYS, "restore_msi_ext -> %d\n", ret);
 365        }
 366        if (!pci_seg_supported) {
 367                struct physdev_restore_msi restore;
 368
 369                restore.bus = dev->bus->number;
 370                restore.devfn = dev->devfn;
 371                ret = HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi, &restore);
 372                WARN(ret && ret != -ENOSYS, "restore_msi -> %d\n", ret);
 373        }
 374}
 375#endif
 376
 377static void xen_teardown_msi_irqs(struct pci_dev *dev)
 378{
 379        struct msi_desc *msidesc;
 380
 381        msidesc = first_pci_msi_entry(dev);
 382        if (msidesc->msi_attrib.is_msix)
 383                xen_pci_frontend_disable_msix(dev);
 384        else
 385                xen_pci_frontend_disable_msi(dev);
 386
 387        /* Free the IRQ's and the msidesc using the generic code. */
 388        default_teardown_msi_irqs(dev);
 389}
 390
 391static void xen_teardown_msi_irq(unsigned int irq)
 392{
 393        xen_destroy_irq(irq);
 394}
 395
 396#endif
 397
 398int __init pci_xen_init(void)
 399{
 400        if (!xen_pv_domain() || xen_initial_domain())
 401                return -ENODEV;
 402
 403        printk(KERN_INFO "PCI: setting up Xen PCI frontend stub\n");
 404
 405        pcibios_set_cache_line_size();
 406
 407        pcibios_enable_irq = xen_pcifront_enable_irq;
 408        pcibios_disable_irq = NULL;
 409
 410        /* Keep ACPI out of the picture */
 411        acpi_noirq_set();
 412
 413#ifdef CONFIG_PCI_MSI
 414        x86_msi.setup_msi_irqs = xen_setup_msi_irqs;
 415        x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
 416        x86_msi.teardown_msi_irqs = xen_teardown_msi_irqs;
 417        pci_msi_ignore_mask = 1;
 418#endif
 419        return 0;
 420}
 421
 422#ifdef CONFIG_PCI_MSI
 423void __init xen_msi_init(void)
 424{
 425        if (!disable_apic) {
 426                /*
 427                 * If hardware supports (x2)APIC virtualization (as indicated
 428                 * by hypervisor's leaf 4) then we don't need to use pirqs/
 429                 * event channels for MSI handling and instead use regular
 430                 * APIC processing
 431                 */
 432                uint32_t eax = cpuid_eax(xen_cpuid_base() + 4);
 433
 434                if (((eax & XEN_HVM_CPUID_X2APIC_VIRT) && x2apic_mode) ||
 435                    ((eax & XEN_HVM_CPUID_APIC_ACCESS_VIRT) && boot_cpu_has(X86_FEATURE_APIC)))
 436                        return;
 437        }
 438
 439        x86_msi.setup_msi_irqs = xen_hvm_setup_msi_irqs;
 440        x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
 441}
 442#endif
 443
 444int __init pci_xen_hvm_init(void)
 445{
 446        if (!xen_have_vector_callback || !xen_feature(XENFEAT_hvm_pirqs))
 447                return 0;
 448
 449#ifdef CONFIG_ACPI
 450        /*
 451         * We don't want to change the actual ACPI delivery model,
 452         * just how GSIs get registered.
 453         */
 454        __acpi_register_gsi = acpi_register_gsi_xen_hvm;
 455        __acpi_unregister_gsi = NULL;
 456#endif
 457
 458#ifdef CONFIG_PCI_MSI
 459        /*
 460         * We need to wait until after x2apic is initialized
 461         * before we can set MSI IRQ ops.
 462         */
 463        x86_platform.apic_post_init = xen_msi_init;
 464#endif
 465        return 0;
 466}
 467
 468#ifdef CONFIG_XEN_DOM0
 469int __init pci_xen_initial_domain(void)
 470{
 471        int irq;
 472
 473#ifdef CONFIG_PCI_MSI
 474        x86_msi.setup_msi_irqs = xen_initdom_setup_msi_irqs;
 475        x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
 476        x86_msi.restore_msi_irqs = xen_initdom_restore_msi_irqs;
 477        pci_msi_ignore_mask = 1;
 478#endif
 479        __acpi_register_gsi = acpi_register_gsi_xen;
 480        __acpi_unregister_gsi = NULL;
 481        /*
 482         * Pre-allocate the legacy IRQs.  Use NR_LEGACY_IRQS here
 483         * because we don't have a PIC and thus nr_legacy_irqs() is zero.
 484         */
 485        for (irq = 0; irq < NR_IRQS_LEGACY; irq++) {
 486                int trigger, polarity;
 487
 488                if (acpi_get_override_irq(irq, &trigger, &polarity) == -1)
 489                        continue;
 490
 491                xen_register_pirq(irq,
 492                        trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE,
 493                        true /* Map GSI to PIRQ */);
 494        }
 495        if (0 == nr_ioapics) {
 496                for (irq = 0; irq < nr_legacy_irqs(); irq++)
 497                        xen_bind_pirq_gsi_to_irq(irq, irq, 0, "xt-pic");
 498        }
 499        return 0;
 500}
 501
 502struct xen_device_domain_owner {
 503        domid_t domain;
 504        struct pci_dev *dev;
 505        struct list_head list;
 506};
 507
 508static DEFINE_SPINLOCK(dev_domain_list_spinlock);
 509static struct list_head dev_domain_list = LIST_HEAD_INIT(dev_domain_list);
 510
 511static struct xen_device_domain_owner *find_device(struct pci_dev *dev)
 512{
 513        struct xen_device_domain_owner *owner;
 514
 515        list_for_each_entry(owner, &dev_domain_list, list) {
 516                if (owner->dev == dev)
 517                        return owner;
 518        }
 519        return NULL;
 520}
 521
 522int xen_find_device_domain_owner(struct pci_dev *dev)
 523{
 524        struct xen_device_domain_owner *owner;
 525        int domain = -ENODEV;
 526
 527        spin_lock(&dev_domain_list_spinlock);
 528        owner = find_device(dev);
 529        if (owner)
 530                domain = owner->domain;
 531        spin_unlock(&dev_domain_list_spinlock);
 532        return domain;
 533}
 534EXPORT_SYMBOL_GPL(xen_find_device_domain_owner);
 535
 536int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain)
 537{
 538        struct xen_device_domain_owner *owner;
 539
 540        owner = kzalloc(sizeof(struct xen_device_domain_owner), GFP_KERNEL);
 541        if (!owner)
 542                return -ENODEV;
 543
 544        spin_lock(&dev_domain_list_spinlock);
 545        if (find_device(dev)) {
 546                spin_unlock(&dev_domain_list_spinlock);
 547                kfree(owner);
 548                return -EEXIST;
 549        }
 550        owner->domain = domain;
 551        owner->dev = dev;
 552        list_add_tail(&owner->list, &dev_domain_list);
 553        spin_unlock(&dev_domain_list_spinlock);
 554        return 0;
 555}
 556EXPORT_SYMBOL_GPL(xen_register_device_domain_owner);
 557
 558int xen_unregister_device_domain_owner(struct pci_dev *dev)
 559{
 560        struct xen_device_domain_owner *owner;
 561
 562        spin_lock(&dev_domain_list_spinlock);
 563        owner = find_device(dev);
 564        if (!owner) {
 565                spin_unlock(&dev_domain_list_spinlock);
 566                return -ENODEV;
 567        }
 568        list_del(&owner->list);
 569        spin_unlock(&dev_domain_list_spinlock);
 570        kfree(owner);
 571        return 0;
 572}
 573EXPORT_SYMBOL_GPL(xen_unregister_device_domain_owner);
 574#endif
 575